Neuron®
The Neuron® is the basis for the creation of open and secure federated networks for smart societies.
Loading...
Searching...
No Matches
TSV.cs
1using System;
2using System.Collections.Generic;
3using System.Text;
6
7namespace Waher.Content
8{
12 public static class TSV
13 {
14 #region Encoding/Decoding
15
21 public static string[][] Parse(string Tsv)
22 {
23 int Pos = 0;
24 int Len = Tsv.Length;
25 string[][] Result = Parse(Tsv, ref Pos, Len);
26 char ch;
27
28 while (Pos < Len && ((ch = Tsv[Pos]) <= ' ' || ch == 160))
29 Pos++;
30
31 if (Pos < Len)
32 throw new Exception("Unexpected content at end of string.");
33
34 return Result;
35 }
36
37 private static string[][] Parse(string Tsv, ref int Pos, int Len)
38 {
39 List<string[]> Records = new List<string[]>();
40 List<string> Fields = new List<string>();
41 StringBuilder sb = new StringBuilder();
42 int State = 0;
43 int i = 0;
44 char ch;
45 bool sbEmpty = true;
46
47 while (Pos < Len)
48 {
49 ch = Tsv[Pos++];
50 switch (State)
51 {
52 case 0:
53 if (ch == '"')
54 State += 2;
55 else if (ch == '\t')
56 Fields.Add(string.Empty);
57 else if (ch == '\r' || ch == '\n')
58 {
59 if (Fields.Count > 0)
60 {
61 Records.Add(Fields.ToArray());
62 Fields.Clear();
63 }
64 }
65 else
66 {
67 sb.Append(ch);
68 sbEmpty = false;
69 State++;
70 }
71 break;
72
73 case 1: // Undelimited string
74 if (ch == '\t')
75 {
76 Fields.Add(sb.ToString());
77 sb.Clear();
78 sbEmpty = true;
79 State = 0;
80 }
81 else if (ch == '\r' || ch == '\n')
82 {
83 Fields.Add(sb.ToString());
84 sb.Clear();
85 sbEmpty = true;
86 State = 0;
87
88 Records.Add(Fields.ToArray());
89 Fields.Clear();
90 }
91 else
92 {
93 sb.Append(ch);
94 sbEmpty = false;
95 }
96 break;
97
98 case 2: // String.
99 if (ch == '\\')
100 State++;
101 else if (ch == '"')
102 State--;
103 else
104 {
105 sb.Append(ch);
106 sbEmpty = false;
107 }
108 break;
109
110 case 3: // String, escaped character.
111 switch (ch)
112 {
113 case 'a':
114 sb.Append('\a');
115 break;
116
117 case 'b':
118 sb.Append('\b');
119 break;
120
121 case 'f':
122 sb.Append('\f');
123 break;
124
125 case 'n':
126 sb.Append('\n');
127 break;
128
129 case 'r':
130 sb.Append('\r');
131 break;
132
133 case 't':
134 sb.Append('\t');
135 break;
136
137 case 'v':
138 sb.Append('\v');
139 break;
140
141 case 'x':
142 i = 0;
143 State += 4;
144 break;
145
146 case 'u':
147 i = 0;
148 State += 2;
149 break;
150
151 default:
152 sb.Append(ch);
153 break;
154 }
155
156 sbEmpty = false;
157 State--;
158 break;
159
160 case 4: // hex digit 1(4)
161 i = JSON.HexDigit(ch);
162 State++;
163 break;
164
165 case 5: // hex digit 2(4)
166 i <<= 4;
167 i |= JSON.HexDigit(ch);
168 State++;
169 break;
170
171 case 6: // hex digit 3(4)
172 i <<= 4;
173 i |= JSON.HexDigit(ch);
174 State++;
175 break;
176
177 case 7: // hex digit 4(4)
178 i <<= 4;
179 i |= JSON.HexDigit(ch);
180 sb.Append((char)i);
181 sbEmpty = false;
182 State -= 5;
183 break;
184 }
185 }
186
187 if (!sbEmpty)
188 Fields.Add(sb.ToString());
189
190 if (Fields.Count > 0)
191 Records.Add(Fields.ToArray());
192
193 return Records.ToArray();
194 }
195
201 public static string Encode(string[][] Records)
202 {
203 return Encode(Records, true);
204 }
205
213 public static string Encode(string[][] Records, bool QuoteIllegalCharacters)
214 {
215 StringBuilder sb = new StringBuilder();
216 bool First;
217
218 foreach (string[] Record in Records)
219 {
220 First = true;
221
222 foreach (string Field in Record)
223 {
224 bool Tab = false;
225 bool Control = false;
226 bool Quote = false;
227
228 if (First)
229 First = false;
230 else
231 sb.Append('\t');
232
233 if (Field is null)
234 continue;
235
236 foreach (char ch in Field)
237 {
238 if (ch == '\t' && QuoteIllegalCharacters)
239 Tab = true;
240 else if (ch == '"' && QuoteIllegalCharacters)
241 Quote = true;
242 else if (ch < ' ')
243 {
244 if (!QuoteIllegalCharacters)
245 throw new InvalidOperationException("String is not properly quoted.");
246
247 Control = true;
248 }
249 }
250
251 if (Tab || Quote || Control)
252 {
253 string Escaped = Field;
254
255 if (Quote)
256 Escaped = Escaped.Replace("\"", "\\\"");
257
258 if (Control)
259 {
260 Escaped = Escaped.
261 Replace("\a", "\\a").
262 Replace("\b", "\\b").
263 Replace("\f", "\\f").
264 Replace("\n", "\\n").
265 Replace("\r", "\\r").
266 Replace("\t", "\\t").
267 Replace("\v", "\\v");
268 }
269
270 sb.Append('"');
271 sb.Append(Escaped);
272 sb.Append('"');
273 }
274 else
275 sb.Append(Field);
276 }
277
278 sb.AppendLine();
279 }
280
281 return sb.ToString();
282 }
283
289 public static string Encode(IMatrix Matrix)
290 {
291 return Encode(Matrix, (E) =>
292 {
293 if (E.AssociatedObjectValue is string s)
294 return s;
295 else if (E.AssociatedObjectValue is double d)
296 return CommonTypes.Encode(d);
297 else
298 return E.AssociatedObjectValue?.ToString();
299 }, true);
300 }
301
310 public static string Encode(IMatrix Matrix, ToString ElementToString, bool QuoteIllegalCharacters)
311 {
312 if (ElementToString is null)
313 throw new ArgumentNullException(nameof(ElementToString));
314
315 List<string[]> Records = new List<string[]>();
316 List<string> Fields = new List<string>();
317
318 if (Matrix is ObjectMatrix M)
319 {
320 if (!(M.ColumnNames is null))
321 Records.Add(M.ColumnNames);
322 }
323
324 int Row, NrRows = Matrix.Rows;
325 int Column, NrColumns = Matrix.Columns;
326
327 for (Row = 0; Row < NrRows; Row++)
328 {
329 for (Column = 0; Column < NrColumns; Column++)
330 Fields.Add(ElementToString(Matrix.GetElement(Column, Row)));
331
332 Records.Add(Fields.ToArray());
333 Fields.Clear();
334 }
335
336 return Encode(Records.ToArray(), QuoteIllegalCharacters);
337 }
338
339 #endregion
340 }
341}
Helps with parsing of commong data types.
Definition: CommonTypes.cs:13
static string Encode(bool x)
Encodes a Boolean for use in XML and other formats.
Definition: CommonTypes.cs:594
Helps with common TSV-related tasks. (TSV=TAB Separated Values)
Definition: TSV.cs:13
static string Encode(string[][] Records)
Encodes records as a Comma-separated values string.
Definition: TSV.cs:201
static string Encode(IMatrix Matrix, ToString ElementToString, bool QuoteIllegalCharacters)
Encodes a matrix as a Comma-separated values string.
Definition: TSV.cs:310
static string Encode(string[][] Records, bool QuoteIllegalCharacters)
Encodes records as a Comma-separated values string.
Definition: TSV.cs:213
static string[][] Parse(string Tsv)
Parses a TSV string.
Definition: TSV.cs:21
static string Encode(IMatrix Matrix)
Encodes a matrix as a Comma-separated values string.
Definition: TSV.cs:289
Basic interface for matrices.
Definition: IMatrix.cs:9
IElement GetElement(int Column, int Row)
Gets an element of the matrix.
delegate string ToString(IElement Element)
Delegate for callback methods that convert an element value to a string.