Neuron®
The Neuron® is the basis for the creation of open and secure federated networks for smart societies.
Loading...
Searching...
No Matches
TokenizationProcess.cs
1using System.Collections.Generic;
2using System.Diagnostics;
3
5{
9 public class TokenizationProcess
10 {
15 {
16 this.TokenCounts = new Dictionary<string, List<uint>>();
17 }
18
22 public Dictionary<string, List<uint>> TokenCounts { get; }
23
27 public uint DocumentIndexOffset { get; set; }
28
34 {
35 int c = this.TokenCounts.Count;
36 if (c == 0)
37 return new TokenCount[0];
38
39 int i = 0;
40 TokenCount[] Counts = new TokenCount[c];
41
42 foreach (KeyValuePair<string, List<uint>> P in this.TokenCounts)
43 Counts[i++] = new TokenCount(P.Key, P.Value.ToArray());
44
45 return Counts;
46 }
47 }
48}
Represents a token and a corresponding occurrence count.
Definition: TokenCount.cs:12
Contains information about a tokenization process.
uint DocumentIndexOffset
Document Index Offset. Used to identify sequences of tokens in a document.
Dictionary< string, List< uint > > TokenCounts
Accumulated token counts.
TokenizationProcess()
Contains information about a tokenization process.
TokenCount[] ToArray()
Generates an array of token counts.