2using System.Collections.Generic;
3using System.Globalization;
5using System.Threading.Tasks;
29 if (Type == typeof(
string))
32 return Grade.NotAtAll;
42 if (Value is
string s)
45 return Task.CompletedTask;
55 if (
string.IsNullOrEmpty(Text))
58 UnicodeCategory Category;
59 StringBuilder sb =
new StringBuilder();
63 foreach (
char ch
in Text.ToLower().Normalize(NormalizationForm.FormD))
65 Category = CharUnicodeInfo.GetUnicodeCategory(ch);
66 if (Category == UnicodeCategory.NonSpacingMark)
69 if (
char.IsLetterOrDigit(ch))
78 Token = sb.ToString();
82 if (!Process.TokenCounts.TryGetValue(Token, out List<uint> DocIndex))
84 DocIndex =
new List<uint>();
85 Process.TokenCounts[Token] = DocIndex;
88 DocIndex.Add(++Process.DocumentIndexOffset);
95 Token = sb.ToString();
98 if (!Process.TokenCounts.TryGetValue(Token, out List<uint> DocIndex))
100 DocIndex =
new List<uint>();
101 Process.TokenCounts[Token] = DocIndex;
104 DocIndex.Add(++Process.DocumentIndexOffset);
Grade Supports(Type Type)
How well the tokenizer can tokenize objects of type Type .
static void Tokenize(string Text, TokenizationProcess Process)
Tokenizes a set of strings.
StringTokenizer()
Tokenizes strings.
Task Tokenize(object Value, TokenizationProcess Process)
Tokenizes an object.
Contains information about a tokenization process.
Interface for full-text-search tokenizers