Neuron®
The Neuron® is the basis for the creation of open and secure federated networks for smart societies.
Loading...
Searching...
No Matches
HtmlDocument.cs
1using System;
2using System.Collections.Generic;
3using System.Globalization;
4using System.Text;
5using System.Xml;
7
8namespace Waher.Content.Html
9{
13 public class HtmlDocument
14 {
15 private readonly string htmlText;
16 private HtmlElement root = null;
17 private Elements.Html html = null;
18 private Title title = null;
19 private Body body = null;
20 private Head head = null;
21 private LinkedList<Main> main = null;
22 private LinkedList<Header> header = null;
23 private LinkedList<Footer> footer = null;
24 private LinkedList<Details> details = null;
25 private LinkedList<Summary> summary = null;
26 private LinkedList<Article> article = null;
27 private LinkedList<DtdInstruction> dtd = null;
28 private LinkedList<ProcessingInstruction> processingInstructions = null;
29 private LinkedList<Link> link = null;
30 private LinkedList<Meta> meta = null;
31 private LinkedList<Style> style = null;
32 private LinkedList<Address> address = null;
33 private LinkedList<Aside> aside = null;
34 private LinkedList<Nav> nav = null;
35 private LinkedList<Section> section = null;
36 private LinkedList<Dialog> dialog = null;
37 private LinkedList<Figure> figure = null;
38 private LinkedList<Elements.Audio> audio = null;
39 private LinkedList<Elements.Video> video = null;
40 private LinkedList<Img> img = null;
41 private LinkedList<Picture> picture = null;
42 private LinkedList<Cite> cite = null;
43 private LinkedList<Data> data = null;
44 private LinkedList<Time> time = null;
45 private LinkedList<Elements.Script> script = null;
46 private LinkedList<Form> form = null;
47
52 public HtmlDocument(string Html)
53 {
54 this.htmlText = Html;
55 }
56
60 public string HtmlText => this.htmlText;
61
62 private void AssertParsed()
63 {
64 if (this.root is null)
65 this.Parse();
66 }
67
72 {
73 get
74 {
75 this.AssertParsed();
76 return this.root;
77 }
78 }
79
84 {
85 get
86 {
87 this.AssertParsed();
88 return this.html;
89 }
90 }
91
95 public Title Title
96 {
97 get
98 {
99 this.AssertParsed();
100 return this.title;
101 }
102 }
103
107 public Head Head
108 {
109 get
110 {
111 this.AssertParsed();
112 return this.head;
113 }
114 }
115
119 public Body Body
120 {
121 get
122 {
123 this.AssertParsed();
124 return this.body;
125 }
126 }
127
131 public IEnumerable<Main> Main
132 {
133 get
134 {
135 this.AssertParsed();
136 return this.main;
137 }
138 }
139
143 public IEnumerable<Header> Header
144 {
145 get
146 {
147 this.AssertParsed();
148 return this.header;
149 }
150 }
151
155 public IEnumerable<Footer> Footer
156 {
157 get
158 {
159 this.AssertParsed();
160 return this.footer;
161 }
162 }
163
167 public IEnumerable<Details> Details
168 {
169 get
170 {
171 this.AssertParsed();
172 return this.details;
173 }
174 }
175
179 public IEnumerable<Summary> Summary
180 {
181 get
182 {
183 this.AssertParsed();
184 return this.summary;
185 }
186 }
187
191 public IEnumerable<Article> Article
192 {
193 get
194 {
195 this.AssertParsed();
196 return this.article;
197 }
198 }
199
203 public IEnumerable<Link> Link
204 {
205 get
206 {
207 this.AssertParsed();
208 return this.link;
209 }
210 }
211
215 public IEnumerable<Meta> Meta
216 {
217 get
218 {
219 this.AssertParsed();
220 return this.meta;
221 }
222 }
223
227 public IEnumerable<Style> Style
228 {
229 get
230 {
231 this.AssertParsed();
232 return this.style;
233 }
234 }
235
239 public IEnumerable<Address> Address
240 {
241 get
242 {
243 this.AssertParsed();
244 return this.address;
245 }
246 }
247
251 public IEnumerable<Aside> Aside
252 {
253 get
254 {
255 this.AssertParsed();
256 return this.aside;
257 }
258 }
259
263 public IEnumerable<Nav> Nav
264 {
265 get
266 {
267 this.AssertParsed();
268 return this.nav;
269 }
270 }
271
275 public IEnumerable<Section> Section
276 {
277 get
278 {
279 this.AssertParsed();
280 return this.section;
281 }
282 }
283
287 public IEnumerable<Dialog> Dialog
288 {
289 get
290 {
291 this.AssertParsed();
292 return this.dialog;
293 }
294 }
295
299 public IEnumerable<Figure> Figure
300 {
301 get
302 {
303 this.AssertParsed();
304 return this.figure;
305 }
306 }
307
311 public IEnumerable<Elements.Audio> Audio
312 {
313 get
314 {
315 this.AssertParsed();
316 return this.audio;
317 }
318 }
319
323 public IEnumerable<Elements.Video> Video
324 {
325 get
326 {
327 this.AssertParsed();
328 return this.video;
329 }
330 }
331
335 public IEnumerable<Img> Img
336 {
337 get
338 {
339 this.AssertParsed();
340 return this.img;
341 }
342 }
343
347 public IEnumerable<Picture> Picture
348 {
349 get
350 {
351 this.AssertParsed();
352 return this.picture;
353 }
354 }
355
359 public IEnumerable<Cite> Cite
360 {
361 get
362 {
363 this.AssertParsed();
364 return this.cite;
365 }
366 }
367
371 public IEnumerable<Data> Data
372 {
373 get
374 {
375 this.AssertParsed();
376 return this.data;
377 }
378 }
379
383 public IEnumerable<Time> Time
384 {
385 get
386 {
387 this.AssertParsed();
388 return this.time;
389 }
390 }
391
395 public IEnumerable<Elements.Script> Script
396 {
397 get
398 {
399 this.AssertParsed();
400 return this.script;
401 }
402 }
403
407 public IEnumerable<Form> Form
408 {
409 get
410 {
411 this.AssertParsed();
412 return this.form;
413 }
414 }
415
419 public IEnumerable<DtdInstruction> Dtd
420 {
421 get
422 {
423 this.AssertParsed();
424 return this.dtd;
425 }
426 }
427
431 public IEnumerable<ProcessingInstruction> ProcessingInstructions
432 {
433 get
434 {
435 this.AssertParsed();
436 return this.processingInstructions;
437 }
438 }
439
440 private void Parse()
441 {
442 StringBuilder sb = new StringBuilder();
443 HtmlElement CurrentElement = null;
444 HtmlElement EmptyElement;
445 HtmlAttribute CurrentAttribute = null;
446 string Name = string.Empty;
447 string s;
448 int State = 0;
449 char EndChar = '\x00';
450 char ch;
451 int Pos;
452 int StartOfElement = 0;
453 int StartOfText = 0;
454 int StartOfAttribute = 0;
455 int Len = this.htmlText.Length;
456 bool Empty = true;
457 bool CurrentElementIsScript = false;
458
459 for (Pos = 0; Pos < Len; Pos++)
460 {
461 ch = this.htmlText[Pos];
462
463 switch (State)
464 {
465 case 0: // Waiting for <
466 if (ch == '<')
467 {
468 if (!Empty)
469 {
470 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
471 CurrentElement = CurrentElement.Parent as HtmlElement;
472
473 CurrentElement?.Add(new HtmlText(this, CurrentElement, StartOfText, Pos - 1, sb.ToString()));
474
475 sb.Clear();
476 Empty = true;
477 }
478
479 StartOfElement = Pos;
480 State++;
481 }
482 else if (ch == '&')
483 {
484 if (!Empty)
485 {
486 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
487 CurrentElement = CurrentElement.Parent as HtmlElement;
488
489 CurrentElement?.Add(new HtmlText(this, CurrentElement, StartOfText, Pos - 1, sb.ToString()));
490
491 sb.Clear();
492 Empty = true;
493 }
494
495 State = 10;
496 }
497 else
498 {
499 sb.Append(ch);
500 Empty = false;
501 }
502 break;
503
504 case 1: // Waiting for ?, !, /, attributes or >
505 if (ch == '/')
506 {
507 if (Empty) // Closing tag
508 State = 4;
509 else if (CurrentElementIsScript)
510 {
511 sb.Insert(0, '<');
512 sb.Append('/');
513 StartOfText = Pos + 1 - sb.Length;
514 State = 0;
515 }
516 else
517 State = 3;
518 }
519 else if (ch == '!' && Empty)
520 State++;
521 else if (CurrentElementIsScript)
522 {
523 sb.Insert(0, '<');
524 sb.Append(ch);
525 StartOfText = Pos + 1 - sb.Length;
526 State = 0;
527 }
528 else if (ch == '>')
529 {
530 if (Empty)
531 {
532 sb.Append("<>");
533 Empty = false;
534 StartOfText = Pos + 1 - sb.Length;
535 State = 0;
536 }
537 else
538 {
539 CurrentElement = this.CreateElement(CurrentElement, sb.ToString(), StartOfElement, Pos);
540 CurrentElementIsScript = CurrentElement is Elements.Script;
541
542 sb.Clear();
543 Empty = true;
544
545 StartOfText = Pos + 1;
546 State = 0;
547 }
548 }
549 else if (ch <= ' ' || ch == 160)
550 {
551 if (Empty)
552 {
553 sb.Append('<');
554 sb.Append(ch);
555 Empty = false;
556 StartOfText = Pos + 1 - sb.Length;
557 State = 0;
558 }
559 else if (CurrentElementIsScript)
560 {
561 sb.Insert(0, '<');
562 sb.Append(ch);
563 StartOfText = Pos + 1 - sb.Length;
564 State = 0;
565 }
566 else
567 {
568 CurrentElement = this.CreateElement(CurrentElement, sb.ToString(), StartOfElement, Pos);
569 CurrentElementIsScript = CurrentElement is Elements.Script;
570
571 sb.Clear();
572 Empty = true;
573
574 State = 5;
575 }
576 }
577 else if (ch == '?')
578 State = 19;
579 else if (IsNameCharacter(ch))
580 {
581 sb.Append(ch);
582 Empty = false;
583 }
584 else
585 {
586 sb.Insert(0, '<');
587 sb.Append(ch);
588 Empty = false;
589 StartOfText = Pos + 1 - sb.Length;
590 State = 0;
591 }
592 break;
593
594 case 2: // DTD, comment or CDATA?
595 if (ch == '[')
596 State = 21; // CDATA?
597 else if (CurrentElementIsScript)
598 {
599 sb.Append("<!");
600 sb.Append(ch);
601 Empty = false;
602 StartOfText = Pos + 1 - sb.Length;
603 State = 0;
604 }
605 else if (ch == '>')
606 {
607 if (this.dtd is null)
608 this.dtd = new LinkedList<DtdInstruction>();
609
610 DtdInstruction Dtd = new DtdInstruction(this, CurrentElement, Pos - 2, Pos, string.Empty);
611
612 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
613 CurrentElement = CurrentElement.Parent as HtmlElement;
614
615 CurrentElement?.Add(Dtd);
616 this.dtd.AddLast(Dtd);
617
618 StartOfText = Pos + 1;
619 State = 0;
620 }
621 else if (ch == '-')
622 State = 15; // Comment?
623 else
624 {
625 sb.Append(ch);
626 Empty = false;
627
628 State = 14; // DTD
629 }
630 break;
631
632 case 3: // Wait for > at end of empty element.
633 if (ch == '>')
634 {
635 EmptyElement = this.CreateElement(CurrentElement, sb.ToString(), StartOfElement, Pos);
636 EmptyElement.EndPosition = Pos;
637
638 sb.Clear();
639 Empty = true;
640
641 StartOfText = Pos + 1;
642 State = 0;
643 }
644 else
645 {
646 sb.Append('/');
647 sb.Append(ch);
648 Empty = false;
649 State = 1;
650 }
651 break;
652
653 case 4: // Closing tag
654 if (ch == '>')
655 {
656 s = sb.ToString().ToUpper();
657 sb.Clear();
658 Empty = true;
659
660 if (!(CurrentElement is null))
661 {
662 if (CurrentElement.EndPosition < Pos)
663 CurrentElement.EndPosition = Pos;
664
665 if (CurrentElement.Name == s)
666 {
667 CurrentElement = CurrentElement.Parent as HtmlElement;
668 CurrentElementIsScript = CurrentElement is Elements.Script;
669 }
670 else
671 {
672 HtmlElement Loop = CurrentElement.Parent as HtmlElement;
673
674 while (!(Loop is null) && Loop.Name != s)
675 Loop = Loop.Parent as HtmlElement;
676
677 if (!(Loop is null))
678 {
679 Loop = CurrentElement.Parent as HtmlElement;
680
681 while (!(Loop is null) && Loop.Name != s)
682 {
683 if (Loop.EndPosition < Pos)
684 Loop.EndPosition = Pos;
685
686 Loop = Loop.Parent as HtmlElement;
687 }
688
689 if (Loop.EndPosition < Pos)
690 Loop.EndPosition = Pos;
691
692 CurrentElement = Loop.Parent as HtmlElement;
693 CurrentElementIsScript = CurrentElement is Elements.Script;
694 }
695 }
696 }
697
698 StartOfText = Pos + 1;
699 State = 0;
700 }
701 else
702 {
703 sb.Append(ch);
704 Empty = false;
705 }
706 break;
707
708 case 5: // Waiting for attribute
709 if (ch == '>')
710 {
711 if (CurrentElement.IsEmptyElement)
712 CurrentElement.EndPosition = Pos;
713
714 StartOfText = Pos + 1;
715 State = 0;
716 }
717 else if (ch == '/')
718 State = 9;
719 else if (ch == '=')
720 {
721 StartOfAttribute = Pos;
722 Name = string.Empty;
723 State = 7;
724 }
725 else if (ch > ' ' && ch != 160)
726 {
727 if (IsNameCharacter(ch))
728 {
729 StartOfAttribute = Pos;
730 sb.Append(ch);
731 Empty = false;
732 State++;
733 }
734 else
735 {
736 sb.Clear();
737 Empty = true;
738
739 State = 13;
740 }
741 }
742 break;
743
744 case 6: // Attribute name
745 if (ch == '>')
746 {
747 CurrentElement.AddAttribute(new HtmlAttribute(this, CurrentElement, StartOfAttribute, Pos - 1, sb.ToString(), string.Empty));
748
749 sb.Clear();
750 Empty = true;
751
752 StartOfText = Pos + 1;
753 State = 0;
754 }
755 else if (ch == '/')
756 {
757 CurrentElement.AddAttribute(new HtmlAttribute(this, CurrentElement, StartOfAttribute, Pos - 1, sb.ToString(), string.Empty));
758
759 sb.Clear();
760 Empty = true;
761
762 State = 9;
763 }
764 else if (ch == '=')
765 {
766 Name = sb.ToString();
767
768 sb.Clear();
769 Empty = true;
770
771 State = 7;
772 }
773 else
774 {
775 if (ch <= ' ' || ch == 160)
776 {
777 CurrentElement.AddAttribute(new HtmlAttribute(this, CurrentElement, StartOfAttribute, Pos - 1, sb.ToString(), string.Empty));
778
779 sb.Clear();
780 Empty = true;
781
782 State--;
783 }
784 else if (IsNameCharacter(ch))
785 {
786 sb.Append(ch);
787 Empty = false;
788 }
789 else
790 {
791 sb.Clear();
792 Empty = true;
793
794 State = 13;
795 }
796 }
797 break;
798
799 case 7: // Wait for value.
800 if (ch == '"' || ch == '\'')
801 {
802 CurrentAttribute = new HtmlAttribute(this, CurrentElement, StartOfAttribute, Name);
803 CurrentElement.AddAttribute(CurrentAttribute);
804
805 EndChar = ch;
806 State = 11;
807 }
808 else if (ch == '>')
809 {
810 CurrentElement.AddAttribute(new HtmlAttribute(this, CurrentElement, StartOfAttribute, Pos - 1, Name, Name));
811 StartOfText = Pos + 1;
812 State = 0;
813 }
814 else if (ch == '/')
815 {
816 CurrentElement.AddAttribute(new HtmlAttribute(this, CurrentElement, StartOfAttribute, Pos - 1, Name, Name));
817 State = 9;
818 }
819 else if (ch > ' ' && ch != 160)
820 {
821 sb.Append(ch);
822 Empty = false;
823 State++;
824 }
825 else // Empty attribute
826 {
827 CurrentElement.AddAttribute(new HtmlAttribute(this, CurrentElement, StartOfAttribute, Pos - 1, Name, Name));
828
829 sb.Clear();
830 Empty = true;
831
832 State = 5;
833 }
834 break;
835
836 case 8: // Non-encapsulated attribute value
837 if (ch <= ' ' || ch == 160)
838 {
839 CurrentElement.AddAttribute(new HtmlAttribute(this, CurrentElement, StartOfAttribute, Pos - 1, Name, sb.ToString()));
840
841 sb.Clear();
842 Empty = true;
843
844 State = 5;
845 }
846 else if (ch == '>')
847 {
848 CurrentElement.AddAttribute(new HtmlAttribute(this, CurrentElement, StartOfAttribute, Pos - 1, Name, sb.ToString()));
849
850 sb.Clear();
851 Empty = true;
852
853 StartOfText = Pos + 1;
854 State = 0;
855 }
856 else if (ch == '/')
857 {
858 CurrentElement.AddAttribute(new HtmlAttribute(this, CurrentElement, StartOfAttribute, Pos - 1, Name, sb.ToString()));
859
860 sb.Clear();
861 Empty = true;
862
863 State = 9;
864 }
865 else
866 {
867 sb.Append(ch);
868 Empty = false;
869 }
870 break;
871
872 case 9: // Waiting for > at end of empty element
873 if (ch == '>')
874 {
875 CurrentElement.EndPosition = Pos;
876 CurrentElement = CurrentElement.Parent as HtmlElement;
877 CurrentElementIsScript = CurrentElement is Elements.Script;
878 StartOfText = Pos + 1;
879 State = 0;
880 }
881 break;
882
883 case 10: // First character of entity
884 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9'))
885 {
886 sb.Append(ch);
887 Empty = false;
888 State = 30;
889 }
890 else if (ch == '#')
891 State = 31;
892 else
893 {
894 sb.Insert(0, '&');
895 sb.Append(ch);
896 Empty = false;
897 StartOfText = Pos + 1 - sb.Length;
898 State = 0;
899 }
900 break;
901
902 case 11: // Encapsulated attribute value
903 if (ch == EndChar)
904 {
905 if (!Empty)
906 {
907 s = sb.ToString();
908
909 if (CurrentAttribute.HasSegments)
910 CurrentAttribute.Add(new HtmlText(this, CurrentAttribute, Pos - s.Length, Pos - 1, s));
911 else
912 CurrentAttribute.Value = s;
913
914 CurrentAttribute.EndPosition = Pos;
915
916 sb.Clear();
917 Empty = true;
918 }
919
920 State = 5;
921 }
922 else if (ch == '&')
923 {
924 if (!Empty)
925 {
926 s = sb.ToString();
927
928 CurrentAttribute.Add(new HtmlText(this, CurrentAttribute, Pos - s.Length, Pos - 1, s));
929
930 sb.Clear();
931 Empty = true;
932 }
933
934 State = 12;
935 }
936 else
937 {
938 sb.Append(ch);
939 Empty = false;
940 }
941 break;
942
943 case 12: // First character of entity in attribute value
944 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9'))
945 {
946 sb.Append(ch);
947 Empty = false;
948 State = 34;
949 }
950 else if (ch == '#')
951 State = 35;
952 else
953 {
954 sb.Insert(0, '&');
955 sb.Append(ch);
956 Empty = false;
957 State = 11;
958 }
959 break;
960
961 case 13: // Ignore everything until end of tag.
962 if (ch == '>')
963 {
964 StartOfText = Pos + 1;
965 State = 0;
966 }
967 else if (ch == '/')
968 State = 9;
969 break;
970
971 case 14: // Skip DTD
972 if (ch == '>')
973 {
974 if (this.dtd is null)
975 this.dtd = new LinkedList<DtdInstruction>();
976
977 s = sb.ToString();
978 DtdInstruction Dtd = new DtdInstruction(this, CurrentElement, Pos - s.Length - 2, Pos, s);
979
980 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
981 CurrentElement = CurrentElement.Parent as HtmlElement;
982
983 CurrentElement?.Add(Dtd);
984 this.dtd.AddLast(Dtd);
985
986 sb.Clear();
987 Empty = true;
988
989 StartOfText = Pos + 1;
990 State = 0;
991 }
992 else
993 {
994 sb.Append(ch);
995 Empty = false;
996 }
997 break;
998
999 case 15: // Second hyphen in start of comment?
1000 if (ch == '-')
1001 State++;
1002 else
1003 {
1004 sb.Append("<!-");
1005 sb.Append(ch);
1006 Empty = false;
1007
1008 StartOfText = Pos + 1 - sb.Length;
1009 State = 0;
1010 }
1011 break;
1012
1013 case 16: // In comment
1014 if (ch == '-')
1015 State++;
1016 else
1017 {
1018 sb.Append(ch);
1019 Empty = false;
1020 }
1021 break;
1022
1023 case 17: // Second hyphen?
1024 if (ch == '-')
1025 State++;
1026 else
1027 {
1028 sb.Append('-');
1029 sb.Append(ch);
1030 Empty = false;
1031
1032 State--;
1033 }
1034 break;
1035
1036 case 18: // End of comment
1037 if (ch == '>')
1038 {
1039 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
1040 CurrentElement = CurrentElement.Parent as HtmlElement;
1041
1042 s = sb.ToString();
1043 CurrentElement?.Add(new Comment(this, CurrentElement, Pos - s.Length - 5, Pos, s));
1044
1045 sb.Clear();
1046 Empty = true;
1047
1048 StartOfText = Pos + 1;
1049 State = 0;
1050 }
1051 else if (ch == '-')
1052 {
1053 sb.Append('-');
1054 Empty = false;
1055 }
1056 else
1057 {
1058 sb.Append("--");
1059 sb.Append(ch);
1060 Empty = false;
1061
1062 State -= 2;
1063 }
1064 break;
1065
1066 case 19: // In processing instruction
1067 if (ch == '?')
1068 State++;
1069 else
1070 {
1071 sb.Append(ch);
1072 Empty = false;
1073 }
1074 break;
1075
1076 case 20: // End of processing instruction?
1077 if (ch == '>')
1078 {
1079 if (this.processingInstructions is null)
1080 this.processingInstructions = new LinkedList<ProcessingInstruction>();
1081
1082 s = sb.ToString();
1083 ProcessingInstruction PI = new ProcessingInstruction(this, CurrentElement, Pos - s.Length - 3, Pos, s);
1084
1085 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
1086 CurrentElement = CurrentElement.Parent as HtmlElement;
1087
1088 CurrentElement?.Add(PI);
1089 this.processingInstructions.AddLast(PI);
1090
1091 sb.Clear();
1092 Empty = true;
1093
1094 StartOfText = Pos + 1;
1095 State = 0;
1096 }
1097 else
1098 {
1099 sb.Append('?');
1100 sb.Append(ch);
1101 Empty = false;
1102
1103 State--;
1104 }
1105 break;
1106
1107 case 21: // <![ received
1108 if (ch == 'C')
1109 State++;
1110 else
1111 {
1112 sb.Append("<![");
1113 sb.Append(ch);
1114 Empty = false;
1115 StartOfText = Pos + 1 - sb.Length;
1116 State = 0;
1117 }
1118 break;
1119
1120 case 22: // <![C received
1121 if (ch == 'D')
1122 State++;
1123 else
1124 {
1125 sb.Append("<![C");
1126 sb.Append(ch);
1127 Empty = false;
1128 StartOfText = Pos + 1 - sb.Length;
1129 State = 0;
1130 }
1131 break;
1132
1133 case 23: // <![CD received
1134 if (ch == 'A')
1135 State++;
1136 else
1137 {
1138 sb.Append("<![CD");
1139 sb.Append(ch);
1140 Empty = false;
1141 StartOfText = Pos + 1 - sb.Length;
1142 State = 0;
1143 }
1144 break;
1145
1146 case 24: // <![CDA received
1147 if (ch == 'T')
1148 State++;
1149 else
1150 {
1151 sb.Append("<![CDA");
1152 sb.Append(ch);
1153 Empty = false;
1154 StartOfText = Pos + 1 - sb.Length;
1155 State = 0;
1156 }
1157 break;
1158
1159 case 25: // <![CDAT received
1160 if (ch == 'A')
1161 State++;
1162 else
1163 {
1164 sb.Append("<![CDAT");
1165 sb.Append(ch);
1166 Empty = false;
1167 StartOfText = Pos + 1 - sb.Length;
1168 State = 0;
1169 }
1170 break;
1171
1172 case 26: // <![CDATA received
1173 if (ch == '[')
1174 State++;
1175 else
1176 {
1177 sb.Append("<![CDATA");
1178 sb.Append(ch);
1179 Empty = false;
1180 StartOfText = Pos + 1 - sb.Length;
1181 State = 0;
1182 }
1183 break;
1184
1185 case 27: // In CDATA
1186 if (ch == ']')
1187 State++;
1188 else
1189 {
1190 sb.Append(ch);
1191 Empty = false;
1192 }
1193 break;
1194
1195 case 28:
1196 if (ch == ']')
1197 State++;
1198 else
1199 {
1200 sb.Append('[');
1201 sb.Append(ch);
1202 Empty = false;
1203 State--;
1204 }
1205 break;
1206
1207 case 29:
1208 if (ch == '>')
1209 {
1210 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
1211 CurrentElement = CurrentElement.Parent as HtmlElement;
1212
1213 s = sb.ToString();
1214 CurrentElement?.Add(new CDATA(this, CurrentElement, Pos - s.Length - 10, Pos, s));
1215
1216 sb.Clear();
1217 Empty = true;
1218
1219 StartOfText = Pos + 1;
1220 State = 0;
1221 }
1222 else
1223 {
1224 sb.Append("[[");
1225 sb.Append(ch);
1226 Empty = false;
1227 State -= 2;
1228 }
1229 break;
1230
1231 case 30: // Entity name
1232 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9'))
1233 sb.Append(ch);
1234 else if (ch == ';')
1235 {
1236 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
1237 CurrentElement = CurrentElement.Parent as HtmlElement;
1238
1239 s = sb.ToString();
1240 CurrentElement?.Add(new HtmlEntity(this, CurrentElement, Pos - s.Length - 1, Pos, s));
1241
1242 sb.Clear();
1243 Empty = true;
1244
1245 StartOfText = Pos + 1;
1246 State = 0;
1247 }
1248 else
1249 {
1250 sb.Insert(0, '&');
1251 sb.Append(ch);
1252 Empty = false;
1253 StartOfText = Pos + 1 - sb.Length;
1254 State = 0;
1255 }
1256 break;
1257
1258 case 31: // First entity number
1259 if (ch >= '0' && ch <= '9')
1260 {
1261 State++;
1262 sb.Append(ch);
1263 }
1264 else if (ch == 'x' || ch == 'X')
1265 State += 2;
1266 else if (ch == ';' && int.TryParse(s = sb.ToString(), out int Code))
1267 {
1268 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
1269 CurrentElement = CurrentElement.Parent as HtmlElement;
1270
1271 CurrentElement?.Add(new HtmlEntityUnicode(this, CurrentElement, Pos - s.Length - 1, Pos, "#" + s, Code));
1272
1273 sb.Clear();
1274 Empty = true;
1275
1276 StartOfText = Pos + 1;
1277 State = 0;
1278 }
1279 else
1280 {
1281 sb.Insert(0, "&#");
1282 sb.Append(ch);
1283 Empty = false;
1284 StartOfText = Pos + 1 - sb.Length;
1285 State = 0;
1286 }
1287 break;
1288
1289 case 32: // Entity number
1290 if (ch >= '0' && ch <= '9')
1291 sb.Append(ch);
1292 else if (ch == ';' && int.TryParse(s = sb.ToString(), out int Code))
1293 {
1294 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
1295 CurrentElement = CurrentElement.Parent as HtmlElement;
1296
1297 CurrentElement?.Add(new HtmlEntityUnicode(this, CurrentElement, Pos - s.Length - 1, Pos, "#" + s, Code));
1298
1299 sb.Clear();
1300 Empty = true;
1301
1302 StartOfText = Pos + 1;
1303 State = 0;
1304 }
1305 else
1306 {
1307 sb.Insert(0, "&#");
1308 sb.Append(ch);
1309 Empty = false;
1310 StartOfText = Pos + 1 - sb.Length;
1311 State = 0;
1312 }
1313 break;
1314
1315 case 33: // Hexadecimal entity number
1316 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'))
1317 sb.Append(ch);
1318 else if (ch == ';' && int.TryParse(s = sb.ToString(), NumberStyles.HexNumber, null, out int Code))
1319 {
1320 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
1321 CurrentElement = CurrentElement.Parent as HtmlElement;
1322
1323 CurrentElement?.Add(new HtmlEntityUnicode(this, CurrentElement, Pos - s.Length - 1, Pos, "#x" + s, Code));
1324
1325 sb.Clear();
1326 Empty = true;
1327
1328 StartOfText = Pos + 1;
1329 State = 0;
1330 }
1331 else
1332 {
1333 sb.Insert(0, "&#x");
1334 sb.Append(ch);
1335 Empty = false;
1336 StartOfText = Pos + 1 - sb.Length;
1337 State = 0;
1338 }
1339 break;
1340
1341 case 34: // Entity name in attribute value
1342 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9'))
1343 sb.Append(ch);
1344 else if (ch == ';')
1345 {
1346 s = sb.ToString();
1347 CurrentAttribute.Add(new HtmlEntity(this, CurrentAttribute, Pos - s.Length - 1, Pos, s));
1348
1349 sb.Clear();
1350 Empty = true;
1351
1352 StartOfText = Pos + 1;
1353 State = 11;
1354 }
1355 else
1356 {
1357 sb.Insert(0, '&');
1358 sb.Append(ch);
1359 Empty = false;
1360 StartOfText = Pos + 1 - sb.Length;
1361 State = 11;
1362 }
1363 break;
1364
1365 case 35: // First entity number in attribute value
1366 if (ch >= '0' && ch <= '9')
1367 {
1368 State++;
1369 sb.Append(ch);
1370 }
1371 else if (ch == 'x' || ch == 'X')
1372 State += 2;
1373 else if (ch == ';' && int.TryParse(s = sb.ToString(), out int Code))
1374 {
1375 CurrentAttribute.Add(new HtmlEntityUnicode(this, CurrentAttribute, Pos - s.Length - 1, Pos, s, Code));
1376
1377 sb.Clear();
1378 Empty = true;
1379
1380 StartOfText = Pos + 1;
1381 State = 11;
1382 }
1383 else
1384 {
1385 sb.Insert(0, "&#");
1386 sb.Append(ch);
1387 Empty = false;
1388 StartOfText = Pos + 1 - sb.Length;
1389 State = 11;
1390 }
1391 break;
1392
1393 case 36: // Entity number in attribute value
1394 if (ch >= '0' && ch <= '9')
1395 sb.Append(ch);
1396 else if (ch == ';' && int.TryParse(s = sb.ToString(), out int Code))
1397 {
1398 CurrentAttribute.Add(new HtmlEntityUnicode(this, CurrentAttribute, Pos - s.Length - 1, Pos, s, Code));
1399
1400 sb.Clear();
1401 Empty = true;
1402
1403 StartOfText = Pos + 1;
1404 State = 11;
1405 }
1406 else
1407 {
1408 sb.Insert(0, "&#");
1409 sb.Append(ch);
1410 Empty = false;
1411 StartOfText = Pos + 1 - sb.Length;
1412 State = 11;
1413 }
1414 break;
1415
1416 case 37: // Hexadecimal entity number
1417 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'))
1418 sb.Append(ch);
1419 else if (ch == ';' && int.TryParse(s = sb.ToString(), NumberStyles.HexNumber, null, out int Code))
1420 {
1421 CurrentAttribute.Add(new HtmlEntityUnicode(this, CurrentAttribute, Pos - s.Length - 1, Pos, s, Code));
1422
1423 sb.Clear();
1424 Empty = true;
1425
1426 StartOfText = Pos + 1;
1427 State = 11;
1428 }
1429 else
1430 {
1431 sb.Insert(0, "&#x");
1432 sb.Append(ch);
1433 Empty = false;
1434 StartOfText = Pos + 1 - sb.Length;
1435 State = 11;
1436 }
1437 break;
1438
1439 default:
1440 throw new Exception("Internal error: Unrecognized state.");
1441 }
1442 }
1443
1444 if (!Empty)
1445 {
1446 switch (State)
1447 {
1448 case 0: // Waiting for <
1449 case 1: // Waiting for !, /, attributes or >
1450 case 3: // Wait for > at end of empty element.
1451 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
1452 CurrentElement = CurrentElement.Parent as HtmlElement;
1453
1454 s = sb.ToString();
1455 CurrentElement?.Add(new HtmlText(this, CurrentElement, Pos - s.Length, Pos - 1, s));
1456 break;
1457
1458 case 6: // Attribute name
1459 s = sb.ToString();
1460 CurrentElement.AddAttribute(new HtmlAttribute(this, CurrentElement, StartOfAttribute, Pos - 1, s, s));
1461 break;
1462
1463 case 7: // Waiting for value
1464 CurrentElement.AddAttribute(new HtmlAttribute(this, CurrentElement, StartOfAttribute, Pos - 1, Name, Name));
1465 break;
1466
1467 case 8: // Non-encapsulated attribute value
1468 CurrentElement.AddAttribute(new HtmlAttribute(this, CurrentElement, StartOfAttribute, Pos - 1, Name, sb.ToString()));
1469 break;
1470
1471 case 10: // First character of entity
1472 case 30: // Entity name
1473 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
1474 CurrentElement = CurrentElement.Parent as HtmlElement;
1475
1476 sb.Insert(0, '&');
1477 s = sb.ToString();
1478 CurrentElement?.Add(new HtmlText(this, CurrentElement, Pos - s.Length, Pos - 1, s));
1479 break;
1480
1481 case 11: // Encapsulated attribute value
1482 s = sb.ToString();
1483
1484 if (CurrentAttribute.HasSegments)
1485 CurrentAttribute.Add(new HtmlText(this, CurrentAttribute, Pos - s.Length, Pos - 1, s));
1486 else
1487 CurrentAttribute.Value = s;
1488
1489 CurrentAttribute.EndPosition = Pos - 1;
1490 break;
1491
1492 case 12: // First character of entity in attribute value
1493 case 34: // Entity name in attribute value
1494 sb.Insert(0, '&');
1495 s = sb.ToString();
1496 CurrentAttribute.Add(new HtmlText(this, CurrentElement, Pos - s.Length, Pos - 1, s));
1497 break;
1498
1499 case 16: // In comment
1500 case 17: // Second hyphen?
1501 case 18: // End of comment
1502 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
1503 CurrentElement = CurrentElement.Parent as HtmlElement;
1504
1505 s = sb.ToString();
1506 CurrentElement?.Add(new Comment(this, CurrentElement, Pos - s.Length - 5, Pos, s));
1507 break;
1508
1509 case 19: // In processing instruction
1510 case 20: // End of processing instruction?
1511 if (this.processingInstructions is null)
1512 this.processingInstructions = new LinkedList<ProcessingInstruction>();
1513
1514 s = sb.ToString();
1515 ProcessingInstruction PI = new ProcessingInstruction(this, CurrentElement, Pos - s.Length - 3, Pos, s);
1516
1517 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
1518 CurrentElement = CurrentElement.Parent as HtmlElement;
1519
1520 CurrentElement?.Add(PI);
1521 this.processingInstructions.AddLast(PI);
1522 break;
1523
1524 case 27: // In CDATA
1525 case 28:
1526 case 29:
1527 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
1528 CurrentElement = CurrentElement.Parent as HtmlElement;
1529
1530 s = sb.ToString();
1531 CurrentElement?.Add(new CDATA(this, CurrentElement, Pos - s.Length, Pos - 1, s));
1532 break;
1533
1534 case 31: // First entity number
1535 case 32: // Entity number
1536 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
1537 CurrentElement = CurrentElement.Parent as HtmlElement;
1538
1539 sb.Insert(0, "&#");
1540 s = sb.ToString();
1541 CurrentElement?.Add(new HtmlText(this, CurrentElement, Pos - s.Length, Pos - 1, s));
1542 break;
1543
1544 case 33: // Hexadecimal entity number
1545 if (!(CurrentElement is null) && CurrentElement.IsEmptyElement)
1546 CurrentElement = CurrentElement.Parent as HtmlElement;
1547
1548 sb.Insert(0, "&#x");
1549 s = sb.ToString();
1550 CurrentElement?.Add(new HtmlText(this, CurrentElement, Pos - s.Length, Pos - 1, s));
1551 break;
1552
1553
1554 case 35: // First entity number in attribute value
1555 case 36: // Entity number in attribute value
1556 sb.Insert(0, "&#");
1557 s = sb.ToString();
1558 CurrentAttribute.Add(new HtmlText(this, CurrentElement, Pos - s.Length, Pos - 1, s));
1559 break;
1560
1561 case 37: // Hexadecimal entity number
1562 sb.Insert(0, "&#x");
1563 s = sb.ToString();
1564 CurrentAttribute.Add(new HtmlText(this, CurrentElement, Pos - s.Length, Pos - 1, s));
1565 break;
1566
1567 }
1568 }
1569
1570 while (!(CurrentElement is null))
1571 {
1572 if (CurrentElement.EndPosition < Pos - 1)
1573 CurrentElement.EndPosition = Pos - 1;
1574
1575 CurrentElement = CurrentElement.Parent as HtmlElement;
1576 }
1577 }
1578
1579 private static bool IsNameCharacter(char ch)
1580 {
1581 if (ch == '-' || ch == '.')
1582 return true;
1583
1584 if (ch < '0')
1585 return false;
1586
1587 if (ch <= '9')
1588 return true;
1589
1590 if (ch == ':')
1591 return true;
1592
1593 if (ch < 'A')
1594 return false;
1595
1596 if (ch <= 'Z')
1597 return true;
1598
1599 if (ch == '_')
1600 return true;
1601
1602 if (ch < 'a')
1603 return false;
1604
1605 if (ch <= 'z')
1606 return true;
1607
1608 if (ch == '\xb7')
1609 return true;
1610
1611 if (ch < '\xc0')
1612 return false;
1613
1614 if (ch == '\xd7' || ch == '\xf7')
1615 return false;
1616
1617 if (ch == '\x037e')
1618 return false;
1619
1620 if (ch <= '\x1fff')
1621 return true;
1622
1623 if (ch == '\x200c' || ch == '\x200d' || ch == '\x203f' || ch == '\x2040')
1624 return true;
1625
1626 if (ch < '\x2070')
1627 return false;
1628
1629 if (ch <= '\x218f')
1630 return true;
1631
1632 if (ch < '\x2c00')
1633 return false;
1634
1635 if (ch <= '\x2fef')
1636 return true;
1637
1638 if (ch < '\x3001')
1639 return false;
1640
1641 if (ch <= '\xd7ff')
1642 return true;
1643
1644 if (ch < '\xf900')
1645 return false;
1646
1647 if (ch <= '\xfdcf')
1648 return true;
1649
1650 if (ch < '\xfdf0')
1651 return false;
1652
1653 if (ch <= '\xfffd')
1654 return true;
1655
1656 return false;
1657 }
1658
1659 private HtmlElement CreateElement(HtmlElement Parent, string TagName, int Start, int Pos)
1660 {
1661 HtmlElement Result;
1662
1663 if (!(Parent is null) && Parent.IsEmptyElement)
1664 Parent = Parent.Parent as HtmlElement;
1665
1666 TagName = TagName.ToUpper();
1667
1668 switch (TagName)
1669 {
1670 case "A": Result = new A(this, Parent, Start); break;
1671 case "ABBR": Result = new Abbr(this, Parent, Start); break;
1672 case "ACRONYM": Result = new Acronym(this, Parent, Start); break;
1673 case "ADDRESS":
1674 Address Address = new Address(this, Parent, Start);
1675 Result = Address;
1676 if (this.address is null)
1677 this.address = new LinkedList<Address>();
1678 this.address.AddLast(Address);
1679 break;
1680
1681 case "APPLET": Result = new Applet(this, Parent, Start); break;
1682 case "AREA": Result = new Area(this, Parent, Start); break;
1683 case "ARTICLE":
1684 Article Article = new Article(this, Parent, Start);
1685 Result = Article;
1686 if (this.article is null)
1687 this.article = new LinkedList<Article>();
1688 this.article.AddLast(Article);
1689 break;
1690
1691 case "ASIDE":
1692 Aside Aside = new Aside(this, Parent, Start);
1693 Result = Aside;
1694 if (this.aside is null)
1695 this.aside = new LinkedList<Aside>();
1696 this.aside.AddLast(Aside);
1697 break;
1698
1699 case "AUDIO":
1700 Elements.Audio Audio = new Elements.Audio(this, Parent, Start);
1701 Result = Audio;
1702 if (this.audio is null)
1703 this.audio = new LinkedList<Elements.Audio>();
1704 this.audio.AddLast(Audio);
1705 break;
1706
1707 case "B": Result = new B(this, Parent, Start); break;
1708 case "BASE": Result = new Base(this, Parent, Start); break;
1709 case "BASEFONT": Result = new BaseFont(this, Parent, Start); break;
1710 case "BDI": Result = new Bdi(this, Parent, Start); break;
1711 case "BDO": Result = new Bdo(this, Parent, Start); break;
1712 case "BGSOUND": Result = new BgSound(this, Parent, Start); break;
1713 case "BIG": Result = new Big(this, Parent, Start); break;
1714 case "BLINK": Result = new BLink(this, Parent, Start); break;
1715 case "BLOCKQUOTE": Result = new BlockQuote(this, Parent, Start); break;
1716 case "BODY":
1717 Body Body = new Body(this, Parent, Start);
1718 Result = Body;
1719 if (this.body is null)
1720 this.body = Body;
1721 break;
1722
1723 case "BR": Result = new Br(this, Parent, Start); break;
1724 case "BUTTON": Result = new Button(this, Parent, Start); break;
1725 case "CANVAS": Result = new Canvas(this, Parent, Start); break;
1726 case "CAPTION": Result = new Caption(this, Parent, Start); break;
1727 case "CENTER": Result = new Center(this, Parent, Start); break;
1728 case "CITE":
1729 Cite Cite = new Cite(this, Parent, Start);
1730 Result = Cite;
1731 if (this.cite is null)
1732 this.cite = new LinkedList<Cite>();
1733 this.cite.AddLast(Cite);
1734 break;
1735
1736 case "CODE": Result = new Code(this, Parent, Start); break;
1737 case "COL": Result = new Col(this, Parent, Start); break;
1738 case "COLGROUP": Result = new ColGroup(this, Parent, Start); break;
1739 case "COMMAND": Result = new Command(this, Parent, Start); break;
1740 case "CONTENT": Result = new Elements.Content(this, Parent, Start); break;
1741 case "DATA":
1742 Data Data = new Data(this, Parent, Start);
1743 Result = Data;
1744 if (this.data is null)
1745 this.data = new LinkedList<Data>();
1746 this.data.AddLast(Data);
1747 break;
1748
1749 case "DATALIST": Result = new DataList(this, Parent, Start); break;
1750 case "DD": Result = new Dd(this, Parent, Start); break;
1751 case "DEL": Result = new Del(this, Parent, Start); break;
1752 case "DETAILS":
1753 Details Details = new Details(this, Parent, Start);
1754 Result = Details;
1755 if (this.details is null)
1756 this.details = new LinkedList<Details>();
1757 this.details.AddLast(Details);
1758 break;
1759
1760 case "DFN": Result = new Dfn(this, Parent, Start); break;
1761 case "DIALOG":
1762 Dialog Dialog = new Dialog(this, Parent, Start);
1763 Result = Dialog;
1764 if (this.dialog is null)
1765 this.dialog = new LinkedList<Dialog>();
1766 this.dialog.AddLast(Dialog);
1767 break;
1768
1769 case "DIR": Result = new Dir(this, Parent, Start); break;
1770 case "DIV": Result = new Div(this, Parent, Start); break;
1771 case "DL": Result = new Dl(this, Parent, Start); break;
1772 case "DT": Result = new Dt(this, Parent, Start); break;
1773 case "ELEMENT": Result = new Element(this, Parent, Start); break;
1774 case "EM": Result = new Em(this, Parent, Start); break;
1775 case "EMBED": Result = new Embed(this, Parent, Start); break;
1776 case "FIELDSET": Result = new FieldSet(this, Parent, Start); break;
1777 case "FIGCAPTION": Result = new FigCaption(this, Parent, Start); break;
1778 case "FIGURE":
1779 Figure Figure = new Figure(this, Parent, Start);
1780 Result = Figure;
1781 if (this.figure is null)
1782 this.figure = new LinkedList<Figure>();
1783 this.figure.AddLast(Figure);
1784 break;
1785
1786 case "FONT": Result = new Font(this, Parent, Start); break;
1787 case "FOOTER":
1788 Footer Footer = new Footer(this, Parent, Start);
1789 Result = Footer;
1790 if (this.footer is null)
1791 this.footer = new LinkedList<Footer>();
1792 this.footer.AddLast(Footer);
1793 break;
1794
1795 case "FORM":
1796 Form Form = new Form(this, Parent, Start);
1797 Result = Form;
1798 if (this.form is null)
1799 this.form = new LinkedList<Form>();
1800 this.form.AddLast(Form);
1801 break;
1802
1803 case "FRAME": Result = new Frame(this, Parent, Start); break;
1804 case "FRAMESET": Result = new FrameSet(this, Parent, Start); break;
1805 case "H1": Result = new Hn(this, Parent, Start, 1); break;
1806 case "H2": Result = new Hn(this, Parent, Start, 2); break;
1807 case "H3": Result = new Hn(this, Parent, Start, 3); break;
1808 case "H4": Result = new Hn(this, Parent, Start, 4); break;
1809 case "H5": Result = new Hn(this, Parent, Start, 5); break;
1810 case "H6": Result = new Hn(this, Parent, Start, 6); break;
1811 case "H7": Result = new Hn(this, Parent, Start, 7); break;
1812 case "H8": Result = new Hn(this, Parent, Start, 8); break;
1813 case "H9": Result = new Hn(this, Parent, Start, 9); break;
1814 case "HEAD":
1815 Head Head = new Head(this, Parent, Start);
1816 Result = Head;
1817 if (this.head is null)
1818 this.head = Head;
1819 break;
1820
1821 case "HEADER":
1822 Header Header = new Header(this, Parent, Start);
1823 Result = Header;
1824 if (this.header is null)
1825 this.header = new LinkedList<Header>();
1826 this.header.AddLast(Header);
1827 break;
1828
1829 case "HGROUP": Result = new HGroup(this, Parent, Start); break;
1830 case "HR": Result = new Hr(this, Parent, Start); break;
1831 case "HTML":
1832 Elements.Html Html = new Elements.Html(this, Parent, Start);
1833 Result = Html;
1834 if (this.html is null)
1835 this.html = Html;
1836 break;
1837
1838 case "I": Result = new I(this, Parent, Start); break;
1839 case "IFRAME": Result = new IFrame(this, Parent, Start); break;
1840 case "IMAGE": Result = new Image(this, Parent, Start); break;
1841 case "IMG":
1842 Img Img = new Img(this, Parent, Start);
1843 Result = Img;
1844 if (this.img is null)
1845 this.img = new LinkedList<Img>();
1846 this.img.AddLast(Img);
1847 break;
1848
1849 case "INPUT": Result = new Input(this, Parent, Start); break;
1850 case "INS": Result = new Ins(this, Parent, Start); break;
1851 case "ISINDEX": Result = new IsIndex(this, Parent, Start); break;
1852 case "KBD": Result = new Kbd(this, Parent, Start); break;
1853 case "KEYGEN": Result = new Keygen(this, Parent, Start); break;
1854 case "LABEL": Result = new Label(this, Parent, Start); break;
1855 case "LEGEND": Result = new Legend(this, Parent, Start); break;
1856 case "LI": Result = new Li(this, Parent, Start); break;
1857 case "LINK":
1858 Link Link = new Link(this, Parent, Start);
1859 Result = Link;
1860 if (this.link is null)
1861 this.link = new LinkedList<Link>();
1862 this.link.AddLast(Link);
1863 break;
1864
1865 case "LISTING": Result = new Listing(this, Parent, Start); break;
1866 case "MAIN":
1867 Main Main = new Main(this, Parent, Start);
1868 Result = Main;
1869 if (this.main is null)
1870 this.main = new LinkedList<Main>();
1871 this.main.AddLast(Main);
1872 break;
1873
1874 case "MAP": Result = new Map(this, Parent, Start); break;
1875 case "MARK": Result = new Mark(this, Parent, Start); break;
1876 case "MARQUEE": Result = new Marquee(this, Parent, Start); break;
1877 case "MENU": Result = new Menu(this, Parent, Start); break;
1878 case "MENUITEM": Result = new MenuItem(this, Parent, Start); break;
1879 case "META":
1880 Meta Meta = new Meta(this, Parent, Start);
1881 Result = Meta;
1882 if (this.meta is null)
1883 this.meta = new LinkedList<Meta>();
1884 this.meta.AddLast(Meta);
1885 break;
1886
1887 case "METER": Result = new Meter(this, Parent, Start); break;
1888 case "MULTICOL": Result = new MultiCol(this, Parent, Start); break;
1889 case "NAV":
1890 Nav Nav = new Nav(this, Parent, Start);
1891 Result = Nav;
1892 if (this.nav is null)
1893 this.nav = new LinkedList<Nav>();
1894 this.nav.AddLast(Nav);
1895 break;
1896
1897 case "NEXTID": Result = new NextId(this, Parent, Start); break;
1898 case "NOBR": Result = new NoBr(this, Parent, Start); break;
1899 case "NOEMBED": Result = new NoEmbed(this, Parent, Start); break;
1900 case "NOFRAMES": Result = new NoFrames(this, Parent, Start); break;
1901 case "NOSCRIPT": Result = new NoScript(this, Parent, Start); break;
1902 case "OBJECT": Result = new Elements.Object(this, Parent, Start); break;
1903 case "OL": Result = new Ol(this, Parent, Start); break;
1904 case "OPTGROUP": Result = new OptGroup(this, Parent, Start); break;
1905 case "OPTION": Result = new Option(this, Parent, Start); break;
1906 case "OUTPUT": Result = new Output(this, Parent, Start); break;
1907 case "P": Result = new P(this, Parent, Start); break;
1908 case "PARAM": Result = new Param(this, Parent, Start); break;
1909 case "PICTURE":
1910 Picture Picture = new Picture(this, Parent, Start);
1911 Result = Picture;
1912 if (this.picture is null)
1913 this.picture = new LinkedList<Picture>();
1914 this.picture.AddLast(Picture);
1915 break;
1916
1917 case "PLAINTEXT": Result = new PlainText(this, Parent, Start); break;
1918 case "PRE": Result = new Pre(this, Parent, Start); break;
1919 case "PROGRESS": Result = new Progress(this, Parent, Start); break;
1920 case "Q": Result = new Q(this, Parent, Start); break;
1921 case "RP": Result = new Rp(this, Parent, Start); break;
1922 case "RT": Result = new Rt(this, Parent, Start); break;
1923 case "RTC": Result = new Rtc(this, Parent, Start); break;
1924 case "RUBY": Result = new Ruby(this, Parent, Start); break;
1925 case "S": Result = new S(this, Parent, Start); break;
1926 case "SAMP": Result = new Samp(this, Parent, Start); break;
1927 case "SCRIPT":
1928 Elements.Script Script = new Elements.Script(this, Parent, Start);
1929 Result = Script;
1930 if (this.script is null)
1931 this.script = new LinkedList<Elements.Script>();
1932 this.script.AddLast(Script);
1933 break;
1934
1935 case "SECTION":
1936 Section Section = new Section(this, Parent, Start);
1937 Result = Section;
1938 if (this.section is null)
1939 this.section = new LinkedList<Section>();
1940 this.section.AddLast(Section);
1941 break;
1942
1943 case "SELECT": Result = new Select(this, Parent, Start); break;
1944 case "SHADOW": Result = new Shadow(this, Parent, Start); break;
1945 case "SLOT": Result = new Slot(this, Parent, Start); break;
1946 case "SMALL": Result = new Small(this, Parent, Start); break;
1947 case "SOURCE": Result = new Source(this, Parent, Start); break;
1948 case "SPACER": Result = new Spacer(this, Parent, Start); break;
1949 case "SPAN": Result = new Span(this, Parent, Start); break;
1950 case "STRIKE": Result = new Strike(this, Parent, Start); break;
1951 case "STRONG": Result = new Strong(this, Parent, Start); break;
1952 case "STYLE":
1953 Style Style = new Style(this, Parent, Start);
1954 Result = Style;
1955 if (this.style is null)
1956 this.style = new LinkedList<Style>();
1957 this.style.AddLast(Style);
1958 break;
1959
1960 case "SUB": Result = new Sub(this, Parent, Start); break;
1961 case "SUMMARY":
1962 Summary Summary = new Summary(this, Parent, Start);
1963 Result = Summary;
1964 if (this.summary is null)
1965 this.summary = new LinkedList<Summary>();
1966 this.summary.AddLast(Summary);
1967 break;
1968
1969 case "SUP": Result = new Sup(this, Parent, Start); break;
1970 case "TABLE": Result = new Table(this, Parent, Start); break;
1971 case "TBODY": Result = new TBody(this, Parent, Start); break;
1972 case "TD": Result = new Td(this, Parent, Start); break;
1973 case "TEMPLATE": Result = new Template(this, Parent, Start); break;
1974 case "TEXTAREA": Result = new TextArea(this, Parent, Start); break;
1975 case "TFOOT": Result = new TFoot(this, Parent, Start); break;
1976 case "TH": Result = new Th(this, Parent, Start); break;
1977 case "THEAD": Result = new THead(this, Parent, Start); break;
1978 case "TIME":
1979 Time Time = new Time(this, Parent, Start);
1980 Result = Time;
1981 if (this.time is null)
1982 this.time = new LinkedList<Time>();
1983 this.time.AddLast(Time);
1984 break;
1985
1986 case "TITLE":
1987 Title Title = new Title(this, Parent, Start);
1988 Result = Title;
1989 if (this.title is null)
1990 this.title = Title;
1991 break;
1992
1993 case "TR": Result = new Tr(this, Parent, Start); break;
1994 case "TRACK": Result = new Track(this, Parent, Start); break;
1995 case "TT": Result = new Tt(this, Parent, Start); break;
1996 case "U": Result = new U(this, Parent, Start); break;
1997 case "UL": Result = new Ul(this, Parent, Start); break;
1998 case "VAR": Result = new Var(this, Parent, Start); break;
1999 case "VIDEO":
2000 Elements.Video Video = new Elements.Video(this, Parent, Start);
2001 Result = Video;
2002 if (this.video is null)
2003 this.video = new LinkedList<Elements.Video>();
2004 this.video.AddLast(Video);
2005 break;
2006
2007 case "WBR": Result = new Wbr(this, Parent, Start); break;
2008 case "XMP": Result = new Xmp(this, Parent, Start); break;
2009 default: Result = new HtmlElement(this, Parent, Start, TagName); break;
2010 }
2011
2012 if (Parent is null)
2013 {
2014 if (this.root is null)
2015 this.root = Result;
2016 }
2017 else
2018 Parent?.Add(Result);
2019
2020 if (Result.IsEmptyElement)
2021 Result.EndPosition = Pos;
2022
2023 return Result;
2024 }
2025
2030 public void Export(XmlWriter Output)
2031 {
2032 this.root?.Export(Output);
2033 }
2034
2039 public void Export(StringBuilder Output)
2040 {
2041 this.root?.Export(Output);
2042 }
2043
2049 {
2050 return new PageMetaData(this);
2051 }
2052
2058 public static string GetBody(string Html)
2059 {
2060 int i = Html.IndexOf("<body>", StringComparison.CurrentCultureIgnoreCase);
2061 if (i > 0)
2062 Html = Html.Substring(i + 6).TrimStart();
2063
2064 i = Html.IndexOf("</body>", StringComparison.CurrentCultureIgnoreCase);
2065 Html = Html.Substring(0, i).TrimEnd();
2066
2067 if (Html.StartsWith("<section>", StringComparison.CurrentCultureIgnoreCase) &&
2068 Html.EndsWith("</section>", StringComparison.CurrentCultureIgnoreCase))
2069 {
2070 string Html2 = Html.Substring(9).TrimStart();
2071 Html2 = Html2.Substring(0, Html2.Length - 10).TrimEnd();
2072
2073 if (!Html2.Contains("<section>"))
2074 Html = Html2;
2075 }
2076
2077 return Html;
2078 }
2079
2080 }
2081}
CDATA content.
Definition: CDATA.cs:12
Inline comment found in the document.
Definition: Comment.cs:12
Represents a DTD instruction inside the document.
ARTICLE element
Definition: Article.cs:11
Audio(HtmlDocument Document, HtmlElement Parent, int StartPosition)
AUDIO element
Definition: Audio.cs:18
Html(HtmlDocument Document, HtmlElement Parent, int StartPosition)
HTML element
Definition: Html.cs:18
Script(HtmlDocument Document, HtmlElement Parent, int StartPosition)
SCRIPT element
Definition: Script.cs:18
Video(HtmlDocument Document, HtmlElement Parent, int StartPosition)
VIDEO element
Definition: Video.cs:18
IEnumerable< Elements.Audio > Audio
AUDIO elements found in document, or null if none found.
IEnumerable< Main > Main
HEADER elements found in document, or null if none found.
Head Head
First HEAD element of document, if found, null otherwise.
HtmlDocument(string Html)
HTML document.
Definition: HtmlDocument.cs:52
HtmlElement Root
Root element.
Definition: HtmlDocument.cs:72
IEnumerable< Style > Style
STYLE elements found in document, or null if none found.
IEnumerable< Nav > Nav
NAV elements found in document, or null if none found.
void Export(StringBuilder Output)
Exports the HTML document to XML.
IEnumerable< Figure > Figure
FIGURE elements found in document, or null if none found.
static string GetBody(string Html)
Extracts the contents of the BODY element in a HTML string.
void Export(XmlWriter Output)
Exports the HTML document to XML.
IEnumerable< Time > Time
TIME elements found in document, or null if none found.
IEnumerable< Article > Article
ARTICLE elements found in document, or null if none found.
IEnumerable< Picture > Picture
PICTURE elements found in document, or null if none found.
IEnumerable< ProcessingInstruction > ProcessingInstructions
Processing instructions found in document, or null if none found.
Title Title
First TITLE element of document, if found, null otherwise.
Definition: HtmlDocument.cs:96
IEnumerable< Address > Address
ADDRESS elements found in document, or null if none found.
IEnumerable< Details > Details
DETAILS elements found in document, or null if none found.
IEnumerable< Aside > Aside
ASIDE elements found in document, or null if none found.
IEnumerable< Dialog > Dialog
DIALOG elements found in document, or null if none found.
IEnumerable< Elements.Script > Script
SCRIPT elements found in document, or null if none found.
PageMetaData GetMetaData()
Gets meta-data about the page.
IEnumerable< Elements.Video > Video
VIDEO elements found in document, or null if none found.
IEnumerable< Header > Header
HEADER elements found in document, or null if none found.
IEnumerable< DtdInstruction > Dtd
DTD instructions found in document, or null if none found.
IEnumerable< Meta > Meta
META elements found in document, or null if none found.
IEnumerable< Summary > Summary
SUMMARY elements found in document, or null if none found.
Elements.Html Html
First HTML element of document, if found, null otherwise.
Definition: HtmlDocument.cs:84
IEnumerable< Img > Img
IMG elements found in document, or null if none found.
Body Body
First BODY element of document, if found, null otherwise.
IEnumerable< Link > Link
LINK elements found in document, or null if none found.
IEnumerable< Form > Form
FORM elements found in document, or null if none found.
IEnumerable< Footer > Footer
FOOTER elements found in document, or null if none found.
IEnumerable< Data > Data
DATA elements found in document, or null if none found.
IEnumerable< Section > Section
SECTION elements found in document, or null if none found.
IEnumerable< Cite > Cite
CITE elements found in document, or null if none found.
Base class for all HTML elements.
Definition: HtmlElement.cs:12
virtual bool IsEmptyElement
If the element is an empty element.
Definition: HtmlElement.cs:273
override void Export(XmlWriter Output)
Exports the HTML document to XML.
Definition: HtmlElement.cs:211
override string ToString()
Definition: HtmlElement.cs:202
HTML Entity, as a unicode number string.
int EndPosition
End position of element.
Definition: HtmlNode.cs:69
Contains meta-data about a page.
Definition: PageMetaData.cs:17
Represents a Processing instruction inside the document.