Last active
July 1, 2023 22:40
-
-
Save DanielBaumert/1b4f0ed1793bef4458648457f1af07d5 to your computer and use it in GitHub Desktop.
HTML parse - example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Collections; | |
using System.Runtime.CompilerServices; | |
using System.Text; | |
namespace EasyWeb; | |
public class HtmlParser | |
{ | |
private Stack<HtmlElement?> _rootElements = null!; | |
private HtmlElement? _rootElement; | |
private Stack<HtmlElement?> _previewSiblings = null!; | |
private HtmlElement? _previewSibling; | |
TagElement? _body; | |
private IDictionary<string, HtmlElement> _idElements = null!; | |
private static readonly HashSet<string> s_inlineTags = new HashSet<string> | |
{ | |
"meta", | |
"link", | |
"br", | |
"col", | |
"wbr", | |
"img" | |
}; | |
public HtmlDocument ParseHtml(ReadOnlySpan<char> htmlSource) | |
{ | |
int i = 0; | |
_rootElements = new Stack<HtmlElement?>(); | |
_previewSiblings = new Stack<HtmlElement?>(); | |
_idElements = new Dictionary<string, HtmlElement>(); | |
HtmlDocument doc = new HtmlDocument(_idElements, ParseHtml(htmlSource, ref i), _body); | |
return doc; | |
} | |
private HtmlElement[] ParseHtml(ReadOnlySpan<char> htmlSource, ref int i) | |
{ | |
List<HtmlElement> elements = new List<HtmlElement>(); | |
while (IsSpace(htmlSource[i])) | |
{ | |
i++; | |
} | |
int start = i; | |
while (i < htmlSource.Length && IsNotCloseTagSig(htmlSource, i)) //didn't found close tag | |
{ | |
if (htmlSource[i] is '<' && htmlSource[i + 1] is not '<') // new tag starting | |
{ | |
if (i == start) | |
{ | |
// no text is between the last tag-closing char and the tag-open char | |
string? idValue = null; | |
HtmlElement element = ParseTagOrComment(htmlSource, ref i, ref idValue); | |
elements.Add(element); | |
if(idValue != null) | |
{ | |
_idElements.Add(idValue, element); | |
} | |
// skip spaces behind a element | |
while (i < htmlSource.Length && IsSpace(htmlSource[i])) | |
{ | |
i++; | |
} | |
// set a new start for the next element | |
start = i; | |
} | |
else | |
{ | |
// text is between the last tag-closing char and the tag-open char | |
elements.Add(SetSibling(new TextElement { SpanStart = start, SpanEnd = i, Value = new string(htmlSource[start..i]), Root = _rootElement })); | |
// skip spaces behind a element | |
while (i < htmlSource.Length && IsSpace(htmlSource[i])) | |
{ | |
i++; | |
} | |
// set a new start for the next element | |
start = i; | |
} | |
} | |
else if (htmlSource[i] is '<' && htmlSource[i + 1] is '<') | |
{ | |
i += 2; | |
while (htmlSource[i] is not '>') // skip ahead until > is found | |
{ | |
i++; | |
} | |
i++; | |
} | |
else | |
{ | |
i++; | |
} | |
} | |
if (i != start) | |
{ | |
elements.Add(SetSibling(new TextElement { SpanStart = start, SpanEnd = i, Value = new string(htmlSource[start..i]), Root = _rootElement })); | |
} | |
return elements.ToArray(); | |
} | |
private HtmlElement ParseTagOrComment(ReadOnlySpan<char> htmlSource, ref int i, ref string? idValue) | |
{ | |
char charAt = htmlSource[++i]; | |
switch (charAt) | |
{ | |
case >= 'a' and <= 'z': | |
return ParseTag(htmlSource, ref i, ref idValue); | |
case '!': | |
int commentStart = i - 1; // get the '<'-char back | |
if (IsDocTypeElement(htmlSource, i + 1)) // DOCTYPE | |
{ | |
i += 7; // skip 'DOCTYPE' | |
while (htmlSource[i] is not '>') // end if open tag | |
{ | |
i++; | |
} | |
i++; | |
string commentContent = new string(htmlSource[commentStart..i]); // '<!':commentContent:'>' | |
return SetSibling(new CommentElement { SpanStart = commentStart, SpanEnd = i, Value = commentContent, Root = _rootElement }); ; | |
} | |
else if (IsCommentOpen(htmlSource, commentStart)) | |
{ | |
commentStart = (i += 3); // skip '<!--' | |
while (!IsCommentClose(htmlSource, i)) | |
{ | |
i++; | |
} | |
string commentContent = new string(htmlSource[commentStart..i]); // '<!--':commentContent:'-->' | |
i += 3; // skip '-->' | |
return SetSibling(new CommentElement { SpanStart = commentStart, SpanEnd = i, Value = commentContent }); ; | |
} | |
else | |
{ | |
throw new FormatException(); | |
} | |
default: | |
throw new FormatException($"The character '{charAt}' isn't recognized!"); | |
} | |
} | |
private HtmlElement ParseTag(ReadOnlySpan<char> htmlSource, ref int i, ref string? idValue) | |
{ | |
int tagStart = i - 1; // get the open tag symbol (<) | |
TagElement element = new TagElement { Tag = ParseTagName(htmlSource, ref i) }; | |
while (true) | |
{ | |
switch (htmlSource[i]) | |
{ | |
case ' ' or '\n' or '\r' or '\t': | |
i++; | |
continue; | |
case >= 'a' and <= 'z' or '_': // attribute | |
element.Attributes = ParseAttributes(htmlSource, ref i, element, ref idValue).ToArray(); | |
break; | |
case '/': | |
i++; | |
if (htmlSource[i] is not '>') | |
{ | |
throw new FormatException(); | |
} | |
element.SpanStart = tagStart; | |
element.SpanEnd = i; | |
element.Root = _rootElement; | |
return SetSibling(element); | |
case '>': | |
i++; | |
if (s_inlineTags.Contains(element.Tag)) | |
{ | |
element.SpanStart = tagStart; | |
if(htmlSource[i - 1] is '/') | |
{ | |
element.SpanEnd = i; | |
} | |
element.Root = _rootElement; | |
return SetSibling(element); | |
} | |
if (CheckSkipTags(htmlSource, ref i, element.Tag, out int skipRange)) | |
{ | |
i += skipRange; | |
element.SpanStart = tagStart; | |
element.SpanEnd = i; | |
element.Root = _rootElement; | |
return SetSibling(element); | |
} | |
element.SpanStart = tagStart; | |
element.SpanEnd = i; | |
element.Root = _rootElement; | |
// save last root | |
_rootElements.Push(_rootElement); | |
_rootElement = element; | |
// clear sibling but save current | |
_previewSiblings.Push(_previewSibling); | |
_previewSibling = null; | |
// get child elements | |
element.Elements = ParseHtml(htmlSource, ref i); | |
// get last sibling | |
_previewSibling = _previewSiblings.Pop(); | |
// get preview root | |
_rootElement = _rootElements.Pop(); | |
// check end tag - equals | |
if (IsNotCloseTagSig(htmlSource, i)) | |
{ | |
throw new FormatException(); | |
} | |
i += 2; | |
int endTagNameOffset = i; | |
while (i - endTagNameOffset < element.Tag.Length && htmlSource[i] == element.Tag[i - endTagNameOffset]) | |
{ | |
i++; | |
} | |
i++; // skip '>' | |
// end - check end tag - equals | |
if (element.Tag.Equals("body")) | |
{ | |
_body = element; | |
} | |
return SetSibling(element); | |
default: | |
throw new FormatException(); | |
} | |
} | |
} | |
private static bool IsNotCloseTagSig(ReadOnlySpan<char> htmlSource, int i) | |
{ | |
return htmlSource[i] is not '<' || htmlSource[i + 1] is not '/'; | |
} | |
private static string ParseTagName(ReadOnlySpan<char> htmlSource, ref int i) | |
{ | |
int tagNameStart = i; | |
i++; | |
while (IsAsciiAlphaNumeric(htmlSource[i])) | |
{ | |
i++; | |
} | |
return new string(htmlSource[tagNameStart..i]); | |
} | |
private static HtmlAttributeBase[] ParseAttributes(ReadOnlySpan<char> htmlSource, ref int i, TagElement element, ref string? idValue) | |
{ | |
List<HtmlAttributeBase> attributes = new List<HtmlAttributeBase>(); | |
do | |
{ | |
HtmlAttributeBase attribute = ParseAttribute(htmlSource, ref i); | |
if (attribute is HtmlAttributeText textAttribute) | |
{ | |
switch (textAttribute.Name.Trim()) // TODO: check if I can trim here | |
{ | |
case "class": | |
element.Classes = textAttribute.Value.Split(' '); | |
break; | |
case "id": | |
idValue = element.Id = textAttribute.Value; | |
break; | |
default: | |
attributes.Add(attribute); | |
break; | |
} | |
} | |
else | |
{ | |
attributes.Add(attribute); | |
} | |
// skip spaces behind an attribute | |
while (IsSpace(htmlSource[i])) | |
{ | |
i++; | |
} | |
// loop if not a end element /> or > | |
} | |
while (htmlSource[i] is not ('>' or '/')); | |
return attributes.ToArray(); | |
} | |
private static HtmlAttributeBase ParseAttribute(ReadOnlySpan<char> htmlSource, ref int i) | |
{ | |
int firstLetter = i; | |
i++; // next letter: | |
while (htmlSource[i] is (>= 'a' and <= 'z') or '_' or '-') | |
{ | |
i++; | |
} | |
string attributeName = new string(htmlSource[firstLetter..i]); | |
int virtualI = i; | |
while (IsSpace(htmlSource[virtualI])) | |
{ | |
virtualI++; | |
} | |
if (htmlSource[virtualI] is '=') | |
{ | |
i = virtualI; | |
return ReadAttributeType(htmlSource, ref i, firstLetter, attributeName); | |
} | |
return new HtmlAttribute { SpanStart = firstLetter, SpanEnd = i - 1, Name = attributeName }; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static HtmlAttributeBase ReadAttributeType(ReadOnlySpan<char> htmlSource, ref int i, int attributeNameStart, string attributeName) | |
{ | |
i++; | |
while (IsSpace(htmlSource[i])) | |
{ | |
i++; | |
} | |
return char.ToLower(htmlSource[i]) switch | |
{ | |
't' => new HtmlAttributeBoolean { Name = attributeName, SpanStart = attributeNameStart, Value = ReadTrue(htmlSource, ref i), SpanEnd = i++ }, | |
'f' => new HtmlAttributeBoolean { Name = attributeName, SpanStart = attributeNameStart, Value = ReadFalse(htmlSource, ref i), SpanEnd = i++ }, | |
'"' => new HtmlAttributeDoubleQuotedText { Name = attributeName, SpanStart = attributeNameStart, Value = ReadDoubleQuotedString(htmlSource, ref i), SpanEnd = i - 1 }, | |
'\'' => new HtmlAttributeSingleQuotedText { Name = attributeName, SpanStart = attributeNameStart, Value = ReadSingleQuotedString(htmlSource, ref i), SpanEnd = i - 1 }, | |
>= '0' and <= '9' => new HtmlAttributeNumeric { Name = attributeName, SpanStart = attributeNameStart, Value = ReadUInt(htmlSource, ref i), SpanEnd = i - 1 }, | |
_ => throw new FormatException() | |
}; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static string ReadDoubleQuotedString(ReadOnlySpan<char> htmlSource, ref int i) | |
{ | |
int start = ++i; | |
while (htmlSource[i] != '"') | |
{ | |
i++; | |
} | |
return new string(htmlSource[start..i++]); | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static string ReadSingleQuotedString(ReadOnlySpan<char> htmlSource, ref int i) | |
{ | |
int start = ++i; | |
while (htmlSource[i] is not '\'') | |
{ | |
i++; | |
} | |
return new string(htmlSource[start..i++]); | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static uint ReadUInt(ReadOnlySpan<char> htmlSource, ref int i) | |
{ | |
int start = i; | |
i++; | |
while (IsNumber(htmlSource[i])) | |
{ | |
i++; | |
} | |
return uint.Parse(new string(htmlSource[start..(i - 1)])); | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool IsDocTypeElement(ReadOnlySpan<char> htmlSource, int i) | |
{ | |
return htmlSource[i++] is 'D' && | |
htmlSource[i++] is 'O' && | |
htmlSource[i++] is 'C' && | |
htmlSource[i++] is 'T' && | |
htmlSource[i++] is 'Y' && | |
htmlSource[i++] is 'P' && | |
htmlSource[i] is 'E'; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool IsCommentOpen(ReadOnlySpan<char> htmlSource, int i) | |
{ | |
return htmlSource[i++] is '<' && | |
htmlSource[i++] is '!' && | |
htmlSource[i++] is '-' && | |
htmlSource[i] is '-'; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool IsCommentClose(ReadOnlySpan<char> htmlSource, int i) | |
{ | |
return htmlSource[i++] is '-' && | |
htmlSource[i++] is '-' && | |
htmlSource[i] is '>'; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool IsStyleCloseTag(ReadOnlySpan<char> htmlSource, int i) | |
{ | |
return htmlSource[i++] is '<' && | |
htmlSource[i++] is '/' && | |
htmlSource[i++] is 's' && | |
htmlSource[i++] is 't' && | |
htmlSource[i++] is 'y' && | |
htmlSource[i++] is 'l' && | |
htmlSource[i++] is 'e' && | |
htmlSource[i] is '>'; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool IsSvgCloseTag(ReadOnlySpan<char> htmlSource, int i) | |
{ | |
return htmlSource[i++] is '<' && | |
htmlSource[i++] is '/' && | |
htmlSource[i++] is 's' && | |
htmlSource[i++] is 'v' && | |
htmlSource[i++] is 'g' && | |
htmlSource[i] is '>'; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool IsScriptCloseTag(ReadOnlySpan<char> htmlSource, int i) | |
{ | |
return htmlSource[i++] is '<' && | |
htmlSource[i++] is '/' && | |
htmlSource[i++] is 's' && | |
htmlSource[i++] is 'c' && | |
htmlSource[i++] is 'r' && | |
htmlSource[i++] is 'i' && | |
htmlSource[i++] is 'p' && | |
htmlSource[i++] is 't' && | |
htmlSource[i] is '>'; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool ReadTrue(ReadOnlySpan<char> htmlSource, ref int i) | |
{ | |
if (htmlSource[++i] is not 'r') | |
{ | |
throw new FormatException(); | |
} | |
if (htmlSource[++i] is not 'u') | |
{ | |
throw new FormatException(); | |
} | |
if (htmlSource[i] is not 'e') | |
{ | |
throw new FormatException(); | |
} | |
return true; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool ReadFalse(ReadOnlySpan<char> htmlSource, ref int i) | |
{ | |
if (htmlSource[++i] is not 'a') | |
{ | |
throw new FormatException(); | |
} | |
if (htmlSource[++i] is not 'l') | |
{ | |
throw new FormatException(); | |
} | |
if (htmlSource[++i] is not 's') | |
{ | |
throw new FormatException(); | |
} | |
if (htmlSource[i] is not 'e') | |
{ | |
throw new FormatException(); | |
} | |
return false; | |
} | |
/// <summary> | |
/// Check if the char is ' ' or '\r' or '\n' or '\t' | |
/// </summary> | |
/// <param name="c">Char to check</param> | |
/// <returns><see langword="true" /> char is ' ' or '\r' or '\n' or '\t'; Otherwise <see langword="false" /></returns> | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool IsSpace(char c) | |
{ | |
return c is /*' ' or*/ '\r' or '\n' or '\t'; | |
} | |
/// <summary> | |
/// Check if the char is between '0' and '9' | |
/// </summary> | |
/// <param name="c">Char to check</param> | |
/// <returns> | |
/// <see langword="true" /> char is >= '0' and <= '9'; Otherwise <see langword="false" /> | |
/// </returns> | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool IsNumber(char c) | |
{ | |
return c is >= '0' and <= '9'; | |
} | |
/// <summary> | |
/// Check if the char between 'a' and 'z' | |
/// </summary> | |
/// <param name="c">char to check</param> | |
/// <returns><see langword="true" />, if the char between; Otherwise <see langword="false" /></returns> | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool IsAsciiAlpha(char c) | |
{ | |
return char.ToLower(c) is (>= 'a' and <= 'z'); | |
} | |
/// <summary> | |
/// Check if the char between 'a' and 'z' or between '0' and '9' | |
/// </summary> | |
/// <param name="c">char to check</param> | |
/// <returns><see langword="true" />, if the char between; Otherwise <see langword="false" /></returns> | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool IsAsciiAlphaNumeric(char c) | |
{ | |
return char.ToLower(c) is (>= 'a' and <= 'z') or (>= '0' and <= '9'); | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool CheckSkipTags(ReadOnlySpan<char> htmlSource, ref int i, string tagName, out int skipRange) | |
{ | |
if (tagName.Equals("style")) | |
{ | |
while (!IsStyleCloseTag(htmlSource, i)) | |
{ | |
i++; | |
} | |
skipRange = 8; | |
return true; | |
} | |
if (tagName.Equals("svg")) | |
{ | |
while (!IsSvgCloseTag(htmlSource, i)) | |
{ | |
i++; | |
} | |
skipRange = 6; | |
return true; | |
} | |
if (tagName.Equals("script")) | |
{ | |
while (!IsScriptCloseTag(htmlSource, i)) | |
{ | |
i++; | |
} | |
skipRange = 9; | |
return true; | |
} | |
skipRange = 0; | |
return false; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private HtmlElement SetSibling(HtmlElement nextElement) | |
{ | |
nextElement.PreviewSibling = _previewSibling; | |
if (_previewSibling != null) | |
{ | |
_previewSibling.NextSibling = nextElement; | |
} | |
return _previewSibling = nextElement; | |
} | |
} | |
public class HtmlDocument | |
{ | |
public IDictionary<string, HtmlElement> IDs { get;} | |
public HtmlElement[] Root { get; } | |
public TagElement? Body { get; } | |
public HtmlDocument(IDictionary<string, HtmlElement> ids, HtmlElement[] root, TagElement? body) | |
{ | |
IDs = ids; | |
Root = root; | |
Body = body; | |
} | |
} | |
public abstract class HtmlElement | |
{ | |
public int SpanStart { get; internal set; } | |
public int SpanEnd { get; internal set; } | |
public HtmlElement? Root { get; internal set; } | |
public HtmlElement? PreviewSibling { get; internal set; } | |
public HtmlElement? NextSibling { get; internal set; } | |
public abstract string ToHtml(Action<HtmlElement> query); | |
} | |
public class TagElement : HtmlElement | |
{ | |
public HtmlElement? this[int index] | |
{ | |
get | |
{ | |
return Elements?[index] ?? null; | |
} | |
} | |
public string Tag { get; internal set; } = null!; | |
public HtmlAttributeBase[]? Attributes { get; internal set; } = null; | |
public HtmlElement[]? Elements { get; internal set; } = null; | |
public string[]? Classes { get; internal set; } = null; | |
public string? Id { get; internal set; } = null; | |
public IEnumerable<TagElement> GetElementsByTagName(string tagName, StringComparison comparison = StringComparison.OrdinalIgnoreCase, bool processChildElements = false) | |
{ | |
if (Elements is not null && Elements.Length > 0) | |
{ | |
HtmlElement? element = Elements.FirstOrDefault(); | |
if(element is not null) | |
{ | |
do | |
{ | |
if (element is TagElement htmlElement) | |
{ | |
if (htmlElement.Tag.Equals(tagName, comparison)) | |
{ | |
yield return htmlElement; | |
} | |
else if (processChildElements && htmlElement.Elements != null) | |
{ | |
foreach (TagElement item in htmlElement.GetElementsByTagName(tagName, comparison, processChildElements)) | |
{ | |
yield return item; | |
} | |
} | |
} | |
element = element.NextSibling; | |
} | |
while (element != null); | |
} | |
} | |
} | |
public TagElement? GetFirstElementByTagName(string tagName, StringComparison comparison = StringComparison.OrdinalIgnoreCase, bool processChildElements = false) | |
{ | |
if (Elements != null && Elements.Length > 0) | |
{ | |
HtmlElement? element = Elements.FirstOrDefault(); | |
if (element is not null) | |
{ | |
do | |
{ | |
if (element is TagElement htmlElement) | |
{ | |
if (htmlElement.Tag.Equals(tagName, comparison)) | |
{ | |
return htmlElement; | |
} | |
if (processChildElements && htmlElement.Elements != null) | |
{ | |
TagElement? ret = htmlElement.GetFirstElementByTagName(tagName, comparison, processChildElements); | |
if (ret != null) | |
{ | |
return ret; | |
} | |
} | |
} | |
element = element.NextSibling; | |
} | |
while (element != null); | |
} | |
} | |
return null; | |
} | |
public TagElement? GetFirstElement(Func<TagElement, bool> comp, bool processChildElements = false) | |
{ | |
if (Elements != null && Elements.Length > 0) | |
{ | |
HtmlElement? element = Elements[0]; | |
do | |
{ | |
if (element is TagElement htmlElement) | |
{ | |
if (comp(htmlElement)) | |
{ | |
return htmlElement; | |
} | |
if (processChildElements && htmlElement.Elements != null) | |
{ | |
TagElement? ret = htmlElement.GetFirstElement(comp, processChildElements); | |
if (ret != null) | |
{ | |
return ret; | |
} | |
} | |
} | |
element = element.NextSibling; | |
} | |
while (element != null); | |
} | |
return null; | |
} | |
public IEnumerable<TagElement> Query(Func<TagElement, bool> query, bool processChildElements = false) | |
{ | |
if (Elements != null && Elements.Length > 0) | |
{ | |
HtmlElement? element = Elements[0]; | |
do | |
{ | |
if (element is TagElement htmlElement) | |
{ | |
if (query(htmlElement)) | |
{ | |
yield return htmlElement; | |
} | |
else if (processChildElements) | |
{ | |
foreach (TagElement item in htmlElement.Query(query, true)) | |
{ | |
yield return item; | |
} | |
} | |
} | |
element = element.NextSibling; | |
} | |
while (element != null); | |
} | |
} | |
public bool HasParentByTagName(string tagName) | |
{ | |
TagElement? parent = (TagElement?)Root; | |
while (parent != null) | |
{ | |
if (parent.Tag.Equals(tagName)) | |
{ | |
return true; | |
} | |
parent = (TagElement?)parent.Root; | |
} | |
return false; | |
} | |
public bool TryGetParentByTagName(string tagName, out TagElement? parent) | |
{ | |
parent = (TagElement?)Root; | |
while (parent is not null) | |
{ | |
if (parent.Tag.Equals(tagName)) | |
{ | |
return true; | |
} | |
parent = (TagElement?)parent.Root; | |
} | |
return false; | |
} | |
public string ToText() | |
{ | |
StringBuilder builder = new StringBuilder(); | |
ToText(builder); | |
return builder.ToString(); | |
} | |
public void ToText(StringBuilder builder) | |
{ | |
if (Elements is not null && Elements.Length > 0) | |
{ | |
HtmlElement? element = Elements.FirstOrDefault(); | |
if(element is not null) | |
{ | |
do | |
{ | |
if (element is TextElement textElement) | |
{ | |
builder.Append(textElement.Value); | |
} | |
else if (element is TagElement htmlElement) | |
{ | |
htmlElement.ToText(builder); | |
} | |
element = element.NextSibling; | |
} | |
while (element != null); | |
} | |
} | |
} | |
public override string ToHtml(Action<HtmlElement> query) | |
{ | |
query(this); | |
StringBuilder sb = new StringBuilder() | |
.Append('<') | |
.Append(Tag); | |
if (Id != null) | |
{ | |
sb.Append($" id=\"{Id}\""); | |
} | |
if (Classes != null && Classes.Length > 0) | |
{ | |
sb.Append($" class=\"{string.Join(' ', Classes)}\""); | |
} | |
if (Attributes != null && Attributes.Length > 0) | |
{ | |
sb.Append(' ') | |
.Append(string.Join(' ', Attributes.Select(x => x.ToString()))); | |
} | |
if (Elements != null && Elements.Length > 0) | |
{ | |
sb.Append('>') | |
.Append(string.Concat(Elements.Select(e => e.ToHtml(query)))) | |
.Append($"</{Tag}>"); | |
} | |
else | |
{ | |
sb.Append("/>"); | |
} | |
return sb.ToString(); | |
} | |
public override string ToString() | |
{ | |
StringBuilder sb = new StringBuilder() | |
.Append('<') | |
.Append(Tag); | |
if (Id != null) | |
{ | |
sb.Append($" id=\"{Id}\""); | |
} | |
if (Classes != null && Classes.Length > 0) | |
{ | |
sb.Append($" class=\"{string.Join(' ', Classes)}\""); | |
} | |
if (Attributes != null && Attributes.Length > 0) | |
{ | |
sb.Append(' ') | |
.Append(string.Join(' ', Attributes.Select(x => x.ToString()))); | |
} | |
if (Elements != null && Elements.Length > 0) | |
{ | |
sb.Append('>') | |
.Append(string.Concat(Elements.Select(e => e.ToString()))) | |
.Append($"</{Tag}>"); | |
} | |
else | |
{ | |
sb.Append("/>"); | |
} | |
return sb.ToString(); | |
} | |
} | |
public class TextElement : HtmlElement | |
{ | |
public string Value { get; internal set; } = null!; | |
public override string ToHtml(Action<HtmlElement> query) | |
{ | |
query(this); | |
return Value ?? string.Empty; | |
} | |
public override string ToString() | |
{ | |
return Value ?? string.Empty; | |
} | |
} | |
public class CommentElement : HtmlElement | |
{ | |
public string Value { get; internal set; } = null!; | |
public override string ToHtml(Action<HtmlElement> query) | |
{ | |
query(this); | |
return $"<!-- {Value} -->"; | |
} | |
public override string ToString() | |
{ | |
return $"<!-- {Value} -->"; | |
} | |
} | |
public abstract class HtmlAttributeBase | |
{ | |
public int SpanStart { get; internal set; } | |
public int SpanEnd { get; internal set; } | |
public string Name { get; internal init; } = null!; | |
} | |
public class HtmlAttribute : HtmlAttributeBase | |
{ | |
public override string ToString() | |
{ | |
return Name; | |
} | |
} | |
public abstract class HtmlAttributeText : HtmlAttributeBase | |
{ | |
public string Value { get; set; } = null!; | |
} | |
public class HtmlAttributeDoubleQuotedText : HtmlAttributeText | |
{ | |
public override string ToString() | |
{ | |
return $"{Name}=\"{Value}\""; | |
} | |
} | |
public class HtmlAttributeSingleQuotedText : HtmlAttributeText | |
{ | |
public override string ToString() | |
{ | |
return $"{Name}='{Value}'"; | |
} | |
} | |
public class HtmlAttributeBoolean : HtmlAttributeBase | |
{ | |
public bool Value { get; internal init; } = false!; | |
public override string ToString() | |
{ | |
return $"{Name}={Value}"; | |
} | |
} | |
public class HtmlAttributeNumeric : HtmlAttributeBase | |
{ | |
public uint Value { get; internal init; } = 0u; | |
public override string ToString() | |
{ | |
return $"{Name}={Value}"; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment