Last active
August 11, 2021 13:20
-
-
Save mathiversen/69d744da9d80bf774ed549511e12d884 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// HTML | |
// | |
html = _{ | |
SOI | |
~ node_comment* | |
~ doctype? | |
~ node* | |
~ EOI | |
} | |
// | |
// DOCTYPE | |
// | |
doctype_name = _{ "doctype" | "DOCTYPE" } | |
doctype = _{ chevron_left_bang ~ doctype_name ~ attr* ~ chevron_right} | |
// | |
// NODES | |
// | |
node = _{ node_comment | node_element | node_text | WHITESPACE} | |
node_comment = _{ comment_tag_start ~ (!comment_tag_end ~ ANY)* ~ comment_tag_end } | |
node_text = { (!(chevron_left | comment_tag_start ) ~ ANY)+ } | |
node_element = { el_void | el_void_xml | el_process_instruct | el_raw_text | el_normal | el_dangling } | |
// | |
// COMMENTS | |
// | |
comment_tag_start = @{ chevron_left_bang ~ "--" } | |
comment_tag_end = @{ "--" ~ chevron_right } | |
// | |
// ATTRIBUTES | |
// | |
attr_key = { ASCII_ALPHA ~ text_chars* } | |
attr_value = { (!PEEK ~ ANY)* } | |
attr_quoted = _{PUSH(quote) ~ attr_value ~ POP } | |
attr_non_quoted = _{ (!WHITESPACE ~ ANY)* } | |
attr = { attr_key ~ (equal ~ (attr_quoted | attr_non_quoted))? } | |
// | |
// ELEMENTS | |
// | |
el_name = @{ ASCII_ALPHA ~ text_chars* } | |
// Void element aka self-closing element | |
// Ex: <hr> | |
el_void_name_html = @{ | |
"area" | |
| "base" | |
| "br" | |
| "col" | |
| "command" | |
| "embed" | |
| "hr" | |
| "img" | |
| "input" | |
| "keygen" | |
| "link" | |
| "meta" | |
| "param" | |
| "source" | |
| "track" | |
| "wbr" | |
| "meta" | |
} | |
// NOTE: This should not have to be a rule, but people doesn't know what void elements are... | |
el_void_name_svg = @{ | |
"path" | |
| "polygon" | |
| "rect" | |
| "circle" | |
} | |
el_void_name = @{ el_void_name_html | el_void_name_svg } | |
el_void = _{ chevron_left ~ el_void_name ~ attr* ~ (chevron_right | chevron_right_closed) } | |
el_void_xml = _{ chevron_left ~ el_name ~ attr* ~ chevron_right_closed } | |
// Open elements are default element that can take children | |
// and have both a start tag and an end tag | |
// Ex: <html lang="en"></html> | |
el_normal = _{ el_normal_start ~ (!el_normal_end ~ node)* ~ el_normal_end } | |
el_normal_start = _{ chevron_left ~ PUSH(el_name) ~ attr* ~ chevron_right} | |
el_normal_end = { chevron_left_closed ~ POP ~ chevron_right} | |
// Raw text elements are elements with text/script content that | |
// might interfere with the normal html syntax | |
el_raw_text_name = _{ "style" | "script" } | |
el_raw_text = _{ el_raw_text_start ~ (!el_raw_text_end ~ ANY)* ~ el_raw_text_end } | |
el_raw_text_start = _{ chevron_left ~ PUSH(el_raw_text_name) ~ attr* ~ chevron_right} | |
el_raw_text_end = _{ chevron_left_closed ~ POP ~ chevron_right} | |
// XML processing instruction | |
// Ex: <?xml version="1.0" ?> | |
el_process_instruct = { chevron_left_question ~ "xml" ~ attr* ~ chevron_right_question } | |
// Catch dangling elements | |
// Ex: <div/></div> | |
el_dangling = { chevron_left_closed ~ el_name ~ chevron_right} | |
// | |
// SYMBOLS / CHARACTERS | |
// | |
text_chars = _{'a'..'z' | 'A'..'Z' | "_" | "-" | ":" |'0'..'9'} | |
chevron_left = _{ "<" } | |
chevron_left_closed = _{ "</" } | |
chevron_left_bang = _{ "<!" } | |
chevron_left_question = _{ "<?" } | |
chevron_right = _{ ">" } | |
chevron_right_closed = _{ "/>" } | |
chevron_right_question = _{ "?>" } | |
equal = _{ "=" } | |
quote_dubble = _{ "\"" } | |
quote_single = _{ "'" } | |
quote = _{ quote_dubble | quote_single } | |
WHITESPACE = _{ " " | "\t" | "\r" | "\n" } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment