Skip to content

Instantly share code, notes, and snippets.

@mathiversen
Last active August 11, 2021 13:20
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mathiversen/69d744da9d80bf774ed549511e12d884 to your computer and use it in GitHub Desktop.
Save mathiversen/69d744da9d80bf774ed549511e12d884 to your computer and use it in GitHub Desktop.
//
// HTML
//
html = _{
SOI
~ node_comment*
~ doctype?
~ node*
~ EOI
}
//
// DOCTYPE
//
doctype_name = _{ "doctype" | "DOCTYPE" }
doctype = _{ chevron_left_bang ~ doctype_name ~ attr* ~ chevron_right}
//
// NODES
//
node = _{ node_comment | node_element | node_text | WHITESPACE}
node_comment = _{ comment_tag_start ~ (!comment_tag_end ~ ANY)* ~ comment_tag_end }
node_text = { (!(chevron_left | comment_tag_start ) ~ ANY)+ }
node_element = { el_void | el_void_xml | el_process_instruct | el_raw_text | el_normal | el_dangling }
//
// COMMENTS
//
comment_tag_start = @{ chevron_left_bang ~ "--" }
comment_tag_end = @{ "--" ~ chevron_right }
//
// ATTRIBUTES
//
attr_key = { ASCII_ALPHA ~ text_chars* }
attr_value = { (!PEEK ~ ANY)* }
attr_quoted = _{PUSH(quote) ~ attr_value ~ POP }
attr_non_quoted = _{ (!WHITESPACE ~ ANY)* }
attr = { attr_key ~ (equal ~ (attr_quoted | attr_non_quoted))? }
//
// ELEMENTS
//
el_name = @{ ASCII_ALPHA ~ text_chars* }
// Void element aka self-closing element
// Ex: <hr>
el_void_name_html = @{
"area"
| "base"
| "br"
| "col"
| "command"
| "embed"
| "hr"
| "img"
| "input"
| "keygen"
| "link"
| "meta"
| "param"
| "source"
| "track"
| "wbr"
| "meta"
}
// NOTE: This should not have to be a rule, but people doesn't know what void elements are...
el_void_name_svg = @{
"path"
| "polygon"
| "rect"
| "circle"
}
el_void_name = @{ el_void_name_html | el_void_name_svg }
el_void = _{ chevron_left ~ el_void_name ~ attr* ~ (chevron_right | chevron_right_closed) }
el_void_xml = _{ chevron_left ~ el_name ~ attr* ~ chevron_right_closed }
// Open elements are default element that can take children
// and have both a start tag and an end tag
// Ex: <html lang="en"></html>
el_normal = _{ el_normal_start ~ (!el_normal_end ~ node)* ~ el_normal_end }
el_normal_start = _{ chevron_left ~ PUSH(el_name) ~ attr* ~ chevron_right}
el_normal_end = { chevron_left_closed ~ POP ~ chevron_right}
// Raw text elements are elements with text/script content that
// might interfere with the normal html syntax
el_raw_text_name = _{ "style" | "script" }
el_raw_text = _{ el_raw_text_start ~ (!el_raw_text_end ~ ANY)* ~ el_raw_text_end }
el_raw_text_start = _{ chevron_left ~ PUSH(el_raw_text_name) ~ attr* ~ chevron_right}
el_raw_text_end = _{ chevron_left_closed ~ POP ~ chevron_right}
// XML processing instruction
// Ex: <?xml version="1.0" ?>
el_process_instruct = { chevron_left_question ~ "xml" ~ attr* ~ chevron_right_question }
// Catch dangling elements
// Ex: <div/></div>
el_dangling = { chevron_left_closed ~ el_name ~ chevron_right}
//
// SYMBOLS / CHARACTERS
//
text_chars = _{'a'..'z' | 'A'..'Z' | "_" | "-" | ":" |'0'..'9'}
chevron_left = _{ "<" }
chevron_left_closed = _{ "</" }
chevron_left_bang = _{ "<!" }
chevron_left_question = _{ "<?" }
chevron_right = _{ ">" }
chevron_right_closed = _{ "/>" }
chevron_right_question = _{ "?>" }
equal = _{ "=" }
quote_dubble = _{ "\"" }
quote_single = _{ "'" }
quote = _{ quote_dubble | quote_single }
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment