Skip to content

Instantly share code, notes, and snippets.

@sjohnr
Last active February 6, 2016 06:21
Show Gist options
  • Save sjohnr/fe11ce667305d97929fb to your computer and use it in GitHub Desktop.
Save sjohnr/fe11ce667305d97929fb to your computer and use it in GitHub Desktop.
SGML (Sounds like sigmal)
<OUTER>
<GROUP1>
<TAG1>some data.
<TAG2>SOME_MORE_DATA
<TAG3>
<TAG3A>test data
</TAG3>
<TAG4>FINAL TAG!
</GROUP1>
</OUTER>
<OUTER><GROUP1><TAG1>some data.<TAG2>SOME_MORE_DATA<TAG3><TAG3A>test/data</TAG3><TAG4>FINAL TAG!@#$%^&*()_+-=;':"[]{}\|,./?`~</GROUP1></OUTER>
<OUTER>
<GROUP1>
<TAG1>
<TAG2>SOME_MORE_DATA
<TAG3>
<TAG3A>test/data
</TAG3>
<TAG4>FINAL TAG!@#$%^&*()_+-=;':"[]{}\|,./?`~
</GROUP1>
</OUTER>
<OUTER>
<TAG1>data
<TAG2>data
<TAG3>data
</OUTER>
<OUTER>
<TAG1>data
<GROUP1>
<TAG2>somedata!@#$%^&*()_+-=~`[]{},.?|\:;'"
<TAG3>some datA
<GROUP2>
<TAG4>SOME_MORE_DATA
</GROUP2>
</GROUP1>
</OUTER>
grammar SGML;
// rules
root : aggregate ;
aggregate : START_TAG ( aggregate | element )* END_TAG ;
element : START_TAG content END_TAG? ;
content : WORD+ ;
// tokens
START_TAG : LT IDENTIFIER GT ;
END_TAG : LT SLASH IDENTIFIER GT ;
WORD : ( ALPHA | DIGIT | SYMBOL )+ ;
IDENTIFIER : UPPER_ALPHA ( UPPER_ALPHA | DIGIT )* ;
// skip tokens
WHITESPACE : ( SPACE | TAB )+ -> skip ;
NEWLINE : [\r\n]+ -> skip ;
// building blocks
fragment LOWER_ALPHA : [a-z] ;
fragment UPPER_ALPHA : [A-Z] ;
fragment ALPHA : [a-zA-Z] ;
fragment DIGIT : [0-9] ;
fragment SYMBOL : [.!@#$%^&*()_+-=~`,?\\\[\]{}|:;'"] ;
fragment SPACE : ' ' ;
fragment TAB : [\t] ;
fragment LT : '<' ;
fragment GT : '>' ;
fragment SLASH : '/' ;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment