Skip to content

Instantly share code, notes, and snippets.

@NolanDeveloper
Last active July 17, 2019 18:08
Show Gist options
  • Save NolanDeveloper/ccd5c6aa10c4bc04ea117d129ce72cba to your computer and use it in GitHub Desktop.
Save NolanDeveloper/ccd5c6aa10c4bc04ea117d129ce72cba to your computer and use it in GitHub Desktop.
Regular expression to match haskell lexemes. Try it out: https://regex101.com/!
\G((@)|(\\)|(case\b)|(class\b)|(,)|(data\b)|(default\b)|(deriving\b)|(do\b)|(else\b)|(foreign\b)|(`)|(if\b)|(import\b)|(infixl\b)|(infixr\b)|(infix\b)|(instance\b)|(in\b)|(<-)|((?&NCOMMENT))|(\{)|(\[)|(\()|(let\b)|(module\b)|(newtype\b)|(of\b)|(->)|(\})|(\])|(=>)|(=)|(\))|(;)|(then\b)|(~)|(::)|(\.\.)|(type\b)|(_)|(\|)|(where\b)|((?&WHITECHAR))|((?&CHAR))|((?&STRING))|((?&VARID))|((?&CONID))|((?&COMMENT))|((?&VARSYM))|((?&CONSYM))|(:)|((?&FLOAT))|((?&INTEGER))|$)
(?(DEFINE)
(?<UNI_SMALL>\p{Ll})
(?<ASC_SMALL>[a-z])
(?<SMALL>((?&ASC_SMALL)|(?&UNI_SMALL)|_))
(?<UNI_LARGE>\p{Lu})
(?<ASC_LARGE>[A-Z])
(?<LARGE>((?&ASC_LARGE)|(?&UNI_LARGE)))
(?<UNI_DIGIT>\p{Nd})
(?<ASC_DIGIT>[0-9])
(?<DIGIT>((?&ASC_DIGIT)|(?&UNI_DIGIT)))
(?<OCTIT>[0-7])(?<HEXIT>((?&DIGIT)|[A-Fa-f]))
(?<DECIMAL>(?&DIGIT)+)
(?<OCTAL>(?&OCTIT)+)
(?<HEXADECIMAL>(?&HEXIT)+)
(?<ASC_SYMBOL>(!|#|\$|%|&|\*|\+|\.|\/|<|=|>|\?|@|\\|\^|\||-|~|:))
(?<ASC_SYMBOL_NO_BACKSLASH>(!|#|\$|%|&|\*|\+|\.|\/|<|=|>|\?|@|\^|\||-|~|:))
(?<ASC_SYMBOL_NO_COLON>(!|#|\$|%|&|\*|\+|\.|\/|<|=|>|\?|@|\\|\^|\||-|~))
(?<ASC_SYMBOL_NO_MINUS>(!|#|\$|%|&|\*|\+|\.|\/|<|=|>|\?|@|\\|\^|\||~|:))
(?<UNI_SYMBOL>(\p{S}|\p{P}))
(?<UNI_SYMBOL_NO_COLON>[^:\P{S}\P{P}])
(?<UNI_SYMBOL_NO_BACKSLASH>[^\\\P{S}\P{P}])
(?<UNI_SYMBOL_NO_MINUS>[^-\P{S}\P{P}])
(?<SYMBOL>((?&ASC_SYMBOL)|(?&UNI_SYMBOL)))
(?<SYMBOL_NO_BACKSLASH>((?&ASC_SYMBOL_NO_BACKSLASH)|(?&UNI_SYMBOL_NO_BACKSLASH)))
(?<SYMBOL_NO_COLON>((?&ASC_SYMBOL_NO_COLON)|(?&UNI_SYMBOL_NO_COLON)))
(?<SYMBOL_NO_MINUS>((?&ASC_SYMBOL_NO_MINUS)|(?&UNI_SYMBOL_NO_MINUS)))
(?<VARID>((?&SMALL)((?&SMALL)|(?&LARGE)|(?&DIGIT)|')*))
(?<CONID>((?&LARGE)((?&SMALL)|(?&LARGE)|(?&DIGIT)|')*))
(?<VARSYM>((?&SYMBOL_NO_COLON)(?&SYMBOL)*))
(?<CONSYM>(:(?&SYMBOL)(?&SYMBOL)*))
(?<SPECIAL>(\(|\)|,|;|\[|\]|`|\{|\}))
(?<SPECIAL_NO_BRACES>(\(|\)|,|;|\[|\]|`))
(?<GRAPHIC>((?&SMALL)|(?&LARGE)|(?&SYMBOL)|(?&DIGIT)|(?&SPECIAL)|"|'))
(?<GRAPHIC_NO_BACKSLASH_AND_PRIME>((?&SMALL)|(?&LARGE)|(?&SYMBOL_NO_BACKSLASH)|(?&DIGIT)|(?&SPECIAL)|"))
(?<GRAPHIC_NO_BACKSLASH_AND_QUOTE>((?&SMALL)|(?&LARGE)|(?&SYMBOL_NO_BACKSLASH)|(?&DIGIT)|(?&SPECIAL)|'))
(?<GRAPHIC_NO_SYMBOL>((?&SMALL)|(?&LARGE)|(?&DIGIT)|(?&SPECIAL)|"|'))
(?<GRAPHIC_NO_BRACES_AND_MINUS>((?&SMALL)|(?&LARGE)|(?&SYMBOL_NO_MINUS)|(?&DIGIT)|(?&SPECIAL_NO_BRACES)|"|'))
(?<CHARESC>(a|b|f|n|r|t|v|\\|"|'|&))(?<CHARESC_NO_AMPERSAND>(a|b|f|n|r|t|v|\\|"|'))
(?<CNTRL>((?&ASC_LARGE)|@|\[|\\|\]|\^|_))
(?<ASCII>(\^(?&CNTRL)|NUL|SOH|STX|ETX|EOT|ENQ|ACK|BEL|BS|HT|LF|VT|FF|CR|SO|SI|DLE|DC1|DC2|DC3|DC4|NAK|SYN|ETB|CAN|EM|SUB|ESC|FS|GS|RS|US|SP|DEL))
(?<ESCAPE>(\\((?&CHARESC)|(?&ASCII)|(?&DECIMAL)|o(?&OCTAL)|x(?&HEXADECIMAL))))
(?<ESCAPE_NO_AMPERSAND>(\\((?&CHARESC_NO_AMPERSAND)|(?&ASCII)|(?&DECIMAL)|o(?&OCTAL)|x(?&HEXADECIMAL))))
(?<CHAR>('((?&GRAPHIC_NO_BACKSLASH_AND_PRIME)| |(?&ESCAPE_NO_AMPERSAND))'))
(?<WHITECHAR>(\n|\v| |\t|\p{Z}))(?<GAP>(\\(?&WHITECHAR)+\\))
(?<STRING>("((?&GRAPHIC_NO_BACKSLASH_AND_QUOTE)| |(?&ESCAPE)|(?&GAP))*"))
(?<EXPONENT>((e|E)[+|-](?&DECIMAL)))
(?<FLOAT>((?&DECIMAL).(?&DECIMAL)(?&EXPONENT)?|(?&DECIMAL)(?&EXPONENT)))
(?<INTEGER>((?&DECIMAL)|0o(?&OCTAL)|0O(?&OCTAL)|0x(?&HEXADECIMAL)|0X(?&HEXADECIMAL)))
(?<DASHES>(--(-)*))(?<ANY>((?&GRAPHIC)|(?&WHITECHAR)))
(?<ANY_NO_SYMBOL>((?&GRAPHIC_NO_SYMBOL)|(?&WHITECHAR)))
(?<ANY_NO_BRACES_AND_MINUS>((?&GRAPHIC_NO_BRACES_AND_MINUS)|(?&WHITECHAR)))
(?<ANY_SEQ>((?&ANY_NO_BRACES_AND_MINUS)|-(?!\})|\{(?!-)))
(?<ANY_SMALL>((?&GRAPHIC)| |\t))(?<ANY_SMALL_NO_SYMBOL>((?&GRAPHIC_NO_SYMBOL)| |\t))
(?<NEWLINE>(\r\n|\r|\n|\f))
(?<COMMENT>((?&DASHES)((?&ANY_SMALL_NO_SYMBOL)(?&ANY_SMALL)*)?(?&NEWLINE)))
(?<NCOMMENT>\{-(?&ANY_SEQ)*((?&NCOMMENT)(?&ANY_SEQ)*)?-\})
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment