Skip to content

Instantly share code, notes, and snippets.

@kelvingakuo
Last active March 27, 2022 08:57
Show Gist options
  • Save kelvingakuo/2d2b4c9d535a8278cad224c931a2df00 to your computer and use it in GitHub Desktop.
Save kelvingakuo/2d2b4c9d535a8278cad224c931a2df00 to your computer and use it in GitHub Desktop.
import re
keywords = r"(SELECT|WHERE|FROM|AND|OR|NOT)"
patterns = [
(keywords, lambda scanner, token: {"token_type": "keyword", "token": token}),
(r"[a-zA-Z_][a-zA-Z_0-9]*", lambda s, t: {"token_type": "name", "token": t}),
(r"\*", lambda s, t: {"token_type": "all_cols","token": t}),
(r">=|>|<=|<|=", lambda s, t: {"token_type": "operator","token": t}),
(r"[-+]?\d*\.\d+", lambda s, t: {"token_type": "float","token": t}),
(r"\d+", lambda s, t: {"token_type": "integer","token": t}),
(r"[,]", lambda s, t: {"token_type": "punctuation","token": t}),
(r"[" " \\n]", lambda s, t: {"token_type": "whitespace","token": t}),
(r";", lambda s, t: {"token_type": "terminal","token": t})
# (r".", lambda s, t: None) # Skip tokens that couldn't be matched
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment