Skip to content

Instantly share code, notes, and snippets.

@nedbat

nedbat/output Secret

Created Jul 8, 2022
Embed
What would you like to do?
% python3 tokens.py
id : text='x'
punct : text='='
int : text='123'
punct : text='+'
string : text='"hello #99"'
id : text='print'
punct : text='('
id : text='hello'
punct : text='.'
id : text='bye'
punct : text='['
int : text='0'
punct : text=']'
punct : text='+'
float : text='3.14'
punct : text=','
string : text="'single'"
punct : text=')'
import re
TOKEN_RX = r"""(?xm)
(?P<string> ".*?"|'.*?' )|
(?P<float> \d*(\d\.|\.\d)\d* )|
(?P<int> \d+ )|
(?P<id> [_a-zA-Z][_a-zA-Z0-9]* )|
(?P<punct> [(){}:\[\]=.,+*/-] )|
( \#.*$ )|
( \s+ )
"""
def tokens(text):
for match in re.finditer(TOKEN_RX, text):
if match.lastgroup:
yield (match.lastgroup, match[0])
TEXT = """
x = 123 + "hello #99" # ignore me!
print(hello.bye[0] + 3.14, 'single')
"""
for kind, text in tokens(TEXT):
print(f"{kind:7}: {text=}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment