nedbat/output Secret

## output
% python3 tokens.py
id     : text='x'
punct  : text='='
int    : text='123'
punct  : text='+'
string : text='"hello #99"'
id     : text='print'
punct  : text='('
id     : text='hello'
punct  : text='.'
id     : text='bye'
punct  : text='['
int    : text='0'
punct  : text=']'
punct  : text='+'
float  : text='3.14'
punct  : text=','
string : text="'single'"
punct  : text=')'

## tokens.py
import re

TOKEN_RX = r"""(?xm)
    (?P<string> ".*?"|'.*?'             )|
    (?P<float>  \d*(\d\.|\.\d)\d*       )|
    (?P<int>    \d+                     )|
    (?P<id>     [_a-zA-Z][_a-zA-Z0-9]*  )|
    (?P<punct>  [(){}:\[\]=.,+*/-]      )|
    (           \#.*$                   )|
    (           \s+                     )
    """

def tokens(text):
    for match in re.finditer(TOKEN_RX, text):
        if match.lastgroup:
            yield (match.lastgroup, match[0])

TEXT = """
    x = 123 + "hello #99"  # ignore me!
    print(hello.bye[0] + 3.14, 'single')
    """

for kind, text in tokens(TEXT):
    print(f"{kind:7}: {text=}")
	% python3 tokens.py
	id : text='x'
	punct : text='='
	int : text='123'
	punct : text='+'
	string : text='"hello #99"'
	id : text='print'
	punct : text='('
	id : text='hello'
	punct : text='.'
	id : text='bye'
	punct : text='['
	int : text='0'
	punct : text=']'
	punct : text='+'
	float : text='3.14'
	punct : text=','
	string : text="'single'"
	punct : text=')'
	import re

	TOKEN_RX = r"""(?xm)
	(?P<string> ".?"\|'.?' )\|
	(?P<float> \d(\d\.\|\.\d)\d )\|
	(?P<int> \d+ )\|
	(?P<id> [_a-zA-Z][_a-zA-Z0-9]* )\|
	(?P<punct> [(){}:\[\]=.,+*/-] )\|
	( \#.*$ )\|
	( \s+ )
	"""

	def tokens(text):
	for match in re.finditer(TOKEN_RX, text):
	if match.lastgroup:
	yield (match.lastgroup, match[0])

	TEXT = """
	x = 123 + "hello #99" # ignore me!
	print(hello.bye[0] + 3.14, 'single')
	"""

	for kind, text in tokens(TEXT):
	print(f"{kind:7}: {text=}")