Skip to content

Instantly share code, notes, and snippets.

@sma
Created March 1, 2009 10:52
Show Gist options
  • Save sma/72297 to your computer and use it in GitHub Desktop.
Save sma/72297 to your computer and use it in GitHub Desktop.
import re
class Scanner(object):
def __init__(self, *rules):
self.rules = rules
self.re = re.compile("|".join("(" + r[0] + ")" for r in rules))
def scan(self, s):
for m in self.re.finditer(s):
try:
rule = self.rules[m.lastindex - 1]
except IndexError:
yield 'ERROR', s[m.start():]
break
value = m.group()
token = rule[1] if len(rule) > 1 else value
if len(rule) > 2:
value = rule[2](value)
if token != 'IGNORE':
yield token, value
yield 'END', ''
def str(s): return s[1:-1].decode("string_escape")
Whitespace = (r'\s+', "IGNORE")
Integer = (r'\d+', "integer", int)
Float = (r'(?:\d+\.\d*|\.\d+)(?:[eE]-?\d+)?|\d+[eE]-?\d+', "float", float)
Number = (r'(?:\d+(?:\.\d*)?|\.\d+)(?:[eE]-?\d+)?', "number", float)
String = (r'"(?:\\.|[^"])*"|\'(?:\\.|[^\'])*\'', "string", str)
Name = (r'\w+', "name")
scanner = Scanner(
Scanner.Number,
Scanner.String,
Scanner.Name,
(r'[-+*/=]=?',),
(r';',),
Scanner.Whitespace,
)
for t in scanner.scan('foo = 5 * 30; bar = "bar\\n" - 6.05e-6;'):
print t
for t in Scanner(
(r'[()]',),
(r'\d+(?![^()\s])', 'int', int),
(r'[^()\s]+', 'sym',),
(r'\s+', 'IGNORE')
).scan("(a 1+ (c? 3))"):
print t
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment