Skip to content

Instantly share code, notes, and snippets.

@thepaul
Created October 24, 2011 19:18
Show Gist options
  • Save thepaul/1309885 to your computer and use it in GitHub Desktop.
Save thepaul/1309885 to your computer and use it in GitHub Desktop.
safer version of python's re.Scanner
# SaferScanner is just like re.Scanner, but it neuters any grouping in the lexicon
# regular expressions and throws an error on group references, named groups, or
# regex in-pattern flags. Any of those can break correct operation of Scanner.
import re
from sre_constants import BRANCH, SUBPATTERN, GROUPREF, GROUPREF_IGNORE, GROUPREF_EXISTS
class SaferScanner(re.Scanner):
def __init__(self, lexicon, flags=0):
self.lexicon = lexicon
p = []
s = re.sre_parse.Pattern()
s.flags = flags
for phrase, action in lexicon:
p.append(re.sre_parse.SubPattern(s, [
(SUBPATTERN, (len(p)+1, self.subpat(phrase, flags))),
]))
s.groups = len(p)+1
p = re.sre_parse.SubPattern(s, [(BRANCH, (None, p))])
self.p = p
self.scanner = re.sre_compile.compile(p)
@classmethod
def subpat(cls, phrase, flags):
return cls.scrub_sub(re.sre_parse.parse(phrase, flags))
@classmethod
def scrub_sub(cls, sub):
scrubbedsub = []
seqtypes = (type(()), type([]))
for op, arg in sub.data:
if type(arg) in seqtypes:
arg = [cls.scrub_sub(a) if isinstance(a, re.sre_parse.SubPattern) else a
for a in arg]
if op in (BRANCH, SUBPATTERN):
arg = [None] + arg[1:]
if op in (GROUPREF, GROUPREF_IGNORE, GROUPREF_EXISTS):
raise ValueError("Group references not allowed in SaferScanner lexicon")
scrubbedsub.append((op, arg))
if sub.pattern.groupdict:
raise ValueError("Named captures not allowed in SaferScanner lexicon")
if sub.pattern.flags:
raise ValueError("RE flag setting not allowed in SaferScanner lexicon")
return re.sre_parse.SubPattern(sub.pattern, scrubbedsub)
@d0ugal
Copy link

d0ugal commented May 22, 2013

Why does this inherit re. Scanner? It doesn't appear to use anything from it - or did I miss something?

@nicktimko
Copy link

If there are other methods in the class they will function as usual.

@thepaul
Copy link
Author

thepaul commented Dec 16, 2015

The scan method of re.Scanner is still meant to be used through this class.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment