Skip to content

Instantly share code, notes, and snippets.

@jeacom25b
Last active September 23, 2019 15:54
Show Gist options
  • Save jeacom25b/3096829522f2b99a2806f1d8571ba713 to your computer and use it in GitHub Desktop.
Save jeacom25b/3096829522f2b99a2806f1d8571ba713 to your computer and use it in GitHub Desktop.
'''
Created by Jeacom
This is a experimental utility module for constructing and concatenating regular expressions
such that we can reuse them to build more complex regexes out of simpler ones. all hidden behind
a readable python interface so we don't have to read things like this:
(?:(?:(?:struct|enum)[ \t]+)?\b[a-zA-Z][a-zA-Z\d]*[ \t]+(\b[a-zA-Z][a-zA-Z\d]*))
instead we read things like this:
struc_enum = OrGroup("struct", "enum")
typed_name = (struc_enum + whitespace).optional() + valid_name + whitespace + valid_name.captured()
Warning: this is just a test, dont take it seriously.
'''
def _quantifier_choice(min, max):
if not min and not max:
return "*"
elif min == 0 and max == 1:
return "?"
elif min == 1 and max == 0:
return "+"
elif min == max:
return "{" + str(max) + "}"
elif min >= 0 and max == 0:
return "{" + str(min) + ",}"
elif min == 0 and max >= 0:
return "{," + str(max) + "}"
else:
return "{" + str(min) + "," + str(max) + "}"
def _escape_if_needed(string):
escape = r"\*-+/|()[]{}.$^!?"
for c in escape:
string = string.replace(c, "\\" + c)
return string
class NakedExpr:
expr = ""
def __init__(self, expr):
self.expr = expr
def __str__(self):
return self.expr
def __repr__(self):
return str(self)
def __add__(self, other):
return Group(str(self) + str(other), escape=False)
class CharList:
open = "["
contents = ""
close = "]"
quantifier = ""
negative_open = "[^"
positive_open = "["
def __init__(self, *args, escape=False):
for content in args:
if isinstance(content, CharList):
self.contents += content.contents
elif escape and type(content) == str:
self.contents += _escape_if_needed(content)
else:
self.contents += str(content)
def __str__(self):
return f"{self.open}{self.contents}{self.close}{self.quantifier}"
def __repr__(self):
return str(self)
def __neg__(self):
new = type(self)(self.contents, escape=False)
if self.open == self.positive_open:
new.open = self.negative_open
else:
new.open = self.positive_open
return new
def __sub__(self, other):
return self + -other
def __add__(self, other):
if isinstance(other, CharList):
return CharList(self.contents + other.contents)
else:
return Group(str(self) + str(other), escape=False)
def __mul__(self, other):
return self.repeated(other, other)
def repeated(self, min=0, max=0, reluctant=False):
"""
repeated(0, 0) -> [...]*
repeated(0, 1) -> [...]?
repeated(1, 0) -> [...]+
repeated(0, 0, True) -> [...]*?
repeated(0, 1, True) -> [...]??
repeated(1, 0, True) -> [...]+?
repeated(10, 10) -> [...]{10}
repeated(10, 15) -> [...]{10,15}
"""
new = type(self)(self.contents, escape=False)
new.quantifier = _quantifier_choice(min, max)
if "," not in new.quantifier and reluctant:
new.quantifier += "?"
return new
def optional(self):
"""
optional() -> [...]?
"""
return self.repeated(max=1)
def once_or_more(self, reluctant=False):
"""
once_or_more() -> [...]+
"""
return self.repeated(min=1, max=0, reluctant=reluctant)
def at_least(self, v, reluctant=False):
"""
at_least(v) -> []{v,}
at_least(1) -> []+
"""
return self.repeated(min=v, reluctant=reluctant)
def at_most(self, v):
"""
at_most(v) -> [...]{,v}
at_most(1) -> [...]?
"""
return self.repeated(max=v)
class Group(CharList):
open = "(?:"
contents = ""
close = ")"
quantifier = ""
positive_open = "(?="
negative_open = "(?!"
def __init__(self, *args, escape=True):
for arg in args:
if type(arg) == Group and arg.open == "(?:" and arg.quantifier == "":
self.contents += arg.contents
elif type(arg) == str and escape:
self.contents += _escape_if_needed(arg)
else:
self.contents += str(arg)
def __or__(self, other):
return Group(str(self) + "|" + str(other), escape=False)
def __ror__(self, other):
return Group(str(other) + "|" + str(self), escape=False)
def __str__(self):
return f"{self.open}{self.contents}{self.close}{self.quantifier}"
def __add__(self, other):
return Group(self, other, escape=False)
def __radd__(self, other):
return Group(other, self, escape=False)
def captured(self):
return Capture(self.contents, escape=False)
class OrGroup(Group):
def __init__(self, *args, escape=True):
self.contents = "|".join(
[_escape_if_needed(arg) if escape and type(arg) == str else str(arg)
for arg in args]
)
class Capture(Group):
open = "("
close = ")"
class LookAhead(Group):
open = "(?="
close = ")"
positive_open = "(?="
positive_close = ")"
negative_open = "(?!"
negative_close = ")"
class LookBehind(Group):
open = "(?<="
close = ")"
positive_open = "(?<="
positive_close = ")"
negative_open = "(<!"
negative_close = ")"
A_Z = CharList("A-Z")
a_z = CharList("a-z")
NUMBER = CharList(r"\d")
NON_NUMBER = CharList(r"\D")
WORD_CHAR = CharList(r"\w")
NON_WORD_CHAS = CharList(r"\w")
WHITESPACE = CharList(r"\s")
NON_WHITESPACE = CharList(r"\S")
ANY_CHAR = CharList(r".")
PERIOD = CharList(r"\.")
TAB_SPACE = CharList(r" \t")
ALPHA = a_z + A_Z
ALPHANUMERIC = ALPHA + NUMBER
SPACE = NakedExpr(" ")
TAB = NakedExpr(r"\t")
LINE_START = NakedExpr("^")
LINE_END = NakedExpr("$")
WORD_BOUNDARY = NakedExpr(r"\b")
NOT_WORD_BOUNDARY = NakedExpr(r"\B")
if __name__ == "__main__":
keywords = OrGroup("struct", "enum", "void", "double", "float", "Char", ",,,")
whitespace = TAB_SPACE.at_least(1)
valid_name = r"\b" + Group(ALPHA, ALPHANUMERIC.repeated())
struc_enum = OrGroup("struct", "enum")
typed_name = (struc_enum + whitespace).optional() + valid_name + whitespace + valid_name.captured()
print(repr(typed_name))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment