Skip to content

Instantly share code, notes, and snippets.

@jvanburen
Last active August 29, 2015 14:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jvanburen/7d6456a7397eb7058132 to your computer and use it in GitHub Desktop.
Save jvanburen/7d6456a7397eb7058132 to your computer and use it in GitHub Desktop.
Parenthesis Checker
#!/usr/bin/env python3
import argparse
from bisect import bisect
from collections import namedtuple
from itertools import chain
import re
from sys import stdin, stderr
__all__ = ("Settings", "ParenParser")
"""A program for checking the matching of parenthesis in a given file or input"""
class Settings:
"""Stores the settings for the delimiter parser.
Subclassing Settings allows people to programmatically change the \
functionality of the parser."""
#Do not change
Delims = namedtuple("Delims", ("open", "close"))
# Change these class variables to modify the behavior of the parser
flags = re.MULTILINE | re.IGNORECASE
ignore = ""
delims = Delims(r"\(", r"\)"), Delims(r"\[", r"\]")
failfast = False
verify = True
# Modifying these may break the parser
@classmethod
def fail_if_invalid(cls):
"Verifies that the regex compilation will work as intended"
ds = cls.delims
re.compile(cls.ignore, cls.flags)
assert ds, "must contain at least one pair of delimiters"
assert type(ds) in {tuple, list, set, frozenset}
for d in ds: assert type(d) == cls.Delims
flat = frozenset(chain.from_iterable(ds))
assert all(type(o)==str for o in flat), "patterns must be strings"
assert all(flat), "delims must be nonempty strings"
for pat in chain.from_iterable(ds):
assert pat, "regexes cannot be empty"
cpat = re.compile(pat, cls.flags)
assert not cpat.groupindex, "pattens cannot contain named groups"
@classmethod
def compile_regex(cls):
"Creates the regex used for parsing by matching groups"
groups = [cls.makegroup(cls.ignore, "ignore")] if cls.ignore else []
for i, delims in enumerate(cls.delims):
if delims.open == delims.close:
tag = 'e'+str(i) #either
groups.append(cls.makegroup(delims.open, tag))
else:
tag, gat = 'o'+str(i), 'c'+str(i) #open and close
groups.append(cls.makegroup(delims.open, tag))
groups.append(cls.makegroup(delims.close, gat))
#empty regexes already filtered out
return re.compile('|'.join(groups), cls.flags)
@staticmethod
def makegroup(patstr, name):
"Creates the regex capturing group with name name and pattern patstr"
return "(?P<{}>{})".format(name, patstr)
class ParenParser:
renewln = re.compile('^', re.MULTILINE)
NO_OPENING_ERR = "No opening delimiter to match closing {}"
WRONG_CLOSE_ERR = "Block opened by {} closed by {}"
NO_CLOSING_ERR = "No closing delimiter to match opening {}"
def __init__(self, s, fn="<input>", settings=Settings):
self.s = s
self.fn = fn
self.lstarts = tuple(m.end() for m in ParenParser.renewln.finditer(s))
self.settings = settings
if settings.verify: settings.fail_if_invalid()
self.regex = settings.compile_regex()
def errors(self):
"Returns a list of all error messages regarding balanced delimeters"
groupof = lambda match: match.lastgroup
stack, output = [], []
for match in self.regex.finditer(self.s):
group = groupof(match)
if group == 'ignore': continue
#match type(open or close) and delim type (which delim number)
mtype, dtype = group[0], group[1:]
if mtype == 'e':
mtype = 'o' #act as opening
if stack and groupof(stack[-1]) == group:
mtype = 'c' #unless there's already an open environment
if mtype == 'o': stack.append(match)
elif mtype == 'c':
try:
opening = stack.pop()
otype = opening.lastgroup[1:] # delim type of opening
except IndexError: #fatal error
m1 = self.loc_str(match)
return output + [ParenParser.NO_OPENING_ERR.format(m1)]
if otype != dtype: #non-fatal error
m1, m2 = self.loc_str(opening, False), self.loc_str(match)
output.append(ParenParser.WRONG_CLOSE_ERR.format(m1, m2))
if self.settings.failfast: return output
else:
assert False, group(match) + " not recognized"
#stack should be empty at this point
for leftover in stack:
m1 = self.loc_str(leftover)
output.append(ParenParser.NO_CLOSING_ERR.format(m1))
if self.settings.failfast: return output
return output
def getline(self, pos):
'Which line is the character at index pos on'
return bisect(self.lstarts, pos)
def getcol(self, pos, line):
'Which column in the line is character at index pos on'
#line should never be 0 (invariant provided by getline)
return pos - self.lstarts[line-1] + 1
def loc_str(self, m, includefn=True):
"Formats the location of the match m"
output = '{delim}' + (' in "{fn}"' if includefn else "")
chars = startch, endch = m.span()
lines = startln, endln = tuple(map(self.getline, chars))
delim = m.string[startch:endch]
if startln == endln:
output += ", line {lines[0]}:{chars[0]}"
if endch - startch > 1: output += "-{chars[1]}"
else: output += ", lines {lines[0]}:{chars[0]}-{lines[1]}:{chars[1]}"
#make chars relative to line starts
chars = (self.getcol(startch, startln), self.getcol(endch, endln))
return output.format(fn=self.fn, delim=delim, chars=chars, lines=lines)
# Some settings classes
class Default(Settings):
pass
class Python(Settings):
ignore = r"#.*$"
class LaTeX(Settings):
ignore = r"((\\def\\\w+(?:#\d)*)|%).*$" # macro defs
envs = Settings.Delims(r"\\begin\{[^ \t\n\r\f\v{]+\}",
r"\\end\{[^ \t\n\r\f\v{]+\}")
displaymath = Settings.Delims(r'\$\$', r'\$\$')
mathmode = Settings.Delims(r'\$', r'\$')
parens = Settings.Delims(r"\(", r"\)")
brackets = Settings.Delims(r"\[", r"\]")
braces = Settings.Delims(r"\{", r"\}")
delims = (envs, displaymath, mathmode, parens, brackets, braces)
def _main():
from argparse import ArgumentParser
formats = {
"standard": Default,
"std": Default,
"py": Default,
"latex": LaTeX,
"tex": LaTeX
}
ap = ArgumentParser(description=__doc__)
ap.add_argument('-ff', '--failfast', action='store_true',
help='fail on the first error encountered')
ap.add_argument('-f', '--format', action='store', default="std",
help='what type of file to parse')
ap.add_argument('files', metavar="file", type=str, nargs='+',
help='the files to parse')
args = ap.parse_args()
try:
fmt = args.format.lower()
settings = formats[fmt]
settings.failfast = args.failfast
for fn in args.files:
with open(fn) as f:
text = f.read()
parser = ParenParser(text, fn, settings)
errors = parser.errors()
print('\n'.join(errors) if errors else "No errors found.", file=stderr)
exit(1 if errors else 0)
except KeyError:
print("Format %s does not exist" % args.format, file=stderr)
except KeyboardInterrupt:
print("Aborted.", file=stderr)
except FileNotFoundError as e:
print(e, file=stderr)
if __name__=='__main__': _main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment