Last active
August 29, 2015 14:08
-
-
Save jvanburen/7d6456a7397eb7058132 to your computer and use it in GitHub Desktop.
Parenthesis Checker
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
from bisect import bisect | |
from collections import namedtuple | |
from itertools import chain | |
import re | |
from sys import stdin, stderr | |
__all__ = ("Settings", "ParenParser") | |
"""A program for checking the matching of parenthesis in a given file or input""" | |
class Settings: | |
"""Stores the settings for the delimiter parser. | |
Subclassing Settings allows people to programmatically change the \ | |
functionality of the parser.""" | |
#Do not change | |
Delims = namedtuple("Delims", ("open", "close")) | |
# Change these class variables to modify the behavior of the parser | |
flags = re.MULTILINE | re.IGNORECASE | |
ignore = "" | |
delims = Delims(r"\(", r"\)"), Delims(r"\[", r"\]") | |
failfast = False | |
verify = True | |
# Modifying these may break the parser | |
@classmethod | |
def fail_if_invalid(cls): | |
"Verifies that the regex compilation will work as intended" | |
ds = cls.delims | |
re.compile(cls.ignore, cls.flags) | |
assert ds, "must contain at least one pair of delimiters" | |
assert type(ds) in {tuple, list, set, frozenset} | |
for d in ds: assert type(d) == cls.Delims | |
flat = frozenset(chain.from_iterable(ds)) | |
assert all(type(o)==str for o in flat), "patterns must be strings" | |
assert all(flat), "delims must be nonempty strings" | |
for pat in chain.from_iterable(ds): | |
assert pat, "regexes cannot be empty" | |
cpat = re.compile(pat, cls.flags) | |
assert not cpat.groupindex, "pattens cannot contain named groups" | |
@classmethod | |
def compile_regex(cls): | |
"Creates the regex used for parsing by matching groups" | |
groups = [cls.makegroup(cls.ignore, "ignore")] if cls.ignore else [] | |
for i, delims in enumerate(cls.delims): | |
if delims.open == delims.close: | |
tag = 'e'+str(i) #either | |
groups.append(cls.makegroup(delims.open, tag)) | |
else: | |
tag, gat = 'o'+str(i), 'c'+str(i) #open and close | |
groups.append(cls.makegroup(delims.open, tag)) | |
groups.append(cls.makegroup(delims.close, gat)) | |
#empty regexes already filtered out | |
return re.compile('|'.join(groups), cls.flags) | |
@staticmethod | |
def makegroup(patstr, name): | |
"Creates the regex capturing group with name name and pattern patstr" | |
return "(?P<{}>{})".format(name, patstr) | |
class ParenParser: | |
renewln = re.compile('^', re.MULTILINE) | |
NO_OPENING_ERR = "No opening delimiter to match closing {}" | |
WRONG_CLOSE_ERR = "Block opened by {} closed by {}" | |
NO_CLOSING_ERR = "No closing delimiter to match opening {}" | |
def __init__(self, s, fn="<input>", settings=Settings): | |
self.s = s | |
self.fn = fn | |
self.lstarts = tuple(m.end() for m in ParenParser.renewln.finditer(s)) | |
self.settings = settings | |
if settings.verify: settings.fail_if_invalid() | |
self.regex = settings.compile_regex() | |
def errors(self): | |
"Returns a list of all error messages regarding balanced delimeters" | |
groupof = lambda match: match.lastgroup | |
stack, output = [], [] | |
for match in self.regex.finditer(self.s): | |
group = groupof(match) | |
if group == 'ignore': continue | |
#match type(open or close) and delim type (which delim number) | |
mtype, dtype = group[0], group[1:] | |
if mtype == 'e': | |
mtype = 'o' #act as opening | |
if stack and groupof(stack[-1]) == group: | |
mtype = 'c' #unless there's already an open environment | |
if mtype == 'o': stack.append(match) | |
elif mtype == 'c': | |
try: | |
opening = stack.pop() | |
otype = opening.lastgroup[1:] # delim type of opening | |
except IndexError: #fatal error | |
m1 = self.loc_str(match) | |
return output + [ParenParser.NO_OPENING_ERR.format(m1)] | |
if otype != dtype: #non-fatal error | |
m1, m2 = self.loc_str(opening, False), self.loc_str(match) | |
output.append(ParenParser.WRONG_CLOSE_ERR.format(m1, m2)) | |
if self.settings.failfast: return output | |
else: | |
assert False, group(match) + " not recognized" | |
#stack should be empty at this point | |
for leftover in stack: | |
m1 = self.loc_str(leftover) | |
output.append(ParenParser.NO_CLOSING_ERR.format(m1)) | |
if self.settings.failfast: return output | |
return output | |
def getline(self, pos): | |
'Which line is the character at index pos on' | |
return bisect(self.lstarts, pos) | |
def getcol(self, pos, line): | |
'Which column in the line is character at index pos on' | |
#line should never be 0 (invariant provided by getline) | |
return pos - self.lstarts[line-1] + 1 | |
def loc_str(self, m, includefn=True): | |
"Formats the location of the match m" | |
output = '{delim}' + (' in "{fn}"' if includefn else "") | |
chars = startch, endch = m.span() | |
lines = startln, endln = tuple(map(self.getline, chars)) | |
delim = m.string[startch:endch] | |
if startln == endln: | |
output += ", line {lines[0]}:{chars[0]}" | |
if endch - startch > 1: output += "-{chars[1]}" | |
else: output += ", lines {lines[0]}:{chars[0]}-{lines[1]}:{chars[1]}" | |
#make chars relative to line starts | |
chars = (self.getcol(startch, startln), self.getcol(endch, endln)) | |
return output.format(fn=self.fn, delim=delim, chars=chars, lines=lines) | |
# Some settings classes | |
class Default(Settings): | |
pass | |
class Python(Settings): | |
ignore = r"#.*$" | |
class LaTeX(Settings): | |
ignore = r"((\\def\\\w+(?:#\d)*)|%).*$" # macro defs | |
envs = Settings.Delims(r"\\begin\{[^ \t\n\r\f\v{]+\}", | |
r"\\end\{[^ \t\n\r\f\v{]+\}") | |
displaymath = Settings.Delims(r'\$\$', r'\$\$') | |
mathmode = Settings.Delims(r'\$', r'\$') | |
parens = Settings.Delims(r"\(", r"\)") | |
brackets = Settings.Delims(r"\[", r"\]") | |
braces = Settings.Delims(r"\{", r"\}") | |
delims = (envs, displaymath, mathmode, parens, brackets, braces) | |
def _main(): | |
from argparse import ArgumentParser | |
formats = { | |
"standard": Default, | |
"std": Default, | |
"py": Default, | |
"latex": LaTeX, | |
"tex": LaTeX | |
} | |
ap = ArgumentParser(description=__doc__) | |
ap.add_argument('-ff', '--failfast', action='store_true', | |
help='fail on the first error encountered') | |
ap.add_argument('-f', '--format', action='store', default="std", | |
help='what type of file to parse') | |
ap.add_argument('files', metavar="file", type=str, nargs='+', | |
help='the files to parse') | |
args = ap.parse_args() | |
try: | |
fmt = args.format.lower() | |
settings = formats[fmt] | |
settings.failfast = args.failfast | |
for fn in args.files: | |
with open(fn) as f: | |
text = f.read() | |
parser = ParenParser(text, fn, settings) | |
errors = parser.errors() | |
print('\n'.join(errors) if errors else "No errors found.", file=stderr) | |
exit(1 if errors else 0) | |
except KeyError: | |
print("Format %s does not exist" % args.format, file=stderr) | |
except KeyboardInterrupt: | |
print("Aborted.", file=stderr) | |
except FileNotFoundError as e: | |
print(e, file=stderr) | |
if __name__=='__main__': _main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment