jvanburen/parenchecker.py

## parenchecker.py
#!/usr/bin/env python3
import argparse
from bisect import bisect
from collections import namedtuple
from itertools import chain
import re
from sys import stdin, stderr

__all__ = ("Settings", "ParenParser")

"""A program for checking the matching of parenthesis in a given file or input"""

class Settings:
    """Stores the settings for the delimiter parser.
Subclassing Settings allows people to programmatically change the \
functionality of the parser."""
    #Do not change
    Delims = namedtuple("Delims", ("open", "close"))

    # Change these class variables to modify the behavior of the parser
    flags = re.MULTILINE | re.IGNORECASE
    ignore = ""
    delims = Delims(r"\(", r"\)"), Delims(r"\[", r"\]")
    failfast = False
    verify = True

    # Modifying these may break the parser
    @classmethod
    def fail_if_invalid(cls):
        "Verifies that the regex compilation will work as intended"
        ds = cls.delims
        re.compile(cls.ignore, cls.flags)
        assert ds, "must contain at least one pair of delimiters"
        assert type(ds) in {tuple, list, set, frozenset}
        for d in ds: assert type(d) == cls.Delims
        flat = frozenset(chain.from_iterable(ds))
        assert all(type(o)==str for o in flat), "patterns must be strings"
        assert all(flat), "delims must be nonempty strings"
        for pat in chain.from_iterable(ds):
            assert pat, "regexes cannot be empty"
            cpat = re.compile(pat, cls.flags)
            assert not cpat.groupindex, "pattens cannot contain named groups"

    @classmethod
    def compile_regex(cls):
        "Creates the regex used for parsing by matching groups"
        groups = [cls.makegroup(cls.ignore, "ignore")] if cls.ignore else []

        for i, delims in enumerate(cls.delims):
            if delims.open == delims.close:
                tag = 'e'+str(i) #either
                groups.append(cls.makegroup(delims.open, tag))
            else:
                tag, gat = 'o'+str(i), 'c'+str(i) #open and close
                groups.append(cls.makegroup(delims.open, tag))
                groups.append(cls.makegroup(delims.close, gat))

        #empty regexes already filtered out
        return re.compile('|'.join(groups), cls.flags)

    @staticmethod
    def makegroup(patstr, name):
        "Creates the regex capturing group with name name and pattern patstr"
        return "(?P<{}>{})".format(name, patstr)

class ParenParser:
    renewln = re.compile('^', re.MULTILINE)
    NO_OPENING_ERR = "No opening delimiter to match closing {}"
    WRONG_CLOSE_ERR = "Block opened by {} closed by {}"
    NO_CLOSING_ERR = "No closing delimiter to match opening {}"

    def __init__(self, s, fn="<input>", settings=Settings):
        self.s = s
        self.fn = fn
        self.lstarts = tuple(m.end() for m in ParenParser.renewln.finditer(s))
        self.settings = settings
        if settings.verify: settings.fail_if_invalid()
        self.regex = settings.compile_regex()

    def errors(self):
        "Returns a list of all error messages regarding balanced delimeters"
        groupof = lambda match: match.lastgroup

        stack, output = [], []
        for match in self.regex.finditer(self.s):
            group = groupof(match)
            if group == 'ignore': continue
            #match type(open or close) and delim type (which delim number)
            mtype, dtype = group[0], group[1:]

            if mtype == 'e':
                mtype = 'o' #act as opening
                if stack and groupof(stack[-1]) == group:
                    mtype = 'c' #unless there's already an open environment

            if mtype == 'o': stack.append(match)
            elif mtype == 'c':
                try:
                    opening = stack.pop()
                    otype = opening.lastgroup[1:] # delim type of opening
                except IndexError: #fatal error
                    m1 = self.loc_str(match)
                    return output + [ParenParser.NO_OPENING_ERR.format(m1)]
                if otype != dtype: #non-fatal error
                    m1, m2 = self.loc_str(opening, False), self.loc_str(match)
                    output.append(ParenParser.WRONG_CLOSE_ERR.format(m1, m2))
                    if self.settings.failfast: return output
            else:
                assert False, group(match) + " not recognized"

        #stack should be empty at this point
        for leftover in stack:
            m1 = self.loc_str(leftover)
            output.append(ParenParser.NO_CLOSING_ERR.format(m1))
            if self.settings.failfast: return output
        return output

    def getline(self, pos):
        'Which line is the character at index pos on'
        return bisect(self.lstarts, pos)

    def getcol(self, pos, line):
        'Which column in the line is character at index pos on'
        #line should never be 0 (invariant provided by getline)
        return pos - self.lstarts[line-1] + 1

    def loc_str(self, m, includefn=True):
        "Formats the location of the match m"
        output = '{delim}' + (' in "{fn}"' if includefn else "")
        chars = startch, endch = m.span()
        lines = startln, endln = tuple(map(self.getline, chars))
        delim = m.string[startch:endch]

        if startln == endln:
            output += ", line {lines[0]}:{chars[0]}"
            if endch - startch > 1: output += "-{chars[1]}"
        else: output += ", lines {lines[0]}:{chars[0]}-{lines[1]}:{chars[1]}"

        #make chars relative to line starts
        chars = (self.getcol(startch, startln), self.getcol(endch, endln))
        return output.format(fn=self.fn, delim=delim, chars=chars, lines=lines)

# Some settings classes

class Default(Settings):
    pass

class Python(Settings):
    ignore = r"#.*$"

class LaTeX(Settings):
    ignore = r"((\\def\\\w+(?:#\d)*)|%).*$" # macro defs
    envs = Settings.Delims(r"\\begin\{[^ \t\n\r\f\v{]+\}",
                          r"\\end\{[^ \t\n\r\f\v{]+\}")
    displaymath = Settings.Delims(r'\$\$', r'\$\$')
    mathmode = Settings.Delims(r'\$', r'\$')
    parens = Settings.Delims(r"\(", r"\)")
    brackets = Settings.Delims(r"\[", r"\]")
    braces = Settings.Delims(r"\{", r"\}")
    delims = (envs, displaymath, mathmode, parens, brackets, braces)

def _main():
    from argparse import ArgumentParser
    formats = {
        "standard": Default,
        "std": Default,
        "py": Default,
        "latex": LaTeX,
        "tex": LaTeX
    }

    ap = ArgumentParser(description=__doc__)
    ap.add_argument('-ff', '--failfast', action='store_true',
                   help='fail on the first error encountered')
    ap.add_argument('-f', '--format', action='store', default="std",
                   help='what type of file to parse')
    ap.add_argument('files', metavar="file", type=str, nargs='+',
                   help='the files to parse')
    args = ap.parse_args()
    try:
        fmt = args.format.lower()

        settings = formats[fmt]
        settings.failfast = args.failfast
        for fn in args.files:
            with open(fn) as f:
                text = f.read()
                parser = ParenParser(text, fn, settings)
        errors = parser.errors()
        print('\n'.join(errors) if errors else "No errors found.", file=stderr)
        exit(1 if errors else 0)
    except KeyError:
        print("Format %s does not exist" % args.format, file=stderr)
    except KeyboardInterrupt:
        print("Aborted.", file=stderr)
    except FileNotFoundError as e:
        print(e, file=stderr)

if __name__=='__main__': _main()
	#!/usr/bin/env python3
	import argparse
	from bisect import bisect
	from collections import namedtuple
	from itertools import chain
	import re
	from sys import stdin, stderr

	__all__ = ("Settings", "ParenParser")

	"""A program for checking the matching of parenthesis in a given file or input"""

	class Settings:
	"""Stores the settings for the delimiter parser.
	Subclassing Settings allows people to programmatically change the \
	functionality of the parser."""
	#Do not change
	Delims = namedtuple("Delims", ("open", "close"))

	# Change these class variables to modify the behavior of the parser
	flags = re.MULTILINE \| re.IGNORECASE
	ignore = ""
	delims = Delims(r"\(", r"\)"), Delims(r"\[", r"\]")
	failfast = False
	verify = True

	# Modifying these may break the parser
	@classmethod
	def fail_if_invalid(cls):
	"Verifies that the regex compilation will work as intended"
	ds = cls.delims
	re.compile(cls.ignore, cls.flags)
	assert ds, "must contain at least one pair of delimiters"
	assert type(ds) in {tuple, list, set, frozenset}
	for d in ds: assert type(d) == cls.Delims
	flat = frozenset(chain.from_iterable(ds))
	assert all(type(o)==str for o in flat), "patterns must be strings"
	assert all(flat), "delims must be nonempty strings"
	for pat in chain.from_iterable(ds):
	assert pat, "regexes cannot be empty"
	cpat = re.compile(pat, cls.flags)
	assert not cpat.groupindex, "pattens cannot contain named groups"

	@classmethod
	def compile_regex(cls):
	"Creates the regex used for parsing by matching groups"
	groups = [cls.makegroup(cls.ignore, "ignore")] if cls.ignore else []

	for i, delims in enumerate(cls.delims):
	if delims.open == delims.close:
	tag = 'e'+str(i) #either
	groups.append(cls.makegroup(delims.open, tag))
	else:
	tag, gat = 'o'+str(i), 'c'+str(i) #open and close
	groups.append(cls.makegroup(delims.open, tag))
	groups.append(cls.makegroup(delims.close, gat))

	#empty regexes already filtered out
	return re.compile('\|'.join(groups), cls.flags)

	@staticmethod
	def makegroup(patstr, name):
	"Creates the regex capturing group with name name and pattern patstr"
	return "(?P<{}>{})".format(name, patstr)

	class ParenParser:
	renewln = re.compile('^', re.MULTILINE)
	NO_OPENING_ERR = "No opening delimiter to match closing {}"
	WRONG_CLOSE_ERR = "Block opened by {} closed by {}"
	NO_CLOSING_ERR = "No closing delimiter to match opening {}"

	def __init__(self, s, fn="<input>", settings=Settings):
	self.s = s
	self.fn = fn
	self.lstarts = tuple(m.end() for m in ParenParser.renewln.finditer(s))
	self.settings = settings
	if settings.verify: settings.fail_if_invalid()
	self.regex = settings.compile_regex()

	def errors(self):
	"Returns a list of all error messages regarding balanced delimeters"
	groupof = lambda match: match.lastgroup

	stack, output = [], []
	for match in self.regex.finditer(self.s):
	group = groupof(match)
	if group == 'ignore': continue
	#match type(open or close) and delim type (which delim number)
	mtype, dtype = group[0], group[1:]

	if mtype == 'e':
	mtype = 'o' #act as opening
	if stack and groupof(stack[-1]) == group:
	mtype = 'c' #unless there's already an open environment

	if mtype == 'o': stack.append(match)
	elif mtype == 'c':
	try:
	opening = stack.pop()
	otype = opening.lastgroup[1:] # delim type of opening
	except IndexError: #fatal error
	m1 = self.loc_str(match)
	return output + [ParenParser.NO_OPENING_ERR.format(m1)]
	if otype != dtype: #non-fatal error
	m1, m2 = self.loc_str(opening, False), self.loc_str(match)
	output.append(ParenParser.WRONG_CLOSE_ERR.format(m1, m2))
	if self.settings.failfast: return output
	else:
	assert False, group(match) + " not recognized"

	#stack should be empty at this point
	for leftover in stack:
	m1 = self.loc_str(leftover)
	output.append(ParenParser.NO_CLOSING_ERR.format(m1))
	if self.settings.failfast: return output
	return output

	def getline(self, pos):
	'Which line is the character at index pos on'
	return bisect(self.lstarts, pos)

	def getcol(self, pos, line):
	'Which column in the line is character at index pos on'
	#line should never be 0 (invariant provided by getline)
	return pos - self.lstarts[line-1] + 1

	def loc_str(self, m, includefn=True):
	"Formats the location of the match m"
	output = '{delim}' + (' in "{fn}"' if includefn else "")
	chars = startch, endch = m.span()
	lines = startln, endln = tuple(map(self.getline, chars))
	delim = m.string[startch:endch]

	if startln == endln:
	output += ", line {lines[0]}:{chars[0]}"
	if endch - startch > 1: output += "-{chars[1]}"
	else: output += ", lines {lines[0]}:{chars[0]}-{lines[1]}:{chars[1]}"

	#make chars relative to line starts
	chars = (self.getcol(startch, startln), self.getcol(endch, endln))
	return output.format(fn=self.fn, delim=delim, chars=chars, lines=lines)

	# Some settings classes

	class Default(Settings):
	pass

	class Python(Settings):
	ignore = r"#.*$"

	class LaTeX(Settings):
	ignore = r"((\\def\\\w+(?:#\d))\|%).$" # macro defs
	envs = Settings.Delims(r"\\begin\{[^ \t\n\r\f\v{]+\}",
	r"\\end\{[^ \t\n\r\f\v{]+\}")
	displaymath = Settings.Delims(r'\$\$', r'\$\$')
	mathmode = Settings.Delims(r'\$', r'\$')
	parens = Settings.Delims(r"\(", r"\)")
	brackets = Settings.Delims(r"\[", r"\]")
	braces = Settings.Delims(r"\{", r"\}")
	delims = (envs, displaymath, mathmode, parens, brackets, braces)

	def _main():
	from argparse import ArgumentParser
	formats = {
	"standard": Default,
	"std": Default,
	"py": Default,
	"latex": LaTeX,
	"tex": LaTeX
	}

	ap = ArgumentParser(description=__doc__)
	ap.add_argument('-ff', '--failfast', action='store_true',
	help='fail on the first error encountered')
	ap.add_argument('-f', '--format', action='store', default="std",
	help='what type of file to parse')
	ap.add_argument('files', metavar="file", type=str, nargs='+',
	help='the files to parse')
	args = ap.parse_args()
	try:
	fmt = args.format.lower()

	settings = formats[fmt]
	settings.failfast = args.failfast
	for fn in args.files:
	with open(fn) as f:
	text = f.read()
	parser = ParenParser(text, fn, settings)
	errors = parser.errors()
	print('\n'.join(errors) if errors else "No errors found.", file=stderr)
	exit(1 if errors else 0)
	except KeyError:
	print("Format %s does not exist" % args.format, file=stderr)
	except KeyboardInterrupt:
	print("Aborted.", file=stderr)
	except FileNotFoundError as e:
	print(e, file=stderr)

	if __name__=='__main__': _main()