Skip to content

Instantly share code, notes, and snippets.

@kitsu
Last active May 30, 2016 22:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kitsu/1184b3caf4c209d67ab3f481de104dc3 to your computer and use it in GitHub Desktop.
Save kitsu/1184b3caf4c209d67ab3f481de104dc3 to your computer and use it in GitHub Desktop.
Flaky Autolisp parser with stats collection/reporting. See blog series starting at http://kitsu.github.io/2016/05/27/autolisp_parse_01/
"""Process a lisp file to extract useful data."""
import sys, os
from collections import namedtuple
substitutions = (
# Expand quote literal to quote fn
("'(", "(quote "),
# Expand parens with white-space for splitting
(')', ' ) '),
('(', ' ( '),
)
Stats = namedtuple('Stats', 'funcs, deps, strs, files')
def modify(line):
"""Perform replacements listed in global dictionary."""
for k, v in substitutions:
if k in line:
line = line.replace(k, v)
return line
def scrub_comment(line):
"""Carefully clean comments from line."""
if not line or line[0] == ';':
# Just skip blank lines and comments
return
semi = line.find(';')
if semi > 0:
if '"' not in line or line.rfind('"') < semi:
line = line[:semi]
else:
# Find left most semi not in a string literal...
parts = line.split(';')
first, rest = parts[0], parts[1:]
accepted = [first]
quotes = 0
#import pdb; pdb.set_trace()
while rest:
quotes += first.count('"')
# If we have collected an even number of quotes on left
if quotes%2 == 0:
break
first, rest = rest[0], rest[1:]
accepted.append(first)
line = ';'.join(accepted)
return line
def tokenize(infile):
"""Cheaty str.split tokenization."""
tokens = list()
# Clean and append all lines into one big string
for line in infile:
line = scrub_comment(line.strip())
if not line:
continue
# Tokenize the line
tokens.extend([token for token in
# AutoLisp is case-insensitive
modify(line).lower().split()
if token])
return tokens
# Model states as classes that know what to consume
Number = namedtuple('Number', 'value')
String = namedtuple('String', 'value')
Symbol = namedtuple('Symbol', 'value')
def consume_other(tokens):
"""Handle Symbol, String, and Number tokens."""
first = tokens[0]
try:
num = float(first)
return Number(num), tokens[1:]
except ValueError:
pass
if '"' == first[0]:
return consume_string(tokens)
return Symbol(first), tokens[1:]
def consume_string(tokens):
"""Consume tokens between double-quotes."""
first, rest = tokens[0], tokens[1:]
if first.count('"')%2 == 0 and '\\"' not in first:
return String(first.strip('"')), tokens[1:]
parts = [first]
while rest:
first, rest = rest[0], rest[1:]
parts.append(first)
quotes = first.count('"') - first.count('\\"') + first.count('\\\\"')
if quotes%2 == 1:
break
else:
print tokens
raise SyntaxError("Reached end of input without matching quote.")
whole = String(" ".join(parts).strip('"'))
return whole, rest
def consume_body(tokens):
"""Consume all expressions until unpaired closing bracket ')' found."""
children = list()
while tokens and not tokens[0] == ')':
# These are in order of specificity
if Defun.matches(tokens):
fn = Defun()
children.append(fn)
tokens = fn.consume(tokens)
elif Apply.matches(tokens):
ap = Apply()
children.append(ap)
tokens = ap.consume(tokens)
elif List.matches(tokens):
li = List()
children.append(li)
tokens = li.consume(tokens)
else:
# First token must be one of Symbol, String, Number
other, tokens = consume_other(tokens)
children.append(other)
# Omit last head if it exists (i.e. ')')
return children, tokens[1:]
class Program(object):
"""Base matcher."""
def __init__(self, tokens):
self.children = list()
self.consume(tokens)
def consume(self, tokens):
"""Consume tokens to build children"""
#import pdb; pdb.set_trace()
body, tokens = consume_body(tokens)
self.children = body
if tokens:
raise SyntaxError("Unmatched ')' in input.")
def __repr__(self):
return "Program({})".format(self.children)
class Defun(object):
"""Match function definition."""
no_params_err = "Defun '{}': expected parameter list, not {}"
nested_params_err = "Defun '{}': parameters must be simple list, found \n{}"
def __init__(self):
self.name = None
self.params = None
self.locals = None
self.body = None
self.deps = None
def __repr__(self):
return "Defun({0.name}, {0.params}, {0.locals}):\n{0.body}".format(self)
@classmethod
def matches(cls, tokens):
"""Match '(defun name (/) body)."""
if tokens[0] == '(' and tokens[1] == 'defun':
return True
return False
def consume(self, tokens):
"""Pull out key elements of fn, then process body."""
self.name = tokens[2]
if not tokens[3] == '(':
raise SyntaxError(self.no_params_err.format(self.name, tokens[3]))
params_close = 3 + tokens[3:].index(')')
next_list = 3 + tokens[4:].index('(')
if params_close > next_list:
raise SyntaxError(self.nested_params_err.format(self.name,
tokens[3:params_close + 1]))
self.parse_params(tokens[4:params_close])
self.body, tokens = consume_body(tokens[params_close + 1:])
self.discover_deps()
return tokens
def parse_params(self, params):
"""Separate params and locals."""
split = params.index('/') if '/' in params else None
if split:
self.params = params[:split]
self.locals = params[split + 1:]
else:
self.params = params
self.locals = list()
def discover_deps(self):
"""Walk through body looking for functions not in locals."""
fn_locals = self.locals
deps = set()
for sub_dep in (item for item in self.body if isinstance(item, Defun)):
deps.update(sub_dep.deps)
contents = [item for item in self.body if isinstance(item, Apply)]
while contents:
ap = contents.pop()
if ap.func not in fn_locals:
deps.add(ap.func)
contents.extend(item for item in ap.body
if isinstance(item, Apply))
self.deps = deps
class Apply(object):
"""Match function application."""
def __init__(self):
func = None
body = None
def __repr__(self):
return "Apply({0.func}):\n{0.body}".format(self)
@classmethod
def matches(cls, tokens):
"""Match '(symbol body)."""
return tokens[0] == '(' and not tokens[1] == '('
def consume(self, tokens):
self.func = tokens[1]
body, tokens = consume_body(tokens[2:])
self.body = body
return tokens
class List(object):
"""Match list literal (in cond)."""
def __init__(self):
body = None
def __repr__(self):
return "List():\n{0.body}".format(self)
@classmethod
def matches(cls, tokens):
"""Match '(body)."""
return tokens[0] == '('
def consume(self, tokens):
body, tokens = consume_body(tokens[1:])
self.body = body
return tokens
def parse(tokens):
"""Do basic tagging and build syntax tree from tokens."""
return Program(tokens)
def analyze(tree):
"""Collect info about program tree."""
extensions = set(('.dcl', '.lsp', '.mnr', '.dvb', '.mnl', '.vlx'))
stats = Stats(set(), set(), set(), set())
exprs = tree.children[:]
while exprs:
expr = exprs.pop()
body = None
if isinstance(expr, Defun):
stats.funcs.add(expr.name)
stats.deps.update(expr.deps)
elif isinstance(expr, String):
val = expr.value
stats.strs.add(val)
if val[-4:] in extensions:
stats.files.add(val)
if hasattr(expr, 'body'):
exprs.extend(item for item in expr.body)
return stats
def process_file(filename):
"""Walk through file maintaining stack of state."""
with open(filename, 'r') as infile:
tokens = tokenize(infile)
tree = parse(tokens)
return analyze(tree)
def output_stats(stats, target=None, prefix=''):
"""Pretty-print stats, optionally to a filelike."""
if not target:
target = sys.stdout
target.write("{}Functions:\n".format(prefix))
if stats.funcs:
for name in sorted(stats.funcs):
target.write("{}\t{}\n".format(prefix, name))
else:
target.write("{}None\n".format(prefix))
target.write("{}Dependencies:\n".format(prefix))
if stats.deps:
for name in sorted(stats.deps):
target.write("{}\t{}\n".format(prefix, name))
else:
target.write("{}None\n".format(prefix))
target.write("{}Strings:\n".format(prefix))
if stats.strs:
for name in sorted(stats.strs, key=len):
target.write("{}\t{}\n".format(prefix, name))
else:
target.write("{}None\n".format(prefix))
target.write("{}Possible file dependencies:\n".format(prefix))
if stats.files:
for name in stats.files:
target.write("{}\t{}\n".format(prefix, name))
else:
target.write("{}None\n".format(prefix))
if __name__ == "__main__":
filename = sys.argv[-1]
stats = process_file(filename)
output_stats(stats)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment