Skip to content

Instantly share code, notes, and snippets.

@jiffyclub
Created July 17, 2014 04:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jiffyclub/d37ad86853a4dd2c56c8 to your computer and use it in GitHub Desktop.
Save jiffyclub/d37ad86853a4dd2c56c8 to your computer and use it in GitHub Desktop.
columns_in_formula function that parses all of the column names from a patsy formula.
from StringIO import StringIO
from tokenize import generate_tokens, NAME
import patsy
def _tokens_from_patsy(node):
"""
Yields all the individual tokens from within a patsy formula
as parsed by patsy.parse_formula.parse_formula.
Parameters
----------
node : patsy.parse_formula.ParseNode
"""
for n in node.args:
for t in _tokens_from_patsy(n):
yield t
if node.token:
yield node.token
def columns_in_formula(formula):
"""
Returns the names of all the columns used in a patsy formula.
Parameters
----------
formula : str
A patsy formula.
Returns
-------
columns : list of str
"""
columns = []
tokens = map(
lambda x: x.extra,
filter(
lambda x: x.extra is not None,
_tokens_from_patsy(patsy.parse_formula.parse_formula(formula))))
for tok in tokens:
# if there are parentheses in the expression we
# want to drop them and everything outside
# and start again from the top
if '(' in tok:
start = tok.find('(') + 1
fin = tok.rfind(')')
columns.extend(columns_in_formula(tok[start:fin]))
else:
for toknum, tokval, _, _, _ in generate_tokens(
StringIO(tok).readline):
if toknum == NAME:
columns.append(tokval)
return list(set(columns))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment