Skip to content

Instantly share code, notes, and snippets.

@bogsio
Created August 13, 2014 22:43
Show Gist options
  • Save bogsio/dffc807cefb22afbe8e7 to your computer and use it in GitHub Desktop.
Save bogsio/dffc807cefb22afbe8e7 to your computer and use it in GitHub Desktop.
Parsing utilities
from nltk import Tree
import logging
def corpus2trees(text):
""" Parse the corpus and return a list of Trees """
rawparses = text.split("\n\n")
trees = []
for rp in rawparses:
if not rp.strip():
continue
try:
t = Tree.parse(rp)
trees.append(t)
except ValueError:
logging.error('Malformed parse: "%s"' % rp)
return trees
def trees2productions(trees):
""" Transform list of Trees to a list of productions """
productions = []
for t in trees:
productions += t.productions()
return productions
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment