Created
November 2, 2013 21:59
-
-
Save baojie/7283960 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Test MBSP parsing | |
Help on clips.MBSP | |
http://www.clips.ua.ac.be/pages/MBSP | |
""" | |
import MBSP | |
from time import time | |
sample_input = "sentences.txt" | |
def run(f): | |
start_time = time() | |
f() | |
print time() - start_time, "seconds" | |
def MBSP_parser(): | |
f_in = open(sample_input, "r") # one sentence per line | |
suc = 0 | |
# below is client code to parse some sentences from a file | |
for i, text in enumerate(f_in): | |
#if i > 2: break # for test first 5 lines | |
print str(i+1)+")", text | |
try: | |
# test parsing | |
try: | |
s = MBSP.parse(text) | |
except: | |
print 'parsing failed, try using ascii parsing %d' % i, text | |
text = text.encode('ascii', 'ignore') | |
s = MBSP.parse(text) | |
MBSP.pprint(s) | |
#print s.constituents(pnp=False) | |
#print s.nltk_tree() | |
sen = MBSP.Sentence(s, token=[MBSP.WORD, MBSP.POS, MBSP.CHUNK, \ | |
MBSP.PNP, MBSP.REL, MBSP.ANCHOR, MBSP.LEMMA]) | |
print sen.relations | |
print "* Subject = ", sen.subjects | |
print "* Object = ", sen.objects | |
# test prepositional phrase chunking | |
pnp = MBSP.split(s) | |
print(pnp[0].chunks) | |
# print all tagged words | |
print(repr(pnp)) | |
suc += 1 | |
except: | |
print '[Parsing error]', text | |
f_in.close() | |
print "successfully parsed", suc, "sentences" | |
MBSP.start(timeout=60) # create a server | |
run(MBSP_parser) | |
MBSP.stop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment