Last active
August 29, 2015 13:57
-
-
Save mlauter/96c3be3a9b6386131c6a to your computer and use it in GitHub Desktop.
Implementation of a probablistic CKY (Cocke–Kasami–Younger) parser in python: Miriam Lauter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1.58 ROOT S Period | |
1.58 ROOT S Exclam | |
15.58 ROOT VP Exclam | |
15.39 ROOT V Exclam | |
0.0 Period . | |
0.0 Exclam ! | |
0.00 S NP VP | |
3.81 S NP V | |
0.00 SBAR Comp S | |
0.00 Comp that | |
3.81 VP V NP | |
3.81 VP V N | |
1.49 VP V VP | |
5.30 VP V V | |
7.62 VP V PP | |
5.81 VP VP PP | |
7.62 VP Modal V | |
3.81 VP Modal VP | |
1.81 VP VComp SBAR | |
3.81 VP V Adj | |
2.81 Adj Intens Adj | |
1.00 Intens very | |
1.00 Intens somewhat | |
1.00 Intens so | |
0.00 Plural -s | |
3.70 N N Plural | |
3.70 N Adj N | |
3.70 N N PP | |
3.70 NP Det N | |
3.81 VP VP ConjVP | |
3.81 V V ConjV | |
5.70 NP NP ConjNP | |
3.70 N N ConjN | |
2.81 Adj Adj ConjAdj | |
0.00 ConjAdj Conj Adj | |
0.00 ConjNP Conj NP | |
0.00 ConjVP Conj VP | |
0.00 ConjV Conj V | |
0.00 ConjN Conj N | |
0.00 Conj and | |
0.00 Conj or | |
0.00 Inf to | |
5.09 V Inf V | |
3 Modal will | |
3 Modal would | |
3 Modal should | |
3 Modal could | |
3 Modal can | |
3 Modal might | |
3 Modal may | |
3 Modal must | |
5.09 V V Vsuff | |
5.09 VComp VComp Vsuff | |
0.00 Vsuff -ed | |
0.00 Vsuff -ing | |
0.00 Vsuff -s | |
0.00 Vsuff -0 | |
4.09 V have | |
5.09 V be | |
5.09 V is | |
5.09 V fly | |
5.09 V go | |
5.09 V sleep | |
5.09 V kiss | |
5.09 V love | |
5.09 V spoon | |
4.09 V eat | |
4.09 V duck | |
4.09 V smile | |
3.50 V want | |
3.50 V fine | |
5.09 VComp say | |
5.09 VComp think | |
4.09 VComp understand | |
2.09 V understand | |
4.09 V entice | |
3.7 NP Papa | |
3.7 NP Homer | |
3.7 NP Marge | |
3.7 NP Bart | |
3.7 NP Grandma | |
3.7 NP her | |
3.7 NP she | |
3.7 NP him | |
3.7 NP he | |
2.7 NP it | |
3.7 NP they | |
3.7 NP them | |
3.7 N man | |
3.7 N monkey | |
3.7 N duck | |
3.7 N fly | |
3.7 N puppy | |
3.7 N woman | |
3.7 N smile | |
3.7 N fine | |
3.7 N bonbon | |
3.7 N spoon | |
3.7 N president | |
3.7 N pickle | |
3.7 N sandwich | |
3.7 N floor | |
3.7 N chief_of_staff | |
3.7 N chiefs_of_staff | |
4.09 Det a | |
4.09 Det all | |
4.09 Det every | |
4.09 Det each | |
4.09 Det two | |
3.09 Det some | |
3.09 Det the | |
3.09 Det his | |
3.09 Det her | |
3.09 Det its | |
3.09 Det their | |
2.81 Adj fine | |
2.81 Adj blue | |
2.81 Adj delicious | |
2.81 Adj perplexed | |
2.81 Adj fly | |
2.81 Adj pickled | |
2.81 Adj unavailable | |
2.81 Adj surprising | |
0.00 PP P NP | |
2.32 P with | |
2.32 P on | |
2.32 P onto | |
2.32 P under | |
2.32 P in |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Lauter_cky.py | |
#An implementation of a probablistic version of the Cocke-Kasami-Younger | |
#parsing algorithm. | |
#Written in Fall 2011 for the course Language and Computation at Yale University | |
#by Miriam Lauter | |
# | |
#The parser takes a weighted grammar file in Chomsky Normal Form | |
#and a sentence file of sentences to be parsed | |
#and returns all possible parses of each sentence and their weights | |
#I have included the grammar and a sentence file (provided in the assignment) | |
# | |
#Example: | |
#python Lauter_cky.py english_cnf.gr sentences.sen | |
############################################ | |
import sys | |
from collections import defaultdict | |
import decimal | |
from decimal import * | |
###Read grammar into dictionary### | |
def load_grammar(gramfile): | |
grammar = defaultdict(lambda: []) | |
for line in gramfile: | |
line = line.split() | |
if len(line) > 0: | |
key = '' | |
prob=Decimal(line[0]) | |
for item in line[2:]: | |
key= key + item + ' ' | |
tup=(prob,line[1]) | |
grammar[key[:-1]].append(tup) | |
return grammar | |
###implement CKY algorithm and make the table### | |
def CKY_parse(sentence, grammar): | |
sentence=sentence.split() | |
l=len(sentence) | |
table=[[0 for y in range(l+1)] for x in range(l)] | |
a=0 | |
for a in range(l): | |
table[a][0]=a | |
a+=1 | |
for j in range(1,l+1): | |
if sentence[j-1] in grammar: | |
table[j-1][j]=grammar[sentence[j-1]] | |
for i in range(0, j-1): | |
i2=j-2-i | |
if table[i2][j]==0: | |
table[i2][j]=[] | |
B=[] | |
C=[] | |
lB=0 | |
lC=0 | |
for k in range(i2+1, j): | |
if table[i2][k] != 0 and table[i2][k] !=[]: | |
B1=table[i2][k] | |
lB=len(B1) | |
B=[] | |
C=[] | |
for r in range(0, len(B1)): | |
Bprob=B1[0] | |
if type(B1[r][1]) is str: | |
B.append(B1[r][1]) | |
if table[k][j] != 0 and table[k][j] !=[]: | |
C1=table[k][j] | |
lC=len(C1) | |
for q in range(0, lC): | |
if type(C1[q][1]) is str: | |
C.append(C1[q][1]) | |
for b in range(0,len(B)): | |
for c in range(0, len(C)): | |
BC=B[b]+' '+ C[c] | |
pointer=[[i2,k,b],[k,j,c]] | |
Bprob=table[i2][k][b][0] | |
Cprob=table[k][j][c][0] | |
if BC in grammar: | |
lgBC=len(grammar[BC]) | |
for e in range(0,lgBC): | |
triple=(grammar[BC][e][0]+Bprob+Cprob,grammar[BC][e][1],pointer) | |
table[i2][j].append(triple) | |
return table | |
###Print trees and weights for a given sentence### | |
def maketree(sentence, table): | |
l=len(sentence.split()) | |
parse=False | |
if len(table[0][l])>0: | |
for n in range(0,len(table[0][l])): | |
if 'ROOT' in table[0][l][n]: | |
weight=table[0][l][n][0] | |
tree = maketree2(sentence.split(),table,0, l, n) | |
if tree !=[]: | |
print 'Tree ' , (n+1) , ' weight: ' , weight | |
print 'Tree ' , (n+1) , ' parse:\n' , tree, '\n' | |
parse=True | |
else: parse=False | |
if parse==False: print 'No Parse!' | |
###Helper function to make single tree### | |
def maketree2(sentence, table,i, j, n): | |
tree='' | |
node=table[i][j][n][1] | |
prob=table[i][j][n][0] | |
tree=tree+'('+ node + ' ' | |
if len(table[i][j][n])==3: | |
pointer=table[i][j][n][2] | |
tree= tree + maketree2(sentence, table, pointer[0][0], pointer[0][1], pointer[0][2]) + ' ' + maketree2(sentence, table, pointer[1][0], pointer[1][1], pointer[1][2]) + ')' | |
else: | |
tree = tree + sentence[i] + ')' | |
return tree | |
###Main Part of Code### | |
gramfile=open(sys.argv[1]) | |
sentences=open(sys.argv[2]) | |
grammar = load_grammar(gramfile) | |
sentences = sentences.readlines() | |
for sentence in sentences: | |
table= CKY_parse(sentence, grammar) | |
print table | |
print 'PARSING:: ', sentence | |
maketree(sentence, table) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Grandma love -s Bart and Marge . | |
Grandma want -s to fly . | |
he be -s so fly ! | |
Grandma sleep -s with a spoon . | |
Grandma eat -ed with a spoon . | |
every fine fly and monkey want to understand him . | |
the duck eat -ed his delicious sandwich with a pickle . | |
the monkey think -ed that the president want -ed his pickle on his sandwich . | |
all monkey -s want to smile and kiss the puppy . | |
some duck -s and somewhat perplexed puppy -s be -ed unavailable . | |
the bonbon -s on the spoon entice -0 . | |
Papa say -s that Grandma might sleep on the floor ! | |
the fine and blue woman and every man must have eat -ed two sandwich -s and sleep -ed on the floor . |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment