Last active
August 29, 2015 14:24
-
-
Save inktrap/0aef00e282a13b1fbc2c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import nltk | |
import sys | |
import datetime | |
import matplotlib.pyplot as plt | |
import csv | |
LIMIT=14 | |
grammar = nltk.CFG.fromstring(""" | |
S -> NP VP | |
VP -> VP PP | |
N -> 'man' | |
D -> 'the' | |
P -> 'with' | |
NP -> D N | |
VP -> V NP | |
N -> 'girl' | |
NP -> 'John' | |
V -> 'saw' | |
N -> N PP | |
PP -> P NP | |
N -> 'telescope' | |
NP -> 'Mary' | |
""") | |
def parse_sentence(this_grammar, this_sent): | |
parser = nltk.parse.EarleyChartParser(this_grammar) | |
return parser.parse_one(this_sent) | |
def parse(LIMIT, grammar): | |
logfile = [] | |
logfile.append("Run Microseconds SentenceLength NormalizedMicroseconds") | |
data = [] | |
for i in range(0,LIMIT): | |
suffix = " with the telescope" * int(i) | |
suffix = suffix.split() | |
sent = "the man saw the girl".split() + suffix | |
# print ' '.join(sent) | |
start = datetime.datetime.now() | |
parse_sentence(grammar, sent) | |
end = datetime.datetime.now() | |
dur = end-start | |
# normalize the microseconds with the length of the sentence | |
# append it to the result-list | |
msec = dur.microseconds | |
sentlen = len(sent) | |
data.append(msec/sentlen) | |
logfile.append("{0} {1} {2} {3}".format(i+1, msec, sentlen, msec/float(sentlen))) | |
with open('logfile.log','w') as fh: | |
fh.write('\n'.join(logfile)) | |
def plot_data(): | |
with open('logfile.log','rb') as fh: | |
reader = csv.reader(fh, delimiter=' ') | |
headers = reader.next() | |
dataIndex = headers.index('NormalizedMicroseconds') | |
data = [ float(row[dataIndex]) for row in reader ] | |
## plot it and save the output | |
plt.plot(data) | |
plt.ylabel('Time in microseconds (normalized by sentence length)') | |
plt.xlabel('Runs') | |
plt.savefig('earley.png') | |
if __name__ == '__main__': | |
if sys.argv[1] == '--plot': | |
plot_data() | |
else: | |
parse(LIMIT, grammar) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Run Microseconds SentenceLength NormalizedMicroseconds | |
1 1567 5 313.4 | |
2 2507 8 313.375 | |
3 3781 11 343.727272727 | |
4 5553 14 396.642857143 | |
5 8172 17 480.705882353 | |
6 10476 20 523.8 | |
7 15129 23 657.782608696 | |
8 23250 26 894.230769231 | |
9 43597 29 1503.34482759 | |
10 141894 32 4434.1875 | |
11 434989 35 12428.2571429 | |
12 669095 38 17607.7631579 | |
13 428897 41 10460.902439 | |
14 940391 44 21372.5227273 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment