Skip to content

Instantly share code, notes, and snippets.

@webcracy
Created December 5, 2011 16:03
Show Gist options
  • Save webcracy/1434087 to your computer and use it in GitHub Desktop.
Save webcracy/1434087 to your computer and use it in GitHub Desktop.
Manybots - Python Natural Language Parsing
from nltk import pos_tag, word_tokenize
from en import verb # from http://nodebox.net/code/index.php/Linguistics
import re, pprint
from nltk.probability import ConditionalFreqDist
import os
def ie_preprocess(sentence):
sentence = word_tokenize(sentence)
sentence = pos_tag(sentence)
return sentence
def findtags(tag_prefix, tagged_text):
cfd = ConditionalFreqDist((tag, word) for (word, tag) in tagged_text
if tag.startswith(tag_prefix))
return dict((tag, cfd[tag].keys()[:5]) for tag in cfd.conditions())
def parse_sentence(input_sentence):
#
tagged_sentence = input_sentence.split(' tagged with ')
# setting the sentence as itself minus the tags (that should come at the end)
sentence = tagged_sentence.pop(0)
# tags
tags = tagged_sentence
# transforming the sentence
tuples = ie_preprocess(sentence)
result = {}
# finding if there's a verb
verbs = findtags('VBD', tuples)
if verbs:
verbs = verb.infinitive(verbs['VBD'][0])
result['verbs'] = verbs
# print 'Verb: ' + verbs
# finding the wanted object
objects = findtags('NNS', tuples)
the_object = ''
if objects:
plural = True
the_object = objects['NNS'][0].lower().rstrip('s')
else:
plural = False
objects = findtags('NN', tuples)
the_object = objects['NN'][0]
# if plural:
# the_object = the_object + ' (plural)'
result['objects'] = the_object
# print 'Object: ' + the_object
# finding if there's a target
target = findtags('IN', tuples)
if target:
target = target['IN'][0]
else:
target = findtags('TO', tuples)
if target:
target = target['TO'][0]
# print target
# if there's a target, then consider all the rest the target value
if target:
target_value = sentence.split(target)[-1]
result['target_values'] = target_value.strip()
# print 'Target: ' + target_value.strip()
if tags:
result['tags'] = tags
# print 'Tagged with: ' + ', '.join(tags)
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment