webcracy/botbot.py

## botbot.py
from nltk import pos_tag, word_tokenize
from en import verb # from http://nodebox.net/code/index.php/Linguistics
import re, pprint
from nltk.probability import ConditionalFreqDist
import os


def ie_preprocess(sentence):
    sentence = word_tokenize(sentence)
    sentence = pos_tag(sentence)
    return sentence

def findtags(tag_prefix, tagged_text):
    cfd = ConditionalFreqDist((tag, word) for (word, tag) in tagged_text
                                 if tag.startswith(tag_prefix))
    return dict((tag, cfd[tag].keys()[:5]) for tag in cfd.conditions())


def parse_sentence(input_sentence):
    #
    tagged_sentence = input_sentence.split(' tagged with ')
    # setting the sentence as itself minus the tags (that should come at the end)
    sentence = tagged_sentence.pop(0)
    # tags
    tags = tagged_sentence

    # transforming the sentence
    tuples = ie_preprocess(sentence)
    result = {}

    # finding if there's a verb
    verbs = findtags('VBD', tuples)

    if verbs:
        verbs = verb.infinitive(verbs['VBD'][0])
        result['verbs'] = verbs
        # print 'Verb: ' + verbs

    # finding the wanted object
    objects = findtags('NNS', tuples)
    the_object = ''
    if objects:
        plural = True
        the_object = objects['NNS'][0].lower().rstrip('s')
    else:
        plural = False
        objects = findtags('NN', tuples)
        the_object = objects['NN'][0]

    # if plural:
    #     the_object = the_object + ' (plural)'

    result['objects'] = the_object
    # print 'Object: ' + the_object


    # finding if there's a target
    target = findtags('IN', tuples)
    if target:
        target = target['IN'][0]
    else:
        target = findtags('TO', tuples)
        if target:
            target = target['TO'][0]
    # print target

    # if there's a target, then consider all the rest the target value
    if target:
        target_value = sentence.split(target)[-1]
        result['target_values'] = target_value.strip()
        # print 'Target: ' + target_value.strip()

    if tags:
        result['tags'] = tags
        # print 'Tagged with: ' + ', '.join(tags)

    return result
	from nltk import pos_tag, word_tokenize
	from en import verb # from http://nodebox.net/code/index.php/Linguistics
	import re, pprint
	from nltk.probability import ConditionalFreqDist
	import os


	def ie_preprocess(sentence):
	sentence = word_tokenize(sentence)
	sentence = pos_tag(sentence)
	return sentence

	def findtags(tag_prefix, tagged_text):
	cfd = ConditionalFreqDist((tag, word) for (word, tag) in tagged_text
	if tag.startswith(tag_prefix))
	return dict((tag, cfd[tag].keys()[:5]) for tag in cfd.conditions())


	def parse_sentence(input_sentence):
	#
	tagged_sentence = input_sentence.split(' tagged with ')
	# setting the sentence as itself minus the tags (that should come at the end)
	sentence = tagged_sentence.pop(0)
	# tags
	tags = tagged_sentence

	# transforming the sentence
	tuples = ie_preprocess(sentence)
	result = {}

	# finding if there's a verb
	verbs = findtags('VBD', tuples)

	if verbs:
	verbs = verb.infinitive(verbs['VBD'][0])
	result['verbs'] = verbs
	# print 'Verb: ' + verbs

	# finding the wanted object
	objects = findtags('NNS', tuples)
	the_object = ''
	if objects:
	plural = True
	the_object = objects['NNS'][0].lower().rstrip('s')
	else:
	plural = False
	objects = findtags('NN', tuples)
	the_object = objects['NN'][0]

	# if plural:
	# the_object = the_object + ' (plural)'

	result['objects'] = the_object
	# print 'Object: ' + the_object


	# finding if there's a target
	target = findtags('IN', tuples)
	if target:
	target = target['IN'][0]
	else:
	target = findtags('TO', tuples)
	if target:
	target = target['TO'][0]
	# print target

	# if there's a target, then consider all the rest the target value
	if target:
	target_value = sentence.split(target)[-1]
	result['target_values'] = target_value.strip()
	# print 'Target: ' + target_value.strip()

	if tags:
	result['tags'] = tags
	# print 'Tagged with: ' + ', '.join(tags)

	return result