cesandoval/chatbot.py

## chatbot.py
from __future__ import print_function, unicode_literals
import random
import logging
import os

os.environ['NLTK_DATA'] = os.getcwd() + '/nltk_data'

from textblob import TextBlob
from config import FILTER_WORDS

logging.basicConfig()
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# start:example-hello.py
# Sentences we'll respond with if the user greeted us
GREETING_KEYWORDS = ("hello", "hi", "greetings", "sup", "what's up",)

GREETING_RESPONSES = ["'sup ", "hey", "*nods*", "yo"]

def check_for_greeting(sentence):
    """If any of the words in the user's input was a greeting, return a greeting response"""
    for word in sentence.words:
        if word.lower() in GREETING_KEYWORDS:
            return random.choice(GREETING_RESPONSES)
# start:example-none.py
# Sentences we'll respond with if we have no idea what the user just said
NONE_RESPONSES = [
    "uh whatever",
    "meet me at the wall?",
    "code hard",
]
# end

# start:example-self.py
# If the user tries to tell us something about ourselves, use one of these responses
COMMENTS_ABOUT_SELF = [
    "You're just jealous",
    "I worked really hard on that",
]
# end


class UnacceptableUtteranceException(Exception):
    """Raise this (uncaught) exception if the response was going to trigger our blacklist"""
    pass


def starts_with_vowel(word):
    """Check for pronoun compability -- 'a' vs. 'an'"""
    return True if word[0] in 'aeiou' else False


def chatbot(sentence):
    """Main program loop: select a response for the input sentence and return it"""
    logger.info("chatbot: respond to %s", sentence)
    resp = respond(sentence)
    return resp


# start:example-pronoun.py
def find_pronoun(sent):
    """Given a sentence, find a preferred pronoun to respond with. Returns None if no candidate
    pronoun is found in the input"""
    pronoun = None

    for word, part_of_speech in sent.pos_tags:
        # Disambiguate pronouns
        if part_of_speech == 'PRP' and word.lower() == 'you':
            pronoun = 'I'
        elif part_of_speech == 'PRP' and word == 'I':
            # If the user mentioned themselves, then they will definitely be the pronoun
            pronoun = 'You'
    return pronoun
# end

def find_verb(sent):
    """Pick a candidate verb for the sentence."""
    verb = None
    pos = None
    for word, part_of_speech in sent.pos_tags:
        if part_of_speech.startswith('VB'):  # This is a verb
            verb = word
            pos = part_of_speech
            break
    return verb, pos


def find_noun(sent):
    """Given a sentence, find the best candidate noun."""
    noun = None

    if not noun:
        for w, p in sent.pos_tags:
            if p == 'NN':  # This is a noun
                noun = w
                break
    if noun:
        logger.info("Found noun: %s", noun)

    return noun

def find_adjective(sent):
    """Given a sentence, find the best candidate adjective."""
    adj = None
    for w, p in sent.pos_tags:
        if p == 'JJ':  # This is an adjective
            adj = w
            break
    return adj


# start:example-construct-response.py
def construct_response(pronoun, noun, verb):
    """No special cases matched, so we're going to try to construct a full sentence that uses as much
    of the user's input as possible"""
    resp = []

    if pronoun:
        resp.append(pronoun)

    # We always respond in the present tense, and the pronoun will always either be a passthrough
    # from the user, or 'you' or 'I', in which case we might need to change the tense for some
    # irregular verbs.
    if verb:
        verb_word = verb[0]
        if verb_word in ('be', 'am', 'is', "'m"):  # This would be an excellent place to use lemmas!
            if pronoun.lower() == 'you':
                # The bot will always tell the person they aren't whatever they said they were
                resp.append("aren't really")
            else:
                resp.append(verb_word)
    if noun:
        pronoun = "an" if starts_with_vowel(noun) else "a"
        resp.append(pronoun + " " + noun)

    resp.append(random.choice(("tho", "bro", "lol", "bruh", "smh", "")))

    return " ".join(resp)
# end


# start:example-check-for-self.py
def check_for_comment_about_bot(pronoun, noun, adjective):
    """Check if the user's input was about the bot itself, in which case try to fashion a response
    that feels right based on their input. Returns the new best sentence, or None."""
    resp = None
    if pronoun == 'I' and (noun or adjective):
        if noun:
            if random.choice((True, False)):
                resp = random.choice(SELF_VERBS_WITH_NOUN_CAPS_PLURAL).format(**{'noun': noun.pluralize().capitalize()})
            else:
                resp = random.choice(SELF_VERBS_WITH_NOUN_LOWER).format(**{'noun': noun})
        else:
            resp = random.choice(SELF_VERBS_WITH_ADJECTIVE).format(**{'adjective': adjective})
    return resp

# Template for responses that include a direct noun which is indefinite/uncountable
SELF_VERBS_WITH_NOUN_CAPS_PLURAL = [
    "My last startup totally crushed the {noun} vertical",
    "Were you aware I was a serial entrepreneur in the {noun} sector?",
    "My startup is Uber for {noun}",
    "I really consider myself an expert on {noun}",
]

SELF_VERBS_WITH_NOUN_LOWER = [
    "Yeah but I know a lot about {noun}",
    "My bros always ask me about {noun}",
]

SELF_VERBS_WITH_ADJECTIVE = [
    "I'm personally building the {adjective} Economy",
    "I consider myself to be a {adjective}preneur",
]
# end

def preprocess_text(sentence):
    """Handle some weird edge cases in parsing, like 'i' needing to be capitalized
    to be correctly identified as a pronoun"""
    cleaned = []
    words = sentence.split(' ')
    for w in words:
        if w == 'i':
            w = 'I'
        if w == "i'm":
            w = "I'm"
        cleaned.append(w)

    return ' '.join(cleaned)

# start:example-respond.py
def respond(sentence):
    """Parse the user's inbound sentence and find candidate terms that make up a best-fit response"""
    cleaned = preprocess_text(sentence)
    parsed = TextBlob(cleaned)

    # Loop through all the sentences, if more than one. This will help extract the most relevant
    # response text even across multiple sentences (for example if there was no obvious direct noun
    # in one sentence
    pronoun, noun, adjective, verb = find_candidate_parts_of_speech(parsed)

    # If we said something about the bot and used some kind of direct noun, construct the
    # sentence around that, discarding the other candidates
    resp = check_for_comment_about_bot(pronoun, noun, adjective)

    # If we just greeted the bot, we'll use a return greeting
    if not resp:
        resp = check_for_greeting(parsed)

    if not resp:
        # If we didn't override the final sentence, try to construct a new one:
        if not pronoun:
            resp = random.choice(NONE_RESPONSES)
        elif pronoun == 'I' and not verb:
            resp = random.choice(COMMENTS_ABOUT_SELF)
        else:
            resp = construct_response(pronoun, noun, verb)

    # If we got through all that with nothing, use a random response
    if not resp:
        resp = random.choice(NONE_RESPONSES)

    logger.info("Returning phrase '%s'", resp)
    # Check that we're not going to say anything obviously offensive
    filter_response(resp)

    return resp

def find_candidate_parts_of_speech(parsed):
    """Given a parsed input, find the best pronoun, direct noun, adjective, and verb to match their input.
    Returns a tuple of pronoun, noun, adjective, verb any of which may be None if there was no good match"""
    pronoun = None
    noun = None
    adjective = None
    verb = None
    for sent in parsed.sentences:
        pronoun = find_pronoun(sent)
        noun = find_noun(sent)
        adjective = find_adjective(sent)
        verb = find_verb(sent)
    logger.info("Pronoun=%s, noun=%s, adjective=%s, verb=%s", pronoun, noun, adjective, verb)
    return pronoun, noun, adjective, verb


# end

# start:example-filter.py
def filter_response(resp):
    """Don't allow any words to match our filter list"""
    tokenized = resp.split(' ')
    for word in tokenized:
        if '@' in word or '#' in word or '!' in word:
            raise UnacceptableUtteranceException()
        for s in FILTER_WORDS:
            if word.lower().startswith(s):
                raise UnacceptableUtteranceException()
# end

if __name__ == '__main__':
    import sys
    # Usage:
    # python chatbot.py "I am an engineer"
    if (len(sys.argv) > 0):
        saying = sys.argv[1]
    else:
        saying = "How are you, bot?"
    print(chatbot(saying))
	from __future__ import print_function, unicode_literals
	import random
	import logging
	import os

	os.environ['NLTK_DATA'] = os.getcwd() + '/nltk_data'

	from textblob import TextBlob
	from config import FILTER_WORDS

	logging.basicConfig()
	logger = logging.getLogger()
	logger.setLevel(logging.DEBUG)

	# start:example-hello.py
	# Sentences we'll respond with if the user greeted us
	GREETING_KEYWORDS = ("hello", "hi", "greetings", "sup", "what's up",)

	GREETING_RESPONSES = ["'sup ", "hey", "nods", "yo"]

	def check_for_greeting(sentence):
	"""If any of the words in the user's input was a greeting, return a greeting response"""
	for word in sentence.words:
	if word.lower() in GREETING_KEYWORDS:
	return random.choice(GREETING_RESPONSES)
	# start:example-none.py
	# Sentences we'll respond with if we have no idea what the user just said
	NONE_RESPONSES = [
	"uh whatever",
	"meet me at the wall?",
	"code hard",
	]
	# end

	# start:example-self.py
	# If the user tries to tell us something about ourselves, use one of these responses
	COMMENTS_ABOUT_SELF = [
	"You're just jealous",
	"I worked really hard on that",
	]
	# end


	class UnacceptableUtteranceException(Exception):
	"""Raise this (uncaught) exception if the response was going to trigger our blacklist"""
	pass


	def starts_with_vowel(word):
	"""Check for pronoun compability -- 'a' vs. 'an'"""
	return True if word[0] in 'aeiou' else False


	def chatbot(sentence):
	"""Main program loop: select a response for the input sentence and return it"""
	logger.info("chatbot: respond to %s", sentence)
	resp = respond(sentence)
	return resp


	# start:example-pronoun.py
	def find_pronoun(sent):
	"""Given a sentence, find a preferred pronoun to respond with. Returns None if no candidate
	pronoun is found in the input"""
	pronoun = None

	for word, part_of_speech in sent.pos_tags:
	# Disambiguate pronouns
	if part_of_speech == 'PRP' and word.lower() == 'you':
	pronoun = 'I'
	elif part_of_speech == 'PRP' and word == 'I':
	# If the user mentioned themselves, then they will definitely be the pronoun
	pronoun = 'You'
	return pronoun
	# end

	def find_verb(sent):
	"""Pick a candidate verb for the sentence."""
	verb = None
	pos = None
	for word, part_of_speech in sent.pos_tags:
	if part_of_speech.startswith('VB'): # This is a verb
	verb = word
	pos = part_of_speech
	break
	return verb, pos


	def find_noun(sent):
	"""Given a sentence, find the best candidate noun."""
	noun = None

	if not noun:
	for w, p in sent.pos_tags:
	if p == 'NN': # This is a noun
	noun = w
	break
	if noun:
	logger.info("Found noun: %s", noun)

	return noun

	def find_adjective(sent):
	"""Given a sentence, find the best candidate adjective."""
	adj = None
	for w, p in sent.pos_tags:
	if p == 'JJ': # This is an adjective
	adj = w
	break
	return adj



	# start:example-construct-response.py
	def construct_response(pronoun, noun, verb):
	"""No special cases matched, so we're going to try to construct a full sentence that uses as much
	of the user's input as possible"""
	resp = []

	if pronoun:
	resp.append(pronoun)

	# We always respond in the present tense, and the pronoun will always either be a passthrough
	# from the user, or 'you' or 'I', in which case we might need to change the tense for some
	# irregular verbs.
	if verb:
	verb_word = verb[0]
	if verb_word in ('be', 'am', 'is', "'m"): # This would be an excellent place to use lemmas!
	if pronoun.lower() == 'you':
	# The bot will always tell the person they aren't whatever they said they were
	resp.append("aren't really")
	else:
	resp.append(verb_word)
	if noun:
	pronoun = "an" if starts_with_vowel(noun) else "a"
	resp.append(pronoun + " " + noun)

	resp.append(random.choice(("tho", "bro", "lol", "bruh", "smh", "")))

	return " ".join(resp)
	# end


	# start:example-check-for-self.py
	def check_for_comment_about_bot(pronoun, noun, adjective):
	"""Check if the user's input was about the bot itself, in which case try to fashion a response
	that feels right based on their input. Returns the new best sentence, or None."""
	resp = None
	if pronoun == 'I' and (noun or adjective):
	if noun:
	if random.choice((True, False)):
	resp = random.choice(SELF_VERBS_WITH_NOUN_CAPS_PLURAL).format(**{'noun': noun.pluralize().capitalize()})
	else:
	resp = random.choice(SELF_VERBS_WITH_NOUN_LOWER).format(**{'noun': noun})
	else:
	resp = random.choice(SELF_VERBS_WITH_ADJECTIVE).format(**{'adjective': adjective})
	return resp

	# Template for responses that include a direct noun which is indefinite/uncountable
	SELF_VERBS_WITH_NOUN_CAPS_PLURAL = [
	"My last startup totally crushed the {noun} vertical",
	"Were you aware I was a serial entrepreneur in the {noun} sector?",
	"My startup is Uber for {noun}",
	"I really consider myself an expert on {noun}",
	]

	SELF_VERBS_WITH_NOUN_LOWER = [
	"Yeah but I know a lot about {noun}",
	"My bros always ask me about {noun}",
	]

	SELF_VERBS_WITH_ADJECTIVE = [
	"I'm personally building the {adjective} Economy",
	"I consider myself to be a {adjective}preneur",
	]
	# end

	def preprocess_text(sentence):
	"""Handle some weird edge cases in parsing, like 'i' needing to be capitalized
	to be correctly identified as a pronoun"""
	cleaned = []
	words = sentence.split(' ')
	for w in words:
	if w == 'i':
	w = 'I'
	if w == "i'm":
	w = "I'm"
	cleaned.append(w)

	return ' '.join(cleaned)

	# start:example-respond.py
	def respond(sentence):
	"""Parse the user's inbound sentence and find candidate terms that make up a best-fit response"""
	cleaned = preprocess_text(sentence)
	parsed = TextBlob(cleaned)

	# Loop through all the sentences, if more than one. This will help extract the most relevant
	# response text even across multiple sentences (for example if there was no obvious direct noun
	# in one sentence
	pronoun, noun, adjective, verb = find_candidate_parts_of_speech(parsed)

	# If we said something about the bot and used some kind of direct noun, construct the
	# sentence around that, discarding the other candidates
	resp = check_for_comment_about_bot(pronoun, noun, adjective)

	# If we just greeted the bot, we'll use a return greeting
	if not resp:
	resp = check_for_greeting(parsed)

	if not resp:
	# If we didn't override the final sentence, try to construct a new one:
	if not pronoun:
	resp = random.choice(NONE_RESPONSES)
	elif pronoun == 'I' and not verb:
	resp = random.choice(COMMENTS_ABOUT_SELF)
	else:
	resp = construct_response(pronoun, noun, verb)

	# If we got through all that with nothing, use a random response
	if not resp:
	resp = random.choice(NONE_RESPONSES)

	logger.info("Returning phrase '%s'", resp)
	# Check that we're not going to say anything obviously offensive
	filter_response(resp)

	return resp

	def find_candidate_parts_of_speech(parsed):
	"""Given a parsed input, find the best pronoun, direct noun, adjective, and verb to match their input.
	Returns a tuple of pronoun, noun, adjective, verb any of which may be None if there was no good match"""
	pronoun = None
	noun = None
	adjective = None
	verb = None
	for sent in parsed.sentences:
	pronoun = find_pronoun(sent)
	noun = find_noun(sent)
	adjective = find_adjective(sent)
	verb = find_verb(sent)
	logger.info("Pronoun=%s, noun=%s, adjective=%s, verb=%s", pronoun, noun, adjective, verb)
	return pronoun, noun, adjective, verb


	# end

	# start:example-filter.py
	def filter_response(resp):
	"""Don't allow any words to match our filter list"""
	tokenized = resp.split(' ')
	for word in tokenized:
	if '@' in word or '#' in word or '!' in word:
	raise UnacceptableUtteranceException()
	for s in FILTER_WORDS:
	if word.lower().startswith(s):
	raise UnacceptableUtteranceException()
	# end

	if __name__ == '__main__':
	import sys
	# Usage:
	# python chatbot.py "I am an engineer"
	if (len(sys.argv) > 0):
	saying = sys.argv[1]
	else:
	saying = "How are you, bot?"
	print(chatbot(saying))