Skip to content

Instantly share code, notes, and snippets.

@cesandoval
Last active June 13, 2019 13:53
Show Gist options
  • Save cesandoval/49a78a8aa99c9261026aea6893432431 to your computer and use it in GitHub Desktop.
Save cesandoval/49a78a8aa99c9261026aea6893432431 to your computer and use it in GitHub Desktop.
from __future__ import print_function, unicode_literals
import random
import logging
import os
os.environ['NLTK_DATA'] = os.getcwd() + '/nltk_data'
from textblob import TextBlob
from config import FILTER_WORDS
logging.basicConfig()
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
# start:example-hello.py
# Sentences we'll respond with if the user greeted us
GREETING_KEYWORDS = ("hello", "hi", "greetings", "sup", "what's up",)
GREETING_RESPONSES = ["'sup ", "hey", "*nods*", "yo"]
def check_for_greeting(sentence):
"""If any of the words in the user's input was a greeting, return a greeting response"""
for word in sentence.words:
if word.lower() in GREETING_KEYWORDS:
return random.choice(GREETING_RESPONSES)
# start:example-none.py
# Sentences we'll respond with if we have no idea what the user just said
NONE_RESPONSES = [
"uh whatever",
"meet me at the wall?",
"code hard",
]
# end
# start:example-self.py
# If the user tries to tell us something about ourselves, use one of these responses
COMMENTS_ABOUT_SELF = [
"You're just jealous",
"I worked really hard on that",
]
# end
class UnacceptableUtteranceException(Exception):
"""Raise this (uncaught) exception if the response was going to trigger our blacklist"""
pass
def starts_with_vowel(word):
"""Check for pronoun compability -- 'a' vs. 'an'"""
return True if word[0] in 'aeiou' else False
def chatbot(sentence):
"""Main program loop: select a response for the input sentence and return it"""
logger.info("chatbot: respond to %s", sentence)
resp = respond(sentence)
return resp
# start:example-pronoun.py
def find_pronoun(sent):
"""Given a sentence, find a preferred pronoun to respond with. Returns None if no candidate
pronoun is found in the input"""
pronoun = None
for word, part_of_speech in sent.pos_tags:
# Disambiguate pronouns
if part_of_speech == 'PRP' and word.lower() == 'you':
pronoun = 'I'
elif part_of_speech == 'PRP' and word == 'I':
# If the user mentioned themselves, then they will definitely be the pronoun
pronoun = 'You'
return pronoun
# end
def find_verb(sent):
"""Pick a candidate verb for the sentence."""
verb = None
pos = None
for word, part_of_speech in sent.pos_tags:
if part_of_speech.startswith('VB'): # This is a verb
verb = word
pos = part_of_speech
break
return verb, pos
def find_noun(sent):
"""Given a sentence, find the best candidate noun."""
noun = None
if not noun:
for w, p in sent.pos_tags:
if p == 'NN': # This is a noun
noun = w
break
if noun:
logger.info("Found noun: %s", noun)
return noun
def find_adjective(sent):
"""Given a sentence, find the best candidate adjective."""
adj = None
for w, p in sent.pos_tags:
if p == 'JJ': # This is an adjective
adj = w
break
return adj
# start:example-construct-response.py
def construct_response(pronoun, noun, verb):
"""No special cases matched, so we're going to try to construct a full sentence that uses as much
of the user's input as possible"""
resp = []
if pronoun:
resp.append(pronoun)
# We always respond in the present tense, and the pronoun will always either be a passthrough
# from the user, or 'you' or 'I', in which case we might need to change the tense for some
# irregular verbs.
if verb:
verb_word = verb[0]
if verb_word in ('be', 'am', 'is', "'m"): # This would be an excellent place to use lemmas!
if pronoun.lower() == 'you':
# The bot will always tell the person they aren't whatever they said they were
resp.append("aren't really")
else:
resp.append(verb_word)
if noun:
pronoun = "an" if starts_with_vowel(noun) else "a"
resp.append(pronoun + " " + noun)
resp.append(random.choice(("tho", "bro", "lol", "bruh", "smh", "")))
return " ".join(resp)
# end
# start:example-check-for-self.py
def check_for_comment_about_bot(pronoun, noun, adjective):
"""Check if the user's input was about the bot itself, in which case try to fashion a response
that feels right based on their input. Returns the new best sentence, or None."""
resp = None
if pronoun == 'I' and (noun or adjective):
if noun:
if random.choice((True, False)):
resp = random.choice(SELF_VERBS_WITH_NOUN_CAPS_PLURAL).format(**{'noun': noun.pluralize().capitalize()})
else:
resp = random.choice(SELF_VERBS_WITH_NOUN_LOWER).format(**{'noun': noun})
else:
resp = random.choice(SELF_VERBS_WITH_ADJECTIVE).format(**{'adjective': adjective})
return resp
# Template for responses that include a direct noun which is indefinite/uncountable
SELF_VERBS_WITH_NOUN_CAPS_PLURAL = [
"My last startup totally crushed the {noun} vertical",
"Were you aware I was a serial entrepreneur in the {noun} sector?",
"My startup is Uber for {noun}",
"I really consider myself an expert on {noun}",
]
SELF_VERBS_WITH_NOUN_LOWER = [
"Yeah but I know a lot about {noun}",
"My bros always ask me about {noun}",
]
SELF_VERBS_WITH_ADJECTIVE = [
"I'm personally building the {adjective} Economy",
"I consider myself to be a {adjective}preneur",
]
# end
def preprocess_text(sentence):
"""Handle some weird edge cases in parsing, like 'i' needing to be capitalized
to be correctly identified as a pronoun"""
cleaned = []
words = sentence.split(' ')
for w in words:
if w == 'i':
w = 'I'
if w == "i'm":
w = "I'm"
cleaned.append(w)
return ' '.join(cleaned)
# start:example-respond.py
def respond(sentence):
"""Parse the user's inbound sentence and find candidate terms that make up a best-fit response"""
cleaned = preprocess_text(sentence)
parsed = TextBlob(cleaned)
# Loop through all the sentences, if more than one. This will help extract the most relevant
# response text even across multiple sentences (for example if there was no obvious direct noun
# in one sentence
pronoun, noun, adjective, verb = find_candidate_parts_of_speech(parsed)
# If we said something about the bot and used some kind of direct noun, construct the
# sentence around that, discarding the other candidates
resp = check_for_comment_about_bot(pronoun, noun, adjective)
# If we just greeted the bot, we'll use a return greeting
if not resp:
resp = check_for_greeting(parsed)
if not resp:
# If we didn't override the final sentence, try to construct a new one:
if not pronoun:
resp = random.choice(NONE_RESPONSES)
elif pronoun == 'I' and not verb:
resp = random.choice(COMMENTS_ABOUT_SELF)
else:
resp = construct_response(pronoun, noun, verb)
# If we got through all that with nothing, use a random response
if not resp:
resp = random.choice(NONE_RESPONSES)
logger.info("Returning phrase '%s'", resp)
# Check that we're not going to say anything obviously offensive
filter_response(resp)
return resp
def find_candidate_parts_of_speech(parsed):
"""Given a parsed input, find the best pronoun, direct noun, adjective, and verb to match their input.
Returns a tuple of pronoun, noun, adjective, verb any of which may be None if there was no good match"""
pronoun = None
noun = None
adjective = None
verb = None
for sent in parsed.sentences:
pronoun = find_pronoun(sent)
noun = find_noun(sent)
adjective = find_adjective(sent)
verb = find_verb(sent)
logger.info("Pronoun=%s, noun=%s, adjective=%s, verb=%s", pronoun, noun, adjective, verb)
return pronoun, noun, adjective, verb
# end
# start:example-filter.py
def filter_response(resp):
"""Don't allow any words to match our filter list"""
tokenized = resp.split(' ')
for word in tokenized:
if '@' in word or '#' in word or '!' in word:
raise UnacceptableUtteranceException()
for s in FILTER_WORDS:
if word.lower().startswith(s):
raise UnacceptableUtteranceException()
# end
if __name__ == '__main__':
import sys
# Usage:
# python chatbot.py "I am an engineer"
if (len(sys.argv) > 0):
saying = sys.argv[1]
else:
saying = "How are you, bot?"
print(chatbot(saying))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment