Skip to content

Instantly share code, notes, and snippets.

@jkgiesler
Created April 26, 2015 17:06
Show Gist options
  • Save jkgiesler/0ce8ba0a0d83b26f00e1 to your computer and use it in GitHub Desktop.
Save jkgiesler/0ce8ba0a0d83b26f00e1 to your computer and use it in GitHub Desktop.
Markov bot based on google search history.
import os
import json
import pickle
import random
# making a markov bot based on my search history. 21k searches total
# markov code borrowed entirely from:
# http://stackoverflow.com/questions/5306729/how-do-markov-chain-chatbots-work
def parse_searches():
corpus = open('corpus.txt', 'wt')
cwd = os.getcwd()
os.chdir(cwd + "/Searches")
file_list = os.listdir()
for search_file in file_list:
with open(search_file) as json_file:
json_data = json.load(json_file)
for search in json_data['event']:
print(search['query']['query_text'], file=corpus)
print('parsed all searches')
os.chdir(cwd)
corpus.close()
def train():
print('training markov bot')
b = open('corpus.txt')
text = []
for line in b:
for word in line.split():
text.append(word)
b.close()
textset = list(set(text))
follow = {}
for l in range(len(textset)):
working = []
check = textset[l]
for w in range(len(text) - 1):
if check == text[w] and text[w][-1] not in '(),.?!':
working.append(str(text[w + 1]))
follow[check] = working
a = open('bot', 'wb')
pickle.dump(follow, a, 2)
a.close()
print('successfully trained')
def nextword(a, successorlist):
if a in successorlist:
return random.choice(successorlist[a])
else:
return 'the'
def bot():
print('Ask me a question followed by return "quit" to quit')
a = open('bot', 'rb')
successorlist = pickle.load(a)
a.close()
speech = ''
while speech != 'quit':
speech = input('>')
s = random.choice(speech.split())
response = ''
for i in range(random.randint(4, 8)):
neword = nextword(s,successorlist)
response += ' ' + neword
s = neword
print(response)
if __name__ == '__main__':
parse_searches()
train()
bot()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment