Skip to content

Instantly share code, notes, and snippets.

@maxcountryman
Created March 10, 2012 15:47
Show Gist options
  • Save maxcountryman/2011842 to your computer and use it in GitHub Desktop.
Save maxcountryman/2011842 to your computer and use it in GitHub Desktop.
Markov Chain bot utilizing IrcTK
mport re
import os
import random
from irctk import Bot
_DEFAULT_CHAIN_LEN = 2
_DEFAULT_WORD_RANGE = 26
PUNCTUATION = '.\?|.!|.\.'
TALKATIVE = 0.3
class MarkovChain(object):
def __init__(self, chain_length=None, cache=None, word_range=None):
if chain_length is None:
self.chain_length = _DEFAULT_CHAIN_LEN
else:
self.chain_length = chain_length
if cache is None:
self.cache = {}
else:
self.cache = cache
if word_range is None:
self.word_range = _DEFAULT_WORD_RANGE
else:
self.word_range = word_range
self.punctuation = re.compile(PUNCTUATION)
def _build_cache(self, corpus):
for group in self.group_words(corpus):
key = tuple(group[:self.chain_length]) # use the first N words as key
value = group[-1:] # use the tail as value
if key in self.cache:
self.cache[key] += value
else:
self.cache[key] = value
def group_words(self, words, chain_length=None):
# split the words into a list of words
words = words.split()
if chain_length is None:
chain_length = self.chain_length
if len(words) > self.chain_length:
for i in xrange(len(words) - self.chain_length):
yield words[i:i + self.chain_length + 1]
else:
yield words
def load_corpus(self, corpus):
self.cache = {} # reset the cache
return self._build_cache(corpus)
def generate_chain(self, seed):
'''Generates a Markov Chain based on a seed state.'''
for i in xrange(random.randrange(1, self.word_range)):
if self.punctuation.findall(seed[-1]): # stop on punctuation
break
words = self.cache.get(seed)
if words is None:
break
word = random.choice(words)
if word is None:
break
new_seed = []
for j in xrange(self.chain_length - 1):
new_seed.append(seed[-j+1])
new_seed.append(word)
seed = tuple(new_seed)
yield word
class Settings(object):
SERVER = 'irc.voxinfinitus.net'
PORT = 6697
SSL = True
TIMEOUT = 300
NICK = 'markov'
REALNAME = 'A Python Bot'
CHANNELS = ['#voxinfinitus', '#radioreddit']
bot = Bot()
bot.config.from_object(Settings)
mc = MarkovChain()
@bot.event('PRIVMSG')
def talk(context):
if random.random() < TALKATIVE:
if mc.cache == {}:
mc.load_corpus(open(os.path.abspath('irc.log')).read())
seed = tuple(random.choice([word for word in mc.group_words(context.line['message'], 2)]))
message = [word for word in mc.generate_chain(seed)]
return ' '.join(message)
return
if __name__ == '__main__':
mc.load_corpus(open(os.path.abspath('irc.log')).read())
bot.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment