Created
March 10, 2012 15:47
-
-
Save maxcountryman/2011842 to your computer and use it in GitHub Desktop.
Markov Chain bot utilizing IrcTK
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mport re | |
import os | |
import random | |
from irctk import Bot | |
_DEFAULT_CHAIN_LEN = 2 | |
_DEFAULT_WORD_RANGE = 26 | |
PUNCTUATION = '.\?|.!|.\.' | |
TALKATIVE = 0.3 | |
class MarkovChain(object): | |
def __init__(self, chain_length=None, cache=None, word_range=None): | |
if chain_length is None: | |
self.chain_length = _DEFAULT_CHAIN_LEN | |
else: | |
self.chain_length = chain_length | |
if cache is None: | |
self.cache = {} | |
else: | |
self.cache = cache | |
if word_range is None: | |
self.word_range = _DEFAULT_WORD_RANGE | |
else: | |
self.word_range = word_range | |
self.punctuation = re.compile(PUNCTUATION) | |
def _build_cache(self, corpus): | |
for group in self.group_words(corpus): | |
key = tuple(group[:self.chain_length]) # use the first N words as key | |
value = group[-1:] # use the tail as value | |
if key in self.cache: | |
self.cache[key] += value | |
else: | |
self.cache[key] = value | |
def group_words(self, words, chain_length=None): | |
# split the words into a list of words | |
words = words.split() | |
if chain_length is None: | |
chain_length = self.chain_length | |
if len(words) > self.chain_length: | |
for i in xrange(len(words) - self.chain_length): | |
yield words[i:i + self.chain_length + 1] | |
else: | |
yield words | |
def load_corpus(self, corpus): | |
self.cache = {} # reset the cache | |
return self._build_cache(corpus) | |
def generate_chain(self, seed): | |
'''Generates a Markov Chain based on a seed state.''' | |
for i in xrange(random.randrange(1, self.word_range)): | |
if self.punctuation.findall(seed[-1]): # stop on punctuation | |
break | |
words = self.cache.get(seed) | |
if words is None: | |
break | |
word = random.choice(words) | |
if word is None: | |
break | |
new_seed = [] | |
for j in xrange(self.chain_length - 1): | |
new_seed.append(seed[-j+1]) | |
new_seed.append(word) | |
seed = tuple(new_seed) | |
yield word | |
class Settings(object): | |
SERVER = 'irc.voxinfinitus.net' | |
PORT = 6697 | |
SSL = True | |
TIMEOUT = 300 | |
NICK = 'markov' | |
REALNAME = 'A Python Bot' | |
CHANNELS = ['#voxinfinitus', '#radioreddit'] | |
bot = Bot() | |
bot.config.from_object(Settings) | |
mc = MarkovChain() | |
@bot.event('PRIVMSG') | |
def talk(context): | |
if random.random() < TALKATIVE: | |
if mc.cache == {}: | |
mc.load_corpus(open(os.path.abspath('irc.log')).read()) | |
seed = tuple(random.choice([word for word in mc.group_words(context.line['message'], 2)])) | |
message = [word for word in mc.generate_chain(seed)] | |
return ' '.join(message) | |
return | |
if __name__ == '__main__': | |
mc.load_corpus(open(os.path.abspath('irc.log')).read()) | |
bot.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment