-
-
Save MitchRatquest/9b682d51b11a42c24256 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import multiprocessing | |
import signal | |
import sys | |
import random | |
import multiprocessing | |
class Markov(object): | |
def __init__(self, open_file): | |
self.cache = {} | |
self.open_file = open_file | |
self.words = self.file_to_words() | |
self.word_size = len(self.words) | |
self.database() | |
def init_worker(self): | |
''' | |
This is used to initialize the worker | |
something about using CTRL+C was really breaking this thing | |
''' | |
signal.signal(signal.SIGINT, signal.SIG_IGN) | |
def file_to_words(self): | |
self.open_file.seek(0) | |
data = self.open_file.read() | |
words = data.split() | |
return words | |
def triples(self): | |
""" Generates triples from the given data string. So if our string were | |
"What a lovely day", we'd generate (What, a, lovely) and then | |
(a, lovely, day). | |
""" | |
if len(self.words) < 3: | |
return | |
for i in range(len(self.words) - 2): | |
yield (self.words[i], self.words[i+1], self.words[i+2]) | |
def database(self): | |
pool = multiprocessing.Pool(multiprocessing.cpu_count(), self.init_worker) | |
data_in = None | |
triple_worker = pool.map(self.triples, [data_in]) | |
import pdb;pdb.set_trace() | |
# for w1, w2, w3 in triple_worker.get(): | |
# try: | |
# key = (w1, w2) | |
# if key in self.cache: | |
# self.cache[key].append(w3) | |
# else: | |
# self.cache[key] = [w3] | |
# except KeyboardInterrupt: | |
# # fail more gracefully | |
# print("Terminating worker") | |
# pool.terminate() | |
# pool.join() | |
# except Exception, e: | |
# # oops what happened? | |
# print(e) | |
# pool.terminate() | |
# pool.join() | |
# exit() | |
def generate_markov_text(self, size=200): | |
seed = random.randint(0, self.word_size-3) | |
seed_word, next_word = self.words[seed], self.words[seed+1] | |
w1, w2 = seed_word, next_word | |
gen_words = [] | |
for i in xrange(size): | |
gen_words.append(w1) | |
w1, w2 = w2, random.choice(self.cache[(w1, w2)]) | |
gen_words.append(w2) | |
return ' '.join(gen_words) | |
def main(): | |
''' | |
''' | |
with open('words.txt','r+') as f: | |
results = Markov(f) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment