Skip to content

Instantly share code, notes, and snippets.

@SuviSree
Forked from Slater-Victoroff/PyMarkov
Created January 19, 2021 14:27
Show Gist options
  • Save SuviSree/8413fc9bc7642a1198c36f1e716eb512 to your computer and use it in GitHub Desktop.
Save SuviSree/8413fc9bc7642a1198c36f1e716eb512 to your computer and use it in GitHub Desktop.
Arbitrary ply markov constructor in python
from collections import Counter
import cPickle as pickle
import random
import itertools
import string
def words(entry):
return [word.lower().decode('ascii', 'ignore') for word in entry.split()]
def letters(entry):
return [letter for letter in list(entry.lower().decode('ascii', 'ignore')) if letter not in string.punctuation]
def ply_markov(entry, ply, current_dict, split_callback):
words = split_callback(entry)
for i in xrange(0, len(words)-ply):
current_tuple = tuple([words[j] for j in xrange(i, i+ply)])
if current_dict.get(current_tuple, False):
current_dict[current_tuple].update([words[i+ply]])
else:
current_dict[current_tuple] = Counter([words[i+ply]])
return current_dict
def train(input_file, output_file, ply, split_callback=words):
master_dict = {i: {} for i in xrange(1,ply+1)}
with open(input_file, "rb") as source:
counter = 0
for line in source:
counter += 1
print counter
for key, value in master_dict.iteritems():
master_dict[key] = ply_markov(line, key, value, split_callback)
pickle.dump(master_dict, open(output_file, 'wb'))
def get_check_tuple(current_output, ply):
last_n_list = [current_output[-i] for i in xrange(1,ply+1)]
last_n_list.reverse()
return tuple(last_n_list)
def append_next_word(master_dict, current_output, ply):
ply = min(len(current_output), ply)
ply_list = []
for i in xrange(1, ply+1):
check = master_dict[i].get(get_check_tuple(current_output, i),{})
ply_list.extend([[key]*value*i for key, value in check.iteritems()])
master_list = list(itertools.chain(*ply_list))
current_output.append(random.choice(master_list))
def generate(input_file, output_length, ply, join_char=" "):
master_dict = pickle.load(open(input_file, 'rb'))
output = []
output.append(random.choice(master_dict[1].keys())[0])
for i in xrange(output_length):
append_next_word(master_dict, output, ply)
return join_char.join(output)
#train("allData.txt", "markovLetters.p", 4, letters)
print generate("markov.p", 200, 3, " ")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment