Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@Slater-Victoroff
Last active March 28, 2022 13:55
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save Slater-Victoroff/6227656 to your computer and use it in GitHub Desktop.
Save Slater-Victoroff/6227656 to your computer and use it in GitHub Desktop.
Arbitrary ply markov constructor in python
from collections import Counter
import cPickle as pickle
import random
import itertools
import string
def words(entry):
return [word.lower().decode('ascii', 'ignore') for word in entry.split()]
def letters(entry):
return [letter for letter in list(entry.lower().decode('ascii', 'ignore')) if letter not in string.punctuation]
def ply_markov(entry, ply, current_dict, split_callback):
words = split_callback(entry)
for i in xrange(0, len(words)-ply):
current_tuple = tuple([words[j] for j in xrange(i, i+ply)])
if current_dict.get(current_tuple, False):
current_dict[current_tuple].update([words[i+ply]])
else:
current_dict[current_tuple] = Counter([words[i+ply]])
return current_dict
def train(input_file, output_file, ply, split_callback=words):
master_dict = {i: {} for i in xrange(1,ply+1)}
with open(input_file, "rb") as source:
counter = 0
for line in source:
counter += 1
print counter
for key, value in master_dict.iteritems():
master_dict[key] = ply_markov(line, key, value, split_callback)
pickle.dump(master_dict, open(output_file, 'wb'))
def get_check_tuple(current_output, ply):
last_n_list = [current_output[-i] for i in xrange(1,ply+1)]
last_n_list.reverse()
return tuple(last_n_list)
def append_next_word(master_dict, current_output, ply):
ply = min(len(current_output), ply)
ply_list = []
for i in xrange(1, ply+1):
check = master_dict[i].get(get_check_tuple(current_output, i),{})
ply_list.extend([[key]*value*i for key, value in check.iteritems()])
master_list = list(itertools.chain(*ply_list))
current_output.append(random.choice(master_list))
def generate(input_file, output_length, ply, join_char=" "):
master_dict = pickle.load(open(input_file, 'rb'))
output = []
output.append(random.choice(master_dict[1].keys())[0])
for i in xrange(output_length):
append_next_word(master_dict, output, ply)
return join_char.join(output)
#train("allData.txt", "markovLetters.p", 4, letters)
print generate("markov.p", 200, 3, " ")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment