Skip to content

Instantly share code, notes, and snippets.

@coblezc
Created May 8, 2016 17:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save coblezc/beebac421898d24e714e532271508880 to your computer and use it in GitHub Desktop.
Save coblezc/beebac421898d24e714e532271508880 to your computer and use it in GitHub Desktop.
# generate dictionaries to feed into markov.py
import markov
# cb speak
cb_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/cbspeak-terms.txt").readlines()
cb_terms = markov.char_level_generate(cb_terms_file, 4, 100)
cb_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/cbspeak-defs.txt").readlines()
cb_defs = markov.char_level_generate(cb_defs_file, 4, 100)
cb_keys = cb_terms
cb_values = cb_defs
cb_dictionary = dict(zip(cb_keys, cb_values))
# cops
cops_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/cops-terms.txt").readlines()
cops_terms = markov.char_level_generate(cops_terms_file, 4, 100)
cops_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/cops-defs.txt").readlines()
cops_defs = markov.char_level_generate(cops_defs_file, 3, 100)
cops_keys = cops_terms
cops_values = cops_defs
cops_dictionary = dict(zip(cops_keys, cops_values))
# driving
driving_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/driving-terms.txt").readlines()
driving_terms = markov.char_level_generate(driving_terms_file, 3, 100)
driving_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/driving-defs.txt").readlines()
driving_defs = markov.char_level_generate(driving_defs_file, 3, 100)
driving_keys = driving_terms
driving_values = driving_defs
driving_dictionary = dict(zip(driving_keys, driving_values))
# events
events_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/events-terms.txt").readlines()
events_terms = markov.char_level_generate(events_terms_file, 4, 100)
events_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/events-defs.txt").readlines()
events_defs = markov.char_level_generate(events_defs_file, 5, 100)
events_keys = events_terms
events_values = events_defs
events_dictionary = dict(zip(events_keys, events_values))
# facts
facts_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/facts-terms.txt").readlines()
facts_terms = markov.char_level_generate(facts_terms_file, 5, 100)
facts_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/facts-defs.txt").readlines()
facts_defs = markov.char_level_generate(facts_defs_file, 5, 100)
facts_keys = facts_terms
facts_values = facts_defs
facts_dictionary = dict(zip(facts_keys, facts_values))
# people
people_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/people-terms.txt").readlines()
people_terms = markov.char_level_generate(people_terms_file, 3, 100)
people_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/people-defs.txt").readlines()
people_defs = markov.char_level_generate(people_defs_file, 5, 100)
people_keys = people_terms
people_values = people_defs
people_dictionary = dict(zip(people_keys, people_values))
# places
places_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/places-terms.txt").readlines()
places_terms = markov.char_level_generate(places_terms_file, 4, 100)
places_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/places-defs.txt").readlines()
places_defs = markov.char_level_generate(places_defs_file, 5, 100)
places_keys = places_terms
places_values = places_defs
places_dictionary = dict(zip(places_keys, places_values))
# rigs
rigs_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/rigs-terms.txt").readlines()
rigs_terms = markov.char_level_generate(rigs_terms_file, 3, 100)
rigs_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/rigs-defs.txt").readlines()
rigs_defs = markov.char_level_generate(rigs_defs_file, 4, 100)
rigs_keys = rigs_terms
rigs_values = rigs_defs
rigs_dictionary = dict(zip(rigs_keys, rigs_values))
# things
things_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/things-terms.txt").readlines()
things_terms = markov.char_level_generate(things_terms_file, 3, 100)
things_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/things-defs.txt").readlines()
things_defs = markov.char_level_generate(things_defs_file, 4, 100)
things_keys = things_terms
things_values = things_defs
things_dictionary = dict(zip(things_keys, things_values))
def handles():
handles_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/handles.txt").readlines()
handles = list()
for handle in handles_terms_file:
handles.append(handle)
return handles
# generates dictionaries and then markov's them
# puts the output in madlib form, including glossary of terms used
# the program you actually run
import dictionaries
import random
# intro, line 1
cb_speak_1 = random.choice(dictionaries.cb_dictionary.keys()).strip()
handle_1 = random.choice(dictionaries.handles()).strip()
cb_speak_2 = random.choice(dictionaries.cb_dictionary.keys()).strip()
handle_2 = random.choice(dictionaries.handles()).strip()
# intro, line 2
cb_speak_3 = random.choice(dictionaries.cb_dictionary.keys()).strip()
cb_speak_4 = random.choice(dictionaries.cb_dictionary.keys()).strip()
# intro, line 3
facts_1 = random.choice(dictionaries.facts_dictionary.keys()).strip()
places_1 = random.choice(dictionaries.places_dictionary.keys()).strip()
cb_speak_5 = random.choice(dictionaries.cb_dictionary.keys()).strip()
# intro, line 4
cb_speak_6 = random.choice(dictionaries.cb_dictionary.keys()).strip()
facts_2 = random.choice(dictionaries.facts_dictionary.keys()).strip()
# verse 1, line 1
events_1 = random.choice(dictionaries.events_dictionary.keys()).strip()
# verse 1, line 2
rigs_1 = random.choice(dictionaries.rigs_dictionary.keys()).strip()
events_2 = random.choice(dictionaries.events_dictionary.keys()).strip()
# verse 1, line 3
rigs_2 = random.choice(dictionaries.rigs_dictionary.keys()).strip()
handle_3 = random.choice(dictionaries.handles()).strip()
# verse 1, line 4
people_1 = random.choice(dictionaries.people_dictionary.keys()).strip()
events_3 = random.choice(dictionaries.events_dictionary.keys()).strip()
places_2 = random.choice(dictionaries.places_dictionary.keys()).strip()
# verse 1, line 5
cops_1 = random.choice(dictionaries.cops_dictionary.keys()).strip()
places_3 = random.choice(dictionaries.places_dictionary.keys()).strip()
# verse 1, line 6
places_4 = random.choice(dictionaries.places_dictionary.keys()).strip()
# verse 1, line 7
handle_2 = random.choice(dictionaries.handles()).strip()
handle_1 = random.choice(dictionaries.handles()).strip()
# verse 1, line 7
driving_1 = random.choice(dictionaries.driving_dictionary.keys()).strip()
# intro
line1 = (cb_speak_1, ", this here's the ", handle_1.strip(), ". You gotta ", cb_speak_2, " on me, ", handle_2.strip(), ", c'mon?")
line2 = ("Ah, yeah, ", cb_speak_3, ", ", handle_1.strip(), ", ", cb_speak_4, ".")
line3 = ("By golly, it's ", facts_1, " to ", places_1, ", ", cb_speak_5, ".")
line4 = ("Yeah, that's a big ", cb_speak_6, " there. Yeah, we definitely got the ", facts_2, ", good buddy.")
line5 = ("Mercy sakes alive, looks like we got us a convoy")
# verse 1
line6 = ("Was ", events_1, " on the sixth of June")
line7 = ("In a ", rigs_1, " ", events_2)
line8 = (rigs_2, " ", handle_3.strip(), " with a reefer on")
# line9 = ("And a ", people_1, " ", events_3, " to ", places_2)
line9 = ("And a ", people_1, " to ", places_2)
line10 = ("We is watchin' for ", cops_1, " on ", places_3)
line11 = ("'Bout a mile outta ", places_4)
line12 = ("I says, ", handle_2.strip(), " this here's the ", handle_1.strip(), ".")
line13 = ("And I'm about to ", driving_1, ".")
# print lines
print "".join(line1)
print "".join(line2)
print "".join(line3)
print "".join(line4)
print "".join(line5)
print ""
print "".join(line6)
print "".join(line7)
print "".join(line8)
print "".join(line9)
print "".join(line10)
print "".join(line11)
print "".join(line12)
print "".join(line13)
# spacer
print "."
print ".."
print "..."
print " ..."
print " ..."
print " ..."
print " ..."
print " ..."
print " ..."
print " ..."
print " ..."
print " ..."
print " ..."
print " ..."
print " ..."
print "..."
print ".."
print "."
# line 1
cb_def_1 = cb_speak_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
handle_1_def = handle_1 + ": Driver #1's name"
cb_def_2 = cb_speak_2 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
handle_2_def = handle_2 + ": Driver #2's name"
print cb_def_1
print handle_1_def
print cb_def_2
print handle_2_def
print ""
# line 2
cb_def_3 = cb_speak_3 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
cb_def_4 = cb_speak_4 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
print cb_def_3
print cb_def_4
print ""
# line 3
facts_def_1 = facts_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
places_def_1 = places_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
cb_def_5 = cb_speak_5 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
print facts_def_1
print places_def_1
print cb_def_5
print ""
# line 4
cb_def_6 = cb_speak_6 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
facts_def_2 = facts_2 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
print cb_def_6
print facts_def_2
print ""
# line 5...no defs
print ""
print ""
print ""
# line 6
events_def_1 = events_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
print events_def_1
print ""
# line 7
rigs_def_1 = rigs_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
events_def_2 = events_2 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
print rigs_def_1
print events_def_2
print ""
# line 8
rigs_def_2 = rigs_2 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
handle_def_3 = handle_3 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
print rigs_def_2
print handle_def_3
print ""
# line 9
people_def_1 = facts_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
events_def_3 = events_2 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
places_def_2 = places_2 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
print people_def_1
print events_def_3
print places_def_2
print ""
# line 10
cops_def_1 = cops_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
places_def_3 = places_3 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
print cops_def_1
print places_def_3
print ""
# line 11
places_def_4 = places_4 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
print places_def_4
print ""
# line 12
print handle_2_def
print handle_1_def
print ""
# line 13
driving_def_1 = driving_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip()
print driving_def_1
# from Allison Parrish's https://github.com/aparrish/rwet-examples/blob/master/ngrams/markov.py
# take output of dictionaries.py and generate markov
import random
def tokenize(corpus):
tokens = list()
for line in corpus:
line = line.strip()
words = line.split()
tokens.append(words)
return tokens
def build_model(tokens, n):
"Builds a Markov model from the list of tokens, using n-grams of length n."
model = dict()
if len(tokens) < n:
return model
for i in range(len(tokens) - n):
gram = tuple(tokens[i:i+n])
next_token = tokens[i+n]
if gram in model:
model[gram].append(next_token)
else:
model[gram] = [next_token]
final_gram = tuple(tokens[len(tokens)-n:])
if final_gram in model:
model[final_gram].append(None)
else:
model[final_gram] = [None]
return model
def generate(model, n, seed=None, max_iterations=100):
"""Generates a list of tokens from information in model, using n as the
length of n-grams in the model. Starts the generation with the n-gram
given as seed. If more than max_iteration iterations are reached, the
process is stopped. (This is to prevent infinite loops)"""
if seed is None:
seed = random.choice(model.keys())
output = list(seed)
current = tuple(seed)
for i in range(max_iterations):
if current in model:
possible_next_tokens = model[current]
next_token = random.choice(possible_next_tokens)
if next_token is None: break
output.append(next_token)
current = tuple(output[-n:])
else:
break
return output
def merge_models(models):
"Merges two or more Markov models."
merged_model = dict()
for model in models:
for key, val in model.iteritems():
if key in merged_model:
merged_model[key].extend(val)
else:
merged_model[key] = val
return merged_model
def generate_from_token_lists(token_lines, n, count=14, max_iterations=100):
"""Generates text from a list of lists of tokens. This function is intended
for input text where each line forms a distinct unit (e.g., poetry), and
where the desired output is to recreate lines in that form. It does this
by keeping track of the n-gram that comes at the beginning of each line,
and then only generating lines that begin with one of these "beginnings."
It also builds a separate Markov model for each line, and then merges
those models together, to ensure that lines end with n-grams statistically
likely to end lines in the original text."""
beginnings = list()
models = list()
for token_line in token_lines:
beginning = token_line[:n]
beginnings.append(beginning)
line_model = build_model(token_line, n)
models.append(line_model)
combined_model = merge_models(models)
generated_list = list()
for i in range(count):
generated_str = generate(combined_model, n, random.choice(beginnings),
max_iterations)
generated_list.append(generated_str)
return generated_list
def char_level_generate(lines, n, count=14, max_iterations=100):
"""Generates Markov chain text from the given lines, using character-level
n-grams of length n. Returns a list of count items."""
token_lines = [list(line) for line in lines]
generated = generate_from_token_lists(token_lines, n, count, max_iterations)
return [''.join(item) for item in generated]
def word_level_generate(lines, n, count=14, max_iterations=100):
"""Generates Markov chain text from the given lines, using word-level
n-grams of length n. Returns a list of count items."""
token_lines = [line.split() for line in lines]
generated = generate_from_token_lists(token_lines, n, count, max_iterations)
return [' '.join(item) for item in generated]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment