Created
May 8, 2016 17:26
-
-
Save coblezc/beebac421898d24e714e532271508880 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# generate dictionaries to feed into markov.py | |
import markov | |
# cb speak | |
cb_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/cbspeak-terms.txt").readlines() | |
cb_terms = markov.char_level_generate(cb_terms_file, 4, 100) | |
cb_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/cbspeak-defs.txt").readlines() | |
cb_defs = markov.char_level_generate(cb_defs_file, 4, 100) | |
cb_keys = cb_terms | |
cb_values = cb_defs | |
cb_dictionary = dict(zip(cb_keys, cb_values)) | |
# cops | |
cops_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/cops-terms.txt").readlines() | |
cops_terms = markov.char_level_generate(cops_terms_file, 4, 100) | |
cops_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/cops-defs.txt").readlines() | |
cops_defs = markov.char_level_generate(cops_defs_file, 3, 100) | |
cops_keys = cops_terms | |
cops_values = cops_defs | |
cops_dictionary = dict(zip(cops_keys, cops_values)) | |
# driving | |
driving_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/driving-terms.txt").readlines() | |
driving_terms = markov.char_level_generate(driving_terms_file, 3, 100) | |
driving_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/driving-defs.txt").readlines() | |
driving_defs = markov.char_level_generate(driving_defs_file, 3, 100) | |
driving_keys = driving_terms | |
driving_values = driving_defs | |
driving_dictionary = dict(zip(driving_keys, driving_values)) | |
# events | |
events_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/events-terms.txt").readlines() | |
events_terms = markov.char_level_generate(events_terms_file, 4, 100) | |
events_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/events-defs.txt").readlines() | |
events_defs = markov.char_level_generate(events_defs_file, 5, 100) | |
events_keys = events_terms | |
events_values = events_defs | |
events_dictionary = dict(zip(events_keys, events_values)) | |
# facts | |
facts_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/facts-terms.txt").readlines() | |
facts_terms = markov.char_level_generate(facts_terms_file, 5, 100) | |
facts_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/facts-defs.txt").readlines() | |
facts_defs = markov.char_level_generate(facts_defs_file, 5, 100) | |
facts_keys = facts_terms | |
facts_values = facts_defs | |
facts_dictionary = dict(zip(facts_keys, facts_values)) | |
# people | |
people_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/people-terms.txt").readlines() | |
people_terms = markov.char_level_generate(people_terms_file, 3, 100) | |
people_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/people-defs.txt").readlines() | |
people_defs = markov.char_level_generate(people_defs_file, 5, 100) | |
people_keys = people_terms | |
people_values = people_defs | |
people_dictionary = dict(zip(people_keys, people_values)) | |
# places | |
places_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/places-terms.txt").readlines() | |
places_terms = markov.char_level_generate(places_terms_file, 4, 100) | |
places_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/places-defs.txt").readlines() | |
places_defs = markov.char_level_generate(places_defs_file, 5, 100) | |
places_keys = places_terms | |
places_values = places_defs | |
places_dictionary = dict(zip(places_keys, places_values)) | |
# rigs | |
rigs_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/rigs-terms.txt").readlines() | |
rigs_terms = markov.char_level_generate(rigs_terms_file, 3, 100) | |
rigs_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/rigs-defs.txt").readlines() | |
rigs_defs = markov.char_level_generate(rigs_defs_file, 4, 100) | |
rigs_keys = rigs_terms | |
rigs_values = rigs_defs | |
rigs_dictionary = dict(zip(rigs_keys, rigs_values)) | |
# things | |
things_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/things-terms.txt").readlines() | |
things_terms = markov.char_level_generate(things_terms_file, 3, 100) | |
things_defs_file = open("/Users/user/Documents/itp/rwet/final/categories/things-defs.txt").readlines() | |
things_defs = markov.char_level_generate(things_defs_file, 4, 100) | |
things_keys = things_terms | |
things_values = things_defs | |
things_dictionary = dict(zip(things_keys, things_values)) | |
def handles(): | |
handles_terms_file = open("/Users/user/Documents/itp/rwet/final/categories/handles.txt").readlines() | |
handles = list() | |
for handle in handles_terms_file: | |
handles.append(handle) | |
return handles | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# generates dictionaries and then markov's them | |
# puts the output in madlib form, including glossary of terms used | |
# the program you actually run | |
import dictionaries | |
import random | |
# intro, line 1 | |
cb_speak_1 = random.choice(dictionaries.cb_dictionary.keys()).strip() | |
handle_1 = random.choice(dictionaries.handles()).strip() | |
cb_speak_2 = random.choice(dictionaries.cb_dictionary.keys()).strip() | |
handle_2 = random.choice(dictionaries.handles()).strip() | |
# intro, line 2 | |
cb_speak_3 = random.choice(dictionaries.cb_dictionary.keys()).strip() | |
cb_speak_4 = random.choice(dictionaries.cb_dictionary.keys()).strip() | |
# intro, line 3 | |
facts_1 = random.choice(dictionaries.facts_dictionary.keys()).strip() | |
places_1 = random.choice(dictionaries.places_dictionary.keys()).strip() | |
cb_speak_5 = random.choice(dictionaries.cb_dictionary.keys()).strip() | |
# intro, line 4 | |
cb_speak_6 = random.choice(dictionaries.cb_dictionary.keys()).strip() | |
facts_2 = random.choice(dictionaries.facts_dictionary.keys()).strip() | |
# verse 1, line 1 | |
events_1 = random.choice(dictionaries.events_dictionary.keys()).strip() | |
# verse 1, line 2 | |
rigs_1 = random.choice(dictionaries.rigs_dictionary.keys()).strip() | |
events_2 = random.choice(dictionaries.events_dictionary.keys()).strip() | |
# verse 1, line 3 | |
rigs_2 = random.choice(dictionaries.rigs_dictionary.keys()).strip() | |
handle_3 = random.choice(dictionaries.handles()).strip() | |
# verse 1, line 4 | |
people_1 = random.choice(dictionaries.people_dictionary.keys()).strip() | |
events_3 = random.choice(dictionaries.events_dictionary.keys()).strip() | |
places_2 = random.choice(dictionaries.places_dictionary.keys()).strip() | |
# verse 1, line 5 | |
cops_1 = random.choice(dictionaries.cops_dictionary.keys()).strip() | |
places_3 = random.choice(dictionaries.places_dictionary.keys()).strip() | |
# verse 1, line 6 | |
places_4 = random.choice(dictionaries.places_dictionary.keys()).strip() | |
# verse 1, line 7 | |
handle_2 = random.choice(dictionaries.handles()).strip() | |
handle_1 = random.choice(dictionaries.handles()).strip() | |
# verse 1, line 7 | |
driving_1 = random.choice(dictionaries.driving_dictionary.keys()).strip() | |
# intro | |
line1 = (cb_speak_1, ", this here's the ", handle_1.strip(), ". You gotta ", cb_speak_2, " on me, ", handle_2.strip(), ", c'mon?") | |
line2 = ("Ah, yeah, ", cb_speak_3, ", ", handle_1.strip(), ", ", cb_speak_4, ".") | |
line3 = ("By golly, it's ", facts_1, " to ", places_1, ", ", cb_speak_5, ".") | |
line4 = ("Yeah, that's a big ", cb_speak_6, " there. Yeah, we definitely got the ", facts_2, ", good buddy.") | |
line5 = ("Mercy sakes alive, looks like we got us a convoy") | |
# verse 1 | |
line6 = ("Was ", events_1, " on the sixth of June") | |
line7 = ("In a ", rigs_1, " ", events_2) | |
line8 = (rigs_2, " ", handle_3.strip(), " with a reefer on") | |
# line9 = ("And a ", people_1, " ", events_3, " to ", places_2) | |
line9 = ("And a ", people_1, " to ", places_2) | |
line10 = ("We is watchin' for ", cops_1, " on ", places_3) | |
line11 = ("'Bout a mile outta ", places_4) | |
line12 = ("I says, ", handle_2.strip(), " this here's the ", handle_1.strip(), ".") | |
line13 = ("And I'm about to ", driving_1, ".") | |
# print lines | |
print "".join(line1) | |
print "".join(line2) | |
print "".join(line3) | |
print "".join(line4) | |
print "".join(line5) | |
print "" | |
print "".join(line6) | |
print "".join(line7) | |
print "".join(line8) | |
print "".join(line9) | |
print "".join(line10) | |
print "".join(line11) | |
print "".join(line12) | |
print "".join(line13) | |
# spacer | |
print "." | |
print ".." | |
print "..." | |
print " ..." | |
print " ..." | |
print " ..." | |
print " ..." | |
print " ..." | |
print " ..." | |
print " ..." | |
print " ..." | |
print " ..." | |
print " ..." | |
print " ..." | |
print " ..." | |
print "..." | |
print ".." | |
print "." | |
# line 1 | |
cb_def_1 = cb_speak_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
handle_1_def = handle_1 + ": Driver #1's name" | |
cb_def_2 = cb_speak_2 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
handle_2_def = handle_2 + ": Driver #2's name" | |
print cb_def_1 | |
print handle_1_def | |
print cb_def_2 | |
print handle_2_def | |
print "" | |
# line 2 | |
cb_def_3 = cb_speak_3 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
cb_def_4 = cb_speak_4 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
print cb_def_3 | |
print cb_def_4 | |
print "" | |
# line 3 | |
facts_def_1 = facts_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
places_def_1 = places_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
cb_def_5 = cb_speak_5 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
print facts_def_1 | |
print places_def_1 | |
print cb_def_5 | |
print "" | |
# line 4 | |
cb_def_6 = cb_speak_6 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
facts_def_2 = facts_2 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
print cb_def_6 | |
print facts_def_2 | |
print "" | |
# line 5...no defs | |
print "" | |
print "" | |
print "" | |
# line 6 | |
events_def_1 = events_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
print events_def_1 | |
print "" | |
# line 7 | |
rigs_def_1 = rigs_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
events_def_2 = events_2 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
print rigs_def_1 | |
print events_def_2 | |
print "" | |
# line 8 | |
rigs_def_2 = rigs_2 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
handle_def_3 = handle_3 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
print rigs_def_2 | |
print handle_def_3 | |
print "" | |
# line 9 | |
people_def_1 = facts_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
events_def_3 = events_2 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
places_def_2 = places_2 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
print people_def_1 | |
print events_def_3 | |
print places_def_2 | |
print "" | |
# line 10 | |
cops_def_1 = cops_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
places_def_3 = places_3 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
print cops_def_1 | |
print places_def_3 | |
print "" | |
# line 11 | |
places_def_4 = places_4 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
print places_def_4 | |
print "" | |
# line 12 | |
print handle_2_def | |
print handle_1_def | |
print "" | |
# line 13 | |
driving_def_1 = driving_1 + ": " + random.choice(dictionaries.cb_dictionary.values()).strip() | |
print driving_def_1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# from Allison Parrish's https://github.com/aparrish/rwet-examples/blob/master/ngrams/markov.py | |
# take output of dictionaries.py and generate markov | |
import random | |
def tokenize(corpus): | |
tokens = list() | |
for line in corpus: | |
line = line.strip() | |
words = line.split() | |
tokens.append(words) | |
return tokens | |
def build_model(tokens, n): | |
"Builds a Markov model from the list of tokens, using n-grams of length n." | |
model = dict() | |
if len(tokens) < n: | |
return model | |
for i in range(len(tokens) - n): | |
gram = tuple(tokens[i:i+n]) | |
next_token = tokens[i+n] | |
if gram in model: | |
model[gram].append(next_token) | |
else: | |
model[gram] = [next_token] | |
final_gram = tuple(tokens[len(tokens)-n:]) | |
if final_gram in model: | |
model[final_gram].append(None) | |
else: | |
model[final_gram] = [None] | |
return model | |
def generate(model, n, seed=None, max_iterations=100): | |
"""Generates a list of tokens from information in model, using n as the | |
length of n-grams in the model. Starts the generation with the n-gram | |
given as seed. If more than max_iteration iterations are reached, the | |
process is stopped. (This is to prevent infinite loops)""" | |
if seed is None: | |
seed = random.choice(model.keys()) | |
output = list(seed) | |
current = tuple(seed) | |
for i in range(max_iterations): | |
if current in model: | |
possible_next_tokens = model[current] | |
next_token = random.choice(possible_next_tokens) | |
if next_token is None: break | |
output.append(next_token) | |
current = tuple(output[-n:]) | |
else: | |
break | |
return output | |
def merge_models(models): | |
"Merges two or more Markov models." | |
merged_model = dict() | |
for model in models: | |
for key, val in model.iteritems(): | |
if key in merged_model: | |
merged_model[key].extend(val) | |
else: | |
merged_model[key] = val | |
return merged_model | |
def generate_from_token_lists(token_lines, n, count=14, max_iterations=100): | |
"""Generates text from a list of lists of tokens. This function is intended | |
for input text where each line forms a distinct unit (e.g., poetry), and | |
where the desired output is to recreate lines in that form. It does this | |
by keeping track of the n-gram that comes at the beginning of each line, | |
and then only generating lines that begin with one of these "beginnings." | |
It also builds a separate Markov model for each line, and then merges | |
those models together, to ensure that lines end with n-grams statistically | |
likely to end lines in the original text.""" | |
beginnings = list() | |
models = list() | |
for token_line in token_lines: | |
beginning = token_line[:n] | |
beginnings.append(beginning) | |
line_model = build_model(token_line, n) | |
models.append(line_model) | |
combined_model = merge_models(models) | |
generated_list = list() | |
for i in range(count): | |
generated_str = generate(combined_model, n, random.choice(beginnings), | |
max_iterations) | |
generated_list.append(generated_str) | |
return generated_list | |
def char_level_generate(lines, n, count=14, max_iterations=100): | |
"""Generates Markov chain text from the given lines, using character-level | |
n-grams of length n. Returns a list of count items.""" | |
token_lines = [list(line) for line in lines] | |
generated = generate_from_token_lists(token_lines, n, count, max_iterations) | |
return [''.join(item) for item in generated] | |
def word_level_generate(lines, n, count=14, max_iterations=100): | |
"""Generates Markov chain text from the given lines, using word-level | |
n-grams of length n. Returns a list of count items.""" | |
token_lines = [line.split() for line in lines] | |
generated = generate_from_token_lists(token_lines, n, count, max_iterations) | |
return [' '.join(item) for item in generated] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment