Created
May 8, 2016 22:14
-
-
Save danaabs/e846cd62fb152d6cd473b02f0a6fa2ee to your computer and use it in GitHub Desktop.
stock.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import sys | |
from textblob import TextBlob | |
import itertools | |
def build_model(tokens, n): | |
"Builds a Markov model from the list of tokens, using n-grams of length n." | |
model = dict() | |
if len(tokens) < n: | |
return model | |
for i in range(len(tokens) - n): | |
gram = tuple(tokens[i:i+n]) | |
next_token = tokens[i+n] | |
if gram in model: | |
model[gram].append(next_token) | |
else: | |
model[gram] = [next_token] | |
final_gram = tuple(tokens[len(tokens)-n:]) | |
if final_gram in model: | |
model[final_gram].append(None) | |
else: | |
model[final_gram] = [None] | |
return model | |
def generate(model, n, seed=None, max_iterations=100): | |
if seed is None: | |
seed = random.choice(model.keys()) | |
output = list(seed) | |
current = tuple(seed) | |
for i in range(max_iterations): | |
if current in model: | |
possible_next_tokens = model[current] | |
next_token = random.choice(possible_next_tokens) | |
if next_token is None: break | |
output.append(next_token) | |
current = tuple(output[-n:]) | |
else: | |
break | |
return output | |
def merge_models(models): | |
"Merges two or more Markov models." | |
merged_model = dict() | |
for model in models: | |
for key, val in model.items(): | |
if key in merged_model: | |
merged_model[key].extend(val) | |
else: | |
merged_model[key] = val | |
return merged_model | |
def generate_from_token_lists(token_lines, n, count=14, max_iterations=100): | |
beginnings = list() | |
models = list() | |
for token_line in token_lines: | |
beginning = token_line[:n] | |
beginnings.append(beginning) | |
line_model = build_model(token_line, n) | |
models.append(line_model) | |
combined_model = merge_models(models) | |
generated_list = list() | |
for i in range(count): | |
generated_str = generate(combined_model, n, random.choice(beginnings), | |
max_iterations) | |
generated_list.append(generated_str) | |
return generated_list | |
def char_level_generate(lines, n, count=8, max_iterations=100): | |
"""Generates Markov chain text from the given lines, using character-level | |
n-grams of length n. Returns a list of count items.""" | |
token_lines = [list(line) for line in lines] | |
generated = generate_from_token_lists(token_lines, n, count, max_iterations) | |
return [''.join(item) for item in generated] | |
def word_level_generate(lines, n, count=6, max_iterations=100): | |
"""Generates Markov chain text from the given lines, using word-level | |
n-grams of length n. Returns a list of count items.""" | |
token_lines = [line.split() for line in lines] | |
generated = generate_from_token_lists(token_lines, n, count, max_iterations) | |
return [' '.join(item) for item in generated] | |
text = sys.stdin.read() | |
blob = TextBlob(text) | |
verbs = list() | |
nouns = list() | |
adj = list() | |
color = ['Baltic Sea'] | |
for word, tag in blob.tags: | |
if (tag == 'VBG') or (tag == 'VB'): | |
verbs.append(word.lemmatize()) | |
if tag == 'JJ': | |
adj.append(word.lemmatize()) | |
if (tag == 'NN') or (tag == 'NNS'): | |
nouns.append(word.lemmatize()) | |
r_verb = random.choice(verbs) | |
r_noun = random.choice(nouns) | |
r_adj = random.choice(adj) | |
#print(r_verb) | |
if __name__ == '__main__': | |
import sys | |
n = 3 | |
lines = list() | |
words = list() | |
for line in open('stock_madlib.txt', 'r'): | |
line = line.strip() | |
lines.append(line) | |
word = line.split() | |
#print(word) #list of words by line | |
words.append(word) | |
#print(lines) | |
new_words1 = [[x.replace('NOUN', random.choice(nouns).upper()) for x in sublist] for sublist in words] | |
new_words2 = [[x.replace('VERB', random.choice(verbs).upper()) for x in sublist] for sublist in new_words1] | |
new_words3 = [[x.replace('JJ', random.choice(adj).upper()) for x in sublist] for sublist in new_words2] | |
new_words4 = [[x.replace('COLOR', random.choice(color).upper()) for x in sublist] for sublist in new_words3] | |
# flatten = [item for sublist in new_words for item in sublist] | |
# print(flatten) | |
newest_list = [' '.join(x) for x in new_words4] | |
for generated in word_level_generate(newest_list, n): | |
print(generated) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment