Skip to content

Instantly share code, notes, and snippets.

@jcbozonier
Created June 17, 2009 15:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save jcbozonier/131289 to your computer and use it in GitHub Desktop.
Save jcbozonier/131289 to your computer and use it in GitHub Desktop.
from __future__ import with_statement
import random
def create_chain(file_paths):
word_counter = {}
previous_word = ""
for path in file_paths:
with open(path) as file:
for line in file:
words = line.split(" ")
for word in words:
if word != "":
word = word.lower()
if previous_word not in word_counter:
word_counter[previous_word] = {"total count":0}
if word not in word_counter[previous_word]:
word_counter[previous_word][word] = 0
word_counter[previous_word][word] = word_counter[previous_word][word] + 1
word_counter[previous_word]["total count"] = word_counter[previous_word]["total count"] + 1
previous_word = word.lower()
return word_counter
def construct_sentence(markov_chain, word_count=300,initial_word=""):
generated_sentence = ""
initial_word = initial_word.lower()
for i in range(1,word_count):
updated = False
while initial_word not in markov_chain:
initial_word = markov_chain[markov_chain.keys()[random.randrange(0,length(markov_chain))]]
#assign a probability to all of the possible
#successive words
#choose a random number between 1 and the total number of words
word_index_to_use = random.randrange(1, markov_chain[initial_word]["total count"]+1)
index_count = 0
#Count thru the occurences until u reach the destination word
for next_word in markov_chain[initial_word]:
#"total count" is a special key used to track word frequency.
if next_word != "total count":
if word_index_to_use in range(index_count, index_count + markov_chain[initial_word][next_word]+1) or markov_chain[initial_word]["total count"] == 1:
if generated_sentence == "":
generated_sentence = generated_sentence + " " + initial_word + " " + next_word
else:
generated_sentence = generated_sentence + " " + next_word
if next_word not in markov_chain:
next_word = markov_chain[markov_chain.keys()[random.randrange(0,length(markov_chain))]]
initial_word = next_word
break
else:
index_count = index_count + markov_chain[initial_word][next_word]
return generated_sentence
markov = create_chain(
(
"/users/darkxanthos/documents/workspace/markovchain/src/documents/bible.txt",
"/users/darkxanthos/documents/workspace/markovchain/src/documents/arabiannights.txt",
"/users/darkxanthos/documents/workspace/markovchain/src/documents/alice.txt",
"/users/darkxanthos/documents/workspace/markovchain/src/documents/taoteching.txt",
"/users/darkxanthos/documents/workspace/markovchain/src/documents/communist_manifesto.txt",
"/users/darkxanthos/documents/workspace/markovchain/src/documents/portrait.txt",
"/users/darkxanthos/documents/workspace/markovchain/src/documents/ulysses.txt",
"/users/darkxanthos/documents/workspace/markovchain/src/documents/dubliners.txt"))
#print markov
print construct_sentence(markov_chain = markov, initial_word = "i", word_count=300)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment