lizadaly/example-markov.py

## example-markov.py
import random
from collections import Counter
import nltk

# See https://gist.github.com/lizadaly/7071e0de589883a197433951bc7314c5 for comments on the setup here
word_list = []
[word_list.extend(nltk.corpus.gutenberg.words(f)) for f in nltk.corpus.gutenberg.fileids()]
cleaned_words = [w.lower() for w in word_list if w.isalnum()]
all_bigrams = [b for b in nltk.bigrams(cleaned_words)]

# Decide on a start word for the sentence
start_word = 'sun'

# Print a 10 word sentence using the set of bigrams
for i in range(0, 10):
  # Find all word pairs that start with our target word
  # e.g. "sun shines", "sun sets"
  start_bigrams = [b for b in all_bigrams if b[0] == start_word]
  # Pick a random one of those, e.g. "sun shines"
  start_bigram = random.choice(start_bigrams)

  # Print that first word ("sun")
  print(start_bigram[0], end=" ")

  # Set our next word to be the following word ("shines")
  start_word = start_bigram[1]

  # Loop will start over, finding the next two-word phrase that starts with "shines"
  # e.g. "shines brightly"
	import random
	from collections import Counter
	import nltk

	# See https://gist.github.com/lizadaly/7071e0de589883a197433951bc7314c5 for comments on the setup here
	word_list = []
	[word_list.extend(nltk.corpus.gutenberg.words(f)) for f in nltk.corpus.gutenberg.fileids()]
	cleaned_words = [w.lower() for w in word_list if w.isalnum()]
	all_bigrams = [b for b in nltk.bigrams(cleaned_words)]

	# Decide on a start word for the sentence
	start_word = 'sun'

	# Print a 10 word sentence using the set of bigrams
	for i in range(0, 10):
	# Find all word pairs that start with our target word
	# e.g. "sun shines", "sun sets"
	start_bigrams = [b for b in all_bigrams if b[0] == start_word]
	# Pick a random one of those, e.g. "sun shines"
	start_bigram = random.choice(start_bigrams)

	# Print that first word ("sun")
	print(start_bigram[0], end=" ")

	# Set our next word to be the following word ("shines")
	start_word = start_bigram[1]

	# Loop will start over, finding the next two-word phrase that starts with "shines"
	# e.g. "shines brightly"