aparrish/poem.py

## poem.py
import nltk
import sys
from nltk.corpus import brown
import random
import re

def only_tokens(paired_list):
	return [x[0] for x in paired_list]

def clean(s):
	s = re.sub(r"^[;'.,?]", '', s)
	s = re.sub(r"^\s+", '', s)
	s = re.sub(r" ([;'.,?])", r'\1', s)
	return s

first_to_gram = dict()
gram_to_after = dict()

brown_tagged_sents = brown.tagged_sents()
tagger = nltk.UnigramTagger(brown_tagged_sents)

low_order = 4
high_order = 9

# break text into POS-tagged n-grams of various lengths
for line in sys.stdin:
	line = line.strip()
	toks = nltk.word_tokenize(line.strip())
	tagged = tagger.tag(toks)
	for i in range(len(tagged) - high_order):
		for n in range(i+low_order, i+high_order):
			this_gram = tuple(tagged[i:n+1])
			# store this n-gram and the POS that came after it
			after = tagged[n+1][1]
			if after is not None:
				if this_gram not in gram_to_after:
					gram_to_after[this_gram] = []
				gram_to_after[this_gram].append(after)
			# store the first POS of this n-gram along with
			# the n-gram itself
			if i > 0:
				first = tagged[i][1]
				if first is not None:
					if first not in first_to_gram:
						first_to_gram[first] = []
					first_to_gram[first].append(this_gram)

for i in range(50):
	# randomly select an n-gram...
	current = random.choice(gram_to_after.keys())
	print clean(' '.join((only_tokens(current))))
	# then chain together lines, selecting a random
	# line that begins with the part of speech that
	# followed the n-gram in the previous line
	for j in range(13):
		next = random.choice(gram_to_after[current])
		if next in first_to_gram:
			next_choice = random.choice(first_to_gram[next])
			print clean(' '.join(only_tokens(next_choice)))
		if next_choice in gram_to_after:
			current = next_choice
		else:
			break
	print ''
	import nltk
	import sys
	from nltk.corpus import brown
	import random
	import re

	def only_tokens(paired_list):
	return [x[0] for x in paired_list]

	def clean(s):
	s = re.sub(r"^[;'.,?]", '', s)
	s = re.sub(r"^\s+", '', s)
	s = re.sub(r" ([;'.,?])", r'\1', s)
	return s

	first_to_gram = dict()
	gram_to_after = dict()

	brown_tagged_sents = brown.tagged_sents()
	tagger = nltk.UnigramTagger(brown_tagged_sents)

	low_order = 4
	high_order = 9

	# break text into POS-tagged n-grams of various lengths
	for line in sys.stdin:
	line = line.strip()
	toks = nltk.word_tokenize(line.strip())
	tagged = tagger.tag(toks)
	for i in range(len(tagged) - high_order):
	for n in range(i+low_order, i+high_order):
	this_gram = tuple(tagged[i:n+1])
	# store this n-gram and the POS that came after it
	after = tagged[n+1][1]
	if after is not None:
	if this_gram not in gram_to_after:
	gram_to_after[this_gram] = []
	gram_to_after[this_gram].append(after)
	# store the first POS of this n-gram along with
	# the n-gram itself
	if i > 0:
	first = tagged[i][1]
	if first is not None:
	if first not in first_to_gram:
	first_to_gram[first] = []
	first_to_gram[first].append(this_gram)

	for i in range(50):
	# randomly select an n-gram...
	current = random.choice(gram_to_after.keys())
	print clean(' '.join((only_tokens(current))))
	# then chain together lines, selecting a random
	# line that begins with the part of speech that
	# followed the n-gram in the previous line
	for j in range(13):
	next = random.choice(gram_to_after[current])
	if next in first_to_gram:
	next_choice = random.choice(first_to_gram[next])
	print clean(' '.join(only_tokens(next_choice)))
	if next_choice in gram_to_after:
	current = next_choice
	else:
	break
	print ''