Create a gist now

Instantly share code, notes, and snippets.

Embed
What would you like to do?
dramatically enjambed poems (using nltk POS-tagging)
import nltk
import sys
from nltk.corpus import brown
import random
import re
def only_tokens(paired_list):
return [x[0] for x in paired_list]
def clean(s):
s = re.sub(r"^[;'.,?]", '', s)
s = re.sub(r"^\s+", '', s)
s = re.sub(r" ([;'.,?])", r'\1', s)
return s
first_to_gram = dict()
gram_to_after = dict()
brown_tagged_sents = brown.tagged_sents()
tagger = nltk.UnigramTagger(brown_tagged_sents)
low_order = 4
high_order = 9
# break text into POS-tagged n-grams of various lengths
for line in sys.stdin:
line = line.strip()
toks = nltk.word_tokenize(line.strip())
tagged = tagger.tag(toks)
for i in range(len(tagged) - high_order):
for n in range(i+low_order, i+high_order):
this_gram = tuple(tagged[i:n+1])
# store this n-gram and the POS that came after it
after = tagged[n+1][1]
if after is not None:
if this_gram not in gram_to_after:
gram_to_after[this_gram] = []
gram_to_after[this_gram].append(after)
# store the first POS of this n-gram along with
# the n-gram itself
if i > 0:
first = tagged[i][1]
if first is not None:
if first not in first_to_gram:
first_to_gram[first] = []
first_to_gram[first].append(this_gram)
for i in range(50):
# randomly select an n-gram...
current = random.choice(gram_to_after.keys())
print clean(' '.join((only_tokens(current))))
# then chain together lines, selecting a random
# line that begins with the part of speech that
# followed the n-gram in the previous line
for j in range(13):
next = random.choice(gram_to_after[current])
if next in first_to_gram:
next_choice = random.choice(first_to_gram[next])
print clean(' '.join(only_tokens(next_choice)))
if next_choice in gram_to_after:
current = next_choice
else:
break
print ''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment