Skip to content

Instantly share code, notes, and snippets.

@ktibb
Created March 7, 2012 23:35
Show Gist options
  • Save ktibb/1997267 to your computer and use it in GitHub Desktop.
Save ktibb/1997267 to your computer and use it in GitHub Desktop.
RWET Midterm
import urllib
import BeautifulSoup
import re
import nltk
from random import choice
#import context_free
part = dict()
words = list()
final = list()
html = urllib.urlopen('http://www.oprah.com/relationships/What-Kind-of-Woman-Watches-Porn-Researchers-Find-Answers').read()
soup = BeautifulSoup.BeautifulSoup(html)
#texts = soup.findAll(text=True)
texts = soup.find("div", {"class": "arial14"})
def visible(element):
if element.parent.name in ['style', 'script', '[document]', 'head', 'title']:
return False
elif re.match('<!--.*-->', str(element)):
return False
return True
visible_texts = filter(visible, texts)
#print visible_texts
for tag in visible_texts:
line = tag.string
if line is not None:
if line not in ['\n',' <br /> ', '\bu','<i>', '</i>', '']:
# print "-----"
line.replace("u'"," ")
line.strip(";:#-?.,")
#print line
#add POStagging
text = nltk.word_tokenize (line)
tagged = nltk.pos_tag(text)
#gather all the words of the same part of speech into a dict (with a POS as a key, words as value)
for i in tagged:
if re.search(r'\W',i[0]) or re.search(r"http",i[0]):
pass
else:
if i[1] in part.keys():
if i[0] in part.values():
pass
else:
part[i[1]] += "|" + i[0]
else:
part[i[1]] = i[0]
for u in part.keys():
thisone = u+ "->" + part[u]
final.append(thisone)
#print thisone
words = part[u].split("|")
random_word = choice(words)
#print words
#Combine the words together in a Nostradamus Poem
#[('IN'+'CD'+'NNS'+'CC'+'CD'+'NNS'+','+'NNP'+'MD'+'VB'+'DT'+'NN'+'NNP'+','+'NNP'+'VBZ'+'MD'+'VB'+'IN'+'PRP$'+'NNP'+','+'NN'+'RB'+'DT'+'NNS'+'CC'+'RB')]
#nostradamus = ['IN', 'CD', 'NNS' , 'CC', 'CD' , 'NNS' , 'NNP', 'MD' , 'VB' , 'DT' , 'NN' , 'NNP' , 'NNP','MD' , 'VB' , 'IN' , 'PRP$' , 'NNP' ,'NN' , 'RB' , 'DT' , 'NNS', 'CC', 'RB']
nostradamus = ['IN', 'CD','NNS','CC','CD','NNS', 'NNP', 'MD', 'VB', 'DT', 'NN', 'NNP', 'NNP', 'VBZ', 'MD', 'VB','IN','PRP$','NN', 'TO', 'VB', 'RP', 'DT', 'NNS', 'CC', 'RB', 'DT', 'NN', 'NN', 'MD', 'VB', 'DT', 'NN', 'CD', 'NNS', 'IN', 'DT', 'NN', 'MD', 'VB', 'NNP', 'MD', 'VB', 'CC', 'VBN', 'VB', 'IN', 'DT', 'JJ', 'NNS', 'IN', 'DT', 'NN', 'DT', 'JJ', 'NN', 'MD', 'VB', 'IN', 'CD', 'NNS', 'DT', 'NN', 'MD', 'VB', 'CD', 'NNS', 'TO', 'VB', 'DT', 'JJ', 'NN', 'MD', 'VB', 'DT', 'NNP', 'DT', 'JJ', 'NN', 'MD', 'VB', 'NN', 'IN', 'CD', 'NNS', 'CC', 'CD', 'NNS', 'NNP', 'MD', 'VB', 'DT', 'NN', 'NN', 'DT', 'NNS', 'MD', 'VB', 'IN', 'PRP$', 'NN', 'TO', 'VB', 'RP', 'DT', 'NNS', 'CC', 'RB']
poem = ""
for part_of_speech in nostradamus:
#print part_of_speech
# get a random word that is the current part_of_speech
array_of_words = part[part_of_speech].split("|")
#print array_of_words
#get a random on of these words
random_poem_word = choice(array_of_words)
# add it to the poem string
poem += " " + random_poem_word
# add a space after it
print poem
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment