ktibb/1984_D1.py

## 1984_D1.py
import urllib
import BeautifulSoup
import re
from random import choice
#import context_free
import sys
import markov


html = urllib.urlopen('http://www.george-orwell.org/1984/0.html').read()
soup = BeautifulSoup.BeautifulSoup(html)
#texts = soup.findAll(text=True)

#soup.html.body.table.contents[1].td.contents[3].tr.contents[3].table.contents[5].tr.td.table.tr.td.tr.td.table.contents[3].td.contents[4]
texts=soup.html.body.table.contents[1].td.contents[3].tr.contents[3].table.contents[5].tr.td.table.tr.td.tr.td.table.contents[3].td.contents[4]
#print "from beautifulsoup: " + str(texts)
#print type(texts)

def visible(element):
    if element.parent.name in ['style', 'script', '[document]', 'head', 'title']:
        return False
    elif re.match('<!--.*-->', str(element)):
       return False
    return True

visible_texts = filter(visible, texts)
text = visible_texts[0].getText()

#print "from getText() " + str(text)
#print "text is a... " + str(type(text))

generator = markov.MarkovGenerator(n=2, max=500)
for line in text.split('. '):
    #print line; prints paragrphs of text over and over...
    stringline=line.encode('utf-8')
    generator.feed(stringline)

for i in range(1):
    print generator.generate()
	import urllib
	import BeautifulSoup
	import re
	from random import choice
	#import context_free
	import sys
	import markov


	html = urllib.urlopen('http://www.george-orwell.org/1984/0.html').read()
	soup = BeautifulSoup.BeautifulSoup(html)
	#texts = soup.findAll(text=True)

	#soup.html.body.table.contents[1].td.contents[3].tr.contents[3].table.contents[5].tr.td.table.tr.td.tr.td.table.contents[3].td.contents[4]
	texts=soup.html.body.table.contents[1].td.contents[3].tr.contents[3].table.contents[5].tr.td.table.tr.td.tr.td.table.contents[3].td.contents[4]
	#print "from beautifulsoup: " + str(texts)
	#print type(texts)

	def visible(element):
	if element.parent.name in ['style', 'script', '[document]', 'head', 'title']:
	return False
	elif re.match('<!--.*-->', str(element)):
	return False
	return True

	visible_texts = filter(visible, texts)
	text = visible_texts[0].getText()

	#print "from getText() " + str(text)
	#print "text is a... " + str(type(text))

	generator = markov.MarkovGenerator(n=2, max=500)
	for line in text.split('. '):
	#print line; prints paragrphs of text over and over...
	stringline=line.encode('utf-8')
	generator.feed(stringline)

	for i in range(1):
	print generator.generate()