ktibb/oprah.py

## oprah.py
import urllib
import BeautifulSoup
import re

html = urllib.urlopen('http://www.oprah.com/relationships/What-Kind-of-Woman-Watches-Porn-Researchers-Find-Answers').read()
soup = BeautifulSoup.BeautifulSoup(html)
#texts = soup.findAll(text=True)

texts = soup.find("div", {"class": "arial14"})

def visible(element):
    if element.parent.name in ['style', 'script', '[document]', 'head', 'title']:
        return False
    elif re.match('<!--.*-->', str(element)):
        return False
    return True

visible_texts = filter(visible, texts)
print visible_texts

for line in visible_texts:

  if line not in ['\n',' <br /> ', '']:
    print "-----"
    line.strip(";:#-?.,")
    print line
	import urllib
	import BeautifulSoup
	import re

	html = urllib.urlopen('http://www.oprah.com/relationships/What-Kind-of-Woman-Watches-Porn-Researchers-Find-Answers').read()
	soup = BeautifulSoup.BeautifulSoup(html)
	#texts = soup.findAll(text=True)

	texts = soup.find("div", {"class": "arial14"})

	def visible(element):
	if element.parent.name in ['style', 'script', '[document]', 'head', 'title']:
	return False
	elif re.match('<!--.*-->', str(element)):
	return False
	return True

	visible_texts = filter(visible, texts)
	print visible_texts

	for line in visible_texts:

	if line not in ['\n',' <br /> ', '']:
	print "-----"
	line.strip(";:#-?.,")
	print line