mateisuica/gist:e4f8bb3a79eb1f77a66cb9c3243a330f

## gistfile1.txt
from bs4 import BeautifulSoup

def parseContent( content ):
     # parse the html using beautiful soap and store in variable `soup`
    soup = BeautifulSoup(content, 'html.parser')

    # Take out the <div> of name and get its value
    content = soup.find_all(['h1','h2','h3','h4','h5', 'p','a'])

    text = ""
    for tag in content:
        if tag.string is not None:
            text = text + " " + tag.string
    return text
	from bs4 import BeautifulSoup

	def parseContent( content ):
	# parse the html using beautiful soap and store in variable `soup`
	soup = BeautifulSoup(content, 'html.parser')

	# Take out the <div> of name and get its value
	content = soup.find_all(['h1','h2','h3','h4','h5', 'p','a'])

	text = ""
	for tag in content:
	if tag.string is not None:
	text = text + " " + tag.string
	return text