Skip to content

Instantly share code, notes, and snippets.

@snahor
Last active September 28, 2015 06:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save snahor/1396564 to your computer and use it in GitHub Desktop.
Save snahor/1396564 to your computer and use it in GitHub Desktop.
try:
from lxml.html import fromstring
strip_html = lambda html: fromstring(html).text_content()
except ImportError:
try:
from bs4 import BeautifulSoup
strip_html = lambda html: ''.join(
BeautifulSoup(html).findAll(text=True))
except ImportError:
from HTMLParser import HTMLParser
class _Parser(HTMLParser):
def __init__(self):
self.reset()
self.items = []
def handle_data(self, data):
self.items.append(data)
def get_text(self):
return ''.join(self.items)
def strip_html(html):
_parser = _Parser()
_parser.feed(html)
return _parser.get_text()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment