Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
from bs4 import BeautifulSoup, FeatureNotFound
import urllib, sys
from lxml import etree
print("%-20s: %s" % ('Python', sys.version_info))
print("%-20s: %s" % ('lxml.etree', etree.LXML_VERSION))
print("%-20s: %s" % ('libxml used', etree.LIBXML_VERSION))
print("%-20s: %s" % ('libxml compiled', etree.LIBXML_COMPILED_VERSION))
print("%-20s: %s" % ('libxslt used', etree.LIBXSLT_VERSION))
print("%-20s: %s" % ('libxslt compiled', etree.LIBXSLT_COMPILED_VERSION))
html = urllib.urlopen('').read()
soup = BeautifulSoup(html, 'lxml')
print'td.version > h3 > a[href^="/show/"]')[0]
class ParserBeautifulSoup(BeautifulSoup):
def __init__(self, markup, parsers, **kwargs):
# reject features
if set(parsers).intersection({'fast', 'permissive', 'strict', 'xml', 'html', 'html5'}):
raise ValueError('Features not allowed, only parser names')
# reject some kwargs
if 'features' in kwargs:
raise ValueError('Cannot use features kwarg')
if 'builder' in kwargs:
raise ValueError('Cannot use builder kwarg')
# pick the first parser available
for parser in parsers:
super(ParserBeautifulSoup, self).__init__(markup, parser, **kwargs)
except FeatureNotFound:
raise FeatureNotFound
soup = ParserBeautifulSoup(html, ['lxml'])
print'td.version > h3 > a[href^="/show/"]')[0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment