fernandog/lxml_test.py

## lxml_test.py
from bs4 import BeautifulSoup, FeatureNotFound
import urllib, sys
from lxml import etree

print("%-20s: %s" % ('Python',           sys.version_info))
print("%-20s: %s" % ('lxml.etree',       etree.LXML_VERSION))
print("%-20s: %s" % ('libxml used',      etree.LIBXML_VERSION))
print("%-20s: %s" % ('libxml compiled',  etree.LIBXML_COMPILED_VERSION))
print("%-20s: %s" % ('libxslt used',     etree.LIBXSLT_VERSION))
print("%-20s: %s" % ('libxslt compiled', etree.LIBXSLT_COMPILED_VERSION))

html = urllib.urlopen('https://gist.githubusercontent.com/fernandog/0087279c50576d1182b4eea9b80f4325/raw/3d28cd038dbae3ec296433063d021b6b3b4b811b/addic7ed.html').read()
soup = BeautifulSoup(html, 'lxml')
print soup.select('td.version > h3 > a[href^="/show/"]')[0]

class ParserBeautifulSoup(BeautifulSoup):
    def __init__(self, markup, parsers, **kwargs):
        # reject features
        if set(parsers).intersection({'fast', 'permissive', 'strict', 'xml', 'html', 'html5'}):
            raise ValueError('Features not allowed, only parser names')

        # reject some kwargs
        if 'features' in kwargs:
            raise ValueError('Cannot use features kwarg')
        if 'builder' in kwargs:
            raise ValueError('Cannot use builder kwarg')

        # pick the first parser available
        for parser in parsers:
            try:
                super(ParserBeautifulSoup, self).__init__(markup, parser, **kwargs)
                return
            except FeatureNotFound:
                pass

        raise FeatureNotFound

soup = ParserBeautifulSoup(html, ['lxml'])
print soup.select('td.version > h3 > a[href^="/show/"]')[0]
	from bs4 import BeautifulSoup, FeatureNotFound
	import urllib, sys
	from lxml import etree

	print("%-20s: %s" % ('Python', sys.version_info))
	print("%-20s: %s" % ('lxml.etree', etree.LXML_VERSION))
	print("%-20s: %s" % ('libxml used', etree.LIBXML_VERSION))
	print("%-20s: %s" % ('libxml compiled', etree.LIBXML_COMPILED_VERSION))
	print("%-20s: %s" % ('libxslt used', etree.LIBXSLT_VERSION))
	print("%-20s: %s" % ('libxslt compiled', etree.LIBXSLT_COMPILED_VERSION))

	html = urllib.urlopen('https://gist.githubusercontent.com/fernandog/0087279c50576d1182b4eea9b80f4325/raw/3d28cd038dbae3ec296433063d021b6b3b4b811b/addic7ed.html').read()
	soup = BeautifulSoup(html, 'lxml')
	print soup.select('td.version > h3 > a[href^="/show/"]')[0]

	class ParserBeautifulSoup(BeautifulSoup):
	def __init__(self, markup, parsers, **kwargs):
	# reject features
	if set(parsers).intersection({'fast', 'permissive', 'strict', 'xml', 'html', 'html5'}):
	raise ValueError('Features not allowed, only parser names')

	# reject some kwargs
	if 'features' in kwargs:
	raise ValueError('Cannot use features kwarg')
	if 'builder' in kwargs:
	raise ValueError('Cannot use builder kwarg')

	# pick the first parser available
	for parser in parsers:
	try:
	super(ParserBeautifulSoup, self).__init__(markup, parser, **kwargs)
	return
	except FeatureNotFound:
	pass

	raise FeatureNotFound

	soup = ParserBeautifulSoup(html, ['lxml'])
	print soup.select('td.version > h3 > a[href^="/show/"]')[0]