Skip to content

Instantly share code, notes, and snippets.

@shirk3y
Last active January 12, 2017 18:02
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save shirk3y/458224083ce5464627bc to your computer and use it in GitHub Desktop.
Save shirk3y/458224083ce5464627bc to your computer and use it in GitHub Desktop.
from lxml import etree
CLASS_EXPR = "contains(concat(' ', normalize-space(@class), ' '), ' {} ')"
def has_class(context, *classes):
"""
This lxml extension allows to select by CSS class more easily
>>> ns = etree.FunctionNamespace(None)
>>> ns['has-class'] = has_class
>>> root = etree.XML('''
... <a>
... <b class="one first text">I</b>
... <b class="two text">LOVE</b>
... <b class="three text">CSS</b>
... </a>
... ''')
>>> len(root.xpath('//b[has-class("text")]'))
3
>>> len(root.xpath('//b[has-class("one")]'))
1
>>> len(root.xpath('//b[has-class("text", "first")]'))
1
>>> len(root.xpath('//b[not(has-class("first"))]'))
2
>>> len(root.xpath('//b[has-class("not-exists")]'))
0
"""
expressions = ' and '.join([CLASS_EXPR.format(c) for c in classes])
xpath = 'self::*[@class and {}]'.format(expressions)
return bool(context.context_node.xpath(xpath))
if __name__ == '__main__':
import doctest
doctest.testmod()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment