Skip to content

Instantly share code, notes, and snippets.

@juanriaza
Forked from rmax/xpathfuncs.py
Last active August 29, 2015 14:07
Show Gist options
  • Save juanriaza/c5c6ba77b00c68d41f77 to your computer and use it in GitHub Desktop.
Save juanriaza/c5c6ba77b00c68d41f77 to your computer and use it in GitHub Desktop.
"""XPath extension functions for lxml, inspired by:
https://gist.github.com/shirk3y/458224083ce5464627bc
Usage:
import xpathfuncs; xpathfuncs.setup()
"""
import string
from lxml import etree
CLASS_EXPR = "contains(concat(' ', normalize-space(@class), ' '), ' {} ')"
ICONTAINS_EXPR = "contains(translate({}, %r, %r), {})" % (string.uppercase, string.lowercase)
FUNCTIONS = {}
def register(func):
FUNCTIONS[func.__name__.replace('_', '-')] = func
return func
def setup():
# Register custom xpath functions.
ns = etree.FunctionNamespace(None)
for name, func in FUNCTIONS.items():
ns[name] = func
@register
def has_class(context, *classes):
"""
This lxml extension allows to select by CSS class more easily
>>> ns = etree.FunctionNamespace(None)
>>> ns['has-class'] = has_class
>>> root = etree.XML('''
... <a>
... <b class="one first text">I</b>
... <b class="two text">LOVE</b>
... <b class="three text">CSS</b>
... </a>
... ''')
>>> len(root.xpath('//b[has-class("text")]'))
3
>>> len(root.xpath('//b[has-class("one")]'))
1
>>> len(root.xpath('//b[has-class("text", "first")]'))
1
>>> len(root.xpath('//b[not(has-class("first"))]'))
2
>>> len(root.xpath('//b[has-class("not-exists")]'))
0
"""
expressions = ' and '.join([CLASS_EXPR.format(c) for c in classes])
xpath = 'self::*[@class and {}]'.format(expressions)
return bool(context.context_node.xpath(xpath))
@register
def lower_case(context, s):
"""Naive lower case function.
>>> ns = etree.FunctionNamespace(None)
>>> ns['lower-case'] = lower_case
>>> root = etree.XML('<root/>')
>>> root.xpath('lower-case("FOO")')
'foo'
"""
return s.lower()
@register
def upper_case(context, s):
"""Naive lower case function.
>>> ns = etree.FunctionNamespace(None)
>>> ns['upper-case'] = upper_case
>>> root = etree.XML('<root/>')
>>> root.xpath('upper-case("foo")')
'FOO'
"""
return s.upper()
@register
def icontains(context, value, text):
"""Like contains but ignores case.
>>> ns = etree.FunctionNamespace(None)
>>> ns['icontains'] = icontains
>>> root = etree.XML('<root><a>foo</a><a>\\'BAR\\'</a></root>')
>>> root.xpath('icontains("FoOo","foO")')
True
>>> root.xpath('icontains(.,"thing")')
False
>>> root.xpath('icontains(.,"FOO")')
True
>>> root.xpath('icontains(./a,"bar")')
True
>>> root.xpath('icontains(./a/text(),"FOO")')
True
>>> root.xpath('icontains(normalize-space(a),"F\\'OO")')
False
"""
if isinstance(value, list): # list of Element's
return any(icontains(context, val, text) for val in value)
elif isinstance(value, basestring):
expr = ICONTAINS_EXPR.format(repr(value.lower()), repr(text.lower()))
return bool(context.context_node.xpath(expr))
else: # assume element
expr = ICONTAINS_EXPR.format(".", repr(text.lower()))
return bool(value.xpath(expr))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment