Skip to content

Instantly share code, notes, and snippets.

@cipri-tom
Last active March 3, 2017 12:38
Show Gist options
  • Save cipri-tom/f85b246fc8a250ea4debf9178a2732b9 to your computer and use it in GitHub Desktop.
Save cipri-tom/f85b246fc8a250ea4debf9178a2732b9 to your computer and use it in GitHub Desktop.
etree bug function namespace prefix
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from lxml import etree"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"etree.FunctionNamespace(\"http://exslt.org/regular-expressions\").prefix = 're'"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"text = \"\"\"\n",
"<monthEntity month=\"02\" year=\"1998\">\n",
" <article>\n",
" <full_text> Some article text </full_text>\n",
" <full_text> Some more article text fragment 2 </full_text>\n",
" </article>\n",
"</monthEntity>\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"t = etree.fromstring(text)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"search = etree.XPath('//full_text[re:match(text(), \"fragment\")]')"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[<Element full_text at 0x7f92d416cc88>]"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"search(t)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "XPathFunctionError",
"evalue": "XPath function '{�O�\u0001}match' not found",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mXPathFunctionError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-58-d7ae80ab29da>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msearch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32msrc/lxml/xpath.pxi\u001b[0m in \u001b[0;36mlxml.etree.XPath.__call__ (src/lxml/lxml.etree.c:172853)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32msrc/lxml/xpath.pxi\u001b[0m in \u001b[0;36mlxml.etree._XPathEvaluatorBase._handle_result (src/lxml/lxml.etree.c:170157)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32msrc/lxml/lxml.etree.pyx\u001b[0m in \u001b[0;36mlxml.etree._ExceptionContext._raise_if_stored (src/lxml/lxml.etree.c:12061)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mXPathFunctionError\u001b[0m: XPath function '{�O�\u0001}match' not found"
]
}
],
"source": [
"search(t)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'3.7.2'"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"etree.__version__"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
from lxml import etree
etree.FunctionNamespace("http://exslt.org/regular-expressions").prefix = 're'
text = """
<monthEntity month="02" year="1998">
<article>
<full_text> Some article text </full_text>
<full_text> Some more article text fragment 2 </full_text>
</article>
</monthEntity>
"""
t = etree.fromstring(text)
search = etree.XPath('//full_text[re:match(text(), "fragment")]')
search(t) # this one works. returns a list with 1 element
search(t) # this one fails with
# src/lxml/xpath.pxi in lxml.etree.XPath.__call__ (src/lxml/lxml.etree.c:172853)()
# ...
# XPathFunctionError: XPath function '{�{|}match' not found
# The thing before `match` in curly braces sometimes changes on different runs of the program
# So this could be a bug somewhere
# using etree.__version__ 3.7.2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment