Skip to content

Instantly share code, notes, and snippets.

@AustinMatherne
Last active July 1, 2025 01:50
Show Gist options
  • Select an option

  • Save AustinMatherne/533a4b6a31a63e11bfd8c09c03c05183 to your computer and use it in GitHub Desktop.

Select an option

Save AustinMatherne/533a4b6a31a63e11bfd8c09c03c05183 to your computer and use it in GitHub Desktop.
lxml.test.py
import sys
from io import BytesIO
from typing import Any
import requests
from lxml import etree
def print_versions() -> None:
print(f"{'Python':<20}: {sys.version_info}")
print(f"{'lxml.etree':<20}: {etree.LXML_VERSION}")
print(f"{'libxml used':<20}: {etree.LIBXML_VERSION}")
print(f"{'libxml compiled':<20}: {etree.LIBXML_COMPILED_VERSION}")
print(f"{'libxml features':<20}: {sorted(getattr(etree, 'LIBXML_FEATURES', set()))}")
print(f"{'libxslt used':<20}: {etree.LIBXSLT_VERSION}")
print(f"{'libxslt compiled':<20}: {etree.LIBXSLT_COMPILED_VERSION}")
print()
SCHEMA_URL = "http://www.xbrl.org/2013/inlineXBRL/xhtml-inlinexbrl-1_1.xsd"
INSTANCE_URL = "https://gist.githubusercontent.com/AustinMatherne/2287372e6490c94937e6e030bb84b827/raw/3241f6f37c73e645f5f43341d6e56024348812f2/instance.xhtml"
class Resolver(etree.Resolver):
def resolve(self, system_url: str, public_id: str, context: Any) -> Any: # type: ignore[override]
if system_url.startswith("http://") or system_url.startswith("https://"):
resp = requests.get(system_url)
if resp.status_code != 200:
raise RuntimeError(f"Failed to fetch {system_url}: {resp.status_code}")
return self.resolve_string(resp.content, context, base_url=system_url)
return self.resolve_filename(system_url, context)
if __name__ == "__main__":
print_versions()
parser = etree.XMLParser()
resolver = Resolver()
parser.resolvers.add(resolver)
schema_resp = requests.get(SCHEMA_URL)
schema_tree = etree.parse(BytesIO(schema_resp.content), parser=parser, base_url=SCHEMA_URL)
schema = etree.XMLSchema(schema_tree)
instance_resp = requests.get(INSTANCE_URL)
instance_tree = etree.parse(BytesIO(instance_resp.content), parser=parser, base_url=INSTANCE_URL)
schema.assertValid(instance_tree)
@AustinMatherne
Copy link
Author

AustinMatherne commented Jul 1, 2025

With lxml 5.4.0 on macOS (arm) and Python 3.13.5 the above code completes successfully with the schema validation assertion.

> python lxml.test.py

Python              : sys.version_info(major=3, minor=13, micro=5, releaselevel='final', serial=0)
lxml.etree          : (5, 4, 0, 0)
libxml used         : (2, 13, 8)
libxml compiled     : (2, 13, 8)
libxml features     : []
libxslt used        : (1, 1, 43)
libxslt compiled    : (1, 1, 43)

With lxml 6.0.0 it raises the following exception:

> python lxml.test.py

Python              : sys.version_info(major=3, minor=13, micro=5, releaselevel='final', serial=0)
lxml.etree          : (6, 0, 0, 0)
libxml used         : (2, 14, 4)
libxml compiled     : (2, 14, 4)
libxml features     : ['catalog', 'html', 'iconv', 'regexp', 'schematron', 'xmlschema', 'xpath', 'zlib']
libxslt used        : (1, 1, 43)
libxslt compiled    : (1, 1, 43)

Traceback (most recent call last):
  File "/Users/austinmatherne/git/Arelle/lxml.test.py", line 43, in <module>
    schema = etree.XMLSchema(schema_tree)
  File "src/lxml/xmlschema.pxi", line 90, in lxml.etree.XMLSchema.__init__
lxml.etree.XMLSchemaParseError: Invalid argument, line 1, column 37

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment