Skip to content

Instantly share code, notes, and snippets.

@aucampia
Last active February 20, 2023 21:12
Show Gist options
  • Save aucampia/91e1bfb27351ebd63c3285d052a34d7e to your computer and use it in GitHub Desktop.
Save aucampia/91e1bfb27351ebd63c3285d052a34d7e to your computer and use it in GitHub Desktop.
20230220T221212 iwana@teekai.zoic.eu.org:~/sw/d/github.com/iafork/rdflib.reviews
$ git log -1
commit 8dc5b983de192168eb6a127c76689674cf833775 (HEAD -> iwana-20230220T2005-add_parser_typing, origin/iwana-20230220T2005-add_parser_typing)
Author: Iwan Aucamp <aucampia@gmail.com>
Date:   Mon Feb 20 19:36:30 2023 +0000

    feat: add parser type hints
    
    Add type hints to:
    - `rdflib/parser.py`
    - `rdflib/plugins/parser/*.py`
    - some JSON-LD utils
    - `rdflib/exceptions.py`.
    
    This is mainly because the work I'm doing to fix
    <https://github.com/RDFLib/rdflib/issues/1844> is touching some of
    this parser stuff and the type hints are useful to avoid mistakes.
    
    No runtime changes are included in this PR.
20230220T221222 iwana@teekai.zoic.eu.org:~/sw/d/github.com/iafork/rdflib.reviews
$ git status
On branch iwana-20230220T2005-add_parser_typing
Your branch is up to date with 'origin/iwana-20230220T2005-add_parser_typing'.

nothing to commit, working tree clean
20230220T221225 iwana@teekai.zoic.eu.org:~/sw/d/github.com/iafork/rdflib.reviews
$ PYLOGGING_LEVEL=INFO task run -- git difftool -y -x $(readlink -f devtools/diffrtpy.py) upstream/main > /var/tmp/compact.diff
task: [venv:run] poetry run git difftool -y -x /home/iwana/sw/d/github.com/iafork/rdflib.reviews/devtools/diffrtpy.py upstream/main

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -103,11 +103,14 @@
pep8-naming = ["-N802", "-N806", "-N815"]
[tool.flakeheaven.exceptions."rdflib/__init__.py"]
pycodestyle = ["-E402"]
[tool.flakeheaven.exceptions."test/utils/namespace/_*"]
pep8-naming = ["-N815"]
-
+[tool.flakeheaven.exceptions."rdflib/plugins/parsers/rdfxml.py"]
+pep8-naming = ["-N802"]
+[tool.flakeheaven.exceptions."rdflib/plugins/parsers/trix.py"]
+pep8-naming = ["-N802"]
[tool.black]
required-version = "22.12.0"
line-length = "88"
target-version = ['py37']
--- a/rdflib/exceptions.py
+++ b/rdflib/exceptions.py
@@ -1,6 +1,7 @@
-__all__ = ["Error", "ParserError"]
+__all__ = ["Error", "ParserError", "UniquenessError"]
+from typing import Any, Optional
class Error(Exception):
def __init__(self, msg=None):
Exception.__init__(self, msg)
--- a/rdflib/parser.py
+++ b/rdflib/parser.py
@@ -1,5 +1,6 @@
+from __future__ import annotations
import codecs
import os
import pathlib
import sys
from io import BufferedIOBase, BytesIO, RawIOBase, StringIO, TextIOBase, TextIOWrapper
@@ -22,12 +23,13 @@
from rdflib import __version__
from rdflib.namespace import Namespace
from rdflib.term import URIRef
if TYPE_CHECKING:
- from http.client import HTTPMessage, HTTPResponse
- from rdflib import Graph
+ from email.message import Message
+ from urllib.response import addinfourl
+ from rdflib.graph import Graph
__all__ = [
"Parser",
"InputSource",
"StringInputSource",
"URLInputSource",
@@ -75,11 +77,11 @@
def write(self, *args, **kwargs):
raise NotImplementedError()
-class InputSource(xmlreader.InputSource, object):
+class InputSource(xmlreader.InputSource):
def __init__(self, system_id=None):
xmlreader.InputSource.__init__(self, system_id=system_id)
self.content_type = None
self.auto_close = False
--- a/rdflib/plugins/parsers/hext.py
+++ b/rdflib/plugins/parsers/hext.py
@@ -1,10 +1,12 @@
+from __future__ import annotations
import json
import warnings
-from typing import List, Union
-from rdflib import BNode, ConjunctiveGraph, Literal, URIRef
-from rdflib.parser import Parser
+from typing import TYPE_CHECKING, Any, List, Optional, Union
+from rdflib.graph import ConjunctiveGraph, Graph
+from rdflib.parser import FileInputSource, InputSource, Parser
+from rdflib.term import BNode, Literal, URIRef
__all__ = ["HextuplesParser"]
class HextuplesParser(Parser):
@@ -51,10 +53,12 @@
graph.store.context_aware
), "Hextuples Parser needs a context-aware store!"
cg = ConjunctiveGraph(store=graph.store, identifier=graph.identifier)
cg.default_context = graph
if hasattr(source, "file"):
+ if TYPE_CHECKING:
+ assert isinstance(source, FileInputSource)
with open(source.file.name, encoding="utf-8") as fp:
for l in fp:
self._parse_hextuple(cg, self._load_json_line(l))
elif hasattr(source, "_InputSource__bytefile"):
if hasattr(source._InputSource__bytefile, "wrapped"):
--- a/rdflib/plugins/parsers/jsonld.py
+++ b/rdflib/plugins/parsers/jsonld.py
@@ -1,12 +1,13 @@
+from __future__ import annotations
import warnings
-from typing import Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
import rdflib.parser
-from rdflib.graph import ConjunctiveGraph
+from rdflib.graph import ConjunctiveGraph, Graph
from rdflib.namespace import RDF, XSD
-from rdflib.parser import URLInputSource
-from rdflib.term import BNode, Literal, URIRef
+from rdflib.parser import InputSource, URLInputSource
+from rdflib.term import BNode, IdentifiedNode, Literal, Node, URIRef
from ..shared.jsonld.context import UNDEF, Context, Term
from ..shared.jsonld.keys import (
CONTEXT,
GRAPH,
ID,
@@ -48,10 +49,12 @@
base = kwargs.get("base") or sink.absolutize(
source.getPublicId() or source.getSystemId() or ""
)
context_data = kwargs.get("context")
if not context_data and hasattr(source, "url") and hasattr(source, "links"):
+ if TYPE_CHECKING:
+ assert isinstance(source, URLInputSource)
context_data = context_from_urlinputsource(source)
try:
version = float(kwargs.get("version", "1.0"))
except ValueError:
version = None
@@ -193,10 +196,12 @@
term_id = None
if TYPE in (key, term_id):
term = TYPE_TERM
if GRAPH in (key, term_id):
if dataset.context_aware and not no_id:
+ if TYPE_CHECKING:
+ assert isinstance(dataset, ConjunctiveGraph)
subgraph = dataset.get_context(subj)
else:
subgraph = graph
for onode in obj_nodes:
self._add_to_graph(dataset, subgraph, context, onode)
--- a/rdflib/plugins/parsers/notation3.py
+++ b/rdflib/plugins/parsers/notation3.py
@@ -1,20 +1,37 @@
#!/usr/bin/env python
+from __future__ import annotations
import codecs
import os
import re
import sys
import typing
from decimal import Decimal
-from typing import IO, TYPE_CHECKING, Any, Callable, Dict, Optional, TypeVar, Union
+from typing import (
+ IO,
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Dict,
+ Match,
+ MutableSequence,
+ NoReturn,
+ Optional,
+ Pattern,
+ Set,
+ Tuple,
+ TypeVar,
+ Union,
+)
from uuid import uuid4
from rdflib.compat import long_type
from rdflib.exceptions import ParserError
from rdflib.graph import ConjunctiveGraph, Graph, QuotedGraph
from rdflib.term import (
_XSD_PFX,
BNode,
+ IdentifiedNode,
Identifier,
Literal,
Node,
URIRef,
Variable,
@@ -37,19 +54,22 @@
]
from rdflib.parser import Parser
if TYPE_CHECKING:
from rdflib.parser import InputSource
-AnyT = TypeVar("AnyT")
-
-
-def splitFragP(uriref, punct=0):
+_AnyT = TypeVar("_AnyT")
+
+
+def splitFragP(uriref, punc=0):
i = uriref.rfind("#")
if i >= 0:
return uriref[:i], uriref[i:]
else:
return uriref, ""
+
+
+_StrT = TypeVar("_StrT", bound=str)
def join(here, there):
slashl = there.find("/")
colonl = there.find(":")
@@ -1451,10 +1471,12 @@
if isinstance(n, float):
s = Literal(str(n), datatype=DOUBLE_DATATYPE)
return s
if isinstance(f, Formula):
if n in f.existentials:
+ if TYPE_CHECKING:
+ assert isinstance(n, URIRef)
return f.existentials[n]
return n
def intern(self, something):
return something
--- a/rdflib/plugins/parsers/nquads.py
+++ b/rdflib/plugins/parsers/nquads.py
@@ -1,15 +1,16 @@
+from __future__ import annotations
from codecs import getreader
-from rdflib import ConjunctiveGraph
-from rdflib.plugins.parsers.ntriples import (
- ParseError,
- W3CNTriplesParser,
- r_tail,
- r_wspace,
-)
+from typing import Any, MutableMapping, Optional
+from rdflib.exceptions import ParserError as ParseError
+from rdflib.graph import ConjunctiveGraph
+from rdflib.parser import InputSource
+from rdflib.plugins.parsers.ntriples import W3CNTriplesParser, r_tail, r_wspace
+from rdflib.term import BNode
__all__ = ["NQuadsParser"]
+_BNodeContextType = MutableMapping[str, BNode]
class NQuadsParser(W3CNTriplesParser):
def parse(self, inputsource, sink, bnode_context=None, **kwargs):
assert (
--- a/rdflib/plugins/parsers/ntriples.py
+++ b/rdflib/plugins/parsers/ntriples.py
@@ -1,19 +1,33 @@
#!/usr/bin/env python3
+from __future__ import annotations
+
__doc__ = "N-Triples Parser\nLicense: GPL 2, W3C, BSD, or MIT\nAuthor: Sean B. Palmer, inamidst.com\n"
import codecs
import re
from io import BytesIO, StringIO, TextIOBase
-from typing import IO, TYPE_CHECKING, Optional, Pattern, TextIO, Union
+from typing import (
+ IO,
+ TYPE_CHECKING,
+ Any,
+ Match,
+ MutableMapping,
+ Optional,
+ Pattern,
+ TextIO,
+ Union,
+)
from rdflib.compat import _string_escape_map, decodeUnicodeEscape
from rdflib.exceptions import ParserError as ParseError
from rdflib.parser import InputSource, Parser
from rdflib.term import BNode as bNode
from rdflib.term import Literal
+from rdflib.term import URIRef
from rdflib.term import URIRef as URI
if TYPE_CHECKING:
+ import typing_extensions as te
from rdflib.graph import Graph, _ObjectType, _PredicateType, _SubjectType
__all__ = [
"unquote",
"uriquote",
"W3CNTriplesParser",
@@ -92,10 +106,13 @@
return uri
else:
return r_hibyte.sub(lambda m: "%%%02X" % ord(m.group(1)), uri)
+_BNodeContextType = MutableMapping[str, bNode]
+
+
class W3CNTriplesParser(object):
__slots__ = "_bnode_ids", "sink", "buffer", "file", "line"
def __init__(self, sink=None, bnode_context=None):
if bnode_context is not None:
--- a/rdflib/plugins/parsers/rdfxml.py
+++ b/rdflib/plugins/parsers/rdfxml.py
@@ -1,15 +1,21 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any, Dict, List, NoReturn, Optional, Tuple
from urllib.parse import urldefrag, urljoin
from xml.sax import handler, make_parser, xmlreader
from xml.sax.handler import ErrorHandler
from xml.sax.saxutils import escape, quoteattr
from rdflib.exceptions import Error, ParserError
+from rdflib.graph import Graph
from rdflib.namespace import RDF, is_ncname
-from rdflib.parser import Parser
+from rdflib.parser import InputSource, Parser
from rdflib.plugins.parsers.RDFVOC import RDFVOC
-from rdflib.term import BNode, Literal, URIRef
-
+from rdflib.term import BNode, Identifier, Literal, URIRef
+
+if TYPE_CHECKING:
+ from xml.sax.xmlreader import AttributesImpl, Locator
+ from rdflib.graph import _ObjectType, _SubjectType, _TripleType
__all__ = ["create_parser", "BagID", "ElementHandler", "RDFXMLHandler", "RDFXMLParser"]
RDFNS = RDFVOC
UNQUALIFIED = {
"about": RDFVOC.about,
"ID": RDFVOC.ID,
--- a/rdflib/plugins/parsers/trig.py
+++ b/rdflib/plugins/parsers/trig.py
@@ -1,7 +1,9 @@
-from rdflib import ConjunctiveGraph
-from rdflib.parser import Parser
+from __future__ import annotations
+from typing import Any, MutableSequence
+from rdflib.graph import ConjunctiveGraph, Graph
+from rdflib.parser import InputSource, Parser
from .notation3 import RDFSink, SinkParser
def becauseSubGraph(*args, **kwargs):
pass
--- a/rdflib/plugins/parsers/trix.py
+++ b/rdflib/plugins/parsers/trix.py
@@ -1,13 +1,18 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any, Dict, List, NoReturn, Optional, Tuple
from xml.sax import handler, make_parser
from xml.sax.handler import ErrorHandler
from rdflib.exceptions import ParserError
from rdflib.graph import Graph
from rdflib.namespace import Namespace
-from rdflib.parser import Parser
-from rdflib.term import BNode, Literal, URIRef
-
+from rdflib.parser import InputSource, Parser
+from rdflib.store import Store
+from rdflib.term import BNode, Identifier, Literal, URIRef
+
+if TYPE_CHECKING:
+ from xml.sax.xmlreader import AttributesImpl, Locator, XMLReader
__all__ = ["create_parser", "TriXHandler", "TriXParser"]
TRIXNS = Namespace("http://www.w3.org/2004/03/trix/trix-1/")
XMLNS = Namespace("http://www.w3.org/XML/1998/namespace")
@@ -102,10 +107,12 @@
else:
self.error("Unknown element %s in TriX namespace" % name[1])
self.chars = ""
def endElementNS(self, name, qname):
+ if TYPE_CHECKING:
+ assert self.triple is not None
if name[0] != str(TRIXNS):
self.error(
"Only elements in the TriX namespace are allowed. %s!=%s"
% (name[0], TRIXNS)
)
--- a/rdflib/plugins/shared/jsonld/context.py
+++ b/rdflib/plugins/shared/jsonld/context.py
@@ -1,7 +1,20 @@
+from __future__ import annotations
from collections import namedtuple
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Collection,
+ Dict,
+ Generator,
+ List,
+ Optional,
+ Set,
+ Tuple,
+ Union,
+)
+from urllib.parse import urljoin, urlsplit
from rdflib.namespace import RDF
from .errors import (
INVALID_CONTEXT_ENTRY,
INVALID_REMOTE_CONTEXT,
RECURSIVE_CONTEXT_INCLUSION,
@@ -28,11 +41,11 @@
TYPE,
VALUE,
VERSION,
VOCAB,
)
-from .util import norm_url, source_to_json, split_iri, urljoin, urlsplit
+from .util import norm_url, source_to_json, split_iri
NODE_KEYS = {GRAPH, ID, INCLUDED, JSON, LIST, NEST, NONE, REV, SET, TYPE, VALUE, LANG}
class Defined(int):
--- a/rdflib/plugins/shared/jsonld/util.py
+++ b/rdflib/plugins/shared/jsonld/util.py
@@ -1,8 +1,10 @@
-import typing as t
+from __future__ import annotations
+import pathlib
+from typing import IO, TYPE_CHECKING, Any, Optional, TextIO, Tuple, Union
-if t.TYPE_CHECKING:
+if TYPE_CHECKING:
import json
else:
try:
import json
@@ -12,12 +14,14 @@
from io import TextIOBase, TextIOWrapper
from posixpath import normpath, sep
from urllib.parse import urljoin, urlsplit, urlunsplit
from rdflib.parser import (
BytesIOWrapper,
+ InputSource,
PythonInputSource,
StringInputSource,
+ URLInputSource,
create_input_source,
)
def source_to_json(source):
@@ -74,5 +78,14 @@
for link in links:
if ' rel="http://www.w3.org/ns/json-ld#context"' in link:
i, j = link.index("<"), link.index(">")
if i > -1 and j > -1:
return urljoin(source.url, link[i + 1 : j])
+
+
+__all__ = [
+ "json",
+ "source_to_json",
+ "split_iri",
+ "norm_url",
+ "context_from_urlinputsource",
+]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment