Skip to content

Instantly share code, notes, and snippets.

@aucampia
Last active July 30, 2022 21:16
Show Gist options
  • Save aucampia/3f380070fbd8f0e8a99aea2df6cc1f06 to your computer and use it in GitHub Desktop.
Save aucampia/3f380070fbd8f0e8a99aea2df6cc1f06 to your computer and use it in GitHub Desktop.
PYLOGGING_LEVEL=INFO task run -- git difftool -y -x $(readlink -f devtools/diffrtpy.py) upstream/master | tee /var/tmp/compact.diff
gh gist edit https://gist.github.com/aucampia/3f380070fbd8f0e8a99aea2df6cc1f06 -f compact.diff /var/tmp/compact.diff
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -123,24 +123,24 @@
<!-- -->
<!-- CHANGE BARRIER: END -->
<!-- -->
<!-- -->
-<!-- -->
-<!-- -->
-<!-- CHANGE BARRIER: START PR #2066 -->
-<!-- -->
-<!-- -->
-
-- Removed pre python 3.7 compatibility code.
- [PR #2066](https://github.com/RDFLib/rdflib/pull/2066).
- - Removed fallback in case the `shutil` module does not have the `move`
- function.
-
-<!-- -->
-<!-- -->
-<!-- CHANGE BARRIER: END PR #2066 -->
+
+<!-- -->
+<!-- -->
+<!-- CHANGE BARRIER: START PR #2057 -->
+<!-- -->
+<!-- -->
+
+- Added type hints.
+ [PR #2057](https://github.com/RDFLib/rdflib/pull/2057).
+ - `rdflib.store` and builtin stores have mostly complete type hints.
+
+<!-- -->
+<!-- -->
+<!-- CHANGE BARRIER: END PR #2057 -->
<!-- -->
<!-- -->
<!-- -->
<!-- -->
--- a/devtools/diffrtpy.py
+++ b/devtools/diffrtpy.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python
+"""
+This is a tool that can be used with git difftool to generate a diff that
+ignores type hints and comments.
+
+The name of this script, ``diffrtpy`` is short for "diff runtime python", as
+this will only compare the parts of the python code that has a runtime impact.
+
+This is to make it easier to review PRs that contain type hints.
+
+To use this script
+
+.. code-block:: bash
+ task run -- python -m pip install --upgrade strip-hints black python-minifier
+ PYLOGGING_LEVEL=INFO task run -- git difftool -y -x $(readlink -f devtools/diffrtpy.py) upstream/master | tee /var/tmp/compact.diff
+
+Then attach ``/var/tmp/compact.diff`` to the PR.
+"""
+
+
+import argparse
+import logging
+import os
+import sys
+from dataclasses import dataclass, field
+from difflib import unified_diff
+from pathlib import Path
+from typing import List
+
+import black
+import python_minifier
+from strip_hints import strip_string_to_string
+
+
+def clean_python(code: str) -> str:
+ code = strip_string_to_string(code, to_empty=True, strip_nl=True)
+ code = python_minifier.minify(
+ code,
+ remove_annotations=True,
+ remove_pass=False,
+ remove_literal_statements=True,
+ combine_imports=False,
+ hoist_literals=False,
+ rename_locals=False,
+ rename_globals=False,
+ remove_object_base=False,
+ convert_posargs_to_args=False,
+ preserve_shebang=True,
+ )
+ code = black.format_str(code, mode=black.FileMode())
+ return code
+
+
+@dataclass
+class Application:
+ parser: argparse.ArgumentParser = field(
+ default_factory=lambda: argparse.ArgumentParser(add_help=True)
+ )
+
+ def __post_init__(self) -> None:
+ parser = self.parser
+ parser.add_argument(
+ "-v",
+ "--verbose",
+ action="count",
+ dest="verbosity",
+ help="increase verbosity level",
+ )
+ parser.add_argument("lhs_file", nargs=1, type=str)
+ parser.add_argument("rhs_file", nargs=1, type=str)
+ parser.set_defaults(handler=self.handle)
+
+ def run(self, args: List[str]) -> None:
+ parse_result = self.parser.parse_args(args)
+
+ verbosity = parse_result.verbosity
+ if verbosity is not None:
+ root_logger = logging.getLogger("")
+ root_logger.propagate = True
+ new_level = (
+ root_logger.getEffectiveLevel()
+ - (min(1, verbosity)) * 10
+ - min(max(0, verbosity - 1), 9) * 1
+ )
+ root_logger.setLevel(new_level)
+
+ logging.debug(
+ "sys.executable = %s, args = %s, parse_result = %s, logging.level = %s",
+ sys.executable,
+ args,
+ parse_result,
+ logging.getLogger("").getEffectiveLevel(),
+ )
+
+ parse_result.handler(parse_result)
+
+ def handle(self, parse_result: argparse.Namespace) -> None:
+ logging.debug("entry ...")
+
+ base = os.environ["BASE"]
+
+ lhs_file: Path = Path(parse_result.lhs_file[0])
+ rhs_file: Path = Path(parse_result.rhs_file[0])
+
+ logging.debug(
+ "base = %s, lhs_file = %s, rhs_file = %s", base, lhs_file, rhs_file
+ )
+
+ lhs_file_content = lhs_file.read_text()
+ rhs_file_content = rhs_file.read_text()
+
+ if lhs_file.name.endswith(".py") and rhs_file.name.endswith(".py"):
+ lhs_file_content = clean_python(lhs_file_content)
+ rhs_file_content = clean_python(rhs_file_content)
+
+ lhs_file_lines = lhs_file_content.splitlines(keepends=True)
+ rhs_file_lines = rhs_file_content.splitlines(keepends=True)
+
+ sys.stdout.writelines(
+ unified_diff(lhs_file_lines, rhs_file_lines, f"a/{base}", f"b/{base}", n=5)
+ )
+
+
+def main() -> None:
+ logging.basicConfig(
+ level=os.environ.get("PYLOGGING_LEVEL", logging.INFO),
+ stream=sys.stderr,
+ datefmt="%Y-%m-%dT%H:%M:%S",
+ format=(
+ "%(asctime)s.%(msecs)03d %(process)d %(thread)d %(levelno)03d:%(levelname)-8s "
+ "%(name)-12s %(module)s:%(lineno)s:%(funcName)s %(message)s"
+ ),
+ )
+
+ Application().run(sys.argv[1:])
+
+
+if __name__ == "__main__":
+ main()
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -61,24 +61,28 @@
("py:class", "http.client.HTTPMessage"),
("py:class", "importlib.metadata.EntryPoint"),
("py:class", "xml.dom.minidom.Document"),
("py:class", "xml.dom.minidom.DocumentFragment"),
("py:class", "rdflib.plugin.PluginT"),
+ ("py:class", "Identifier"),
("py:class", "Diagnostics"),
("py:class", "ParseAction"),
("py:class", "ParseFailAction"),
("py:class", "pyparsing.core.TokenConverter"),
("py:class", "pyparsing.results.ParseResults"),
+ ("py:class", "db.DBEnv"),
]
if sys.version_info < (3, 9):
nitpick_ignore.extend(
[
("py:class", "_TriplePatternType"),
("py:class", "_TripleType"),
("py:class", "_ObjectType"),
("py:class", "_PredicateType"),
("py:class", "_SubjectType"),
+ ("py:class", "_ContextType"),
+ ("py:class", "_ContextIdentifierType"),
("py:class", "TextIO"),
]
)
if sys.version_info < (3, 8):
nitpick_ignore.extend([("py:class", "importlib_metadata.EntryPoint")])
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,11 +6,12 @@
pycodestyle = [
"+*",
# mirrored from setup.cfg
"-E501",
"-E203",
- "-W503"
+ "-W503",
+ "-E231",
]
pyflakes = [
"+*",
]
pep8-naming = ["+*"]
@@ -61,10 +62,11 @@
[tool.pytest.ini_options]
addopts = [
"--doctest-modules",
"--ignore=admin",
+ "--ignore=devtools",
"--ignore=rdflib/extras/external_graph_libs.py",
"--ignore-glob=docs/*.py",
"--doctest-glob=docs/*.rst",
]
doctest_optionflags = "ALLOW_UNICODE"
--- a/rdflib/graph.py
+++ b/rdflib/graph.py
@@ -39,17 +39,18 @@
from rdflib.term import BNode, Genid, IdentifiedNode, Literal, Node, RDFLibGenid, URIRef
_SubjectType = Node
_PredicateType = Node
_ObjectType = Node
+_ContextIdentifierType = Node
_TripleType = Tuple["_SubjectType", "_PredicateType", "_ObjectType"]
-_QuadType = Tuple["_SubjectType", "_PredicateType", "_ObjectType", "Graph"]
+_QuadType = Tuple["_SubjectType", "_PredicateType", "_ObjectType", "_ContextType"]
_OptionalQuadType = Tuple[
- "_SubjectType", "_PredicateType", "_ObjectType", Optional["Graph"]
+ "_SubjectType", "_PredicateType", "_ObjectType", Optional["_ContextType"]
]
_OptionalIdentifiedQuadType = Tuple[
- "_SubjectType", "_PredicateType", "_ObjectType", Optional["Node"]
+ "_SubjectType", "_PredicateType", "_ObjectType", Optional["_ContextIdentifierType"]
]
_TriplePatternType = Tuple[
Optional["_SubjectType"], Optional["_PredicateType"], Optional["_ObjectType"]
]
_QuadPatternType = Tuple[
@@ -606,11 +607,15 @@
fd, name = tempfile.mkstemp()
stream = os.fdopen(fd, "wb")
serializer.serialize(stream, base=base, encoding=encoding, **args)
stream.close()
dest = url2pathname(path) if scheme == "file" else location
- shutil.move(name, dest)
+ if hasattr(shutil, "move"):
+ shutil.move(name, dest)
+ else:
+ shutil.copy(name, dest)
+ os.remove(name)
return self
def print(self, format="turtle", encoding="utf-8", out=None):
print(
self.serialize(None, format=format, encoding=encoding).decode(encoding),
@@ -843,10 +848,13 @@
add_to_cbd(resource)
return subgraph
+_ContextType = Graph
+
+
class ConjunctiveGraph(Graph):
def __init__(self, store="default", identifier=None, default_graph_base=None):
super(ConjunctiveGraph, self).__init__(store, identifier=identifier)
assert (
self.store.context_aware
--- a/rdflib/plugins/stores/auditable.py
+++ b/rdflib/plugins/stores/auditable.py
@@ -1,9 +1,22 @@
import threading
-from rdflib import ConjunctiveGraph, Graph
+from typing import TYPE_CHECKING, Any, Generator, Iterator, List, Optional, Tuple
+from rdflib.graph import ConjunctiveGraph, Graph
from rdflib.store import Store
+if TYPE_CHECKING:
+ from rdflib.graph import (
+ _ContextIdentifierType,
+ _ContextType,
+ _ObjectType,
+ _PredicateType,
+ _SubjectType,
+ _TriplePatternType,
+ _TripleType,
+ )
+ from rdflib.query import Result
+ from rdflib.term import URIRef
destructiveOpLocks = {"add": None, "remove": None}
class AuditableStore(Store):
def __init__(self, store):
--- a/rdflib/plugins/stores/berkeleydb.py
+++ b/rdflib/plugins/stores/berkeleydb.py
@@ -1,12 +1,16 @@
import logging
from os import mkdir
from os.path import abspath, exists
from threading import Thread
+from typing import TYPE_CHECKING, Any, Callable, Dict, Generator, List, Optional, Tuple
from urllib.request import pathname2url
from rdflib.store import NO_STORE, VALID_STORE, Store
-from rdflib.term import URIRef
+from rdflib.term import Identifier, Node, URIRef
+
+if TYPE_CHECKING:
+ from rdflib.graph import Graph, _ContextType, _TriplePatternType, _TripleType
def bb(u):
return u.encode("utf-8")
@@ -21,11 +25,26 @@
ENVSETFLAGS = db.DB_CDB_ALLDB
ENVFLAGS = db.DB_INIT_MPOOL | db.DB_INIT_CDB | db.DB_THREAD
CACHESIZE = 1024 * 1024 * 50
DBOPENFLAGS = db.DB_THREAD
logger = logging.getLogger(__name__)
-__all__ = ["BerkeleyDB"]
+__all__ = [
+ "BerkeleyDB",
+ "_ToKeyFunc",
+ "_FromKeyFunc",
+ "_GetPrefixFunc",
+ "_ResultsFromKeyFunc",
+]
+_ToKeyFunc = Callable[[Tuple[bytes, bytes, bytes], bytes], bytes]
+_FromKeyFunc = Callable[[bytes], Tuple[bytes, bytes, bytes, bytes]]
+_GetPrefixFunc = Callable[
+ [Tuple[str, str, str], Optional[str]], Generator[str, None, None]
+]
+_ResultsFromKeyFunc = Callable[
+ [bytes, Optional[Node], Optional[Node], Optional[Node], bytes],
+ Tuple[Tuple[Node, Node, Node], Generator[Node, None, None]],
+]
class BerkeleyDB(Store):
context_aware = True
formula_aware = True
--- a/rdflib/plugins/stores/memory.py
+++ b/rdflib/plugins/stores/memory.py
@@ -1,8 +1,34 @@
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Collection,
+ Dict,
+ Generator,
+ Iterator,
+ Optional,
+ Set,
+ Tuple,
+ Union,
+ overload,
+)
from rdflib.store import Store
from rdflib.util import _coalesce
+if TYPE_CHECKING:
+ from rdflib.graph import (
+ Graph,
+ _ContextType,
+ _ObjectType,
+ _PredicateType,
+ _SubjectType,
+ _TriplePatternType,
+ _TripleType,
+ )
+ from rdflib.plugins.sparql.sparql import Query, Update
+ from rdflib.query import Result
+ from rdflib.term import Identifier, URIRef, Variable
__all__ = ["SimpleMemory", "Memory"]
ANY = None
class SimpleMemory(Store):
@@ -449,10 +475,18 @@
del self.__tripleContexts[triple]
else:
self.__tripleContexts[triple] = ctxs
self.__contextTriples[ctx].remove(triple)
+ @overload
+ def __ctx_to_str(self, ctx):
+ ...
+
+ @overload
+ def __ctx_to_str(self, ctx):
+ ...
+
def __ctx_to_str(self, ctx):
if ctx is None:
return None
try:
ctx_str = "{}:{}".format(ctx.identifier.__class__.__name__, ctx.identifier)
--- a/rdflib/plugins/stores/sparqlconnector.py
+++ b/rdflib/plugins/stores/sparqlconnector.py
@@ -3,12 +3,12 @@
from io import BytesIO
from typing import TYPE_CHECKING, Optional, Tuple
from urllib.error import HTTPError
from urllib.parse import urlencode
from urllib.request import Request, urlopen
-from rdflib import BNode
from rdflib.query import Result
+from rdflib.term import BNode
log = logging.getLogger(__name__)
if TYPE_CHECKING:
import typing_extensions as te
--- a/rdflib/plugins/stores/sparqlstore.py
+++ b/rdflib/plugins/stores/sparqlstore.py
@@ -1,20 +1,47 @@
import collections
import re
-from typing import Any, Callable, Dict, Optional, Tuple, Union
-from rdflib import BNode, Variable
-from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Callable,
+ Dict,
+ Generator,
+ Iterable,
+ Iterator,
+ List,
+ Optional,
+ Tuple,
+ Union,
+ overload,
+)
+from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Graph
from rdflib.plugins.stores.regexmatching import NATIVE_REGEX
from rdflib.store import Store
-from rdflib.term import Node
+from rdflib.term import BNode, Identifier, Node, URIRef, Variable
+
+if TYPE_CHECKING:
+ import typing_extensions as te
+ from rdflib.graph import (
+ _TripleType,
+ _ContextType,
+ _QuadType,
+ _TriplePatternType,
+ _SubjectType,
+ _PredicateType,
+ _ObjectType,
+ _ContextIdentifierType,
+ )
+ from rdflib.plugins.sparql.sparql import Query, Update
+ from rdflib.query import Result, ResultRow
from .sparqlconnector import SPARQLConnector
LIMIT = "LIMIT"
OFFSET = "OFFSET"
ORDERBY = "ORDER BY"
BNODE_IDENT_PATTERN = re.compile("(?P<label>_\\:[^\\s]+)")
-NodeToSparql = Callable[..., str]
+_NodeToSparql = Callable[["Node"], str]
def _node_to_sparql(node):
if isinstance(node, BNode):
raise Exception(
@@ -170,10 +197,12 @@
if result[0] == 401:
raise ValueError(
"It looks like you need to authenticate with this SPARQL Store. HTTP unauthorized"
)
for row in result:
+ if TYPE_CHECKING:
+ assert isinstance(row, ResultRow)
yield ((row.get(s, s), row.get(p, p), row.get(o, o)), None)
elif result.askAnswer:
yield ((s, p, o), None)
def triples_choices(self, _, context=None):
@@ -229,10 +258,18 @@
def add_graph(self, graph):
raise TypeError("The SPARQL store is read only")
def remove_graph(self, graph):
raise TypeError("The SPARQL store is read only")
+
+ @overload
+ def _is_contextual(self, graph):
+ ...
+
+ @overload
+ def _is_contextual(self, graph):
+ ...
def _is_contextual(self, graph):
if not self.context_aware or graph is None:
return False
if isinstance(graph, str):
@@ -376,10 +413,12 @@
assert not quoted
subject, predicate, obj = spo
nts = self.node_to_sparql
triple = "%s %s %s ." % (nts(subject), nts(predicate), nts(obj))
if self._is_contextual(context):
+ if TYPE_CHECKING:
+ assert context is not None
q = "INSERT DATA { GRAPH %s { %s } }" % (nts(context.identifier), triple)
else:
q = "INSERT DATA { %s }" % triple
self._transaction().append(q)
if self.autocommit:
@@ -417,10 +456,12 @@
if not obj:
obj = Variable("O")
nts = self.node_to_sparql
triple = "%s %s %s ." % (nts(subject), nts(predicate), nts(obj))
if self._is_contextual(context):
+ if TYPE_CHECKING:
+ assert context is not None
cid = nts(context.identifier)
q = "WITH %(graph)s DELETE { %(triple)s } WHERE { %(triple)s }" % {
"graph": cid,
"triple": triple,
}
@@ -442,10 +483,12 @@
raise Exception("Update endpoint is not set!")
self.debug = DEBUG
assert isinstance(query, str)
query = self._inject_prefixes(query, initNs)
if self._is_contextual(queryGraph):
+ if TYPE_CHECKING:
+ assert queryGraph is not None
query = self._insert_named_graph(query, queryGraph)
if initBindings:
v = list(initBindings)
values = "\nVALUES ( %s )\n{ ( %s ) }\n" % (
" ".join(("?" + str(x) for x in v)),
--- a/rdflib/query.py
+++ b/rdflib/query.py
@@ -153,11 +153,15 @@
return None
fd, name = tempfile.mkstemp()
stream = os.fdopen(fd, "wb")
serializer.serialize(stream, encoding=encoding, **args)
stream.close()
- shutil.move(name, path)
+ if hasattr(shutil, "move"):
+ shutil.move(name, path)
+ else:
+ shutil.copy(name, path)
+ os.remove(name)
return None
def __len__(self):
if self.type == "ASK":
return 1
--- a/rdflib/store.py
+++ b/rdflib/store.py
@@ -1,13 +1,37 @@
import pickle
from io import BytesIO
-from typing import TYPE_CHECKING, Iterable, Optional, Tuple
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Dict,
+ Generator,
+ Iterable,
+ Iterator,
+ List,
+ Mapping,
+ Optional,
+ Tuple,
+ Union,
+ overload,
+)
from rdflib.events import Dispatcher, Event
if TYPE_CHECKING:
- from rdflib.graph import Graph
- from rdflib.term import IdentifiedNode, Node, URIRef
+ from rdflib.graph import (
+ Graph,
+ _ContextType,
+ _ObjectType,
+ _PredicateType,
+ _QuadType,
+ _SubjectType,
+ _TriplePatternType,
+ _TripleType,
+ )
+ from rdflib.plugins.sparql.sparql import Query, Update
+ from rdflib.query import Result
+ from rdflib.term import Identifier, Node, URIRef, Variable
VALID_STORE = 1
CORRUPTED_STORE = 0
NO_STORE = -1
UNKNOWN = None
Pickler = pickle.Pickler
@@ -135,10 +159,22 @@
)
self.add((s, p, o), c)
def remove(self, triple, context=None):
self.dispatcher.dispatch(TripleRemovedEvent(triple=triple, context=context))
+
+ @overload
+ def triples_choices(self, triple, context=None):
+ ...
+
+ @overload
+ def triples_choices(self, triple, context=None):
+ ...
+
+ @overload
+ def triples_choices(self, triple, context=None):
+ ...
def triples_choices(self, triple, context=None):
subject, predicate, object_ = triple
if isinstance(object_, list):
assert not isinstance(subject, list), "object_ / subject are both lists"
--- a/setup.cfg
+++ b/setup.cfg
@@ -21,10 +21,12 @@
E501, # line too long
# Disabled based on black recommendations
# https://black.readthedocs.io/en/stable/faq.html#why-are-flake8-s-e203-and-w503-violated
E203, # Whitespace before ':'
W503, # Line break occurred before a binary operator
+ # Disabled because this bumps heads with black
+ E231, # missing whitespace after ','
[coverage:run]
branch = True
source = rdflib
omit =
@@ -46,11 +48,11 @@
if 0:
if __name__ == .__main__.:
if __name__==.__main__.:
[mypy]
-files = rdflib,test
+files = rdflib,test,devtools
python_version = 3.7
warn_unused_configs = True
ignore_missing_imports = True
disallow_subclassing_any = False
warn_unreachable = True
--- a/tox.ini
+++ b/tox.ini
@@ -5,10 +5,11 @@
[testenv]
deps =
lxml: lxml
lxml: lxml-stubs
+ extensive: berkeleydb-stubs
setenv =
extensive: BERKELEYDB_DIR = /usr
COVERAGE_FILE = {env:COVERAGE_FILE:{toxinidir}/.coverage.{envname}}
MYPY_CACHE_DIR = {envdir}/.mypy_cache
extras =
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment