Skip to content

Instantly share code, notes, and snippets.

@tonyfast
Last active September 20, 2019 13:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tonyfast/6382152537773be2ffc208c3d4e526c2 to your computer and use it in GitHub Desktop.
Save tonyfast/6382152537773be2ffc208c3d4e526c2 to your computer and use it in GitHub Desktop.
__all__ = 'Graph',
with __import__('importnb').Notebook(lazy=True):
from .namespaces import self as Graph
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [],
"source": [
" with __import__('importnb').Notebook():\n",
" try:\n",
" from . import rdflib_patch\n",
" except:\n",
" import rdflib_patch\n",
" import rdflib, pandas, pydantic, jsonschema, json, abc, networkx, itertools, dataclasses, requests, typing, collections, requests_cache, inspect, IPython, pyld.jsonld as jsonld, abc\n",
" from toolz.curried import *; from rdflib.namespace import OWL, RDF, RDFS, SKOS, DC, DCTERMS\n",
" requests_cache.install_cache('rdf')\n",
" __all__ = tuple(\"Graph CC RDFS HYDRA OWL XHTML RDF XS XSD SW\".split())\n",
"\n",
" SCHEMA = rdflib.Namespace('http://schema.org/')"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [],
"source": [
"if 'field_class_to_schema_enum_enabled' not in globals(): field_class_to_schema_enum_enabled = pydantic.schema.field_class_to_schema_enum_enabled"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Meta classes hold linked data types in their annotations.\n",
"\n",
"The class of the meta class stores the python type annotations."
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [],
"source": [
"def split(object): ns, sep, pointer = object.rpartition('/#'['#' in object]); return ns+sep, pointer"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load in a bunch of namespaces."
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [],
"source": [
"class Namespace(rdflib.namespace.ClosedNamespace):\n",
" def __getattr__(_, name):\n",
" object = super().__getattr__(name)\n",
" return self.new(object)\n",
"\n",
"class Graph(rdflib.ConjunctiveGraph):\n",
" types = {}\n",
" CC: rdflib.term.URIRef('http://creativecommons.org/ns#')\n",
" RDFS: rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#')\n",
" HYDRA: rdflib.term.URIRef('http://www.w3.org/ns/hydra/core#')\n",
" OWL: rdflib.term.URIRef('http://www.w3.org/2002/07/owl#')\n",
" XHTML: rdflib.term.URIRef('http://www.w3.org/1999/xhtml/vocab#')\n",
" RDF: rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#')\n",
" XSD: rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#')\n",
" XS: rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema-datatypes#')\n",
" SW: rdflib.term.URIRef('http://www.w3.org/2003/06/sw-vocab-status/ns#')\n",
" SCHEMA: rdflib.term.URIRef('http://schema.org/')\n",
" PROV: rdflib.term.URIRef('http://www.w3.org/ns/prov#')\n",
" DCAT: rdflib.term.URIRef('http://www.w3.org/ns/dcat#')\n",
" QUDT: rdflib.term.URIRef('http://qudt.org/schema/qudt#')\n",
" \n",
" def get(self, object, format=None):\n",
" if format in {'json-ld'}: object = pipe(object, requests.get, requests.Response.json, jsonld.expand, json.dumps)\n",
" else: object = requests.get(str(object)).text\n",
" self.parse(data=object, format=format); self.update()\n",
" return self\n",
" \n",
" def update(self): pipe(\n",
" self, concat, filter(flip(isinstance)(rdflib.URIRef)), filter(flip(str.startswith)('http')), \n",
" set, groupby(compose(first, split)), valmap(\n",
" compose(list, map(compose(second, split)))\n",
" ), itemmap(lambda x: (rdflib.URIRef(x[0]), Namespace(*x))), \n",
" keymap(pipe(self.__annotations__, itemmap(reversed), dict).get), keyfilter(flip(isinstance)(str)),\n",
" itemmap(do(lambda x: setattr(self, *x)))\n",
" ); return self\n",
" \n",
" def enrich(self, type):\n",
" if type not in self.types: \n",
" subject = self[type]\n",
" object = self[:, :, type]\n",
" __annotations__ = {}\n",
" bases = pipe(subject, filter(compose({RDFS.subPropertyOf, RDFS.subClassOf, RDF.type}.__contains__, first)), map(last), map(self.enrich), set, tuple) or (self.enrich(RDFS.Resource),)\n",
" __context__ = pipe(self[:, RDFS.domain, type] + self[:, SCHEMA.domainIncludes, type], map(juxt(compose(second, split), identity)), dict)\n",
" self.types[type] = __import__('builtins').type(second(split(type)), tuple(sorted(bases, key=lambda x: list(self.types).index(x.type), reverse=True)), {\n",
" '__doc__': ''.join(self[type, RDFS.comment]), **locals()})\n",
" self.types[type].__context__ = pipe(self.types[type], inspect.getmro, map(lambda x: getattr(x, '__context__', {})), lambda x: collections.ChainMap(*x), dict) \n",
" return self.types[type]\n",
" \n",
" def new(self, type): \n",
" if type not in self.types: self.enrich(type); self.annotate(); \n",
" return self.types[type]\n",
" \n",
" def annotate(self):\n",
" \"\"\"Add type annotations from the context.\"\"\"\n",
" for cls in list(self.types.values()):\n",
" if not hasattr(cls, '__annotations__'): cls.__annotations__ = {}\n",
" pipe(cls.__context__, \n",
" valmap(lambda x: self[x, RDFS.range] + self[x, SCHEMA.rangeIncludes]), valmap(map(self.enrich)), valmap(tuple), valmap((str,).__add__), \n",
" valmap(typing.Union.__getitem__), valmap(lambda x: typing.Union[x, typing.List[x]]),\n",
" cls.__annotations__.update)\n",
" cls.__annotations__ = {'value': cls.__annotations__.pop('value'), **cls.__annotations__, }\n",
" for x in cls.__annotations__: setattr(cls, x, getattr(cls, x, None))\n",
" excepts(BaseException, pydantic.dataclasses.dataclass)(cls)\n"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
"outputs": [],
"source": [
"self = (\n",
" Graph().get(rdflib.namespace.RDF, 'ttl')\n",
" .get(rdflib.namespace.RDFS, 'ttl')\n",
" .get(rdflib.namespace.OWL, 'ttl')\n",
" .get('https://www.w3.org/ns/prov.ttl', 'ttl')\n",
" .get('https://raw.githubusercontent.com/AKSW/RDB2RDF-Seminar/master/sparqlmap/eclipse/workspace/xturtle.core/xsd.ttl', 'ttl')\n",
" .get('https://www.w3.org/ns/hydra/core', 'json-ld')\n",
" .get('http://www.w3.org/2003/06/sw-vocab-status/ns#', 'xml')\n",
" .get('http://schema.org/version/latest/schema.ttl', 'ttl')\n",
" .get('https://w3c.github.io/dxwg/dcat/rdf/dcat.ttl', 'ttl')\n",
" .get('https://creativecommons.org/schema.rdf', 'xml')\n",
" .get('http://qudt.org/2.0/schema/qudt', 'ttl')\n",
" .get('http://www.linkedmodel.org/schema/vaem', 'xml') \n",
" .get('http://qudt.org/1.1/schema/dimension', 'xml')\n",
" .get('http://qudt.org/1.1/schema/quantity', 'xml')\n",
" .get('http://www.linkedmodel.org/1.0/schema/dtype', 'xml')\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {},
"outputs": [],
"source": [
"class WebType:\n",
" @classmethod\n",
" def schema(cls): return cls.__pydantic_model__.schema(cls)\n",
"\n",
" def __get_validators__():\n",
" \"\"\"[custom types]: https://pydantic-docs.helpmanual.io/#custom-data-types\"\"\"\n",
" return []\n",
" \n",
" def dict(self, **ctx):\n",
" object = {'@context': ctx, **{\n",
" k: v.dict(**(ctx and v.__context__ or {})) if isinstance(v, WebType) else v\n",
" for k, v in [(x, getattr(self, x)) for x in self.__context__ if getattr(self, x, None) is not None]}}\n",
" for k, v in object.items():\n",
" try: object['@context'][k] = jsonschema.validate(v, {'anyOf': [{'type': 'string', 'format': 'uri'}, {'type': 'string', 'format': 'json-pointer'}]}, format_checker=jsonschema.draft7_format_checker) or {'@type': '@id', '@id': object['@context'][k]}\n",
" except jsonschema.ValidationError: ...\n",
" return object\n",
" \n",
" def metadata(self): return jsonld.expand(self.dict(**self.__context__))"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {},
"outputs": [],
"source": [
"self.types[RDFS.Resource] = type(second(split(RDFS.Resource)), (WebType,), {\n",
" '__doc__': ''.join(self[RDFS.Resource, RDFS.comment]), '__context__': pipe(\n",
" self[:, RDFS.domain, RDFS.Resource],\n",
" map(juxt(compose(second, split), identity)), dict\n",
" ), 'type': RDFS.Resource, 'subject': self[RDFS.Resource], 'object': self[:, :, RDFS.Resource]\n",
"})\n",
"self.types[RDFS.Class] = type(second(split(RDFS.Class)), (self.types[RDFS.Resource],), {\n",
" '__doc__': ''.join(self[RDFS.Class, RDFS.comment]), '__context__': pipe(\n",
" self[:, RDFS.domain, RDFS.Class],\n",
" map(juxt(compose(second, split), identity)), dict\n",
" ), 'type': RDFS.Class, 'subject': self[RDFS.Class], 'object': self[:, :, RDFS.Class]\n",
"})\n",
"self.annotate()"
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {},
"outputs": [],
"source": [
"if 'field_class_to_schema_enum_enabled' not in globals(): field_class_to_schema_enum_enabled = pydantic.schema.field_class_to_schema_enum_enabled\n",
"pydantic.schema.field_class_to_schema_enum_enabled = ((self.RDFS.Resource, {}),) + field_class_to_schema_enum_enabled"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
rdflib
strict-rfc3339
rfc3986
rfc3987
pyld
pandas
pyyaml
networkx
matplotlib
jsonschema
requests
importnb
toolz
idna
webcolors
jsonpointer
rdflib-jsonld
requests-cache
nest-asyncio
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment