Last active
September 20, 2019 13:23
-
-
Save tonyfast/6382152537773be2ffc208c3d4e526c2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__all__ = 'Graph', | |
with __import__('importnb').Notebook(lazy=True): | |
from .namespaces import self as Graph |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 113, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
" with __import__('importnb').Notebook():\n", | |
" try:\n", | |
" from . import rdflib_patch\n", | |
" except:\n", | |
" import rdflib_patch\n", | |
" import rdflib, pandas, pydantic, jsonschema, json, abc, networkx, itertools, dataclasses, requests, typing, collections, requests_cache, inspect, IPython, pyld.jsonld as jsonld, abc\n", | |
" from toolz.curried import *; from rdflib.namespace import OWL, RDF, RDFS, SKOS, DC, DCTERMS\n", | |
" requests_cache.install_cache('rdf')\n", | |
" __all__ = tuple(\"Graph CC RDFS HYDRA OWL XHTML RDF XS XSD SW\".split())\n", | |
"\n", | |
" SCHEMA = rdflib.Namespace('http://schema.org/')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 114, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"if 'field_class_to_schema_enum_enabled' not in globals(): field_class_to_schema_enum_enabled = pydantic.schema.field_class_to_schema_enum_enabled" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Meta classes hold linked data types in their annotations.\n", | |
"\n", | |
"The class of the meta class stores the python type annotations." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 115, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def split(object): ns, sep, pointer = object.rpartition('/#'['#' in object]); return ns+sep, pointer" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Load in a bunch of namespaces." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 116, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class Namespace(rdflib.namespace.ClosedNamespace):\n", | |
" def __getattr__(_, name):\n", | |
" object = super().__getattr__(name)\n", | |
" return self.new(object)\n", | |
"\n", | |
"class Graph(rdflib.ConjunctiveGraph):\n", | |
" types = {}\n", | |
" CC: rdflib.term.URIRef('http://creativecommons.org/ns#')\n", | |
" RDFS: rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#')\n", | |
" HYDRA: rdflib.term.URIRef('http://www.w3.org/ns/hydra/core#')\n", | |
" OWL: rdflib.term.URIRef('http://www.w3.org/2002/07/owl#')\n", | |
" XHTML: rdflib.term.URIRef('http://www.w3.org/1999/xhtml/vocab#')\n", | |
" RDF: rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#')\n", | |
" XSD: rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#')\n", | |
" XS: rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema-datatypes#')\n", | |
" SW: rdflib.term.URIRef('http://www.w3.org/2003/06/sw-vocab-status/ns#')\n", | |
" SCHEMA: rdflib.term.URIRef('http://schema.org/')\n", | |
" PROV: rdflib.term.URIRef('http://www.w3.org/ns/prov#')\n", | |
" DCAT: rdflib.term.URIRef('http://www.w3.org/ns/dcat#')\n", | |
" QUDT: rdflib.term.URIRef('http://qudt.org/schema/qudt#')\n", | |
" \n", | |
" def get(self, object, format=None):\n", | |
" if format in {'json-ld'}: object = pipe(object, requests.get, requests.Response.json, jsonld.expand, json.dumps)\n", | |
" else: object = requests.get(str(object)).text\n", | |
" self.parse(data=object, format=format); self.update()\n", | |
" return self\n", | |
" \n", | |
" def update(self): pipe(\n", | |
" self, concat, filter(flip(isinstance)(rdflib.URIRef)), filter(flip(str.startswith)('http')), \n", | |
" set, groupby(compose(first, split)), valmap(\n", | |
" compose(list, map(compose(second, split)))\n", | |
" ), itemmap(lambda x: (rdflib.URIRef(x[0]), Namespace(*x))), \n", | |
" keymap(pipe(self.__annotations__, itemmap(reversed), dict).get), keyfilter(flip(isinstance)(str)),\n", | |
" itemmap(do(lambda x: setattr(self, *x)))\n", | |
" ); return self\n", | |
" \n", | |
" def enrich(self, type):\n", | |
" if type not in self.types: \n", | |
" subject = self[type]\n", | |
" object = self[:, :, type]\n", | |
" __annotations__ = {}\n", | |
" bases = pipe(subject, filter(compose({RDFS.subPropertyOf, RDFS.subClassOf, RDF.type}.__contains__, first)), map(last), map(self.enrich), set, tuple) or (self.enrich(RDFS.Resource),)\n", | |
" __context__ = pipe(self[:, RDFS.domain, type] + self[:, SCHEMA.domainIncludes, type], map(juxt(compose(second, split), identity)), dict)\n", | |
" self.types[type] = __import__('builtins').type(second(split(type)), tuple(sorted(bases, key=lambda x: list(self.types).index(x.type), reverse=True)), {\n", | |
" '__doc__': ''.join(self[type, RDFS.comment]), **locals()})\n", | |
" self.types[type].__context__ = pipe(self.types[type], inspect.getmro, map(lambda x: getattr(x, '__context__', {})), lambda x: collections.ChainMap(*x), dict) \n", | |
" return self.types[type]\n", | |
" \n", | |
" def new(self, type): \n", | |
" if type not in self.types: self.enrich(type); self.annotate(); \n", | |
" return self.types[type]\n", | |
" \n", | |
" def annotate(self):\n", | |
" \"\"\"Add type annotations from the context.\"\"\"\n", | |
" for cls in list(self.types.values()):\n", | |
" if not hasattr(cls, '__annotations__'): cls.__annotations__ = {}\n", | |
" pipe(cls.__context__, \n", | |
" valmap(lambda x: self[x, RDFS.range] + self[x, SCHEMA.rangeIncludes]), valmap(map(self.enrich)), valmap(tuple), valmap((str,).__add__), \n", | |
" valmap(typing.Union.__getitem__), valmap(lambda x: typing.Union[x, typing.List[x]]),\n", | |
" cls.__annotations__.update)\n", | |
" cls.__annotations__ = {'value': cls.__annotations__.pop('value'), **cls.__annotations__, }\n", | |
" for x in cls.__annotations__: setattr(cls, x, getattr(cls, x, None))\n", | |
" excepts(BaseException, pydantic.dataclasses.dataclass)(cls)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 117, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"self = (\n", | |
" Graph().get(rdflib.namespace.RDF, 'ttl')\n", | |
" .get(rdflib.namespace.RDFS, 'ttl')\n", | |
" .get(rdflib.namespace.OWL, 'ttl')\n", | |
" .get('https://www.w3.org/ns/prov.ttl', 'ttl')\n", | |
" .get('https://raw.githubusercontent.com/AKSW/RDB2RDF-Seminar/master/sparqlmap/eclipse/workspace/xturtle.core/xsd.ttl', 'ttl')\n", | |
" .get('https://www.w3.org/ns/hydra/core', 'json-ld')\n", | |
" .get('http://www.w3.org/2003/06/sw-vocab-status/ns#', 'xml')\n", | |
" .get('http://schema.org/version/latest/schema.ttl', 'ttl')\n", | |
" .get('https://w3c.github.io/dxwg/dcat/rdf/dcat.ttl', 'ttl')\n", | |
" .get('https://creativecommons.org/schema.rdf', 'xml')\n", | |
" .get('http://qudt.org/2.0/schema/qudt', 'ttl')\n", | |
" .get('http://www.linkedmodel.org/schema/vaem', 'xml') \n", | |
" .get('http://qudt.org/1.1/schema/dimension', 'xml')\n", | |
" .get('http://qudt.org/1.1/schema/quantity', 'xml')\n", | |
" .get('http://www.linkedmodel.org/1.0/schema/dtype', 'xml')\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 127, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class WebType:\n", | |
" @classmethod\n", | |
" def schema(cls): return cls.__pydantic_model__.schema(cls)\n", | |
"\n", | |
" def __get_validators__():\n", | |
" \"\"\"[custom types]: https://pydantic-docs.helpmanual.io/#custom-data-types\"\"\"\n", | |
" return []\n", | |
" \n", | |
" def dict(self, **ctx):\n", | |
" object = {'@context': ctx, **{\n", | |
" k: v.dict(**(ctx and v.__context__ or {})) if isinstance(v, WebType) else v\n", | |
" for k, v in [(x, getattr(self, x)) for x in self.__context__ if getattr(self, x, None) is not None]}}\n", | |
" for k, v in object.items():\n", | |
" try: object['@context'][k] = jsonschema.validate(v, {'anyOf': [{'type': 'string', 'format': 'uri'}, {'type': 'string', 'format': 'json-pointer'}]}, format_checker=jsonschema.draft7_format_checker) or {'@type': '@id', '@id': object['@context'][k]}\n", | |
" except jsonschema.ValidationError: ...\n", | |
" return object\n", | |
" \n", | |
" def metadata(self): return jsonld.expand(self.dict(**self.__context__))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 128, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"self.types[RDFS.Resource] = type(second(split(RDFS.Resource)), (WebType,), {\n", | |
" '__doc__': ''.join(self[RDFS.Resource, RDFS.comment]), '__context__': pipe(\n", | |
" self[:, RDFS.domain, RDFS.Resource],\n", | |
" map(juxt(compose(second, split), identity)), dict\n", | |
" ), 'type': RDFS.Resource, 'subject': self[RDFS.Resource], 'object': self[:, :, RDFS.Resource]\n", | |
"})\n", | |
"self.types[RDFS.Class] = type(second(split(RDFS.Class)), (self.types[RDFS.Resource],), {\n", | |
" '__doc__': ''.join(self[RDFS.Class, RDFS.comment]), '__context__': pipe(\n", | |
" self[:, RDFS.domain, RDFS.Class],\n", | |
" map(juxt(compose(second, split), identity)), dict\n", | |
" ), 'type': RDFS.Class, 'subject': self[RDFS.Class], 'object': self[:, :, RDFS.Class]\n", | |
"})\n", | |
"self.annotate()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 129, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"if 'field_class_to_schema_enum_enabled' not in globals(): field_class_to_schema_enum_enabled = pydantic.schema.field_class_to_schema_enum_enabled\n", | |
"pydantic.schema.field_class_to_schema_enum_enabled = ((self.RDFS.Resource, {}),) + field_class_to_schema_enum_enabled" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rdflib | |
strict-rfc3339 | |
rfc3986 | |
rfc3987 | |
pyld | |
pandas | |
pyyaml | |
networkx | |
matplotlib | |
jsonschema | |
requests | |
importnb | |
toolz | |
idna | |
webcolors | |
jsonpointer | |
rdflib-jsonld | |
requests-cache | |
nest-asyncio |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment