Created
January 8, 2021 08:46
-
-
Save wjwillemse/36d1ec4b67710715f866e1492086239e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from os import listdir\n", | |
"from os.path import join, isfile\n", | |
"import pandas as pd\n", | |
"from rdflib import URIRef, term, Graph, Literal, Namespace\n", | |
"from rdflib.namespace import OWL,RDF, RDFS, SKOS, XSD" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"EIOPA_DATA_PATH = join(\"..\", \"data\", \"external\", \"eiopa\")\n", | |
"GLEIF_DATA_PATH = join(\"..\", \"data\", \"external\", \"gleif\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Read EIOPA Register in RDF" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"graph has 375388 statements.\n" | |
] | |
} | |
], | |
"source": [ | |
"g = Graph()\n", | |
"\n", | |
"with open(join(EIOPA_DATA_PATH,'eiopa_register.ttl'), \"rb\") as fp:\n", | |
" g.parse(data = fp.read(), format = 'turtle')\n", | |
"\n", | |
"with open(join(GLEIF_DATA_PATH,'gleif-L1-extract.ttl'), \"rb\") as fp:\n", | |
" g.parse(data = fp.read(), format = 'turtle')\n", | |
" \n", | |
"with open(join(GLEIF_DATA_PATH,'EntityLegalFormData.ttl'), \"rb\") as fp:\n", | |
" g.parse(data = fp.read(), format = 'turtle')\n", | |
"\n", | |
"print(\"graph has {} statements.\".format(len(g)))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Example SPARQL queries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# function to visualize the name of the uri without namespace\n", | |
"def get_name(uri):\n", | |
" if isinstance(uri, term.URIRef):\n", | |
" return uri.n3().split(\"/\")[-1][0:-1]\n", | |
" else:\n", | |
" return uri" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Extracting information in the register" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"L-72450067SU8C745IAV11 hasLegalJurisdiction NL \n", | |
"L-72450067SU8C745IAV11 hasEntityStatus EntityStatusActive \n", | |
"L-72450067SU8C745IAV11 hasLegalAddress L-72450067SU8C745IAV11-LAL \n", | |
"L-72450067SU8C745IAV11 hasRegisterIdentifier IURI-De-Nederlandsche-Bank-W1686 \n", | |
"L-72450067SU8C745IAV11 22-rdf-syntax-ns#type LegalEntity \n", | |
"L-72450067SU8C745IAV11 hasLegalName Achmea Schadeverzekeringen N.V. \n", | |
"L-72450067SU8C745IAV11 hasHeadquartersAddress L-72450067SU8C745IAV11-LAL \n", | |
"L-72450067SU8C745IAV11 hasLegalForm ELF-B5PM \n", | |
"L-72450067SU8C745IAV11 owl#a InsuranceUndertaking \n", | |
"L-72450067SU8C745IAV11 hasRegistrationIdentifier BID-RA000463-08053410 \n" | |
] | |
} | |
], | |
"source": [ | |
"query = \"\"\"SELECT DISTINCT ?s ?p ?o\n", | |
" WHERE {?s gleif-L1:hasLegalName \"Achmea Schadeverzekeringen N.V.\" . \n", | |
" ?s ?p ?o .\n", | |
"}\"\"\"\n", | |
"results = g.query(query)\n", | |
"\n", | |
"for row in results:\n", | |
" for item in row:\n", | |
" print(get_name(item) + \" \", end = '')\n", | |
" print(\"\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"ELF-B5PM hasAbbreviationLocal NV \n", | |
"ELF-B5PM hasNameLocal naamloze vennootschap \n", | |
"ELF-B5PM identifies ELF-B5PM \n", | |
"ELF-B5PM hasAbbreviationLocal nv \n", | |
"ELF-B5PM 22-rdf-syntax-ns#type EntityLegalForm \n", | |
"ELF-B5PM hasCoverageArea NL \n", | |
"ELF-B5PM hasNameTransliterated naamloze vennootschap \n", | |
"ELF-B5PM hasAbbreviationLocal N.V. \n", | |
"ELF-B5PM tag B5PM \n", | |
"ELF-B5PM 22-rdf-syntax-ns#type EntityLegalFormIdentifier \n", | |
"ELF-B5PM hasAbbreviationLocal n.v. \n" | |
] | |
} | |
], | |
"source": [ | |
"query = \"\"\"SELECT DISTINCT ?lf ?p ?o\n", | |
" WHERE {?s gleif-L1:hasLegalName \"Achmea Schadeverzekeringen N.V.\" . \n", | |
" ?s gleif-L1:hasLegalForm ?lf .\n", | |
" ?lf ?p ?o .}\"\"\"\n", | |
"results = g.query(query)\n", | |
"\n", | |
"for row in results:\n", | |
" for item in row:\n", | |
" print(get_name(item) + \" \", end = '')\n", | |
" print(\"\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"hasInsuranceUndertakingID W1686 \n", | |
"hasRegistrationStartDate 23/12/1991 01:00:00 \n", | |
"hasOperationStartDate 23/12/1991 01:00:00 \n", | |
"owl#a InsuranceUndertakingRegisterIdentifier \n", | |
"hasNCA De Nederlandsche Bank \n", | |
"hasRegistrationEndDate nan \n", | |
"hasCrossBorderStatus Domestic undertaking \n", | |
"identifies L-72450067SU8C745IAV11 \n", | |
"hasOperationEndDate nan \n", | |
"hasEUCountryWhereEntityOperates NL \n" | |
] | |
} | |
], | |
"source": [ | |
"query = \"\"\"SELECT DISTINCT ?p ?o\n", | |
" WHERE {?s gleif-L1:hasLegalName \"Achmea Schadeverzekeringen N.V.\" . \n", | |
" ?s eiopa-Base:hasRegisterIdentifier ?lf .\n", | |
" ?lf ?p ?o .\n", | |
"}\"\"\"\n", | |
"results = g.query(query)\n", | |
"\n", | |
"for row in results:\n", | |
" for item in row:\n", | |
" print(get_name(item) + \" \", end = '')\n", | |
" print(\"\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Extracting geographical location of entities" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"The following query extracts the geographical location of all insurance undertakings within NL" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"query = \"\"\"SELECT DISTINCT ?name ?lat ?long\n", | |
" WHERE {?s gleif-base:hasLegalJurisdiction CountryCodes:NL ; \n", | |
" gleif-L1:hasLegalName ?name ;\n", | |
" gleif-L1:hasLegalAddress/gleif-base:hasCity ?city .\n", | |
" ?geo gleif-base:hasCity ?city ; \n", | |
" geo:lat ?lat ; \n", | |
" geo:long ?long .}\"\"\"\n", | |
"results = g.query(query)\n", | |
"\n", | |
"points = list()\n", | |
"for row in results:\n", | |
" new = (row[0].value, float(row[1].value), float(row[2].value))\n", | |
" if new not in points:\n", | |
" points.append(new)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"392" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(results)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Plotting geographical location" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import folium" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"ave_lat = sum(p[1] for p in points)/len(points)\n", | |
"ave_lon = sum(p[2] for p in points)/len(points)\n", | |
"\n", | |
"# Load map centred on average coordinates\n", | |
"my_map = folium.Map(location=[ave_lat, ave_lon], zoom_start=8, tiles='Stamen Terrain')\n", | |
"\n", | |
"#add a markers\n", | |
"for each in points: \n", | |
" folium.Marker((each[1], each[2]), popup=each[0]).add_to(my_map)\n", | |
"\n", | |
"my_map.save(\"eiopa_register_nl.html\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment