Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wjwillemse/36d1ec4b67710715f866e1492086239e to your computer and use it in GitHub Desktop.
Save wjwillemse/36d1ec4b67710715f866e1492086239e to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"from os import listdir\n",
"from os.path import join, isfile\n",
"import pandas as pd\n",
"from rdflib import URIRef, term, Graph, Literal, Namespace\n",
"from rdflib.namespace import OWL,RDF, RDFS, SKOS, XSD"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"EIOPA_DATA_PATH = join(\"..\", \"data\", \"external\", \"eiopa\")\n",
"GLEIF_DATA_PATH = join(\"..\", \"data\", \"external\", \"gleif\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Read EIOPA Register in RDF"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"graph has 375388 statements.\n"
]
}
],
"source": [
"g = Graph()\n",
"\n",
"with open(join(EIOPA_DATA_PATH,'eiopa_register.ttl'), \"rb\") as fp:\n",
" g.parse(data = fp.read(), format = 'turtle')\n",
"\n",
"with open(join(GLEIF_DATA_PATH,'gleif-L1-extract.ttl'), \"rb\") as fp:\n",
" g.parse(data = fp.read(), format = 'turtle')\n",
" \n",
"with open(join(GLEIF_DATA_PATH,'EntityLegalFormData.ttl'), \"rb\") as fp:\n",
" g.parse(data = fp.read(), format = 'turtle')\n",
"\n",
"print(\"graph has {} statements.\".format(len(g)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Example SPARQL queries"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# function to visualize the name of the uri without namespace\n",
"def get_name(uri):\n",
" if isinstance(uri, term.URIRef):\n",
" return uri.n3().split(\"/\")[-1][0:-1]\n",
" else:\n",
" return uri"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Extracting information in the register"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"L-72450067SU8C745IAV11 hasLegalJurisdiction NL \n",
"L-72450067SU8C745IAV11 hasEntityStatus EntityStatusActive \n",
"L-72450067SU8C745IAV11 hasLegalAddress L-72450067SU8C745IAV11-LAL \n",
"L-72450067SU8C745IAV11 hasRegisterIdentifier IURI-De-Nederlandsche-Bank-W1686 \n",
"L-72450067SU8C745IAV11 22-rdf-syntax-ns#type LegalEntity \n",
"L-72450067SU8C745IAV11 hasLegalName Achmea Schadeverzekeringen N.V. \n",
"L-72450067SU8C745IAV11 hasHeadquartersAddress L-72450067SU8C745IAV11-LAL \n",
"L-72450067SU8C745IAV11 hasLegalForm ELF-B5PM \n",
"L-72450067SU8C745IAV11 owl#a InsuranceUndertaking \n",
"L-72450067SU8C745IAV11 hasRegistrationIdentifier BID-RA000463-08053410 \n"
]
}
],
"source": [
"query = \"\"\"SELECT DISTINCT ?s ?p ?o\n",
" WHERE {?s gleif-L1:hasLegalName \"Achmea Schadeverzekeringen N.V.\" . \n",
" ?s ?p ?o .\n",
"}\"\"\"\n",
"results = g.query(query)\n",
"\n",
"for row in results:\n",
" for item in row:\n",
" print(get_name(item) + \" \", end = '')\n",
" print(\"\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ELF-B5PM hasAbbreviationLocal NV \n",
"ELF-B5PM hasNameLocal naamloze vennootschap \n",
"ELF-B5PM identifies ELF-B5PM \n",
"ELF-B5PM hasAbbreviationLocal nv \n",
"ELF-B5PM 22-rdf-syntax-ns#type EntityLegalForm \n",
"ELF-B5PM hasCoverageArea NL \n",
"ELF-B5PM hasNameTransliterated naamloze vennootschap \n",
"ELF-B5PM hasAbbreviationLocal N.V. \n",
"ELF-B5PM tag B5PM \n",
"ELF-B5PM 22-rdf-syntax-ns#type EntityLegalFormIdentifier \n",
"ELF-B5PM hasAbbreviationLocal n.v. \n"
]
}
],
"source": [
"query = \"\"\"SELECT DISTINCT ?lf ?p ?o\n",
" WHERE {?s gleif-L1:hasLegalName \"Achmea Schadeverzekeringen N.V.\" . \n",
" ?s gleif-L1:hasLegalForm ?lf .\n",
" ?lf ?p ?o .}\"\"\"\n",
"results = g.query(query)\n",
"\n",
"for row in results:\n",
" for item in row:\n",
" print(get_name(item) + \" \", end = '')\n",
" print(\"\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"hasInsuranceUndertakingID W1686 \n",
"hasRegistrationStartDate 23/12/1991 01:00:00 \n",
"hasOperationStartDate 23/12/1991 01:00:00 \n",
"owl#a InsuranceUndertakingRegisterIdentifier \n",
"hasNCA De Nederlandsche Bank \n",
"hasRegistrationEndDate nan \n",
"hasCrossBorderStatus Domestic undertaking \n",
"identifies L-72450067SU8C745IAV11 \n",
"hasOperationEndDate nan \n",
"hasEUCountryWhereEntityOperates NL \n"
]
}
],
"source": [
"query = \"\"\"SELECT DISTINCT ?p ?o\n",
" WHERE {?s gleif-L1:hasLegalName \"Achmea Schadeverzekeringen N.V.\" . \n",
" ?s eiopa-Base:hasRegisterIdentifier ?lf .\n",
" ?lf ?p ?o .\n",
"}\"\"\"\n",
"results = g.query(query)\n",
"\n",
"for row in results:\n",
" for item in row:\n",
" print(get_name(item) + \" \", end = '')\n",
" print(\"\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Extracting geographical location of entities"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The following query extracts the geographical location of all insurance undertakings within NL"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"query = \"\"\"SELECT DISTINCT ?name ?lat ?long\n",
" WHERE {?s gleif-base:hasLegalJurisdiction CountryCodes:NL ; \n",
" gleif-L1:hasLegalName ?name ;\n",
" gleif-L1:hasLegalAddress/gleif-base:hasCity ?city .\n",
" ?geo gleif-base:hasCity ?city ; \n",
" geo:lat ?lat ; \n",
" geo:long ?long .}\"\"\"\n",
"results = g.query(query)\n",
"\n",
"points = list()\n",
"for row in results:\n",
" new = (row[0].value, float(row[1].value), float(row[2].value))\n",
" if new not in points:\n",
" points.append(new)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"392"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(results)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plotting geographical location"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import folium"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"ave_lat = sum(p[1] for p in points)/len(points)\n",
"ave_lon = sum(p[2] for p in points)/len(points)\n",
"\n",
"# Load map centred on average coordinates\n",
"my_map = folium.Map(location=[ave_lat, ave_lon], zoom_start=8, tiles='Stamen Terrain')\n",
"\n",
"#add a markers\n",
"for each in points: \n",
" folium.Marker((each[1], each[2]), popup=each[0]).add_to(my_map)\n",
"\n",
"my_map.save(\"eiopa_register_nl.html\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment