Created
November 15, 2022 22:33
-
-
Save MBcode/4cd3be921b6c7d33cdfa4c3dbef6886c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "85618885", | |
"metadata": { | |
"papermill": {}, | |
"tags": [ | |
"parameters" | |
] | |
}, | |
"outputs": [], | |
"source": [ | |
"#Set up the environment\n", | |
"%load_ext rpy2.ipython\n", | |
"%load_ext google.colab.data_table\n", | |
"import httpimport\n", | |
"with httpimport.github_repo('MBcode', 'ec'):\n", | |
" import ec\n", | |
"#github.com/nteract/papermill'parameters'tag used to inject them into template then post a gist runable by colab\n", | |
"url=ext=None\n", | |
"#q=\"norway\" #an example query" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "eeeeabde", | |
"metadata": { | |
"tags": [ | |
"injected-parameters" | |
] | |
}, | |
"outputs": [], | |
"source": [ | |
"# Parameters\n", | |
"q = \"Norway\"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "80928938", | |
"metadata": { | |
"papermill": {}, | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"#get Parameters: Name,Datasets,Tools,Queries\n", | |
"n,d,t,Q= ec.ndtq(name,datasets,queries,tools) if ec.IN_COLAB else ec.ndtq()\n", | |
"urn=d1= None if len(d)<1 else d[0]\n", | |
"q= None if len(Q)<1 else Q[0]\n", | |
"print(f'q={q},d1={d1}')\n", | |
"#Do a text search:\n", | |
"dfSPARQL=ec.txt_query(q) if q else f'get URNs from resource arrays instead, eg: {urn}'\n", | |
"dfSPARQL #same output from the UI's search" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "87d8500f", | |
"metadata": { | |
"papermill": {}, | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"#Get resource metadata\n", | |
"if not urn: #if collection not sent in, can go from dfSPARQL row/s of interest\n", | |
" dRows=[0,1,2]; d=list(map(lambda row: dfSPARQL['g'][row], dRows))\n", | |
"print(f'Using URNs:{d}') #datasets only right now\n", | |
"dfRDFs=list(map(ec.wget_rdf, d)) #dfRDF=ec.wget_rdf(urn)\n", | |
"#!ls -l or FilePaneBrowser on left, to see rdf/nt files\n", | |
"dfRDFs[0] #you can look at others (beyond 0)if there" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "de411a08", | |
"metadata": { | |
"papermill": {}, | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"#Get resource download URL from the metadata\n", | |
"ds_urls,ds_url= ec.getDatasetURLs(d) if d1 else ec.getDatasetURLs(dRows,dfSPARQL) \n", | |
"ds_urls #list of dataset's urls ;can use 1st or filter by ext .." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "de8678be", | |
"metadata": { | |
"papermill": {}, | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"#Filter URLs to download, if you want:\n", | |
"ext='.tsv' #you can pick\n", | |
"ds_url=ec.collect_ext(ds_urls,ext)\n", | |
"#Download the resource\n", | |
"#df=ec.read_file(url,ext) #can cut+paste one you want to see, or get them all:\n", | |
"DFs=list(map(lambda u: ec.read_file(u,ext), ds_url)) #you can look at separately below\n", | |
"ds_url" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "28224272", | |
"metadata": { | |
"papermill": {}, | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"#dataset page functionality includes: search_relateddatafilename\n", | |
" #and tool matches: search_download search_webservice search_notebook\n", | |
"!ls -la\n", | |
"DFs[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "cef43edc", | |
"metadata": { | |
"papermill": {}, | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"ec.viz() #shows .nt metadata from urn, can use for (rdf)xml as well " | |
] | |
} | |
], | |
"metadata": { | |
"celltoolbar": "Tags", | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.10" | |
}, | |
"papermill": { | |
"default_parameters": {}, | |
"environment_variables": {}, | |
"input_path": "sparql.ipynb", | |
"output_path": "q/Norway.ipynb", | |
"parameters": { | |
"q": "Norway" | |
}, | |
"version": "2.3.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment