Skip to content

Instantly share code, notes, and snippets.

@SimonGoring
Created October 2, 2018 04:54
Show Gist options
  • Save SimonGoring/efd8df78673077f0137bcd31d84e2fb7 to your computer and use it in GitHub Desktop.
Save SimonGoring/efd8df78673077f0137bcd31d84e2fb7 to your computer and use it in GitHub Desktop.
Loading yaml, csv, json and json-ld files in Python.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Working with data formats\n",
"\n",
"Some work with YAML, CSV, JSON and JSON-LD"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{ 'gene': { 'annotation': { 'interaction': { 'genetic': '...',\n",
" 'physical': '...'},\n",
" 'ontology': { 'biological_process': ['...'],\n",
" 'molecular_function': [ { 'annotation_extension': '...',\n",
" 'evidence': { 'evidence_code': 'IEA',\n",
" 'with': 'UniProtKB-KW:KW-0067'},\n",
" 'reference': { 'id': 'GO_REF:0000037'},\n",
" 'term': { 'id': 'GO:0005524',\n",
" 'name': 'ATP '\n",
" 'binding'}},\n",
" { 'evidence': 'TAS',\n",
" 'reference': { 'author': 'JG, '\n",
" 'Goodrich '\n",
" 'KJ, '\n",
" 'Bähler '\n",
" 'J, '\n",
" 'Cech '\n",
" 'TR.',\n",
" 'citation': 'J '\n",
" 'Biol '\n",
" 'Chem '\n",
" '280:5249-5257 '\n",
" '2005',\n",
" 'id': 'PMID:15591066',\n",
" 'title': 'Expression '\n",
" 'of '\n",
" 'a '\n",
" 'RecQ '\n",
" 'helicase '\n",
" 'homolog '\n",
" 'affects '\n",
" 'progression '\n",
" 'through '\n",
" 'crisis '\n",
" 'in '\n",
" 'fission '\n",
" 'yeast '\n",
" 'lacking '\n",
" 'telomerase.'},\n",
" 'term': { 'id': 'GO:0043140',\n",
" 'name': 'ATP-dependent '\n",
" \"3'-5' \"\n",
" 'DNA '\n",
" 'helicase '\n",
" 'activity'}}]}},\n",
" 'gene_type': 'protein_coding',\n",
" 'location': '...',\n",
" 'name': 'tlh1',\n",
" 'organism': {'genus': 'Schizosaccharomyces', 'species': 'pombe'},\n",
" 'orthologs': None,\n",
" 'product': { 'name': 'RecQ type DNA helicase',\n",
" 'sequence': '...',\n",
" 'size': '297aa',\n",
" 'weight': '34.36kDa'},\n",
" 'protein_features': '...',\n",
" 'sequence': '...',\n",
" 'transcripts': [{'exons': ['...', '...'], 'uniquename': '...'}],\n",
" 'uniquename': 'SPAC212.11'}}\n"
]
}
],
"source": [
"import pprint\n",
"import yaml\n",
"\n",
"pp = pprint.PrettyPrinter(indent=2)\n",
"\n",
"yaml_file = \"\"\"\n",
" gene:\n",
" uniquename: \"SPAC212.11\"\n",
" name: \"tlh1\"\n",
" organism:\n",
" genus: \"Schizosaccharomyces\"\n",
" species: \"pombe\"\n",
" product:\n",
" name: \"RecQ type DNA helicase\"\n",
" size: \"297aa\"\n",
" weight: \"34.36kDa\"\n",
" sequence: \"...\"\n",
" location: ...\n",
" transcripts:\n",
" - uniquename: ...\n",
" exons:\n",
" - ...\n",
" - ...\n",
" gene_type: \"protein_coding\"\n",
" annotation:\n",
" ontology:\n",
" molecular_function:\n",
" - term:\n",
" name: \"ATP binding\"\n",
" id: \"GO:0005524\"\n",
" evidence:\n",
" evidence_code: \"IEA\"\n",
" with: \"UniProtKB-KW:KW-0067\"\n",
" reference:\n",
" id: \"GO_REF:0000037\"\n",
" annotation_extension:\n",
" ...\n",
" - term:\n",
" name: \"ATP-dependent 3'-5' DNA helicase activity\"\n",
" id: \"GO:0043140\"\n",
" evidence: \"TAS\"\n",
" reference:\n",
" id: \"PMID:15591066\"\n",
" title: \"Expression of a RecQ helicase homolog affects progression through crisis in fission yeast lacking telomerase.\"\n",
" citation: \"J Biol Chem 280:5249-5257 2005\"\n",
" author: \"JG, Goodrich KJ, Bähler J, Cech TR.\"\n",
" biological_process:\n",
" - ...\n",
" interaction:\n",
" genetic:\n",
" ...\n",
" physical:\n",
" ...\n",
" protein_features:\n",
" ...\n",
" sequence:\n",
" ...\n",
" orthologs:\n",
"...\n",
"\"\"\"\n",
"\n",
"data = yaml.load(yaml_file)\n",
"pp.pprint(data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Importing CSV"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['firstcolumn', 'secondcolumn', 'thirdcolumn', 'fourthcolumn']\n",
"[' This Field', ' 12', ' That Field', ' 12.76']\n",
"[' children', ' 10', ' 12', ' peanut']\n"
]
}
],
"source": [
"import csv\n",
"\n",
"csv_file = \"\"\"firstcolumn,secondcolumn,thirdcolumn,fourthcolumn\n",
" This Field, 12, That Field, 12.76\n",
" children, 10, 12, peanut\"\"\"\n",
"\n",
"csv_import = csv.reader(csv_file.split('\\n'), delimiter = ',')\n",
"\n",
"for rows in csv_import:\n",
" print(rows)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Importing JSON\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ { 'avgCostPrice': None,\n",
" 'costPrice': 0,\n",
" 'dividend': 0,\n",
" 'lastUpdate': '2017-10-08',\n",
" 'marketValue': 0.0,\n",
" 'marketValueDateTime': '2017-10-06T16:30:00.000Z',\n",
" 'marketValuePerUnit': 78.0,\n",
" 'marketValueSource': 'XOSL',\n",
" 'profit': -100.16,\n",
" 'realizedProfit': -100.16,\n",
" 'redeemedVolume': 0,\n",
" 'security': { 'isin': 'NO0003097503',\n",
" 'securityGroup': 'AK',\n",
" 'securityName': 'AKVA GROUP ASA',\n",
" 'securityName20': 'AKVA GROUP',\n",
" 'securityName34': 'AKVA GROUP ASA',\n",
" 'securityTicker': 'AKVA',\n",
" 'securityType': 'stock',\n",
" 'uri': 'json/0/securities/NO0003097503'},\n",
" 'tradeAmountMissingForCostPrice': False,\n",
" 'transactionFee': 100.16,\n",
" 'unrealizedProfit': 0.0,\n",
" 'uri': 'json/0/positions/csdAccounts/097141071619/securities/NO0003097503',\n",
" 'volume': 0.0}]\n"
]
}
],
"source": [
"import json\n",
"\n",
"json_file = \"\"\"\n",
"[\n",
" {\n",
" \"profit\": -100.16,\n",
" \"costPrice\": 0,\n",
" \"realizedProfit\": -100.1600,\n",
" \"dividend\": 0,\n",
" \"lastUpdate\": \"2017-10-08\",\n",
" \"security\": {\n",
" \"securityName20\": \"AKVA GROUP\",\n",
" \"securityType\": \"stock\",\n",
" \"securityGroup\": \"AK\",\n",
" \"securityTicker\": \"AKVA\",\n",
" \"securityName\": \"AKVA GROUP ASA\",\n",
" \"uri\": \"json/0/securities/NO0003097503\",\n",
" \"securityName34\": \"AKVA GROUP ASA\",\n",
" \"isin\": \"NO0003097503\"\n",
" },\n",
" \"uri\": \"json/0/positions/csdAccounts/097141071619/securities/NO0003097503\",\n",
" \"redeemedVolume\": 0,\n",
" \"marketValueDateTime\": \"2017-10-06T16:30:00.000Z\",\n",
" \"marketValueSource\": \"XOSL\",\n",
" \"volume\": 0E-10,\n",
" \"marketValuePerUnit\": 78.0,\n",
" \"transactionFee\": 100.16,\n",
" \"unrealizedProfit\": 0.00,\n",
" \"marketValue\": 0.00,\n",
" \"avgCostPrice\": null,\n",
" \"tradeAmountMissingForCostPrice\": false\n",
"}]\"\"\"\n",
" \n",
"new_json = json.loads(json_file)\n",
"pp.pprint(new_json)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ { '@type': ['http://schema.org/Person'],\n",
" 'http://schema.org/address': [ { '@type': [ 'http://schema.org/PostalAddress'],\n",
" 'http://schema.org/addressLocality': [ { '@value': 'Vancouver'}],\n",
" 'http://schema.org/addressRegion': [ { '@value': 'BC'}],\n",
" 'http://schema.org/postalCode': [ { '@value': 'V5N4E8'}]}],\n",
" 'http://schema.org/alumniOf': [ {'@value': 'Simon Fraser University'},\n",
" { '@value': 'University of Northern '\n",
" 'British Columbia'},\n",
" { '@value': 'University of Wisconsin - '\n",
" 'Madison'}],\n",
" 'http://schema.org/colleague': [ { '@id': 'http://www.geography.wisc.edu/faculty/williams/lab/People.html'},\n",
" {'@id': 'http://www.andriadawson.org'}],\n",
" 'http://schema.org/email': [{'@value': 'mailto:goring@wisc.edu'}],\n",
" 'http://schema.org/familyName': [{'@value': 'Goring'}],\n",
" 'http://schema.org/givenName': [{'@value': 'Simon'}],\n",
" 'http://schema.org/image': [ { '@id': 'https://i1.rgstatic.net/ii/profile.image/AS%3A321585720823810%401453683418273_l/Simon_Goring.png'}],\n",
" 'http://schema.org/jobTitle': [{'@value': 'Assistant Scientist'}],\n",
" 'http://schema.org/name': [{'@value': 'Simon Goring'}],\n",
" 'http://schema.org/sameAs': [ { '@id': 'http://www.orcid.org/0000-0002-2700-4605#person'},\n",
" {'@id': 'http://twitter.com/sjGoring'},\n",
" {'@id': 'http://github.com/SimonGoring'}],\n",
" 'http://schema.org/url': [{'@id': 'http://www.goring.org'}],\n",
" 'http://schema.org/worksFor': [ { '@value': 'University of Wisconsin - '\n",
" 'Madison'}]}]\n"
]
}
],
"source": [
"import pyld\n",
"\n",
"jsonld_file = \"\"\"\n",
" {\n",
" \"@context\": \"http://schema.org\",\n",
" \"@type\": \"Person\",\n",
" \"address\": {\n",
" \"@type\": \"PostalAddress\",\n",
" \"addressLocality\": \"Vancouver\",\n",
" \"addressRegion\": \"BC\",\n",
" \"postalCode\": \"V5N4E8\"\n",
" },\n",
" \"colleague\": [\n",
" \"http://www.geography.wisc.edu/faculty/williams/lab/People.html\",\n",
" \"http://www.andriadawson.org\"\n",
" ],\n",
" \"email\": \"mailto:goring@wisc.edu\",\n",
" \"image\": \"https://i1.rgstatic.net/ii/profile.image/AS%3A321585720823810%401453683418273_l/Simon_Goring.png\",\n",
" \"jobTitle\": \"Assistant Scientist\",\n",
" \"name\": \"Simon Goring\",\n",
" \"familyName\": \"Goring\",\n",
" \"givenName\": \"Simon\",\n",
" \"worksFor\": \"University of Wisconsin - Madison\",\n",
" \"alumniOf\": [\"Simon Fraser University\", \"University of Northern British Columbia\", \"University of Wisconsin - Madison\"],\n",
" \"url\": \"http://www.goring.org\",\n",
" \"sameAs\" : [ \"http://www.orcid.org/0000-0002-2700-4605#person\",\n",
" \"http://twitter.com/sjGoring\",\n",
" \"http://github.com/SimonGoring\"]\n",
" }\"\"\"\n",
"\n",
"ld_in = json.loads(jsonld_file)\n",
"\n",
"pp.pprint(pyld.jsonld.expand(ld_in))\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment