Last active
October 20, 2022 16:01
-
-
Save adyork/966fdd2bd596336047dcf005255014d1 to your computer and use it in GitHub Desktop.
species_erddap_csv.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "species_erddap_csv.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyNHuxK9qqot0L9XCY4sRzHm", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/adyork/966fdd2bd596336047dcf005255014d1/species_erddap_csv.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "QUliR1El8x17" | |
}, | |
"source": [ | |
"Use the ERDDAP api to pull data in csv format from BCO-DMO then get a unique species list" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "C4Jm5Zcq8vc7" | |
}, | |
"source": [ | |
"# Resources\n", | |
"* BCO-DMO ERDDAP Documentation https://erddap.bco-dmo.org/erddap/tabledap/documentation.html\n", | |
"* See workshop materials for more on ERDDAP: https://k-rns.github.io/workshop_data_reuse/" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "LFuUfzV4Homj" | |
}, | |
"source": [ | |
"import requests\n", | |
"import pandas as pd" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "K0UJ0cLI6xyz", | |
"outputId": "d396311e-38c8-4794-838b-c726595d837c" | |
}, | |
"source": [ | |
"dataset_id = '752795'# '850202' #850190\n", | |
"# Dataset Landing Page (see metadata and file access)\n", | |
"# https://www.bco-dmo.org/dataset/752795\n", | |
"\n", | |
"url_csv = \"https://erddap.bco-dmo.org/erddap/tabledap/bcodmo_dataset_752795.csv\"\n", | |
"\n", | |
"response = requests.get(url_csv)\n", | |
"response.status_code" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"200" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 19 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Q_pv92n2A5U0", | |
"outputId": "657a4f76-ae86-457f-8542-fd3728ec219e" | |
}, | |
"source": [ | |
"# We can get the response text as string with \n", | |
"csv_as_string = response.text\n", | |
"\n", | |
"# use type() to tell you what the type of your variable is\n", | |
"type(csv_as_string)\n" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"str" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 20 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "cTyxc3C6NA7z", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "dd180145-d9f3-43e8-a1c7-a9480d14701b" | |
}, | |
"source": [ | |
"\n", | |
"#let's see just the first characters to see what we are working with\n", | |
"print(csv_as_string[0:500])" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"System,Group,Species,BM,HR,Refs\n", | |
"unitless,unitless,unitless,grams (g),square kilometers (km^2),unitless\n", | |
"M,B,Sterna forsteri,149,58.0,(150)\n", | |
"M,B,Ptychoramphus aleuticus,164,3008.0,(151|152)\n", | |
"M,B,Brachyramphus marmoratus,220,127.0,(153)\n", | |
"M,B,Calonectris diomedea,535,485776.0,(154)\n", | |
"M,B,Alca torda,600,2201.0,(155)\n", | |
"M,B,Uria aalge,907,815.0,(155|156)\n", | |
"M,B,Sula sula,956,5454.0,(157)\n", | |
"M,B,Melanitta nigra,1052,1298.0,(158)\n", | |
"M,B,Procellaria aequinoctialis,1213,683000.0,(159)\n", | |
"M,B,Procellaria conspicillata,1278,59\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 419 | |
}, | |
"id": "E4l8N38tBAQL", | |
"outputId": "5f981e4b-9cff-4cd7-f7e1-ad3ca78f6316" | |
}, | |
"source": [ | |
"# import the StrinIO function so we can load the csv as a string since \n", | |
"# it isn't in a file we can call by filename.\n", | |
"from io import StringIO\n", | |
" \n", | |
"# let's read the data using the Pandas\n", | |
"# read_csv() function\n", | |
"\n", | |
"df = pd.read_csv(StringIO(csv_as_string),sep=',')\n", | |
" \n", | |
"# Print the dataframe\n", | |
"df" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>System</th>\n", | |
" <th>Group</th>\n", | |
" <th>Species</th>\n", | |
" <th>BM</th>\n", | |
" <th>HR</th>\n", | |
" <th>Refs</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>unitless</td>\n", | |
" <td>unitless</td>\n", | |
" <td>unitless</td>\n", | |
" <td>grams (g)</td>\n", | |
" <td>square kilometers (km^2)</td>\n", | |
" <td>unitless</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>M</td>\n", | |
" <td>B</td>\n", | |
" <td>Sterna forsteri</td>\n", | |
" <td>149</td>\n", | |
" <td>58.0</td>\n", | |
" <td>(150)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>M</td>\n", | |
" <td>B</td>\n", | |
" <td>Ptychoramphus aleuticus</td>\n", | |
" <td>164</td>\n", | |
" <td>3008.0</td>\n", | |
" <td>(151|152)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>M</td>\n", | |
" <td>B</td>\n", | |
" <td>Brachyramphus marmoratus</td>\n", | |
" <td>220</td>\n", | |
" <td>127.0</td>\n", | |
" <td>(153)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>M</td>\n", | |
" <td>B</td>\n", | |
" <td>Calonectris diomedea</td>\n", | |
" <td>535</td>\n", | |
" <td>485776.0</td>\n", | |
" <td>(154)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>839</th>\n", | |
" <td>T</td>\n", | |
" <td>R</td>\n", | |
" <td>Conolophus pallidus</td>\n", | |
" <td>5600</td>\n", | |
" <td>0.0056</td>\n", | |
" <td>(291)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>840</th>\n", | |
" <td>T</td>\n", | |
" <td>R</td>\n", | |
" <td>Varanus bengalensis</td>\n", | |
" <td>5744</td>\n", | |
" <td>0.094</td>\n", | |
" <td>(291)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>841</th>\n", | |
" <td>T</td>\n", | |
" <td>R</td>\n", | |
" <td>Varanus albigularis</td>\n", | |
" <td>7250</td>\n", | |
" <td>12.0</td>\n", | |
" <td>(291)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>842</th>\n", | |
" <td>T</td>\n", | |
" <td>R</td>\n", | |
" <td>Stigmochelys pardalis</td>\n", | |
" <td>10600</td>\n", | |
" <td>2.1</td>\n", | |
" <td>(297)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>843</th>\n", | |
" <td>T</td>\n", | |
" <td>R</td>\n", | |
" <td>Gopherus agassizii</td>\n", | |
" <td>17000</td>\n", | |
" <td>0.29</td>\n", | |
" <td>(298)</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>844 rows × 6 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" System Group ... HR Refs\n", | |
"0 unitless unitless ... square kilometers (km^2) unitless\n", | |
"1 M B ... 58.0 (150)\n", | |
"2 M B ... 3008.0 (151|152)\n", | |
"3 M B ... 127.0 (153)\n", | |
"4 M B ... 485776.0 (154)\n", | |
".. ... ... ... ... ...\n", | |
"839 T R ... 0.0056 (291)\n", | |
"840 T R ... 0.094 (291)\n", | |
"841 T R ... 12.0 (291)\n", | |
"842 T R ... 2.1 (297)\n", | |
"843 T R ... 0.29 (298)\n", | |
"\n", | |
"[844 rows x 6 columns]" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 22 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 419 | |
}, | |
"id": "pHgBJjsqEfID", | |
"outputId": "d558cb6d-7276-4a81-ca88-4a20cec9fc83" | |
}, | |
"source": [ | |
"# drop the first data row (it's units not data)\n", | |
"# Drop first row\n", | |
"# we are telling it to drop rows at index[0] which is the first row\n", | |
"df.drop(index=df.index[0],inplace=True)\n", | |
"df" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>System</th>\n", | |
" <th>Group</th>\n", | |
" <th>Species</th>\n", | |
" <th>BM</th>\n", | |
" <th>HR</th>\n", | |
" <th>Refs</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>M</td>\n", | |
" <td>B</td>\n", | |
" <td>Sterna forsteri</td>\n", | |
" <td>149</td>\n", | |
" <td>58.0</td>\n", | |
" <td>(150)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>M</td>\n", | |
" <td>B</td>\n", | |
" <td>Ptychoramphus aleuticus</td>\n", | |
" <td>164</td>\n", | |
" <td>3008.0</td>\n", | |
" <td>(151|152)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>M</td>\n", | |
" <td>B</td>\n", | |
" <td>Brachyramphus marmoratus</td>\n", | |
" <td>220</td>\n", | |
" <td>127.0</td>\n", | |
" <td>(153)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>M</td>\n", | |
" <td>B</td>\n", | |
" <td>Calonectris diomedea</td>\n", | |
" <td>535</td>\n", | |
" <td>485776.0</td>\n", | |
" <td>(154)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>M</td>\n", | |
" <td>B</td>\n", | |
" <td>Alca torda</td>\n", | |
" <td>600</td>\n", | |
" <td>2201.0</td>\n", | |
" <td>(155)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>839</th>\n", | |
" <td>T</td>\n", | |
" <td>R</td>\n", | |
" <td>Conolophus pallidus</td>\n", | |
" <td>5600</td>\n", | |
" <td>0.0056</td>\n", | |
" <td>(291)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>840</th>\n", | |
" <td>T</td>\n", | |
" <td>R</td>\n", | |
" <td>Varanus bengalensis</td>\n", | |
" <td>5744</td>\n", | |
" <td>0.094</td>\n", | |
" <td>(291)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>841</th>\n", | |
" <td>T</td>\n", | |
" <td>R</td>\n", | |
" <td>Varanus albigularis</td>\n", | |
" <td>7250</td>\n", | |
" <td>12.0</td>\n", | |
" <td>(291)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>842</th>\n", | |
" <td>T</td>\n", | |
" <td>R</td>\n", | |
" <td>Stigmochelys pardalis</td>\n", | |
" <td>10600</td>\n", | |
" <td>2.1</td>\n", | |
" <td>(297)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>843</th>\n", | |
" <td>T</td>\n", | |
" <td>R</td>\n", | |
" <td>Gopherus agassizii</td>\n", | |
" <td>17000</td>\n", | |
" <td>0.29</td>\n", | |
" <td>(298)</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>843 rows × 6 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" System Group Species BM HR Refs\n", | |
"1 M B Sterna forsteri 149 58.0 (150)\n", | |
"2 M B Ptychoramphus aleuticus 164 3008.0 (151|152)\n", | |
"3 M B Brachyramphus marmoratus 220 127.0 (153)\n", | |
"4 M B Calonectris diomedea 535 485776.0 (154)\n", | |
"5 M B Alca torda 600 2201.0 (155)\n", | |
".. ... ... ... ... ... ...\n", | |
"839 T R Conolophus pallidus 5600 0.0056 (291)\n", | |
"840 T R Varanus bengalensis 5744 0.094 (291)\n", | |
"841 T R Varanus albigularis 7250 12.0 (291)\n", | |
"842 T R Stigmochelys pardalis 10600 2.1 (297)\n", | |
"843 T R Gopherus agassizii 17000 0.29 (298)\n", | |
"\n", | |
"[843 rows x 6 columns]" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 23 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "dSi9Im67C-A0", | |
"outputId": "8e948773-9a64-4252-8b7a-67e2e45a5128" | |
}, | |
"source": [ | |
"# Get Unique Species List\n", | |
"\n", | |
"# let's get the values from the Species column with df.Species\n", | |
"# Then get the unique values only with .unique()\n", | |
"# Then make that a list with tolist()\n", | |
"sp_list = df.Species.unique().tolist()\n", | |
"\n", | |
"# And let's shorten the list for demonstration purposes.\n", | |
"sp_list = sp_list[0:20]\n", | |
"sp_list" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"['Sterna forsteri',\n", | |
" 'Ptychoramphus aleuticus',\n", | |
" 'Brachyramphus marmoratus',\n", | |
" 'Calonectris diomedea',\n", | |
" 'Alca torda',\n", | |
" 'Uria aalge',\n", | |
" 'Sula sula',\n", | |
" 'Melanitta nigra',\n", | |
" 'Procellaria aequinoctialis',\n", | |
" 'Procellaria conspicillata',\n", | |
" 'Papasula abbotti',\n", | |
" 'Somateria mollissima',\n", | |
" 'Eudyptes filholi',\n", | |
" 'Thalassarche chrysostoma',\n", | |
" 'Thalassarche melanophrys',\n", | |
" 'Pygoscelis antarctica',\n", | |
" 'Gaviia immer',\n", | |
" 'Pygoscelis papua',\n", | |
" 'Eudyptes chrysolophus',\n", | |
" 'Ctenochaetus striatus']" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 25 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "v4Nd8d9dEeCE" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment