Skip to content

Instantly share code, notes, and snippets.

@adyork
Last active October 20, 2022 16:01
Show Gist options
  • Save adyork/966fdd2bd596336047dcf005255014d1 to your computer and use it in GitHub Desktop.
Save adyork/966fdd2bd596336047dcf005255014d1 to your computer and use it in GitHub Desktop.
species_erddap_csv.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "species_erddap_csv.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyNHuxK9qqot0L9XCY4sRzHm",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/adyork/966fdd2bd596336047dcf005255014d1/species_erddap_csv.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "QUliR1El8x17"
},
"source": [
"Use the ERDDAP api to pull data in csv format from BCO-DMO then get a unique species list"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "C4Jm5Zcq8vc7"
},
"source": [
"# Resources\n",
"* BCO-DMO ERDDAP Documentation https://erddap.bco-dmo.org/erddap/tabledap/documentation.html\n",
"* See workshop materials for more on ERDDAP: https://k-rns.github.io/workshop_data_reuse/"
]
},
{
"cell_type": "code",
"metadata": {
"id": "LFuUfzV4Homj"
},
"source": [
"import requests\n",
"import pandas as pd"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "K0UJ0cLI6xyz",
"outputId": "d396311e-38c8-4794-838b-c726595d837c"
},
"source": [
"dataset_id = '752795'# '850202' #850190\n",
"# Dataset Landing Page (see metadata and file access)\n",
"# https://www.bco-dmo.org/dataset/752795\n",
"\n",
"url_csv = \"https://erddap.bco-dmo.org/erddap/tabledap/bcodmo_dataset_752795.csv\"\n",
"\n",
"response = requests.get(url_csv)\n",
"response.status_code"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"200"
]
},
"metadata": {},
"execution_count": 19
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Q_pv92n2A5U0",
"outputId": "657a4f76-ae86-457f-8542-fd3728ec219e"
},
"source": [
"# We can get the response text as string with \n",
"csv_as_string = response.text\n",
"\n",
"# use type() to tell you what the type of your variable is\n",
"type(csv_as_string)\n"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"str"
]
},
"metadata": {},
"execution_count": 20
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "cTyxc3C6NA7z",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "dd180145-d9f3-43e8-a1c7-a9480d14701b"
},
"source": [
"\n",
"#let's see just the first characters to see what we are working with\n",
"print(csv_as_string[0:500])"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"System,Group,Species,BM,HR,Refs\n",
"unitless,unitless,unitless,grams (g),square kilometers (km^2),unitless\n",
"M,B,Sterna forsteri,149,58.0,(150)\n",
"M,B,Ptychoramphus aleuticus,164,3008.0,(151|152)\n",
"M,B,Brachyramphus marmoratus,220,127.0,(153)\n",
"M,B,Calonectris diomedea,535,485776.0,(154)\n",
"M,B,Alca torda,600,2201.0,(155)\n",
"M,B,Uria aalge,907,815.0,(155|156)\n",
"M,B,Sula sula,956,5454.0,(157)\n",
"M,B,Melanitta nigra,1052,1298.0,(158)\n",
"M,B,Procellaria aequinoctialis,1213,683000.0,(159)\n",
"M,B,Procellaria conspicillata,1278,59\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "E4l8N38tBAQL",
"outputId": "5f981e4b-9cff-4cd7-f7e1-ad3ca78f6316"
},
"source": [
"# import the StrinIO function so we can load the csv as a string since \n",
"# it isn't in a file we can call by filename.\n",
"from io import StringIO\n",
" \n",
"# let's read the data using the Pandas\n",
"# read_csv() function\n",
"\n",
"df = pd.read_csv(StringIO(csv_as_string),sep=',')\n",
" \n",
"# Print the dataframe\n",
"df"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>System</th>\n",
" <th>Group</th>\n",
" <th>Species</th>\n",
" <th>BM</th>\n",
" <th>HR</th>\n",
" <th>Refs</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>unitless</td>\n",
" <td>unitless</td>\n",
" <td>unitless</td>\n",
" <td>grams (g)</td>\n",
" <td>square kilometers (km^2)</td>\n",
" <td>unitless</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>M</td>\n",
" <td>B</td>\n",
" <td>Sterna forsteri</td>\n",
" <td>149</td>\n",
" <td>58.0</td>\n",
" <td>(150)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M</td>\n",
" <td>B</td>\n",
" <td>Ptychoramphus aleuticus</td>\n",
" <td>164</td>\n",
" <td>3008.0</td>\n",
" <td>(151|152)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M</td>\n",
" <td>B</td>\n",
" <td>Brachyramphus marmoratus</td>\n",
" <td>220</td>\n",
" <td>127.0</td>\n",
" <td>(153)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M</td>\n",
" <td>B</td>\n",
" <td>Calonectris diomedea</td>\n",
" <td>535</td>\n",
" <td>485776.0</td>\n",
" <td>(154)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>839</th>\n",
" <td>T</td>\n",
" <td>R</td>\n",
" <td>Conolophus pallidus</td>\n",
" <td>5600</td>\n",
" <td>0.0056</td>\n",
" <td>(291)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>840</th>\n",
" <td>T</td>\n",
" <td>R</td>\n",
" <td>Varanus bengalensis</td>\n",
" <td>5744</td>\n",
" <td>0.094</td>\n",
" <td>(291)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>841</th>\n",
" <td>T</td>\n",
" <td>R</td>\n",
" <td>Varanus albigularis</td>\n",
" <td>7250</td>\n",
" <td>12.0</td>\n",
" <td>(291)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>842</th>\n",
" <td>T</td>\n",
" <td>R</td>\n",
" <td>Stigmochelys pardalis</td>\n",
" <td>10600</td>\n",
" <td>2.1</td>\n",
" <td>(297)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>843</th>\n",
" <td>T</td>\n",
" <td>R</td>\n",
" <td>Gopherus agassizii</td>\n",
" <td>17000</td>\n",
" <td>0.29</td>\n",
" <td>(298)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>844 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" System Group ... HR Refs\n",
"0 unitless unitless ... square kilometers (km^2) unitless\n",
"1 M B ... 58.0 (150)\n",
"2 M B ... 3008.0 (151|152)\n",
"3 M B ... 127.0 (153)\n",
"4 M B ... 485776.0 (154)\n",
".. ... ... ... ... ...\n",
"839 T R ... 0.0056 (291)\n",
"840 T R ... 0.094 (291)\n",
"841 T R ... 12.0 (291)\n",
"842 T R ... 2.1 (297)\n",
"843 T R ... 0.29 (298)\n",
"\n",
"[844 rows x 6 columns]"
]
},
"metadata": {},
"execution_count": 22
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "pHgBJjsqEfID",
"outputId": "d558cb6d-7276-4a81-ca88-4a20cec9fc83"
},
"source": [
"# drop the first data row (it's units not data)\n",
"# Drop first row\n",
"# we are telling it to drop rows at index[0] which is the first row\n",
"df.drop(index=df.index[0],inplace=True)\n",
"df"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>System</th>\n",
" <th>Group</th>\n",
" <th>Species</th>\n",
" <th>BM</th>\n",
" <th>HR</th>\n",
" <th>Refs</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>M</td>\n",
" <td>B</td>\n",
" <td>Sterna forsteri</td>\n",
" <td>149</td>\n",
" <td>58.0</td>\n",
" <td>(150)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M</td>\n",
" <td>B</td>\n",
" <td>Ptychoramphus aleuticus</td>\n",
" <td>164</td>\n",
" <td>3008.0</td>\n",
" <td>(151|152)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M</td>\n",
" <td>B</td>\n",
" <td>Brachyramphus marmoratus</td>\n",
" <td>220</td>\n",
" <td>127.0</td>\n",
" <td>(153)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M</td>\n",
" <td>B</td>\n",
" <td>Calonectris diomedea</td>\n",
" <td>535</td>\n",
" <td>485776.0</td>\n",
" <td>(154)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>M</td>\n",
" <td>B</td>\n",
" <td>Alca torda</td>\n",
" <td>600</td>\n",
" <td>2201.0</td>\n",
" <td>(155)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>839</th>\n",
" <td>T</td>\n",
" <td>R</td>\n",
" <td>Conolophus pallidus</td>\n",
" <td>5600</td>\n",
" <td>0.0056</td>\n",
" <td>(291)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>840</th>\n",
" <td>T</td>\n",
" <td>R</td>\n",
" <td>Varanus bengalensis</td>\n",
" <td>5744</td>\n",
" <td>0.094</td>\n",
" <td>(291)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>841</th>\n",
" <td>T</td>\n",
" <td>R</td>\n",
" <td>Varanus albigularis</td>\n",
" <td>7250</td>\n",
" <td>12.0</td>\n",
" <td>(291)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>842</th>\n",
" <td>T</td>\n",
" <td>R</td>\n",
" <td>Stigmochelys pardalis</td>\n",
" <td>10600</td>\n",
" <td>2.1</td>\n",
" <td>(297)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>843</th>\n",
" <td>T</td>\n",
" <td>R</td>\n",
" <td>Gopherus agassizii</td>\n",
" <td>17000</td>\n",
" <td>0.29</td>\n",
" <td>(298)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>843 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" System Group Species BM HR Refs\n",
"1 M B Sterna forsteri 149 58.0 (150)\n",
"2 M B Ptychoramphus aleuticus 164 3008.0 (151|152)\n",
"3 M B Brachyramphus marmoratus 220 127.0 (153)\n",
"4 M B Calonectris diomedea 535 485776.0 (154)\n",
"5 M B Alca torda 600 2201.0 (155)\n",
".. ... ... ... ... ... ...\n",
"839 T R Conolophus pallidus 5600 0.0056 (291)\n",
"840 T R Varanus bengalensis 5744 0.094 (291)\n",
"841 T R Varanus albigularis 7250 12.0 (291)\n",
"842 T R Stigmochelys pardalis 10600 2.1 (297)\n",
"843 T R Gopherus agassizii 17000 0.29 (298)\n",
"\n",
"[843 rows x 6 columns]"
]
},
"metadata": {},
"execution_count": 23
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "dSi9Im67C-A0",
"outputId": "8e948773-9a64-4252-8b7a-67e2e45a5128"
},
"source": [
"# Get Unique Species List\n",
"\n",
"# let's get the values from the Species column with df.Species\n",
"# Then get the unique values only with .unique()\n",
"# Then make that a list with tolist()\n",
"sp_list = df.Species.unique().tolist()\n",
"\n",
"# And let's shorten the list for demonstration purposes.\n",
"sp_list = sp_list[0:20]\n",
"sp_list"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['Sterna forsteri',\n",
" 'Ptychoramphus aleuticus',\n",
" 'Brachyramphus marmoratus',\n",
" 'Calonectris diomedea',\n",
" 'Alca torda',\n",
" 'Uria aalge',\n",
" 'Sula sula',\n",
" 'Melanitta nigra',\n",
" 'Procellaria aequinoctialis',\n",
" 'Procellaria conspicillata',\n",
" 'Papasula abbotti',\n",
" 'Somateria mollissima',\n",
" 'Eudyptes filholi',\n",
" 'Thalassarche chrysostoma',\n",
" 'Thalassarche melanophrys',\n",
" 'Pygoscelis antarctica',\n",
" 'Gaviia immer',\n",
" 'Pygoscelis papua',\n",
" 'Eudyptes chrysolophus',\n",
" 'Ctenochaetus striatus']"
]
},
"metadata": {},
"execution_count": 25
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "v4Nd8d9dEeCE"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment