Skip to content

Instantly share code, notes, and snippets.

@MathewBiddle
Created October 12, 2023 20:35
Show Gist options
  • Save MathewBiddle/be67a380228a7da3d084e6e4da7786a0 to your computer and use it in GitHub Desktop.
Save MathewBiddle/be67a380228a7da3d084e6e4da7786a0 to your computer and use it in GitHub Desktop.
glider_days.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyONKm936yZLuQxCxI6pa6Dx",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/MathewBiddle/be67a380228a7da3d084e6e4da7786a0/glider_days.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "a9GA9tAWmDuE",
"outputId": "a55b1eac-77d9-4e56-871f-6c5ad60c4bd8"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Glider days between 2021-06-01 and 2021-08-10: 2187\n",
"\n",
"\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"<ipython-input-2-470cdbbe7af9>:30: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" glider_day_outside.loc[:, 'maxTime (UTC)'] = pd.to_datetime(end_date, utc=True)\n",
"<ipython-input-2-470cdbbe7af9>:30: DeprecationWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`\n",
" glider_day_outside.loc[:, 'maxTime (UTC)'] = pd.to_datetime(end_date, utc=True)\n",
"<ipython-input-2-470cdbbe7af9>:31: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" glider_day_outside.loc[:, 'minTime (UTC)'] = pd.to_datetime(start_date, utc=True)\n",
"<ipython-input-2-470cdbbe7af9>:31: DeprecationWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`\n",
" glider_day_outside.loc[:, 'minTime (UTC)'] = pd.to_datetime(start_date, utc=True)\n",
"<ipython-input-2-470cdbbe7af9>:42: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" glider_day_lower.loc[:,'minTime (UTC)'] = pd.to_datetime(start_date, utc=True)\n",
"<ipython-input-2-470cdbbe7af9>:42: DeprecationWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`\n",
" glider_day_lower.loc[:,'minTime (UTC)'] = pd.to_datetime(start_date, utc=True)\n",
"<ipython-input-2-470cdbbe7af9>:50: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" glider_day_upper.loc[:,'maxTime (UTC)'] = pd.to_datetime(end_date, utc=True)\n",
"<ipython-input-2-470cdbbe7af9>:50: DeprecationWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`\n",
" glider_day_upper.loc[:,'maxTime (UTC)'] = pd.to_datetime(end_date, utc=True)\n"
]
}
],
"source": [
"import pandas as pd\n",
"df_glider = pd.read_csv('https://gliders.ioos.us/erddap/tabledap/allDatasets.csvp?minTime%2CmaxTime%2CdatasetID')\n",
"df_glider.dropna(\n",
" axis=0,\n",
" inplace=True,\n",
" )\n",
"\n",
"# drop delayed datasets\n",
"df_glider = df_glider[df_glider[\"datasetID\"].str.contains(\"delayed\")==False]\n",
"\n",
"df_glider[['minTime (UTC)','maxTime (UTC)']] = df_glider[\n",
" ['minTime (UTC)','maxTime (UTC)']\n",
" ].apply(pd.to_datetime)\n",
"\n",
"start_date = '2021-06-01'\n",
"end_date = '2021-08-10'\n",
"\n",
"# find glider deployments between 10/01 and 12/31\n",
"glider_day_within = df_glider.loc[\n",
" (df_glider['minTime (UTC)'] > pd.to_datetime(start_date,utc=True)) &\n",
" (df_glider['maxTime (UTC)'] < pd.to_datetime(end_date,utc=True))\n",
"]\n",
"\n",
"# gliders that start before 10/01 and end after 12/31\n",
"glider_day_outside = df_glider.loc[\n",
" (df_glider['minTime (UTC)'] < pd.to_datetime(start_date,utc=True)) &\n",
" (df_glider['maxTime (UTC)'] > pd.to_datetime(end_date,utc=True))\n",
"]\n",
"\n",
"glider_day_outside.loc[:, 'maxTime (UTC)'] = pd.to_datetime(end_date, utc=True)\n",
"glider_day_outside.loc[:, 'minTime (UTC)'] = pd.to_datetime(start_date, utc=True)\n",
"\n",
"# drop the ones from above as they will be duplicates in the next round of filtering\n",
"df_glider.drop(axis=0, index=glider_day_outside.index, inplace=True)\n",
"\n",
"# Find gliders that start before 10/01 and end after 10/01\n",
"glider_day_lower = df_glider.loc[\n",
" (df_glider['minTime (UTC)'] < pd.to_datetime(start_date,utc=True)) &\n",
" (df_glider['maxTime (UTC)'] > pd.to_datetime(start_date,utc=True))\n",
"]\n",
"\n",
"glider_day_lower.loc[:,'minTime (UTC)'] = pd.to_datetime(start_date, utc=True)\n",
"\n",
"# Find gliders that start before 12/31 and end after 12/31.\n",
"glider_day_upper = df_glider.loc[\n",
" (df_glider['minTime (UTC)']<pd.to_datetime(end_date,utc=True)) &\n",
" (df_glider['maxTime (UTC)']>pd.to_datetime(end_date,utc=True))\n",
"]\n",
"\n",
"glider_day_upper.loc[:,'maxTime (UTC)'] = pd.to_datetime(end_date, utc=True)\n",
"\n",
"# Combine it all together into one DF.\n",
"glider_subset = pd.concat([glider_day_lower,\n",
" glider_day_within,\n",
" glider_day_upper,\n",
" glider_day_outside],\n",
" verify_integrity=True)\n",
"\n",
"# Calculate the days between min time and max time.\n",
"glider_subset['glider_days'] = (glider_subset['maxTime (UTC)'] - glider_subset['minTime (UTC)']).dt.days\n",
"\n",
"# Calculate total glider days.\n",
"glider_days = glider_subset['glider_days'].sum()\n",
"\n",
"print(\"Glider days between %s and %s: %s\\n\\n\" % (start_date,end_date,glider_days))"
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "AdN6uhC-mLhc"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment