Created
April 26, 2023 17:03
-
-
Save MathewBiddle/c223df1df6e2f45318b42ee605720394 to your computer and use it in GitHub Desktop.
GTS_ATN_metrics_problem.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"authorship_tag": "ABX9TyMK/YJpfvpEcZUC7rNfoSIW", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/MathewBiddle/c223df1df6e2f45318b42ee605720394/gts_atn_metrics_problem.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!pip install fiscalyear" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "uI1x6j8Tgps4", | |
"outputId": "5c032d6c-be39-4812-ef0f-571aa1694482" | |
}, | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", | |
"Requirement already satisfied: fiscalyear in /usr/local/lib/python3.9/dist-packages (0.4.0)\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "z3ZrLxphgdkr", | |
"outputId": "d2d118a5-f164-4921-bdee-fc1cce74fd44" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"4381 total messages from ATN to GTS.\n" | |
] | |
} | |
], | |
"source": [ | |
"import datetime as dt\n", | |
"import pandas as pd\n", | |
"from fiscalyear import *\n", | |
"import requests\n", | |
"from bs4 import BeautifulSoup\n", | |
"\n", | |
"# gather FY start/end dates for previous quarter\n", | |
"fq = FiscalQuarter.current().prev_fiscal_quarter\n", | |
"\n", | |
"start_date = fq.start.strftime('%Y-%m-%d')\n", | |
"end_date = fq.end.strftime('%Y-%m-%d')\n", | |
"\n", | |
"start = dt.datetime.strptime(start_date,'%Y-%m-%d')\n", | |
"end = dt.datetime.strptime(end_date,'%Y-%m-%d')\n", | |
"\n", | |
"# recursively search the https index for bufr messages\n", | |
"url = 'https://stage-ndbc-bufr.srv.axds.co/platforms/atn/smru/profiles/'\n", | |
"\n", | |
"html = requests.get(url).text\n", | |
"soup = BeautifulSoup(html, 'html.parser')\n", | |
"\n", | |
"df_out = pd.DataFrame()\n", | |
"\n", | |
"for deployment in soup.find_all('a'):\n", | |
"\n", | |
" depl_url = url+deployment.text\n", | |
" depl_html = requests.get(depl_url).text\n", | |
"\n", | |
" depl_soup = BeautifulSoup(depl_html, 'html.parser')\n", | |
"\n", | |
" # some content is not in an html node, so we have to parse line by line as we want the stuff after <a> nodes\n", | |
" files = depl_soup.get_text().split('\\r\\n')[1:-1]\n", | |
"\n", | |
" for file in files:\n", | |
"\n", | |
" content = file.split()\n", | |
"\n", | |
" if '.bufr' in content[0]:\n", | |
" # save the index file information to DF.\n", | |
" df_file = pd.DataFrame({'fname':[deployment.text+content[0]],\n", | |
" 'date':pd.to_datetime([content[1]+\"T\"+content[2]]),\n", | |
" 'size': [content[3]]})\n", | |
"\n", | |
" df_out = pd.concat([df_out, df_file])\n", | |
"\n", | |
"\n", | |
"print('{} total messages from ATN to GTS.'.format(df_out.shape[0]))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# mask for FY Quarter via logic\n", | |
"mask = (df_out['date'] >= start) & (df_out['date'] <= end)\n", | |
"\n", | |
"print('For {} ATN sent {} records to GTS. (via mask)'.format(fq, df_out[mask].shape[0]))" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "deiJDMNqgm7v", | |
"outputId": "ba37ec51-aac3-4552-8e0b-2424ab60f32b" | |
}, | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"For FY2023 Q2 ATN sent 926 records to GTS. (via mask)\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"#mask for FY Qarter by grouper\n", | |
"group = df_out.groupby(pd.Grouper(key=\"date\", freq=\"M\"))\n", | |
"\n", | |
"s = group['fname'].count() \n", | |
"\n", | |
"s.index = s.index.to_period(\"M\")\n", | |
"\n", | |
"s = s.rename('total')\n", | |
"\n", | |
"print('For {} ATN sent {} records to GTS. (via group)'.format(s[-4:-1].index, s[-4:-1].sum()))" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "obEPz4djg7-9", | |
"outputId": "89b2cc6c-5bb3-4714-8de4-5b1e24285745" | |
}, | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"For PeriodIndex(['2023-01', '2023-02', '2023-03'], dtype='period[M]', name='date') ATN sent 932 records to GTS. (via group)\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [], | |
"metadata": { | |
"id": "Tj2jS56jg9nR" | |
}, | |
"execution_count": 4, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment