Skip to content

Instantly share code, notes, and snippets.

@MathewBiddle
Created April 26, 2023 17:03
Show Gist options
  • Save MathewBiddle/c223df1df6e2f45318b42ee605720394 to your computer and use it in GitHub Desktop.
Save MathewBiddle/c223df1df6e2f45318b42ee605720394 to your computer and use it in GitHub Desktop.
GTS_ATN_metrics_problem.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyMK/YJpfvpEcZUC7rNfoSIW",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/MathewBiddle/c223df1df6e2f45318b42ee605720394/gts_atn_metrics_problem.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"source": [
"!pip install fiscalyear"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uI1x6j8Tgps4",
"outputId": "5c032d6c-be39-4812-ef0f-571aa1694482"
},
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: fiscalyear in /usr/local/lib/python3.9/dist-packages (0.4.0)\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "z3ZrLxphgdkr",
"outputId": "d2d118a5-f164-4921-bdee-fc1cce74fd44"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"4381 total messages from ATN to GTS.\n"
]
}
],
"source": [
"import datetime as dt\n",
"import pandas as pd\n",
"from fiscalyear import *\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"\n",
"# gather FY start/end dates for previous quarter\n",
"fq = FiscalQuarter.current().prev_fiscal_quarter\n",
"\n",
"start_date = fq.start.strftime('%Y-%m-%d')\n",
"end_date = fq.end.strftime('%Y-%m-%d')\n",
"\n",
"start = dt.datetime.strptime(start_date,'%Y-%m-%d')\n",
"end = dt.datetime.strptime(end_date,'%Y-%m-%d')\n",
"\n",
"# recursively search the https index for bufr messages\n",
"url = 'https://stage-ndbc-bufr.srv.axds.co/platforms/atn/smru/profiles/'\n",
"\n",
"html = requests.get(url).text\n",
"soup = BeautifulSoup(html, 'html.parser')\n",
"\n",
"df_out = pd.DataFrame()\n",
"\n",
"for deployment in soup.find_all('a'):\n",
"\n",
" depl_url = url+deployment.text\n",
" depl_html = requests.get(depl_url).text\n",
"\n",
" depl_soup = BeautifulSoup(depl_html, 'html.parser')\n",
"\n",
" # some content is not in an html node, so we have to parse line by line as we want the stuff after <a> nodes\n",
" files = depl_soup.get_text().split('\\r\\n')[1:-1]\n",
"\n",
" for file in files:\n",
"\n",
" content = file.split()\n",
"\n",
" if '.bufr' in content[0]:\n",
" # save the index file information to DF.\n",
" df_file = pd.DataFrame({'fname':[deployment.text+content[0]],\n",
" 'date':pd.to_datetime([content[1]+\"T\"+content[2]]),\n",
" 'size': [content[3]]})\n",
"\n",
" df_out = pd.concat([df_out, df_file])\n",
"\n",
"\n",
"print('{} total messages from ATN to GTS.'.format(df_out.shape[0]))"
]
},
{
"cell_type": "code",
"source": [
"# mask for FY Quarter via logic\n",
"mask = (df_out['date'] >= start) & (df_out['date'] <= end)\n",
"\n",
"print('For {} ATN sent {} records to GTS. (via mask)'.format(fq, df_out[mask].shape[0]))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "deiJDMNqgm7v",
"outputId": "ba37ec51-aac3-4552-8e0b-2424ab60f32b"
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"For FY2023 Q2 ATN sent 926 records to GTS. (via mask)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"#mask for FY Qarter by grouper\n",
"group = df_out.groupby(pd.Grouper(key=\"date\", freq=\"M\"))\n",
"\n",
"s = group['fname'].count() \n",
"\n",
"s.index = s.index.to_period(\"M\")\n",
"\n",
"s = s.rename('total')\n",
"\n",
"print('For {} ATN sent {} records to GTS. (via group)'.format(s[-4:-1].index, s[-4:-1].sum()))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "obEPz4djg7-9",
"outputId": "89b2cc6c-5bb3-4714-8de4-5b1e24285745"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"For PeriodIndex(['2023-01', '2023-02', '2023-03'], dtype='period[M]', name='date') ATN sent 932 records to GTS. (via group)\n"
]
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "Tj2jS56jg9nR"
},
"execution_count": 4,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment