Skip to content

Instantly share code, notes, and snippets.

@tam203
Created March 11, 2020 21:39
Show Gist options
  • Save tam203/1a7dcb98d6ffe7e1cc35d20304338c40 to your computer and use it in GitHub Desktop.
Save tam203/1a7dcb98d6ffe7e1cc35d20304338c40 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# %conda install dask pandas numpy xlrd -y"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"file = 'midas_wxdrnl_194701-194712 (1).txt'"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1947-01-01 09:00, 1226, DCNN, 0, 1, DLY3208, 148, 1001, , , 0, 0, 0, , , , , , , , , 0, 0, , , , , , ,\n",
"1947-01-01 23:59, 0044, DCNN, 24, 1, NCM, 9, 1001, .7, , , , , , , , , , , 9, 0, 0, 0, 0, 0, 0, 0, , ,\n",
"1947-01-01 23:59, 0088, DCNN, 24, 1, DLY3208, 13, 1001, 0, , , , , , , , , , , 0, , , , , , , , , ,\n",
"1947-01-01 23:59, 0293, DCNN, 24, 1, NCM, 32, 1001, 1.8, , , , , , , , , , , 9, 0, 0, 0, 0, 0, 0, 0, , ,\n"
]
}
],
"source": [
"! head -n 4 \"{file}\""
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from dask import dataframe as dd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [
{
"data": {
"text/plain": [
"['OB_END_TIME',\n",
" 'ID',\n",
" 'ID_TYPE',\n",
" 'OB_HOUR_COUNT',\n",
" 'VERSION_NUM',\n",
" 'MET_DOMAIN_NAME',\n",
" 'SRC_ID',\n",
" 'REC_ST_IND',\n",
" 'CS_24HR_SUN_DUR',\n",
" 'CONC_STATE_ID',\n",
" 'LYING_SNOW_FLAG',\n",
" 'SNOW_DEPTH',\n",
" 'FRSH_SNOW_AMT',\n",
" 'SNOW_DAY_ID',\n",
" 'HAIL_DAY_ID',\n",
" 'THUNDER_DAY_FLAG',\n",
" 'GALE_DAY_FLAG',\n",
" 'FRSH_MNT_SNWFALL_FLAG',\n",
" 'WMO_24HR_SUN_DUR',\n",
" 'CS_24HR_SUN_DUR_Q',\n",
" 'CONC_STATE_ID_Q',\n",
" 'SNOW_DEPTH_Q',\n",
" 'FRSH_SNW_AMT_Q',\n",
" 'SNOW_DAY_ID_Q',\n",
" 'HAIL_DAY_ID_Q',\n",
" 'THUNDER_DAY_FLAG_Q',\n",
" 'GALE_DAY_FLAG_Q',\n",
" 'WMO_24HR_SUN_DUR_Q',\n",
" 'METO_STMP_TIME',\n",
" 'MIDAS_STMP_ETIME',\n",
" 'DRV_24HR_SUN_DUR',\n",
" 'DRV_24HR_SUN_DUR_Q',\n",
" 'LYING_SNOW_HT',\n",
" 'LYING_SNOW_HT_Q']"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"headers = pd.read_excel(\"WD_Column_Headers.xlsx\", header=None)\n",
"headers = next(headers.iterrows())[1]\n",
"headers = [h.strip() for h in headers.to_list()]\n",
"headers"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>OB_END_TIME</th>\n",
" <th>ID</th>\n",
" <th>ID_TYPE</th>\n",
" <th>OB_HOUR_COUNT</th>\n",
" <th>VERSION_NUM</th>\n",
" <th>MET_DOMAIN_NAME</th>\n",
" <th>SRC_ID</th>\n",
" <th>REC_ST_IND</th>\n",
" <th>CS_24HR_SUN_DUR</th>\n",
" <th>CONC_STATE_ID</th>\n",
" <th>...</th>\n",
" <th>HAIL_DAY_ID_Q</th>\n",
" <th>THUNDER_DAY_FLAG_Q</th>\n",
" <th>GALE_DAY_FLAG_Q</th>\n",
" <th>WMO_24HR_SUN_DUR_Q</th>\n",
" <th>METO_STMP_TIME</th>\n",
" <th>MIDAS_STMP_ETIME</th>\n",
" <th>DRV_24HR_SUN_DUR</th>\n",
" <th>DRV_24HR_SUN_DUR_Q</th>\n",
" <th>LYING_SNOW_HT</th>\n",
" <th>LYING_SNOW_HT_Q</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1947-01-01 09:00:00</td>\n",
" <td>1226</td>\n",
" <td>DCNN</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>DLY3208</td>\n",
" <td>148</td>\n",
" <td>1001</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1947-01-01 23:59:00</td>\n",
" <td>0044</td>\n",
" <td>DCNN</td>\n",
" <td>24</td>\n",
" <td>1</td>\n",
" <td>NCM</td>\n",
" <td>9</td>\n",
" <td>1001</td>\n",
" <td>0.7</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1947-01-01 23:59:00</td>\n",
" <td>0088</td>\n",
" <td>DCNN</td>\n",
" <td>24</td>\n",
" <td>1</td>\n",
" <td>DLY3208</td>\n",
" <td>13</td>\n",
" <td>1001</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1947-01-01 23:59:00</td>\n",
" <td>0293</td>\n",
" <td>DCNN</td>\n",
" <td>24</td>\n",
" <td>1</td>\n",
" <td>NCM</td>\n",
" <td>32</td>\n",
" <td>1001</td>\n",
" <td>1.8</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1947-01-01 23:59:00</td>\n",
" <td>0425</td>\n",
" <td>DCNN</td>\n",
" <td>24</td>\n",
" <td>1</td>\n",
" <td>NCM</td>\n",
" <td>54</td>\n",
" <td>1001</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 34 columns</p>\n",
"</div>"
],
"text/plain": [
" OB_END_TIME ID ID_TYPE OB_HOUR_COUNT VERSION_NUM \\\n",
"0 1947-01-01 09:00:00 1226 DCNN 0 1 \n",
"1 1947-01-01 23:59:00 0044 DCNN 24 1 \n",
"2 1947-01-01 23:59:00 0088 DCNN 24 1 \n",
"3 1947-01-01 23:59:00 0293 DCNN 24 1 \n",
"4 1947-01-01 23:59:00 0425 DCNN 24 1 \n",
"\n",
" MET_DOMAIN_NAME SRC_ID REC_ST_IND CS_24HR_SUN_DUR CONC_STATE_ID ... \\\n",
"0 DLY3208 148 1001 NaN NaN ... \n",
"1 NCM 9 1001 0.7 NaN ... \n",
"2 DLY3208 13 1001 0.0 NaN ... \n",
"3 NCM 32 1001 1.8 NaN ... \n",
"4 NCM 54 1001 0.0 NaN ... \n",
"\n",
" HAIL_DAY_ID_Q THUNDER_DAY_FLAG_Q GALE_DAY_FLAG_Q WMO_24HR_SUN_DUR_Q \\\n",
"0 NaN NaN NaN NaN \n",
"1 0.0 0.0 0.0 NaN \n",
"2 NaN NaN NaN NaN \n",
"3 0.0 0.0 0.0 NaN \n",
"4 0.0 0.0 0.0 NaN \n",
"\n",
" METO_STMP_TIME MIDAS_STMP_ETIME DRV_24HR_SUN_DUR DRV_24HR_SUN_DUR_Q \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" LYING_SNOW_HT LYING_SNOW_HT_Q \n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
"[5 rows x 34 columns]"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = dd.read_csv(file, header=None, names=headers,dtype={'ID':str},parse_dates=[\"OB_END_TIME\"],skipinitialspace=True)\n",
"\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"import datetime"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(datetime.datetime(1947, 1, 1, 0, 0), datetime.datetime(1947, 1, 2, 0, 0))"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt_start = datetime.datetime(1947,1,1)\n",
"dt_end = dt_start + datetime.timedelta(days=1)\n",
"site = \"0293\"\n",
"dt_start, dt_end"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>OB_END_TIME</th>\n",
" <th>ID</th>\n",
" <th>ID_TYPE</th>\n",
" <th>OB_HOUR_COUNT</th>\n",
" <th>VERSION_NUM</th>\n",
" <th>MET_DOMAIN_NAME</th>\n",
" <th>SRC_ID</th>\n",
" <th>REC_ST_IND</th>\n",
" <th>CS_24HR_SUN_DUR</th>\n",
" <th>CONC_STATE_ID</th>\n",
" <th>...</th>\n",
" <th>HAIL_DAY_ID_Q</th>\n",
" <th>THUNDER_DAY_FLAG_Q</th>\n",
" <th>GALE_DAY_FLAG_Q</th>\n",
" <th>WMO_24HR_SUN_DUR_Q</th>\n",
" <th>METO_STMP_TIME</th>\n",
" <th>MIDAS_STMP_ETIME</th>\n",
" <th>DRV_24HR_SUN_DUR</th>\n",
" <th>DRV_24HR_SUN_DUR_Q</th>\n",
" <th>LYING_SNOW_HT</th>\n",
" <th>LYING_SNOW_HT_Q</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1947-01-01 23:59:00</td>\n",
" <td>0293</td>\n",
" <td>DCNN</td>\n",
" <td>24</td>\n",
" <td>1</td>\n",
" <td>NCM</td>\n",
" <td>32</td>\n",
" <td>1001</td>\n",
" <td>1.8</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1 rows × 34 columns</p>\n",
"</div>"
],
"text/plain": [
" OB_END_TIME ID ID_TYPE OB_HOUR_COUNT VERSION_NUM \\\n",
"3 1947-01-01 23:59:00 0293 DCNN 24 1 \n",
"\n",
" MET_DOMAIN_NAME SRC_ID REC_ST_IND CS_24HR_SUN_DUR CONC_STATE_ID ... \\\n",
"3 NCM 32 1001 1.8 NaN ... \n",
"\n",
" HAIL_DAY_ID_Q THUNDER_DAY_FLAG_Q GALE_DAY_FLAG_Q WMO_24HR_SUN_DUR_Q \\\n",
"3 0.0 0.0 0.0 NaN \n",
"\n",
" METO_STMP_TIME MIDAS_STMP_ETIME DRV_24HR_SUN_DUR DRV_24HR_SUN_DUR_Q \\\n",
"3 NaN NaN NaN NaN \n",
"\n",
" LYING_SNOW_HT LYING_SNOW_HT_Q \n",
"3 NaN NaN \n",
"\n",
"[1 rows x 34 columns]"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"in_time_range = (df[\"OB_END_TIME\"] < dt_end ) & (df[\"OB_END_TIME\"] >= dt_start)\n",
"site_of_intrest = df[\"ID\"] == site\n",
"obs = df[ site_of_intrest & in_time_range]\n",
"obs.sum()\n",
"obs.compute()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.7",
"language": "python",
"name": "py3.7"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment