Last active
June 25, 2017 11:39
-
-
Save seumasmorrison/d83d09c0dfb1bedd50ca57e6c7fd0baa to your computer and use it in GitHub Desktop.
Notebook for extracting daily wind observation data for one year from an airport weather station using the Weather Underground and creating a single time indexed and resampled pandas DataFrame then exporting to Excel Workbook.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"IPython Notebook for extracting daily observation data for a single weather station from the [Weather Undeground API](https://www.wunderground.com/weather/api/) ( free registration required ). The example used here is [Kirkwall Airport (EGPA)](https://www.wunderground.com/gb/kirkwall-airport) where daily observation ( 10 minute values ) weather parameters for wind speed metric & direction in degrees were extracted for months during 2016.\n", | |
"\n", | |
"Developed with Python 3.6" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from calendar import monthrange\n", | |
"import json\n", | |
"import math\n", | |
"import pandas as pd\n", | |
"import time\n", | |
"import urllib3" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"http = urllib3.PoolManager()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"api_key = 'paste_your_own_api_key_here'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#Extract wind speed and direction\n", | |
"def get_daily_data(json_daily):\n", | |
" daily_data = {}\n", | |
" for index, x in enumerate(json_daily['history']['observations']):\n", | |
" daily_data[index] = {k: x[k] for k in ('date', 'wspdm', 'wdird')}\n", | |
" hourly_data_list = []\n", | |
" for hourly_data in daily_data:\n", | |
" date_time = pd.to_datetime(daily_data[hourly_data]['date']['pretty'])\n", | |
" record = {}\n", | |
" for x in ['wspdm', 'wdird']:\n", | |
" record[x] = daily_data[hourly_data][x]\n", | |
" hourly_data_list.append(pd.DataFrame(record, index=[date_time]))\n", | |
" daily_data = pd.concat(hourly_data_list)\n", | |
" return daily_data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Write daily CSV files for a year from the Weather Underground API.\n", | |
"# Include 7 second delay between requests to remain on the free tier\n", | |
"year = 2016\n", | |
"prefix = 'http://api.wunderground.com/api/'\n", | |
"suffix = '/q/airport/EGPA.json'\n", | |
"for month_num in range(1,13):\n", | |
" range_tuple = monthrange(year, month_num)\n", | |
" month_str = \"%02d\" % (month_num,)\n", | |
" for day_num in range(range_tuple[0], range_tuple[1]):\n", | |
" day_str = \"%02d\" % (day_num,)\n", | |
" time.sleep(7)\n", | |
" r = http.request('GET', prefix + api_key + '/history_2016' + month_str + day_str + suffix)\n", | |
" json_daily = json.loads(r.data)\n", | |
" daily_data = get_daily_data(json_daily)\n", | |
" daily_data.to_csv(str(year) + month_str + day_str + '_EGPA.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Read CSV files for a particular month into DataFrames, concatenate into one DataFrame & write to HDF\n", | |
"for month_num in range(1,13):\n", | |
" month_str = \"%02d\" % (month_num,)\n", | |
" month_data = !ls 2016$month_str*EGPA.csv\n", | |
" df_list = []\n", | |
" for x in month_data:\n", | |
" df = pd.read_csv(x, skiprows=1, names = ['date_time', 'wdird', 'wspdm'])\n", | |
" df.index = df.date_time\n", | |
" df.drop('date_time',axis=1, inplace=True)\n", | |
" df_list.append(df)\n", | |
" month_df = pd.concat(df_list)\n", | |
" month_df.to_hdf('Kirkwall_Airport_2016_EGPA.h5',month_str, format='table', append=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#Concatenate all DataFrames from each month into one annual DataFrame\n", | |
"df_list = []\n", | |
"for x in pd.HDFStore('Kirkwall_Airport_2016_EGPA.h5').keys():\n", | |
" df_list.append(pd.HDFStore('Kirkwall_Airport_2016_EGPA.h5')[x])\n", | |
"annual_df = pd.concat(df_list)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"annual_df.to_excel('Kirkwall_Airport_2016_wind_speed_direction_10min.xlsx')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"annual_df.index = annual_df.to_datetime(annual_df.index)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"annual_df.to_hdf('Kirkwall_Airport_2016.h5','EGPA')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"annual_df_30min_resample = annual_df.resample('30min')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def circular_mean(weights, angles):\n", | |
" x = y = 0.\n", | |
" for angle, weight in zip(angles, weights):\n", | |
" x += math.cos(math.radians(angle)) * weight\n", | |
" y += math.sin(math.radians(angle)) * weight\n", | |
"\n", | |
" mean = math.degrees(math.atan2(y, x))\n", | |
" if mean < 0:\n", | |
" mean = 360 + mean\n", | |
" return mean" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dir_mean = []\n", | |
"for x in annual_df_30min_resample:\n", | |
" dir_mean.append(circular_mean(x[1]['wspdm'].values,x[1]['wdird'].values))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resampled_30min_df = annual_df_30min_resample.mean()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mean_direction_df = pd.DataFrame(dir_mean, index = resampled_30min_df.index)\n", | |
"resampled_30min_df.join(mean_direction_df)\n", | |
"resampled_30min_df.drop('wdird',axis=1,inplace=True)\n", | |
"resampled_30min_df.columns = ['Wind_Speed_kph','Wind_Direction_degrees']\n", | |
"resampled_30min_df.to_excel('Kirkwall_Airport_2016_30_minute_mean_wind_speed_direction.xlsx')" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment