Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save seumasmorrison/deb738068e51dd9e29647fbc9dd17c83 to your computer and use it in GitHub Desktop.
Save seumasmorrison/deb738068e51dd9e29647fbc9dd17c83 to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Updated script for Python 3 based on https://gist.github.com/seumasmorrison/1abaa2308044814167a9\n",
"from datetime import datetime\n",
"import glob\n",
"import os\n",
"import pandas as pd\n",
"\n",
"his_columns = ['date_time', 'tp', 'dirp', 'sprp', 'tz', 'hm0', 'ti', 't1', \n",
" 'tc', 'tdw2', 'tdw1', 'tpc', 'nu','eps','qp','ss','tref','tsea',\n",
" 'bat']\n",
" \n",
"hiw_columns = ['date_time','% no reception errors','hmax','tmax','h(1/10)',\n",
" 't(1/10)','h1/3','t1/3','Hav','Tav','Eps','#Waves']\n",
"\n",
"matching_string_buoy_his = '*$*.his'\n",
"matching_string_computed_his = '*[!$]}*.his'\n",
"matching_string_hiw = '*.hiw'\n",
"\n",
"matching_file_types = {'his':matching_string_computed_his, 'hiw':matching_string_hiw}\n",
"\n",
"def strip_non_directories(path):\n",
" files_and_dirs = os.listdir(path)\n",
" return [x for x in files_and_dirs if os.path.isdir(os.path.join(path,x))]\n",
"\n",
"def get_historical_dataframe(buoy_path, matching_string):\n",
" print(\"buoy_path\", buoy_path)\n",
" df_list = []\n",
" years = strip_non_directories(buoy_path)\n",
" print(\"years\", years)\n",
" for year in years:\n",
" year_path = os.path.join(buoy_path, year)\n",
" months = strip_non_directories(year_path)\n",
" for month in months:\n",
" month_path = os.path.join(year_path,month)\n",
" try:\n",
" file_name = glob.glob(month_path + os.sep + matching_string)[0]\n",
" if matching_string[-1] == 'w':\n",
" columns = hiw_columns\n",
" else:\n",
" columns = his_columns\n",
" df = pd.read_csv(file_name, names = columns)\n",
" date_times = []\n",
" for date_time_string in df['date_time'].values:\n",
" if date_time_string != 'nan':\n",
" date_time = datetime.strptime(date_time_string[:-5],\n",
" \"%Y-%m-%dT%H:%M:%S\")\n",
" date_times.append(date_time)\n",
" else:\n",
" date_times.append(datetime(1970,1,1))\n",
" df.index = pd.DatetimeIndex(date_times)\n",
" df_list.append(df)\n",
" except IndexError:\n",
" print(\"No file found matching\", matching_string)\n",
" if len(df_list) != 0:\n",
" large_df = pd.concat(df_list)\n",
" large_df = large_df.sort_index() \n",
" large_df.to_pickle(buoy_path + '_' + matching_string[-3:] + '_dataframe')\n",
" def resample_write_xlsx(df, period):\n",
" resampled_df = df.resample(period)\n",
" resampled_df.to_excel(buoy_path + '_' + period + '_' + \\\n",
" matching_string[-3:] + '.xlsx' )\n",
" return resampled_df\n",
" thirty_min_resample = resample_write_xlsx(large_df, '30Min')\n",
" resample_write_xlsx(large_df, '60Min')\n",
" return thirty_min_resample\n",
"\n",
"\n",
"def load(buoy_path):\n",
" for key, value in matching_file_types.items():\n",
" print(key)\n",
" hist_df = get_historical_dataframe(buoy_path, value)\n",
" hist_df.to_hdf(buoy_path + '/hist.h5', key)"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment