Skip to content

Instantly share code, notes, and snippets.

@seumasmorrison
Last active October 12, 2015 21:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save seumasmorrison/4089043 to your computer and use it in GitHub Desktop.
Save seumasmorrison/4089043 to your computer and use it in GitHub Desktop.
Script for producing date sorted Pandas DataFrames and writing Excel ( xlsx ) files from historical files which can be resampled ( 30 minutes specified ). Used with his/hiw files from Datawell Directional Waverider MK II/III produced by RFBuoy v2.1.27
import os
import glob
import pandas as pd
from datetime import datetime
# Example below assumes following folder hierarchy D:\Buoy_Data\Buoy_Name\Year\Month\
buoy_names = ['Buoy_Name']
buoy_path = 'D:\\Buoy_Data\\'
his_columns = ['date_time', 'Tp', 'dirp', 'sprp', 'Tz', 'Hm0', 'TI', 'T1', 'Tc',
'Tdw2', 'Tdw1', 'Tpc', 'nu','eps','QP','Ss','TRef','TSea','Bat']
hiw_columns = ['date_time','% no reception errors','Hmax', 'Tmax', 'H(1/10)',
'T(1/10)', 'H1/3', 'T1/3', 'Hav', 'Tav', 'Eps', '#Waves']
matching_string_buoy_his = '*$*.his'
matching_string_computed_his = '*[!$]}*.his'
matching_string_hiw = '*.hiw'
def get_buoy_dataframe(buoys_root_path, buoy_name, matching_string, columns):
buoy_path = buoys_root_path + buoy_name
years = os.listdir(buoy_path)
large_dataframe = pd.DataFrame()
for year in years:
year_path = os.path.join(buoy_path, year)
months = os.listdir(year_path)
month_dataframes = []
for month in months:
month_path = os.path.join(year_path,month)
print month_path
os.chdir(month_path)
file_name = glob.glob(matching_string)[0]
month_dataframe = pd.io.parsers.read_csv(file_name, names = columns)
date_time_array = []
for date_time_string in month_dataframe['date_time'].values:
date_time_array.append(datetime.strptime(date_time_string[:-5],
"%Y-%m-%dT%H:%M:%S"))
month_dataframe.index = pd.DatetimeIndex(date_time_array)
month_dataframes.append(month_dataframe)
buoy_history_df = pd.concat(month_dataframes)
buoy_history_df = buoy_history_df.sort_index()
buoy_history_df.to_pickle(buoy_path + '_' + matching_string[-3:] + '_dataframe')
thirty_min_resample = buoy_history_df.resample('30Min')
thirty_min_resample.to_excel(buoy_path + '_30_minute_' + matching_string[-3:] + \
'.xlsx' )
for buoy_name in buoy_names:
get_buoy_dataframe(buoy_path, buoy_name, matching_string_computed_his, his_columns)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment