Last active
October 12, 2015 21:19
-
-
Save seumasmorrison/4089043 to your computer and use it in GitHub Desktop.
Script for producing date sorted Pandas DataFrames and writing Excel ( xlsx ) files from historical files which can be resampled ( 30 minutes specified ). Used with his/hiw files from Datawell Directional Waverider MK II/III produced by RFBuoy v2.1.27
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import glob | |
import pandas as pd | |
from datetime import datetime | |
# Example below assumes following folder hierarchy D:\Buoy_Data\Buoy_Name\Year\Month\ | |
buoy_names = ['Buoy_Name'] | |
buoy_path = 'D:\\Buoy_Data\\' | |
his_columns = ['date_time', 'Tp', 'dirp', 'sprp', 'Tz', 'Hm0', 'TI', 'T1', 'Tc', | |
'Tdw2', 'Tdw1', 'Tpc', 'nu','eps','QP','Ss','TRef','TSea','Bat'] | |
hiw_columns = ['date_time','% no reception errors','Hmax', 'Tmax', 'H(1/10)', | |
'T(1/10)', 'H1/3', 'T1/3', 'Hav', 'Tav', 'Eps', '#Waves'] | |
matching_string_buoy_his = '*$*.his' | |
matching_string_computed_his = '*[!$]}*.his' | |
matching_string_hiw = '*.hiw' | |
def get_buoy_dataframe(buoys_root_path, buoy_name, matching_string, columns): | |
buoy_path = buoys_root_path + buoy_name | |
years = os.listdir(buoy_path) | |
large_dataframe = pd.DataFrame() | |
for year in years: | |
year_path = os.path.join(buoy_path, year) | |
months = os.listdir(year_path) | |
month_dataframes = [] | |
for month in months: | |
month_path = os.path.join(year_path,month) | |
print month_path | |
os.chdir(month_path) | |
file_name = glob.glob(matching_string)[0] | |
month_dataframe = pd.io.parsers.read_csv(file_name, names = columns) | |
date_time_array = [] | |
for date_time_string in month_dataframe['date_time'].values: | |
date_time_array.append(datetime.strptime(date_time_string[:-5], | |
"%Y-%m-%dT%H:%M:%S")) | |
month_dataframe.index = pd.DatetimeIndex(date_time_array) | |
month_dataframes.append(month_dataframe) | |
buoy_history_df = pd.concat(month_dataframes) | |
buoy_history_df = buoy_history_df.sort_index() | |
buoy_history_df.to_pickle(buoy_path + '_' + matching_string[-3:] + '_dataframe') | |
thirty_min_resample = buoy_history_df.resample('30Min') | |
thirty_min_resample.to_excel(buoy_path + '_30_minute_' + matching_string[-3:] + \ | |
'.xlsx' ) | |
for buoy_name in buoy_names: | |
get_buoy_dataframe(buoy_path, buoy_name, matching_string_computed_his, his_columns) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment