Skip to content

Instantly share code, notes, and snippets.

@seumasmorrison
Created February 9, 2015 12:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save seumasmorrison/1abaa2308044814167a9 to your computer and use it in GitHub Desktop.
Save seumasmorrison/1abaa2308044814167a9 to your computer and use it in GitHub Desktop.
Module for concatenating, resampling and writing Datawell his/hiw files as Excel spreadsheets, modified from hebtools version
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 06 14:38:11 2015
@author: le12jm
"""
from datetime import datetime
#from hebtools.common import wave_power
import glob
import os
import pandas as pd
import logging
import sys
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
his_columns = ['date_time', 'tp', 'dirp', 'sprp', 'tz', 'hm0', 'ti', 't1',
'tc', 'tdw2', 'tdw1', 'tpc', 'nu','eps','qp','ss','tref','tsea',
'bat']
hiw_columns = ['date_time','% no reception errors','hmax','tmax','h(1/10)',
't(1/10)','h1/3','t1/3','Hav','Tav','Eps','#Waves']
matching_string_buoy_his = '*$*.his'
matching_string_computed_his = '*[!$]}*.his'
matching_string_hiw = '*.hiw'
depth = 65
matching_file_types = {'his':matching_string_computed_his, 'hiw':matching_string_hiw}
def strip_non_directories(path):
files_and_dirs = os.listdir(path)
return [x for x in files_and_dirs if os.path.isdir(os.path.join(path,x))]
def get_historical_dataframe(buoy_path, matching_string):
logging.info(("buoy_path", buoy_path))
df_list = []
years = strip_non_directories(buoy_path)
logging.info(("years", years))
for year in years:
year_path = os.path.join(buoy_path, year)
months = strip_non_directories(year_path)
for month in months:
month_path = os.path.join(year_path,month)
try:
file_name = glob.glob(month_path + os.sep + matching_string)[0]
if matching_string[-1] == 'w':
columns = hiw_columns
else:
columns = his_columns
df = pd.read_csv(file_name, names = columns)
date_times = []
for date_time_string in df['date_time'].values:
if date_time_string != 'nan':
date_time = datetime.strptime(date_time_string[:-5],
"%Y-%m-%dT%H:%M:%S")
date_times.append(date_time)
else:
date_times.append(datetime(1970,1,1))
df.index = pd.DatetimeIndex(date_times)
df_list.append(df)
except IndexError:
print "No file found matching", matching_string
if len(df_list) != 0:
large_df = pd.concat(df_list)
large_df = large_df.sort_index()
large_df.to_pickle(buoy_path + '_' + matching_string[-3:] + '_dataframe')
def resample_write_xlsx(df, period):
resampled_df = df.resample(period)
resampled_df.to_excel(buoy_path + '_' + period + '_' + \
matching_string[-3:] + '.xlsx' )
return resampled_df
thirty_min_resample = resample_write_xlsx(large_df, '30Min')
resample_write_xlsx(large_df, '60Min')
return thirty_min_resample
def load(buoy_path):
for key, value in matching_file_types.iteritems():
print key
hist_df = get_historical_dataframe(buoy_path, value)
hist_df.to_hdf(buoy_path + '/hist.h5', key)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment