seumasmorrison/historical_concat.py

## historical_concat.py
import os
import glob
import pandas as pd
from datetime import datetime

# Example below assumes following folder hierarchy D:\Buoy_Data\Buoy_Name\Year\Month\
buoy_names = ['Buoy_Name']
buoy_path = 'D:\\Buoy_Data\\'

his_columns = ['date_time', 'Tp', 'dirp', 'sprp', 'Tz', 'Hm0', 'TI', 'T1', 'Tc',
           'Tdw2', 'Tdw1', 'Tpc', 'nu','eps','QP','Ss','TRef','TSea','Bat']

hiw_columns = ['date_time','% no reception errors','Hmax', 'Tmax', 'H(1/10)',
               'T(1/10)', 'H1/3', 'T1/3', 'Hav', 'Tav', 'Eps', '#Waves']

matching_string_buoy_his = '*$*.his'
matching_string_computed_his = '*[!$]}*.his'
matching_string_hiw = '*.hiw'

def get_buoy_dataframe(buoys_root_path, buoy_name, matching_string, columns):
    buoy_path = buoys_root_path + buoy_name
    years = os.listdir(buoy_path)
    large_dataframe = pd.DataFrame()
    for year in years:
        year_path = os.path.join(buoy_path, year)
        months = os.listdir(year_path)
        month_dataframes = []
        for month in months:
            month_path = os.path.join(year_path,month)
            print month_path
            os.chdir(month_path)
            file_name = glob.glob(matching_string)[0]
            month_dataframe = pd.io.parsers.read_csv(file_name, names = columns)
            date_time_array = []
            for date_time_string in month_dataframe['date_time'].values:
                date_time_array.append(datetime.strptime(date_time_string[:-5],
                                                         "%Y-%m-%dT%H:%M:%S"))
            month_dataframe.index = pd.DatetimeIndex(date_time_array)
            month_dataframes.append(month_dataframe)
    buoy_history_df = pd.concat(month_dataframes)
    buoy_history_df = buoy_history_df.sort_index()
    buoy_history_df.to_pickle(buoy_path + '_' + matching_string[-3:] + '_dataframe')
    thirty_min_resample = buoy_history_df.resample('30Min')
    thirty_min_resample.to_excel(buoy_path + '_30_minute_' + matching_string[-3:] + \
                                 '.xlsx' )

for buoy_name in buoy_names:
    get_buoy_dataframe(buoy_path, buoy_name, matching_string_computed_his, his_columns)
	import os
	import glob
	import pandas as pd
	from datetime import datetime

	# Example below assumes following folder hierarchy D:\Buoy_Data\Buoy_Name\Year\Month\
	buoy_names = ['Buoy_Name']
	buoy_path = 'D:\\Buoy_Data\\'

	his_columns = ['date_time', 'Tp', 'dirp', 'sprp', 'Tz', 'Hm0', 'TI', 'T1', 'Tc',
	'Tdw2', 'Tdw1', 'Tpc', 'nu','eps','QP','Ss','TRef','TSea','Bat']

	hiw_columns = ['date_time','% no reception errors','Hmax', 'Tmax', 'H(1/10)',
	'T(1/10)', 'H1/3', 'T1/3', 'Hav', 'Tav', 'Eps', '#Waves']

	matching_string_buoy_his = '$.his'
	matching_string_computed_his = '[!$]}.his'
	matching_string_hiw = '*.hiw'

	def get_buoy_dataframe(buoys_root_path, buoy_name, matching_string, columns):
	buoy_path = buoys_root_path + buoy_name
	years = os.listdir(buoy_path)
	large_dataframe = pd.DataFrame()
	for year in years:
	year_path = os.path.join(buoy_path, year)
	months = os.listdir(year_path)
	month_dataframes = []
	for month in months:
	month_path = os.path.join(year_path,month)
	print month_path
	os.chdir(month_path)
	file_name = glob.glob(matching_string)[0]
	month_dataframe = pd.io.parsers.read_csv(file_name, names = columns)
	date_time_array = []
	for date_time_string in month_dataframe['date_time'].values:
	date_time_array.append(datetime.strptime(date_time_string[:-5],
	"%Y-%m-%dT%H:%M:%S"))
	month_dataframe.index = pd.DatetimeIndex(date_time_array)
	month_dataframes.append(month_dataframe)
	buoy_history_df = pd.concat(month_dataframes)
	buoy_history_df = buoy_history_df.sort_index()
	buoy_history_df.to_pickle(buoy_path + '_' + matching_string[-3:] + '_dataframe')
	thirty_min_resample = buoy_history_df.resample('30Min')
	thirty_min_resample.to_excel(buoy_path + '_30_minute_' + matching_string[-3:] + \
	'.xlsx' )

	for buoy_name in buoy_names:
	get_buoy_dataframe(buoy_path, buoy_name, matching_string_computed_his, his_columns)