OneGneissGuy/SM_data.py

## SM_data.py
# -*- coding: utf-8 -*-
"""
:DESCRIPTION:Code for managing soil moisture data
:REQUIRES:
:TODO:More error handling
:AUTHOR: John Franco Saraceno
:ORGANIZATION: U.S. Geological Survey, United States Department of Interior
:CONTACT: saraceno@usgs.gov
:VERSION: 1.0
Fri Jul 29 18:48:40 2016
"""
# =============================================================================
# IMPORT STATEMENTS
# =============================================================================
import glob
import matplotlib.pyplot as plt
import os
import pandas as pd


def list_of_files(path, fmatch):
    files = []
    for name in glob.glob(os.path.join(path, fmatch)):
        if os.path.isfile(os.path.join(path, name)):
            files.append(name)
    return files


def create_datetime_index(df):
    dataframe = df.copy()
    time = dataframe['HRMin'].values
    # use list comp to convert time format to pandas friendly format
    timetuple = [divmod(int(f) % 2400, 100) for f in time]
    hour = pd.to_timedelta([x[0] for x in timetuple], unit='H')
    minute = pd.to_timedelta([x[1] for x in timetuple], unit='m')

    day = (pd.to_timedelta(dataframe['JulianDay'], unit='D') -
           pd.Timedelta('1D'))
    year = pd.to_datetime(dataframe['Year'], format='%Y')
    # convert dataframe index to date index
    dataframe.index = year + day + hour + minute
    return dataframe


def process_file(filename, cols):
    try:
        # read in the .dat file as a pandas dataframe
        df = pd.read_csv(filename, delimiter=',', skiprows=0, index_col=False,
                         names=cols, na_values='-9999.000')
        # give the data a datetime index
        df = create_datetime_index(df)
        # resample dataframe to a monthly mean
        df_resmpld = df.resample('D', label='left').mean()
        # replace monthly mean of precip with monthly sum of precip
        precip_col = 'PPTmm'
        if precip_col in df_resmpld.columns:
            df_resmpld[precip_col] = df[precip_col
                                        ].resample('D', label='left').sum()
        return df_resmpld
    except:
        print "Tried, but could not process this file: {}".format(filename)


def process_directory(directory):
    # get a list of files to process based on the filter criteria,
    # *.dat by default
    # you could also filter by site/year
    # create list of column names for dataframe
    column_names = ['Year', 'JulianDay', 'HRMin', 'TmpC', 'RelHum', 'BattV',
                    'PPTmm', 'Temp5cm', 'Temp10cm', 'Temp15cm', 'Temp20cm',
                    'Temp50cm', 'Temp100cm', 'SM5cm', 'SM10cm', 'SM15cm',
                    'SM20cm', 'SM50cm', 'SM100cm']
    files = list_of_files(directory, '*.dat')
    # process and concatenate the files into a master dataframe
    means = pd.concat(process_file(f, column_names) for f in files)
    # sort the master dataframe by date time
    means.sort_index(axis=0, ascending=True, inplace=True)
    # name the index
    means.index.name = 'Date'
    return means


def plot_sm_data(df):
    # create a plot
    plt.figure()
    # plot the soil moisture data
    df.plot(x=df.index, y=['SM5cm', 'SM10cm', 'SM15cm',
                           'SM20cm', 'SM50cm', 'SM100cm'])


def main():
    # set the data file containing directory
    directory = os.getcwd()
    df_master = process_directory(directory)
    # save the master dataframe to a csv file with date time in YYYY-MM format
    date_fmt = '%Y-%m-%d'
    # or Mon-YYYYformat
    # date_fmt = '%b-%Y'
    df_master.to_csv('Daily_means.csv', date_format=date_fmt, errors=coerce)
    plot_sm_data(df_master)

if __name__ == "__main__":
    main()
	# -- coding: utf-8 --
	"""
	:DESCRIPTION:Code for managing soil moisture data
	:REQUIRES:
	:TODO:More error handling
	:AUTHOR: John Franco Saraceno
	:ORGANIZATION: U.S. Geological Survey, United States Department of Interior
	:CONTACT: saraceno@usgs.gov
	:VERSION: 1.0
	Fri Jul 29 18:48:40 2016
	"""
	# =============================================================================
	# IMPORT STATEMENTS
	# =============================================================================
	import glob
	import matplotlib.pyplot as plt
	import os
	import pandas as pd


	def list_of_files(path, fmatch):
	files = []
	for name in glob.glob(os.path.join(path, fmatch)):
	if os.path.isfile(os.path.join(path, name)):
	files.append(name)
	return files


	def create_datetime_index(df):
	dataframe = df.copy()
	time = dataframe['HRMin'].values
	# use list comp to convert time format to pandas friendly format
	timetuple = [divmod(int(f) % 2400, 100) for f in time]
	hour = pd.to_timedelta([x[0] for x in timetuple], unit='H')
	minute = pd.to_timedelta([x[1] for x in timetuple], unit='m')

	day = (pd.to_timedelta(dataframe['JulianDay'], unit='D') -
	pd.Timedelta('1D'))
	year = pd.to_datetime(dataframe['Year'], format='%Y')
	# convert dataframe index to date index
	dataframe.index = year + day + hour + minute
	return dataframe


	def process_file(filename, cols):
	try:
	# read in the .dat file as a pandas dataframe
	df = pd.read_csv(filename, delimiter=',', skiprows=0, index_col=False,
	names=cols, na_values='-9999.000')
	# give the data a datetime index
	df = create_datetime_index(df)
	# resample dataframe to a monthly mean
	df_resmpld = df.resample('D', label='left').mean()
	# replace monthly mean of precip with monthly sum of precip
	precip_col = 'PPTmm'
	if precip_col in df_resmpld.columns:
	df_resmpld[precip_col] = df[precip_col
	].resample('D', label='left').sum()
	return df_resmpld
	except:
	print "Tried, but could not process this file: {}".format(filename)


	def process_directory(directory):
	# get a list of files to process based on the filter criteria,
	# *.dat by default
	# you could also filter by site/year
	# create list of column names for dataframe
	column_names = ['Year', 'JulianDay', 'HRMin', 'TmpC', 'RelHum', 'BattV',
	'PPTmm', 'Temp5cm', 'Temp10cm', 'Temp15cm', 'Temp20cm',
	'Temp50cm', 'Temp100cm', 'SM5cm', 'SM10cm', 'SM15cm',
	'SM20cm', 'SM50cm', 'SM100cm']
	files = list_of_files(directory, '*.dat')
	# process and concatenate the files into a master dataframe
	means = pd.concat(process_file(f, column_names) for f in files)
	# sort the master dataframe by date time
	means.sort_index(axis=0, ascending=True, inplace=True)
	# name the index
	means.index.name = 'Date'
	return means


	def plot_sm_data(df):
	# create a plot
	plt.figure()
	# plot the soil moisture data
	df.plot(x=df.index, y=['SM5cm', 'SM10cm', 'SM15cm',
	'SM20cm', 'SM50cm', 'SM100cm'])


	def main():
	# set the data file containing directory
	directory = os.getcwd()
	df_master = process_directory(directory)
	# save the master dataframe to a csv file with date time in YYYY-MM format
	date_fmt = '%Y-%m-%d'
	# or Mon-YYYYformat
	# date_fmt = '%b-%Y'
	df_master.to_csv('Daily_means.csv', date_format=date_fmt, errors=coerce)
	plot_sm_data(df_master)

	if __name__ == "__main__":
	main()