misgeatgit/reproduce_FFX_output.py

## reproduce_FFX_output.py
import pandas as pds
import numpy as np
import ffx

# Silence SKlearn warning on python3
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

ffx_dir = 'data/ffx'
# Data meant to be used with command line FFX
test_X = (pds.read_csv('{}/natural_data_ffx_test_X.csv'.format(ffx_dir)))
test_Y = pds.read_csv('{}/natural_data_ffx_test_Y.csv'.format(ffx_dir))
train_X = (pds.read_csv('{}/natural_data_ffx_train_X.csv'.format(ffx_dir)))
train_Y = pds.read_csv('{}/natural_data_ffx_train_Y.csv'.format(ffx_dir))

# Train over variables found to be correlated with the Granger analysis
group_1 = ['WMGHG', 'Land_Use', 'Ozone', 'TropAerInd']
all_vars = ['WMGHG', 'Ozone', 'Solar', 'Land_Use', 'SnowAlb_BC', \
            'Orbital','TropAerDir','TropAerInd','StratAer']
predictors = group_1#all_vars
train_X = train_X[predictors]
test_X = test_X[predictors]
# make sure predictors name are for each column are indentical
for i in range(len(predictors)):
    assert(train_X.columns[i] == predictors[i])
    assert(test_X.columns[i] == predictors[i])

# Merge train-test input and output values and generate them dynamically with
# respect to forecasting year K
data_X = np.append(train_X.to_numpy(), test_X.to_numpy(), axis=0)
data_Y = np.append(train_Y.to_numpy(), test_Y.to_numpy(), axis=0)

# Predict K years ahead
#Ks = [0,1,2,3,4,5,6,7,8,10,9]
Ks = range(1,31)#[15,16,17,18,19,20]
Ks = [1] # For debugging purpose
for K in Ks:
    print('\nModels Predicting {} years ahead:'.format(K))
    # Remove the last K year data from as the Y value is unknown
    cur_data_X = data_X[:len(data_X) - K]
    # Shift Y values K steps up and remove the last K points.
    cur_data_Y = np.roll(data_Y, -K, axis=0)[:len(data_Y) - K]
    train_size = int(cur_data_X.shape[0]*0.8) #80%
    # Prepare FFX inputs
    cur_train_X = cur_data_X[:train_size]
    cur_train_Y = cur_data_Y[:train_size]
    cur_test_X = cur_data_X[train_size: ]
    cur_test_Y = cur_data_Y[train_size: ]

    assert(cur_test_X.shape[0] == cur_test_Y.shape[0])
    assert(cur_train_X.shape[0] == cur_train_Y.shape[0])

    models = ffx.run(cur_train_X, cur_train_Y, cur_test_X, \
                 cur_test_Y, predictors)
    best_performing_model={'sq_err':float('inf'), 'model':None}
    for model in models:
        yhat = model.simulate(cur_test_X)
        y = np.reshape(cur_test_Y, cur_test_Y.shape[0])
        print(' * {}'.format(model))
	import pandas as pds
	import numpy as np
	import ffx

	# Silence SKlearn warning on python3
	def warn(args, *kwargs):
	pass
	import warnings
	warnings.warn = warn

	ffx_dir = 'data/ffx'
	# Data meant to be used with command line FFX
	test_X = (pds.read_csv('{}/natural_data_ffx_test_X.csv'.format(ffx_dir)))
	test_Y = pds.read_csv('{}/natural_data_ffx_test_Y.csv'.format(ffx_dir))
	train_X = (pds.read_csv('{}/natural_data_ffx_train_X.csv'.format(ffx_dir)))
	train_Y = pds.read_csv('{}/natural_data_ffx_train_Y.csv'.format(ffx_dir))

	# Train over variables found to be correlated with the Granger analysis
	group_1 = ['WMGHG', 'Land_Use', 'Ozone', 'TropAerInd']
	all_vars = ['WMGHG', 'Ozone', 'Solar', 'Land_Use', 'SnowAlb_BC', \
	'Orbital','TropAerDir','TropAerInd','StratAer']
	predictors = group_1#all_vars
	train_X = train_X[predictors]
	test_X = test_X[predictors]
	# make sure predictors name are for each column are indentical
	for i in range(len(predictors)):
	assert(train_X.columns[i] == predictors[i])
	assert(test_X.columns[i] == predictors[i])

	# Merge train-test input and output values and generate them dynamically with
	# respect to forecasting year K
	data_X = np.append(train_X.to_numpy(), test_X.to_numpy(), axis=0)
	data_Y = np.append(train_Y.to_numpy(), test_Y.to_numpy(), axis=0)

	# Predict K years ahead
	#Ks = [0,1,2,3,4,5,6,7,8,10,9]
	Ks = range(1,31)#[15,16,17,18,19,20]
	Ks = [1] # For debugging purpose
	for K in Ks:
	print('\nModels Predicting {} years ahead:'.format(K))
	# Remove the last K year data from as the Y value is unknown
	cur_data_X = data_X[:len(data_X) - K]
	# Shift Y values K steps up and remove the last K points.
	cur_data_Y = np.roll(data_Y, -K, axis=0)[:len(data_Y) - K]
	train_size = int(cur_data_X.shape[0]*0.8) #80%
	# Prepare FFX inputs
	cur_train_X = cur_data_X[:train_size]
	cur_train_Y = cur_data_Y[:train_size]
	cur_test_X = cur_data_X[train_size: ]
	cur_test_Y = cur_data_Y[train_size: ]

	assert(cur_test_X.shape[0] == cur_test_Y.shape[0])
	assert(cur_train_X.shape[0] == cur_train_Y.shape[0])

	models = ffx.run(cur_train_X, cur_train_Y, cur_test_X, \
	cur_test_Y, predictors)
	best_performing_model={'sq_err':float('inf'), 'model':None}
	for model in models:
	yhat = model.simulate(cur_test_X)
	y = np.reshape(cur_test_Y, cur_test_Y.shape[0])
	print(' * {}'.format(model))