Skip to content

Instantly share code, notes, and snippets.

Created November 14, 2017 08:22
Show Gist options
  • Save duarteocarmo/efac4835e3d23c63db4bfb4ba28b795a to your computer and use it in GitHub Desktop.
Save duarteocarmo/efac4835e3d23c63db4bfb4ba28b795a to your computer and use it in GitHub Desktop.
Neural Network Regression Problem.
# exercise 8.2.6
from matplotlib.pyplot import figure, plot, subplot, title, show, bar, legend, scatter
import numpy as np
from import loadmat
import matplotlib.pyplot as plt
import neurolab as nl
from sklearn import model_selection
from scipy import stats
from Project_Clean_data import raw, header, is_binary
X_chest = np.loadtxt('chest.txt', dtype=int)
final_cand = np.loadtxt('final_cand.txt', dtype=int)
# select attribute to predict
target_attribute_name = 'Number of sexual partners'
target_index = list(header).index(target_attribute_name)
# prepare data
X = raw
y = X[:, target_index]
y = np.delete(y, final_cand)
X = np.delete(raw, target_index, 1)
X = np.delete(X, final_cand, 0)
X = X[:, 0:10]
attributeNames = np.delete(header, target_index)
N, M = X.shape
C = 2
# Normalize data
X = stats.zscore(X);
## Normalize and compute PCA (UNCOMMENT to experiment with PCA preprocessing)
# Y = stats.zscore(X,0);
# U,S,V = np.linalg.svd(Y,full_matrices=False)
# V = V.T
##Components to be included as features
# k_pca = 3
# X = X @ V[:,0:k_pca]
# N, M = X.shape
# Parameters for neural network classifier
n_hidden_units = 5 # number of hidden units
n_train = 2 # number of networks trained in each k-fold
learning_goal = 100 # stop criterion 1 (train mse to be reached)
max_epochs = 64 # stop criterion 2 (max epochs in training)
show_error_freq = 5 # frequency of training status updates
# K-fold crossvalidation
K = 5 # only five folds to speed up this example
CV = model_selection.KFold(K, shuffle=True)
# Variable for classification error
errors = np.zeros(K)
error_hist = np.zeros((max_epochs, K))
bestnet = list()
k = 0
for train_index, test_index in CV.split(X, y):
print('\nCrossvalidation fold: {0}/{1}'.format(k + 1, K))
# extract training and test set for current CV fold
X_train = X[train_index, :]
y_train = y[train_index]
X_test = X[test_index, :]
y_test = y[test_index]
best_train_error = 1e100
for i in range(n_train):
print('Training network {0}/{1}...'.format(i + 1, n_train))
# Create randomly initialized network with 2 layers
ann =[[-3, 3]] * M, [n_hidden_units, 1], [nl.trans.TanSig(), nl.trans.PureLin()])
if i == 0:
# train network
train_error = ann.train(X_train, y_train.reshape(-1, 1), goal=learning_goal, epochs=max_epochs,
if train_error[-1] < best_train_error:
bestnet[k] = ann
best_train_error = train_error[-1]
error_hist[range(len(train_error)), k] = train_error
print('Best train error: {0}...'.format(best_train_error))
y_est = bestnet[k].sim(X_test).squeeze()
errors[k] = np.power(y_est - y_test, 2).sum().astype(float) / y_test.shape[0]
k += 1
# Print the average least squares error
print('Mean-square error: {0}'.format(np.mean(errors)))
figure(figsize=(6, 7));
subplot(2, 1, 1);
bar(range(0, K), errors);
title('Mean-square errors');
subplot(2, 1, 2);
title('Training error as function of BP iterations');
figure(figsize=(6, 7));
subplot(2, 1, 1);
title('Last CV-fold: est_y vs. test_y');
subplot(2, 1, 2);
plot((y_est - y_test));
title('Last CV-fold: prediction error (est_y-test_y)');
index = np.argmin(errors)
best_net = bestnet[index]
y_chest = X_chest[:, target_index]
X_chest = np.delete(X_chest, target_index, 1)
X_chest = X_chest[:, 0:10]
y_est = best_net.sim(X_chest).squeeze()
final_error = abs(y_chest - y_est)
x_axis = np.arange(0, np.size(y_chest))
plt.scatter(x_axis, final_error)
plt.ylabel('Estimation Error')
plt.title('Error Quantity for final test subjects')
print('\nOn average, the prediction fails by {} {}'.format(final_error.mean(), target_attribute_name))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment