Skip to content

Instantly share code, notes, and snippets.

@GKarmakar
Created December 20, 2017 17:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save GKarmakar/056f30295edbe5d3817d1b472e4b0cad to your computer and use it in GitHub Desktop.
Save GKarmakar/056f30295edbe5d3817d1b472e4b0cad to your computer and use it in GitHub Desktop.
Linear Regression using Neural Net in Keras
# coding: utf-8
# In[1]:
get_ipython().magic('matplotlib inline')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.grid_search import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble.forest import RandomForestRegressor
from sklearn.linear_model.ridge import Ridge
from sklearn.linear_model.stochastic_gradient import SGDRegressor
from sklearn.svm.classes import SVR
from sklearn.utils import shuffle
import warnings
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer
warnings.filterwarnings("ignore")
# In[2]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from keras.constraints import maxnorm
from keras import optimizers
from keras.wrappers.scikit_learn import KerasRegressor
def baseline_model_896(optimizer='adam', init='glorot_uniform'):
# create model
#optimizer = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
model = Sequential()
model.add(Dense(896, activation='relu', kernel_initializer = 'normal', input_shape=(896,)))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(1, init='normal', activation='linear'))
model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['accuracy'])
return model
# In[8]:
def train_data_nn(X_train, y_train):
np.random.seed(42)
# create model
estimator = KerasRegressor(build_fn=baseline_model_896, epochs=200, batch_size=5, verbose=0)
kfold = KFold(n_splits=10, random_state=42)
results = cross_val_score(estimator, X_train, y_train, cv=kfold)
# grid search epochs, batch size and optimizer
#optimizers = ['rmsprop', 'adam', 'sgd']
#init = ['glorot_uniform', 'normal', 'uniform']
#epochs = [50, 100, 150]
#batches = [5, 10, 20]
#param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, init=init)
#grid = GridSearchCV(estimator=estimator, param_grid=param_grid)
#grid_result = grid.fit(X_train, y_train)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
#means = grid_result.cv_results_['mean_test_score']
#stds = grid_result.cv_results_['std_test_score']
#params = grid_result.cv_results_['params']
#for mean, stdev, param in zip(means, stds, params):
# print("%f (%f) with: %r" % (mean, stdev, param))
#return grid_result.best_estimator_
print("RMSE:", results.std())
return estimator
# In[9]:
def visualize_learning_curve(history):
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# In[10]:
def train_and_predict_new(Xtrain, Xtest):
X = Xtrain
y = X['PeerRank']
X.drop("PeerRank", inplace=True, axis=1)
null_cols = X.columns[X.isnull().all()]
X.drop(null_cols, inplace=True, axis=1)
nunique = X.apply(pd.Series.nunique)
null_col_uni = nunique[nunique == 1].index
X.drop(null_col_uni, inplace=True, axis=1)
X_test = Xtest
X_test.drop(null_cols, inplace=True, axis=1)
X_test.drop(null_col_uni, inplace=True, axis=1)
print('Train size:', X.shape, ' Test size:', X_test.shape)
X_train =X
X_val = X
y_train = y
y_val = y
scale = StandardScaler()
X_train = scale.fit_transform(X_train)
X_test = scale.fit_transform(X_test)
X_val = scale.fit_transform(X_val)
#print(np.all(np.isfinite(X_train)))
#print(np.all(np.isfinite(X_test)))
#print(np.any(np.isnan(X_train)))
#print(np.any(np.isnan(X_test)))
#print(np.any(np.isnan(y.values)))
estimator = train_data_nn(X_train, y_train)
history = estimator.fit(X_val, y_val, validation_split=0.3, epochs=200, batch_size=5, verbose=0)
#visualize_learning_curve(history)
pred = estimator.predict(X_test)
test_df = pd.DataFrame({'y_pred': pred})
return test_df
# In[ ]:
df_train = pd.read_csv("./data/train.csv")
df_test = pd.read_csv("./data/test.csv")
#df_pred = pd.read_csv("./data/submission.csv")
#df_test['PeerRank'] = df_pred['y_pred'].values
train_num = len(df_train)
df_test.insert(0, 'PeerRank', 0)
dataset = pd.concat(objs=[df_train, df_test], axis=0)
dataset.drop(".id", axis=1, inplace=True)
dataset_shuffled = shuffle(dataset)
dataset = encodeData(dataset)
dataset = imputeMissingDataWithMeanValue(dataset)
dataset.fillna(0, inplace=True)
#df_train = dataset
df_train = dataset[:train_num]
#print(dataset.select_dtypes(include=['object']).dtypes)
df_test = dataset[train_num:]
df_test.drop('PeerRank', inplace=True, axis=1)
print("Train Data:", df_train.shape)
print("Test Data:", df_test.shape)
# In[ ]:
test_df = train_and_predict_new(df_train, df_test)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment