Batch SGD ElasticNet
from sklearn.datasets import load_boston | |
from sklearn.linear_model import (LinearRegression, Ridge, SGDRegressor, | |
Lasso, ElasticNetCV) | |
from sklearn.preprocessing import MinMaxScaler | |
import numpy as np | |
#from minepy import MINE | |
from sklearn.metrics import mean_squared_error | |
#np.random.seed(0) | |
size = 1000 | |
X1 = np.random.uniform(0, 1, (size, 14)) | |
X2 = np.random.uniform(0, 1, (size, 14)) | |
X3 = np.random.uniform(0, 1, (size, 14)) | |
Xtrain = [X1,X2,X3] | |
X_test = np.random.uniform(0, 1, (size, 14)) | |
### Friedamn 1st regression problem | |
Ytrue1 = 10 * np.sin(np.pi*X1[:,0]*X1[:,1]) + 20*(X1[:,2] - .5)**2 + 10*X1[:,3] + 5*X1[:,4] | |
Y1 = (Ytrue1 + np.random.normal(0,1)) | |
Ytrue2 = 10 * np.sin(np.pi*X2[:,0]*X2[:,1]) + 20*(X2[:,2] - .5)**2 + 10*X2[:,3] + 5*X2[:,4] | |
Y2 = (Ytrue2 + np.random.normal(0,1)) | |
Ytrue3 = 10 * np.sin(np.pi*X3[:,0]*X3[:,1]) + 20*(X3[:,2] - .5)**2 + 10*X3[:,3] + 5*X3[:,4] | |
Y3 = (Ytrue3 + np.random.normal(0,1)) | |
Ytrain = [Y1,Y2,Y3] | |
Ytrue_test = 10 * np.sin(np.pi*X_test[:,0]*X_test[:,1]) + 20*(X_test[:,2] - .5)**2 + 10*X_test[:,3] + 5*X_test[:,4] | |
### Add 3 additional correlated variables (correlated with X1-X3) | |
X1[:,10:] = X1[:,:4] + np.random.normal(0, .025, (size,4)) | |
X2[:,10:] = X2[:,:4] + np.random.normal(0, .025, (size,4)) | |
X3[:,10:] = X3[:,:4] + np.random.normal(0, .025, (size,4)) | |
names = ["x%s" % i for i in range(1,15)] | |
ranks = {} | |
def rank_to_dict(ranks, names, order=1): | |
minmax = MinMaxScaler() | |
ranks = minmax.fit_transform(order*np.array([ranks]).T).T[0] | |
ranks = map(lambda x: round(x, 2), ranks) | |
return dict(zip(names, ranks )) | |
clf_ElasticNetCV = ElasticNetCV(alphas=[0.0001,0.001,0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 100],l1_ratio=[.1, .5, .7, .9, .95, .99, 1]) | |
clf_ElasticNetCV.fit(X3, Y3) | |
print ('Best alpha:', clf_ElasticNetCV.alpha_) | |
print ('Best l1_ratio:', clf_ElasticNetCV.l1_ratio_) | |
#sgdEN = SGDRegressor(warm_start=True, penalty='elasticnet') | |
sgdEN = SGDRegressor(warm_start=True, penalty='elasticnet', alpha=clf_ElasticNetCV.alpha_, l1_ratio=clf_ElasticNetCV.l1_ratio_) | |
for ii in range(len(Ytrain)): | |
X = Xtrain[ii] | |
Y = Ytrain[ii] | |
sgdEN.partial_fit(X,Y) | |
ranks["SGDEN"] = rank_to_dict(sgdEN.coef_, names) | |
y_pred = sgdEN.predict(X_test) | |
print mean_squared_error(Ytrue_test, y_pred) | |
r = {} | |
for name in names: | |
r[name] = round(np.mean([ranks[method][name] | |
for method in ranks.keys()]), 2) | |
methods = sorted(ranks.keys()) | |
print "\t%s" % "\t".join(methods) | |
for name in names: | |
print "%s\t%s" % (name, "\t".join(map(str, | |
[ranks[method][name] for method in methods]))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment