Skip to content

Instantly share code, notes, and snippets.

@alfredplpl
Last active August 29, 2015 14:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alfredplpl/c5de11157df20ca11b69 to your computer and use it in GitHub Desktop.
Save alfredplpl/c5de11157df20ca11b69 to your computer and use it in GitHub Desktop.
多変量版のミニバッチSGDです。マルチタスク学習でないので、効率はすごく悪いです
# -*- coding: utf-8 -*-
# This code is distributed under the 3-Clause BSD license (New BSD license).
# 基本的に作者の名前を書いていただければ、商用利用も可能です。なお、保証はしません。
# 参考URL: http://osdn.jp/projects/opensource/wiki/licenses%2Fnew_BSD_license
from sklearn import linear_model
import Image
import numpy as np
from sklearn.cross_validation import ShuffleSplit
from sklearn.metrics import r2_score
class MultivariateMinibatchSGDR:
def __init__(self, batch_size=32, n_iter=5,alpha=0.0001,penalty="l2",verbose=0,loss='squared_loss'):
self.clfs=None
self.params={"n_iter":n_iter, "alpha":alpha,
"penalty":penalty, "verbose":verbose, "loss":loss}
self.batch_size=batch_size
def fit(self,X,y,n_iter=20,random_state=0):
self.ndim=y.shape[1]
self.clfs=[linear_model.SGDRegressor( n_iter=self.params["n_iter"],
alpha=self.params["alpha"],
penalty=self.params["penalty"],
verbose=self.params["verbose"],
loss=self.params["loss"])
for x in xrange(self.ndim)]
rs=ShuffleSplit(X.shape[0], n_iter=n_iter,random_state=random_state,
test_size=1.0/self.batch_size)
for dummy, batch in rs:
for i in xrange(self.ndim):
self.clfs[i].partial_fit(X=X[batch],y=y[batch,i])
def predict(self,X):
return np.array([np.array([clf.predict(x) for clf in self.clfs]).reshape(-1) for x in X])
def score(self,X,y):
return r2_score(self.predict(X), y)
def get_params(self,deep=True):
return self.params;
#The following code doesn't run if you import this.
#importしても実行されないので無視してimportしてください
if __name__ == "__main__":
from sklearn.datasets import make_regression
from sklearn.cross_validation import cross_val_score
X,y=make_regression(n_samples=100000,n_features=20,n_targets=10,noise=10)
clf = MultivariateMinibatchSGDR(batch_size=4)
clfBaseline = linear_model.LinearRegression()
print cross_val_score(clf,X,y,cv=2)
print cross_val_score(clfBaseline,X,y,cv=2)
# Copyright (c) 2015, alfredplpl
# All rights reserved.
__author__ = 'alfredplpl'
# References: https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/training_algorithms/sgd.py
# https://www.kaggle.com/c/criteo-display-ad-challenge/forums/t/9561/how-to-apply-python-linear-model-sgdregressor-to-do-logistic-regression
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment