Skip to content

Instantly share code, notes, and snippets.

@syhw
Created July 17, 2014 14:55
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save syhw/36165d49537e4dd5172c to your computer and use it in GitHub Desktop.
Save syhw/36165d49537e4dd5172c to your computer and use it in GitHub Desktop.
Trying dropout with simple off-the-selves scikit-learn models. Not really working.
from sklearn.datasets import fetch_20newsgroups, load_digits
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cross_validation import train_test_split
import numpy as np
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn import metrics
newsgroups_train = fetch_20newsgroups(subset='train')
vectorizer = TfidfVectorizer(encoding='latin-1', max_features=10000)
vectors = vectorizer.fit_transform(newsgroups_train.data)
dense_vectors = vectors.todense()
dense_vectors = np.asarray(dense_vectors)
newsgroups_test = fetch_20newsgroups(subset='test')
vectors_test = vectorizer.transform(newsgroups_test.data)
digits = load_digits()
d_train_x, d_test_x, d_train_y, d_test_y = train_test_split(digits.data,
digits.target, test_size=0.2)
DO_ALL = True
N_TIMES = 20 # number of datasets dropped out
DROPOUT_RATE = 0.5 # TODO explore 0.0->0.5
#class Dropout(object):
# def __init__(self, p=0.5):
# self.p = p
#
# def fit(self, X, y):
# return self
#
# def transform(self, X):
# return np.random.binomial(n=1, p=1.-self.p, size=X.shape) * X
#
# def get_params(self, **kwargs):
# return {"p": self.p}
for dname, x_train, y_train, x_test, y_test in (('digits', d_train_x,
d_train_y, d_test_x, d_test_y), ('20newsgroups', dense_vectors,
newsgroups_train.target, vectors_test, newsgroups_test.target)):
classifiers = [LogisticRegression(), SGDClassifier()]
# default penaly for LogisticRegression and SGDClassifier
# is L2 and dropout approximates an L2 ellipsis
if dname == '20newsgroups':
classifiers += [MultinomialNB(alpha=0.01), BernoulliNB(alpha=0.01)]
print "==> dataset name:", dname
print "-> without dropout"
if DO_ALL:
for clf in classifiers:
print clf
clf.fit(x_train, y_train)
pred = clf.predict(x_test)
print metrics.f1_score(pred, y_test)
tmp_l = [dense_vectors * np.random.binomial(n=1, p=0.5,
size=dense_vectors.shape) for _ in xrange(N_TIMES)]
X = np.concatenate(tmp_l, axis=0)
y = np.concatenate([newsgroups_train.target for _ in xrange(N_TIMES)], axis=0)
print "-> now with", N_TIMES, "dropouts, with rate", DROPOUT_RATE
classifiers = [LogisticRegression(C=1.E6), SGDClassifier(alpha=1.E-9)]
# default penaly for LogisticRegression and SGDClassifier
# is L2 and dropout approximates an L2 ellipsis ==> we try to remove the L2
if dname == '20newsgroups':
classifiers += [MultinomialNB(alpha=0.01), BernoulliNB(alpha=0.01)]
for clf in classifiers:
print clf
clf.fit(x_train, y_train)
pred = clf.predict(x_test)
print metrics.f1_score(pred, y_test)
# Three things to keep in mind:
# - dropout is usually done at the hidden units activations, not at the input
# - dropout is good when models are strongly overfitting
# - dropout is good with lots of data
# ==> dataset name: digits
# -> without dropout
# LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
# intercept_scaling=1, penalty=l2, random_state=None, tol=0.0001)
# 0.96389747273
# SGDClassifier(alpha=0.0001, class_weight=None, epsilon=0.1, eta0=0.0,
# fit_intercept=True, l1_ratio=0.15, learning_rate=optimal,
# loss=hinge, n_iter=5, n_jobs=1, penalty=l2, power_t=0.5,
# random_state=None, rho=None, shuffle=False, verbose=0,
# warm_start=False)
# 0.93351689353
# -> now with 20 dropouts, with rate 0.5
# LogisticRegression(C=1000000.0, class_weight=None, dual=False,
# fit_intercept=True, intercept_scaling=1, penalty=l2,
# random_state=None, tol=0.0001)
# 0.935424946443
# SGDClassifier(alpha=1e-09, class_weight=None, epsilon=0.1, eta0=0.0,
# fit_intercept=True, l1_ratio=0.15, learning_rate=optimal,
# loss=hinge, n_iter=5, n_jobs=1, penalty=l2, power_t=0.5,
# random_state=None, rho=None, shuffle=False, verbose=0,
# warm_start=False)
# 0.94890380291
# ==> dataset name: 20newsgroups
# -> without dropout
# LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
# intercept_scaling=1, penalty=l2, random_state=None, tol=0.0001)
# 0.810228116561
# SGDClassifier(alpha=0.0001, class_weight=None, epsilon=0.1, eta0=0.0,
# fit_intercept=True, l1_ratio=0.15, learning_rate=optimal,
# loss=hinge, n_iter=5, n_jobs=1, penalty=l2, power_t=0.5,
# random_state=None, rho=None, shuffle=False, verbose=0,
# warm_start=False)
# 0.813840047475
# MultinomialNB(alpha=0.01, class_prior=None, fit_prior=True)
# 0.806747433797
# BernoulliNB(alpha=0.01, binarize=0.0, class_prior=None, fit_prior=True)
# 0.71331798034
# -> now with 20 dropouts, with rate 0.5
# LogisticRegression(C=1000000.0, class_weight=None, dual=False,
# fit_intercept=True, intercept_scaling=1, penalty=l2,
# random_state=None, tol=0.0001)
# 0.813512528347
# SGDClassifier(alpha=1e-09, class_weight=None, epsilon=0.1, eta0=0.0,
# fit_intercept=True, l1_ratio=0.15, learning_rate=optimal,
# loss=hinge, n_iter=5, n_jobs=1, penalty=l2, power_t=0.5,
# random_state=None, rho=None, shuffle=False, verbose=0,
# warm_start=False)
# 0.764192166602
# MultinomialNB(alpha=0.01, class_prior=None, fit_prior=True)
# 0.806747433797
# BernoulliNB(alpha=0.01, binarize=0.0, class_prior=None, fit_prior=True)
# 0.71331798034
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment