Skip to content

Instantly share code, notes, and snippets.

@mrgloom
Created December 4, 2013 07:34
Show Gist options
  • Save mrgloom/7783666 to your computer and use it in GitHub Desktop.
Save mrgloom/7783666 to your computer and use it in GitHub Desktop.
MNIST classifier test with default params.
import numpy as np
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.linear_model.stochastic_gradient import SGDClassifier
from sklearn.datasets import fetch_mldata
from sklearn.utils import shuffle
import time
#out-of-core \ online
#http://scikit-learn.org/stable/auto_examples/applications/plot_out_of_core_classification.html
#use all digits
mnist = fetch_mldata("MNIST original")
X_train, y_train = mnist.data[:70000] / 255., mnist.target[:70000]
X_train, y_train = shuffle(X_train, y_train)
X_test, y_test = X_train[60000:70000], y_train[60000:70000]
X_train, y_train = X_train[:60000], y_train[:60000]
#SVC() t:1267.66 acc:94,04
#LinearSVC() t:188.17 acc:91,23
#SGD t:6.09 acc:87,19 # we must shuffle data? \ predicting rate fluctuating => depends on order of dataset?
#online SGD t:4.64 acc:~86 (max:88.1 stop criteria?) 10 passes => acc:90.8
#test on more data? => better predictions? (with compared time).
#test with grid search => time?
#test SVC
# clf = SVC()
# t0 = time.time()
# clf.fit(X_train, y_train)
# print (time.time()-t0)
# score= clf.score(X_test, y_test)
# print score
# test linearSVM
# clf = LinearSVC()
# t1 = time.time()
# clf.fit(X_train, y_train)
# print (time.time()-t1)
# score= clf.score(X_test, y_test)
# print score
# test SGD
# clf = SGDClassifier()
# t2 = time.time()
# clf.fit(X_train, y_train)
# print (time.time()-t2)
# score= clf.score(X_test, y_test)
# print score
#test SGD online # need to do more then 1 pass?
step =1000
batches= np.arange(0,60000,step)
clf = SGDClassifier()
all_classes = np.array([0,1,2,3,4,5,6,7,8,9])
t3 = time.time()
max_acc= 0
n_pass= 10
for i in range(0, n_pass):
for curr in batches:
X_curr, y_curr = X_train[curr:curr+step], y_train[curr:curr+step]
clf.partial_fit(X_curr, y_curr, classes=all_classes)
score= clf.score(X_test, y_test)
if(max_acc<score):
max_acc= score
print score
print (time.time()-t3)
print max_acc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment