Skip to content

Instantly share code, notes, and snippets.

Last active Oct 13, 2021
What would you like to do?
mnist with sklearn
import numpy
import random
from numpy import arange
#from classification import *
from sklearn import metrics
from sklearn.datasets import fetch_mldata
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils import shuffle
import time
def run():
mnist = fetch_mldata('MNIST original'), = shuffle(,
# Trunk the data
n_train = 60000
n_test = 10000
# Define training and testing sets
indices = arange(len(
#train_idx = random.sample(indices, n_train)
#test_idx = random.sample(indices, n_test)
train_idx = arange(0,n_train)
test_idx = arange(n_train+1,n_train+n_test)
X_train, y_train =[train_idx],[train_idx]
X_test, y_test =[test_idx],[test_idx]
# Apply a learning algorithm
print "Applying a learning algorithm..."
clf = RandomForestClassifier(n_estimators=10,n_jobs=2), y_train)
# Make a prediction
print "Making predictions..."
y_pred = clf.predict(X_test)
#print y_pred
# Evaluate the prediction
print "Evaluating results..."
print "Precision: \t", metrics.precision_score(y_test, y_pred)
print "Recall: \t", metrics.recall_score(y_test, y_pred)
print "F1 score: \t", metrics.f1_score(y_test, y_pred)
print "Mean accuracy: \t", clf.score(X_test, y_test)
if __name__ == "__main__":
start_time = time.time()
results = run()
end_time = time.time()
print "Overall running time:", end_time - start_time
Copy link

chenchunaidu commented Nov 5, 2018

it is showing
"RemoteDisconnected: Remote end closed connection without response"
error in jupyter notebook can you say what is happening

Copy link

timkofu commented Jul 18, 2019

Had the same issue witth fetch_mldata(). After reading this SF answer I downloaded it from Kaggle.

Copy link

EyjanHuang commented Nov 19, 2019

There is a problem that if I use the original .idx file, transform it into the matrix and train it, the time is so long. Any better solution?

Copy link

codeshoper commented Sep 28, 2020

i can't import the dataset whenever i try to do so this error pops up
ImportError: cannot import name 'fetch_mldata' from 'sklearn.datasets'

Copy link

sri-vishnu-001 commented May 9, 2021

i can't import the dataset whenever i try to do so this error pops up
ImportError: cannot import name 'fetch_mldata' from 'sklearn.datasets'

use fetch_openml inplace of fetch_mldata and use 'mnist_784' inplace of 'MNIST original'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment