Skip to content

Instantly share code, notes, and snippets.

@jsun
Created September 16, 2018 03:57
Show Gist options
  • Save jsun/6f9481906f7a0d92e708fe1ae5f2f38c to your computer and use it in GitHub Desktop.
Save jsun/6f9481906f7a0d92e708fe1ae5f2f38c to your computer and use it in GitHub Desktop.
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
# get training and test sets
x_train, x_test, y_train, y_test = train_test_split(mnist.data, mnist.target, test_size=0.2, random_state=0)
print(x_train.shape)
print(x_test.shape)
# standardization
scaler = StandardScaler()
# calculate scaling parameters from training data set and scale the training data set
scaler.fit(x_train)
x_train = scaler.transform(x_train)
# PCA for training data set, only the first 80% principal components will be choosed
pca = PCA(0.80)
pca.fit(x_train)
print(pca.n_components_)
x_train = pca.transform(x_train)
# use logistic regresssion for machine learning
clf = LogisticRegression()
clf.fit(x_train, y_train)
# prediction
x_test = scaler.transform(x_test)
x_test = pca.transform(x_test)
y_pred = clf.predict(x_test)
confusion_matrix(y_test, y_pred)
## array([[1277, 0, 2, 2, 4, 7, 13, 0, 7, 0],
## [ 0, 1555, 12, 3, 1, 7, 1, 3, 19, 3],
## [ 10, 12, 1211, 21, 19, 5, 14, 18, 33, 5],
## [ 6, 15, 37, 1260, 0, 46, 4, 11, 32, 16],
## [ 5, 9, 10, 4, 1252, 2, 13, 4, 9, 54],
## [ 14, 7, 14, 50, 20, 1086, 25, 7, 41, 16],
## [ 6, 3, 9, 0, 4, 12, 1357, 1, 3, 2],
## [ 9, 5, 18, 7, 18, 3, 0, 1351, 4, 46],
## [ 9, 33, 15, 46, 10, 41, 14, 4, 1196, 22],
## [ 11, 7, 10, 29, 58, 10, 1, 50, 7, 1236]])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment