Created
September 16, 2018 03:57
-
-
Save jsun/6f9481906f7a0d92e708fe1ae5f2f38c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import fetch_mldata | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.decomposition import PCA | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.metrics import confusion_matrix | |
# get training and test sets | |
x_train, x_test, y_train, y_test = train_test_split(mnist.data, mnist.target, test_size=0.2, random_state=0) | |
print(x_train.shape) | |
print(x_test.shape) | |
# standardization | |
scaler = StandardScaler() | |
# calculate scaling parameters from training data set and scale the training data set | |
scaler.fit(x_train) | |
x_train = scaler.transform(x_train) | |
# PCA for training data set, only the first 80% principal components will be choosed | |
pca = PCA(0.80) | |
pca.fit(x_train) | |
print(pca.n_components_) | |
x_train = pca.transform(x_train) | |
# use logistic regresssion for machine learning | |
clf = LogisticRegression() | |
clf.fit(x_train, y_train) | |
# prediction | |
x_test = scaler.transform(x_test) | |
x_test = pca.transform(x_test) | |
y_pred = clf.predict(x_test) | |
confusion_matrix(y_test, y_pred) | |
## array([[1277, 0, 2, 2, 4, 7, 13, 0, 7, 0], | |
## [ 0, 1555, 12, 3, 1, 7, 1, 3, 19, 3], | |
## [ 10, 12, 1211, 21, 19, 5, 14, 18, 33, 5], | |
## [ 6, 15, 37, 1260, 0, 46, 4, 11, 32, 16], | |
## [ 5, 9, 10, 4, 1252, 2, 13, 4, 9, 54], | |
## [ 14, 7, 14, 50, 20, 1086, 25, 7, 41, 16], | |
## [ 6, 3, 9, 0, 4, 12, 1357, 1, 3, 2], | |
## [ 9, 5, 18, 7, 18, 3, 0, 1351, 4, 46], | |
## [ 9, 33, 15, 46, 10, 41, 14, 4, 1196, 22], | |
## [ 11, 7, 10, 29, 58, 10, 1, 50, 7, 1236]]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment