Skip to content

Instantly share code, notes, and snippets.

@jsun
Last active September 16, 2018 06:05
Show Gist options
  • Save jsun/e8b419d7dc6bd1858a0e5828aff71adb to your computer and use it in GitHub Desktop.
Save jsun/e8b419d7dc6bd1858a0e5828aff71adb to your computer and use it in GitHub Desktop.
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
cancer = datasets.load_breast_cancer()
x = cancer.data
y = cancer.target
print(x.shape)
## (569, 30)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)
# training
scaler = StandardScaler()
scaler.fit(x_train)
x_train_scaled = scaler.transform(x_train)
pca = PCA(0.80)
pca.fit(x_train_scaled)
x_train_scaled_pca = pca.transform(x_train_scaled)
clf = RandomForestClassifier(max_depth=3)
clf.fit(x_train_scaled_pca, y_train)
# test
x_test_scaled = scaler.transform(x_test)
x_test_scaled_pca = pca.transform(x_test_scaled)
y_pred = clf.predict(x_test_scaled_pca)
confusion_matrix(y_test, y_pred)
## array([[33, 7],
## [ 3, 71]])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment