Skip to content

Instantly share code, notes, and snippets.

@jsun
Created September 16, 2018 05:51
Show Gist options
  • Save jsun/8967cf925503bd981e28eb4b452ef248 to your computer and use it in GitHub Desktop.
Save jsun/8967cf925503bd981e28eb4b452ef248 to your computer and use it in GitHub Desktop.
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
cancer = datasets.load_breast_cancer()
x = cancer.data
y = cancer.target
print(x.shape)
## (569, 30)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)
# create a pipeline to process data
ppln = Pipeline([
('scale', StandardScaler()),
('pca', PCA(0.80)),
('clf', RandomForestClassifier(max_depth=3))
])
ppln.fit(x_train, y_train)
# prediction
y_pred = ppln.predict(x_test)
confusion_matrix(y_test, y_pred)
## array([[40, 0],
## [ 5, 69]])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment