Skip to content

Instantly share code, notes, and snippets.

@rajsandhu1989
Last active December 11, 2021 10:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rajsandhu1989/4299ffbd3612b6ca6a9228f59ab4330a to your computer and use it in GitHub Desktop.
Save rajsandhu1989/4299ffbd3612b6ca6a9228f59ab4330a to your computer and use it in GitHub Desktop.
pip install scikit-plot
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scikitplot as skplt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_predict
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import load_iris
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import LinearSVC
X, y= load_digits(return_X_y=True)
random_forest_clf = RandomForestClassifier(n_estimators=5, max_depth=5, random_state=1)
predictions = cross_val_predict(random_forest_clf, X, y)
plt.rcParams['figure.figsize'] = 10,10
skplt.metrics.plot_confusion_matrix(y, predictions, normalize=True)
plt.savefig("confusion_matrix.png", dpi=300)
plt.show()
plt.rcParams['figure.figsize'] = 10,6
# For ROC we need to find prediction proabilities
y_prob=rf_clf.predict_proba(X_test)
skplt.metrics.plot_roc(y_test,y_prob)
plt.legend(bbox_to_anchor=(1, 1), loc=2)
plt.tight_layout()
plt.savefig("roc_curve.png", dpi=300)
plt.show()
plt.rcParams['figure.figsize'] = 10,6
skplt.metrics.plot_precision_recall(y_test, y_prob)
plt.legend(bbox_to_anchor=(1, 1), loc=2)
plt.tight_layout()
plt.savefig('p_r.png', dpi=300)
plt.show()
from sklearn import datasets
# Generate a random classification dataset with 20 features
X, y = datasets.make_classification(n_samples=100000, n_features=20, n_informative=7, n_redundant=10, random_state=42)
#Generate Training and Testing spilt
X_train, X_test, y_train, y_test=train_test_split(X,y, test_size=.50, random_state=42)
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import LinearSVC
rf= RandomForestClassifier()
lr=LogisticRegression(max_iter=500)
nb=GaussianNB()
svm=LinearSVC()
rf_proba=rf.fit(X_train,y_train).predict_proba(X_test)
lr_proba=lr.fit(X_train,y_train).predict_proba(X_test)
nb_proba=nb.fit(X_train,y_train).predict_proba(X_test)
svm_scores=svm.fit(X_train,y_train).decision_function(X_test)
names=['Random Forest','Logistic Regression','GaussianNB','svm']
proba_list=[rf_proba,lr_proba,nb_proba,svm_scores]
skplt.metrics.plot_calibration_curve(y_test, proba_list, names)
plt.savefig('calibration_curve.png', dpi=300)
plt.show()
from sklearn.datasets import load_iris
X,y=load_iris(return_X_y=True)
rf=RandomForestClassifier()
rf.fit(X,y)
skplt.estimators.plot_feature_importances(rf, feature_names=['petal length', 'petal width','sepal length', 'sepal width'])
plt.savefig('feature_importance.png', dpi=300)
plt.show()
skplt.estimators.plot_learning_curve(rf, X, y)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment