coreyjs/build_roc_auc.py

## build_roc_auc.py
# Function for calculating auc and roc

def build_roc_auc(model, X_train, X_test, y_train, y_test):
    '''
    INPUT:
    model - an sklearn instantiated model
    X_train - the training data
    y_train - the training response values (must be categorical)
    X_test - the test data
    y_test - the test response values (must be categorical)
    OUTPUT:
    auc - returns auc as a float
    prints the roc curve
    '''
    import numpy as np
    import matplotlib.pyplot as plt
    from itertools import cycle
    from sklearn.metrics import roc_curve, auc, roc_auc_score
    from scipy import interp

    y_preds = model.fit(X_train, y_train).predict_proba(X_test)
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(len(y_test)):
        fpr[i], tpr[i], _ = roc_curve(y_test, y_preds[:, 1])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_preds[:, 1].ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    plt.plot(fpr[2], tpr[2], color='darkorange',
             lw=2, label='ROC curve (area = %0.2f)' % roc_auc[2])
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic example')
    plt.show()

    return roc_auc_score(y_test, np.round(y_preds[:, 1]))


# Finding roc and auc for the random forest model
build_roc_auc(rf_mod, training_data, testing_data, y_train, y_test)
	# Function for calculating auc and roc

	def build_roc_auc(model, X_train, X_test, y_train, y_test):
	'''
	INPUT:
	model - an sklearn instantiated model
	X_train - the training data
	y_train - the training response values (must be categorical)
	X_test - the test data
	y_test - the test response values (must be categorical)
	OUTPUT:
	auc - returns auc as a float
	prints the roc curve
	'''
	import numpy as np
	import matplotlib.pyplot as plt
	from itertools import cycle
	from sklearn.metrics import roc_curve, auc, roc_auc_score
	from scipy import interp

	y_preds = model.fit(X_train, y_train).predict_proba(X_test)
	# Compute ROC curve and ROC area for each class
	fpr = dict()
	tpr = dict()
	roc_auc = dict()
	for i in range(len(y_test)):
	fpr[i], tpr[i], _ = roc_curve(y_test, y_preds[:, 1])
	roc_auc[i] = auc(fpr[i], tpr[i])

	# Compute micro-average ROC curve and ROC area
	fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_preds[:, 1].ravel())
	roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

	plt.plot(fpr[2], tpr[2], color='darkorange',
	lw=2, label='ROC curve (area = %0.2f)' % roc_auc[2])
	plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
	plt.xlim([0.0, 1.0])
	plt.ylim([0.0, 1.05])
	plt.xlabel('False Positive Rate')
	plt.ylabel('True Positive Rate')
	plt.title('Receiver operating characteristic example')
	plt.show()

	return roc_auc_score(y_test, np.round(y_preds[:, 1]))


	# Finding roc and auc for the random forest model
	build_roc_auc(rf_mod, training_data, testing_data, y_train, y_test)