|
# make predictions |
|
from pandas import read_csv |
|
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.svm import SVC |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
|
|
# Load dataset |
|
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/iris.csv" |
|
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class'] |
|
dataset = read_csv(url, names=names) |
|
|
|
# Split-out validation dataset |
|
array = dataset.values |
|
X = array[:, 0:4] |
|
y = array[:, 4] |
|
X_train, X_validation, Y_train, Y_validation = train_test_split(X, |
|
y, |
|
test_size=0.20, |
|
random_state=1) |
|
# Make predictions on validation dataset |
|
model = SVC(gamma='auto') |
|
model.fit(X_train, Y_train) |
|
predictions = model.predict(X_validation) |
|
|
|
# Evaluate predictions |
|
print(accuracy_score(Y_validation, predictions)) |
|
cm = confusion_matrix(Y_validation, predictions) |
|
print(cm) |
|
print(classification_report(Y_validation, predictions)) |
|
|
|
|
|
def calculate_metrics(confusion_matrix): |
|
FP = confusion_matrix.sum(axis=0) - np.diag(confusion_matrix) |
|
FN = confusion_matrix.sum(axis=1) - np.diag(confusion_matrix) |
|
TP = np.diag(confusion_matrix) |
|
TN = confusion_matrix.sum() - (FP + FN + TP) |
|
|
|
return TP, TN, FP, FN |
|
|
|
|
|
# https://stackoverflow.com/a/43331484 |
|
# True Positives = the diagonal of the confusion matrix, top-left to bottom-right. |
|
tps = [cm[0, 0], cm[1, 1], cm[2, 2]] |
|
|
|
# True negatives = everything except for the row and column for the class. |
|
tns = [ |
|
cm[1, 1] + cm[1, 2] + cm[2, 1] + cm[2, 2], |
|
cm[0, 0] + cm[2, 0] + cm[0, 2] + cm[2, 2], |
|
cm[0, 0] + cm[0, 1] + cm[1, 0] + cm[1, 1] |
|
] |
|
|
|
# False positives = everything in the column for the class, except for the true positive value. |
|
fps = [cm[1, 0] + cm[2, 0], cm[0, 1] + cm[2, 1], cm[0, 2] + cm[1, 2]] |
|
|
|
# False negatives = everything in the row for the class, except for the true positive value. |
|
fns = [cm[0, 1] + cm[0, 2], cm[1, 0] + cm[1, 2], cm[2, 0] + cm[2, 1]] |
|
|
|
for i in tps: |
|
print(i, end=' ') |
|
print() |
|
for i in tns: |
|
print(i, end=' ') |
|
print() |
|
for i in fps: |
|
print(i, end=' ') |
|
print() |
|
for i in fns: |
|
print(i, end=' ') |
|
print() |
|
|
|
tps1, tns1, fps1, fns1 = calculate_metrics(cm) |
|
print(f'TP: {tps1}, TN: {tns1}, FP: {fps1}, FN: {fns1}') |
|
|
|
precision1 = tps[0] / (tps[0] + fps[0]) |
|
recall1 = tps[0] / (tps[0] + fns[0]) |
|
f11 = (2 * precision1 * recall1) / (precision1 + recall1) |
|
print(f'precision1: {precision1}, recall1: {recall1}, f11: {f11}') |
|
|
|
precision2 = tps[1] / (tps[1] + fps[1]) |
|
recall2 = tps[1] / (tps[1] + fns[1]) |
|
f12 = (2 * precision2 * recall2) / (precision2 + recall2) |
|
print(f'precision2: {precision2}, recall2: {recall2}, f12: {f12}') |
|
|
|
# Plot the confusion matrix. |
|
sns.heatmap(cm, annot=True) |
|
plt.ylabel('Prediction', fontsize=13) |
|
plt.xlabel('Actual', fontsize=13) |
|
plt.title('Confusion Matrix', fontsize=17) |
|
plt.show() |