import numpy as np
from keras import backend as K
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix
train_data_path = 'F://data//Train'
test_data_path = 'F://data//Validation'
img_rows = 150
img_cols = 150
epochs = 30
batch_size = 32
num_of_train_samples = 3000
num_of_test_samples = 600
#Image Generator
train_datagen = ImageDataGenerator(rescale=1. / 255,
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(train_data_path,
target_size=(img_rows, img_cols),
validation_generator = test_datagen.flow_from_directory(test_data_path,
target_size=(img_rows, img_cols),
# Build model
model = Sequential()
model.add(Convolution2D(32, (3, 3), input_shape=(img_rows, img_cols, 3), padding='valid'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(32, (3, 3), padding='valid'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(64, (3, 3), padding='valid'))
model.add(MaxPooling2D(pool_size=(2, 2)))
steps_per_epoch=num_of_train_samples // batch_size,
validation_steps=num_of_test_samples // batch_size)
#Confution Matrix and Classification Report
Y_pred = model.predict_generator(validation_generator, num_of_test_samples // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes, y_pred))
print('Classification Report')
target_names = ['Cats', 'Dogs', 'Horse']
print(classification_report(validation_generator.classes, y_pred, target_names=target_names))
rish4 commented Mar 29, 2020

To plot a ROC curve and AUC score for multi-class classification:

def multiclass_roc_auc_score(y_test, y_pred, average="macro"):
    lb = LabelBinarizer()
    y_test = lb.transform(y_test)
    y_pred = lb.transform(y_pred)

    for (idx, c_label) in enumerate(all_labels): # all_labels: no of the labels
        fpr, tpr, thresholds = roc_curve(y_test[:,idx].astype(int), y_pred[:,idx])
        c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)'  % (c_label, auc(fpr, tpr)))
    c_ax.plot(fpr, fpr, 'b-', label = 'Random Guessing')
    return roc_auc_score(y_test, y_pred, average=average)

# calling
valid_generator.reset() # resetting generator
y_pred = model.predict_generator(valid_generator, verbose = True)
y_pred = np.argmax(y_pred, axis=1)
multiclass_roc_auc_score(valid_generator.classes, y_pred)

@govindrajmohan @JoaoPF

What does all_label means? I am getting error on that

It is the number of labels in your dataset. all_labels has to be replaced by a number(the number of labels you have in your data)

mnavaidd commented Jun 3, 2020

ROC Curve

``from sklearn.metrics import roc_curve, auc, roc_auc_score
import matplotlib.pyplot as plt

# make a prediction
y_pred_keras = loaded_model.predict_generator(validation_generator, validation_generator.samples // validation_generator.batch_size+1) #(test_gen, steps=len(df_val), verbose=1)
fpr_keras, tpr_keras, thresholds_keras = roc_curve(validation_generator.classes, y_pred_keras)
auc_keras = auc(fpr_keras, tpr_keras)

plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr_keras, tpr_keras, label='area = {:.3f}'.format(auc_keras))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')

I am getting error like: Found input variables with inconsistent numbers of samples: [30, 150]
my train data has 600 images and test data has 30 images.
Below is the code. please help me to solve this issue!

        steps_per_epoch=600, # No of images in training set
        validation_steps=30)# No of images in test set

Y_pred = CNN_model.predict_generator(test_set, 30 )
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(test_set.classes, y_pred))

innat commented Jun 6, 2020

It's number of targets in your dataset. For example:

all_labels = ['cat' , 'dog' , 'human`]


# set plot figure size
fig, c_ax = plt.subplots(1,1, figsize = (12, 8))

I updated the above solution. Please check.

if the target image is only "cat", "dog", "horse"; why did you use 6 dense layer at the end? wouldn't it be only 3?

bit-scientist commented Sep 2, 2020

Could someone guide me how to get the labels of validation_set when it gets pair images as input and then constructed with ImageDataGenerator as following:

GEN = ImageDataGenerator(rescale = 1./255)

def two_inputs(generator, X1, X2, batch_size, img_height, img_width):
    U = generator.flow_from_directory(X1,
                                            target_size=(img_height, img_width),
                                            shuffle= False,
    V = generator.flow_from_directory(X2,
                                            target_size=(img_height, img_width),
                                            shuffle= False,
    while True:
        X1i =
        X2i =
        yield [X1i[0], X2i[0]], X2i[1]   # Yield both images and their mutual label

In the following scenario I can get predictions by preds = base_model.predict_generator(val_flow) where val_flow is

val_flow = two_inputs(generator= GEN,
                      X1 = val_05_dirs,
                      X2 = val_06_dirs,
                      batch_size = batch_size,

I need need to get fpr and tpr using fpr, tpr, _ = metrics.roc_curve(LABELS, preds).

Therefore I am trying to get the LABELS of a full val_flow which addressing two val_05_dirs , val_06_dirs folders.

Thanks in advance

innat commented Sep 2, 2020

if the target image is only "cat", "dog", "horse"; why did you use 6 dense layers at the end? wouldn't it be only 3?

It was a demonstration reply of my comment, not for the main post.

How do I plot confusion matrix for this?

BogoK commented Apr 28, 2021

I'm curious. How do you plot it like is done in this example?

Pratyusha001 commented May 4, 2021

confusion matrix doesn't work with validation_generator , how to plot the confusion matrix accurately

validation_generator = test_datagen.flow_from_directory(
target_size=(img_height, img_width),
cnn_model_history = cnn_model.fit_generator(
steps_per_epoch=nb_train_samples// batch_size ,
validation_steps=nb_validation_samples// batch_size,shuffle='false',
workers = 4)
Y_pred = cnn_model.predict(validation_generator, nb_validation_samples // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')

it shows the accuracy of 98 percent but gives the wrong values of confusion matrix
Confusion Matrix
[[34 38 32 35 30 16 20 31 30 33]
[33 24 42 35 21 20 22 30 35 36]
[26 23 31 35 19 22 17 25 29 29]
[32 39 26 31 32 23 22 25 28 41]
[28 20 16 18 15 11 21 23 19 25]
[30 20 14 22 15 8 19 19 23 25]
[16 26 21 21 17 17 14 25 19 20]
[30 45 25 35 15 18 22 42 30 37]
[31 31 36 33 28 25 21 34 31 29]
[35 35 21 28 19 17 21 37 48 38]]

Is the used code correct?

ghost commented May 4, 2021

Nice. Very helpful.

Random results like this happens when you make shuffle=True in test_generator. So it should be like this:

test_generator = test_datagen.flow_from_directory(

wediye commented Jun 1, 2021

Thank you So much !!

Anushajadav commented Jun 3, 2021

Thank you.
Bot I got an error. Please tell the solution.
WhatsApp Image 2021-06-03 at 11 11 48 PM
WhatsApp Image 2021-06-03 at 11 12 13 PM

ERROR IS -IndexError: index 131 is out of bounds for axis 1 with size 131

saikumarkethi commented Jun 15, 2021

Thank you.
Bot I got an error. Please tell the solution.
WhatsApp Image 2021-06-03 at 11 11 48 PM
WhatsApp Image 2021-06-03 at 11 12 13 PM

ERROR IS -IndexError: index 131 is out of bounds for axis 1 with size 131

you need to import "from sklearn.metrics import roc_auc_score" "from sklearn.metrics import roc_curve"
"from sklearn.metrics import auc" libraries

innat commented Jun 15, 2021

cc. @Anushajadav

Here is the complete end-to-end example for plotting roc on multi-class classification.

It's number of targets in your dataset. For example:

all_labels = ['cat' , 'dog' , 'human`]


# set plot figure size
fig, c_ax = plt.subplots(1,1, figsize = (12, 8))

I updated the above solution. Please check.


Random results like this happens when you make shuffle=True in test_generator. So it should be like this:

test_generator = test_datagen.flow_from_directory(

Thanks ,this was my problem.

gabrielakuhn commented Jul 8, 2022

I'm curious. How do you plot it like is done in this example?

def plot_confusion_matrix(cm, 
                          title='Confusion matrix',


    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=55)
    plt.yticks(tick_marks, classes)

    fmt = 'd'
    thresh = cm.max() / 2.

    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
                 format(cm[i, j], fmt),
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')

cm = confusion_matrix(validation_generator.classes, y_pred)

cm_plot_label = ['Cats', 'Dogs', 'Horse']
plot_confusion_matrix(cm, cm_plot_label, title ='Confusion matrix')

Hello, can anyone help me to solve this problem please? There is no prediction for Class 2, how do I solve this?
This is my code:

test_dataset = test.flow_from_directory('testdata/', target_size=(i_size, j_size), batch_size=128, class_mode='binary', color_mode="grayscale")
Y_pred = model.predict(test_dataset, 129)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(test_dataset.classes, y_pred))
print('Classification Report')
labels_names = ['C1', 'C2']
print(classification_report(test_dataset.classes, y_pred, target_names=labels_names))

Found 3592 images belonging to 2 classes.
29/29 [==============================] - 6s 207ms/step
Confusion Matrix
[[1796 0]
[1796 0]]
Classification Report
precision recall f1-score support

      C1       0.50      1.00      0.67      1796
      C2       0.00      0.00      0.00      1796

accuracy                           0.50      3592

macro avg 0.25 0.50 0.33 3592
weighted avg 0.25 0.50 0.33 3592

UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use zero_division parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result))

it seems that some labels in y_test don't appear in y_pred, see this .

Swearys commented Dec 30, 2022

Hi!, very good gist.
I think you have to put shuffle=False when you do test_datagen.flow_from_directory() so the samples don't get shuffled and have the same order as validation_generator.classes

Very good comment man!
I was struggling to understand why my model had good metrics, but when predicting without the 'Shuffle = False' I got bad results. Thank you so much !

