comtom/test-pneumonia.py

## test-pneumonia.py
# -*- coding: utf-8 -*-
"""Modelo_Acc+80.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1kLQn4QbQKLGSHXhAsmKrIMBcKLWHNWvg
"""

import matplotlib
matplotlib.use("Agg")

# import the necessary packages

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras import backend as K
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import pickle
import cv2
import os

# Install a Drive FUSE wrapper.
# https://github.com/astrada/google-drive-ocamlfuse
# !apt-get install -y -qq software-properties-common python-software-properties module-init-tools
# !add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
# !apt-get update -qq 2>&1 > /dev/null
# !apt-get -y install -qq google-drive-ocamlfuse fuse

# from google.colab import auth
# auth.authenticate_user()


# # Generate creds for the Drive FUSE library.
# from oauth2client.client import GoogleCredentials
# creds = GoogleCredentials.get_application_default()
# import getpass
# !google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
# !echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}
# vcode = getpass.getpass()

# # Create a directory and mount Google Drive using that directory.
# !mkdir -p drive
# !google-drive-ocamlfuse drive

# # print 'Files in Drive:'
# !ls drive/'Colab Notebooks'/Ojos

# import os
# os.chdir("drive/Neumonía/")

# !ls

image_data_folder_path = "train/"

imagePaths = list(paths.list_images(image_data_folder_path))

len(imagePaths)

data = []
labels = []

for imagePath in imagePaths:
	# load the image, resize it to 64x64 pixels (the required input
	# spatial dimensions of SmallVGGNet), and store the image in the
	# data list
  #imageName = ''
  #labelName = ''

    if (imagePath.find('PNEUMONIA') != -1):
        labelName = 1
    else:
        # if (imagePath.find('virus') != -1):
        #   labelName = 1
        # else:
        labelName = 0

    #print(imageName)
    image = cv2.imread(imagePath)
    image = cv2.resize(image, (150, 150))
    data.append(image)
    labels.append(labelName)

data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)

# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
	labels, test_size=0.25, random_state=42)

print("trainX.shape------>>",trainX.shape)
# convert the labels from integers to vectors (for 2-class, binary
# classification you should use Keras' to_categorical function
# instead as the scikit-learn's LabelBinarizer will not return a
# vector)
#lb = LabelBinarizer()
#trainY = lb.fit_transform(trainY)
#testY = lb.transform(testY)

# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
	height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
	horizontal_flip=True, fill_mode="nearest")

from keras.utils import to_categorical

trainY = to_categorical(trainY, num_classes=2)
testY = to_categorical(testY, num_classes=2)

height = 150
width = 150
depth = 3

inputShape = (height, width, depth)

classes = 2

#classes = len(lb.classes_)

chanDim=-1

model = Sequential()
# CONV => RELU => POOL layer set
model.add(Conv2D(32, (3, 3), padding="same",
	input_shape=inputShape))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# (CONV => RELU) * 2 => POOL layer set
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# (CONV => RELU) * 3 => POOL layer set
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# first (and only) set of FC => RELU layers
model.add(Flatten())
model.add(Dense(512))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))

# softmax classifier
model.add(Dense(2))
model.add(Activation("sigmoid"))
model.summary()


# initialize our initial learning rate, # of epochs to train for,
# and batch size
INIT_LR = 0.01
EPOCHS = 200
BS = 250

# initialize the model and optimizer (you'll want to use
# binary_crossentropy for 2-class classification)
print("[INFO] training network...")
opt = SGD(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="binary_crossentropy", optimizer=opt,
	metrics=["accuracy"])

from keras.callbacks import ReduceLROnPlateau , ModelCheckpoint

filepath="weights150x150_A_crop.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

# train the network
H = model.fit_generator(aug.flow(trainX, trainY, batch_size=BS),
	validation_data=(testX, testY), steps_per_epoch=len(trainX) // BS,
	callbacks=[checkpoint],epochs=EPOCHS)

# evaluate the network
print("[INFO] evaluating network...")
predictions = model.predict(testX, batch_size=128)
print(classification_report(testY.argmax(axis=1), predictions.argmax(axis=-1)))#, target_names=lb.classes_))

pred = predictions.argmax(axis=1)

y_true = testY.argmax(axis=1)

from sklearn.metrics import classification_report, confusion_matrix

confusion_matrix(y_true,pred)

#precision = TP / (TP + FP)
p1 = 214 / (214 + 117 )
p1

p0 = 349 / (349 + 95 )
p0

#recall = TP / (TP / FN)
r1 = 214 / (214 + 131)
r1

r0 = 349 / (349 + 8)
r0

#image_data_folder_path_test = "val/NORMAL/"
#image_data_folder_path_test = "val/PNEUMONIA/"
#image_data_folder_path_test = "test/PNEUMONIA/"
#image_data_folder_path_test = "test/NORMAL/"
#image_data_folder_path_test = "CROP/test/NORMAL/"
#image_data_folder_path_test = "CROP/test/PNEUMONIA/"
#image_data_folder_path_test = "CROP/NORMAL/"
#image_data_folder_path_test = "CROP/PNEUMONIA/"
#image_data_folder_path_test = "CROP/val/NORMAL/"
image_data_folder_path_test = "CROP/val/PNEUMONIA/"
imagePaths_test = list(paths.list_images(image_data_folder_path_test))

contOK = 0
contToT = 0
for imagePath in imagePaths_test:
    if contToT < 2000:
        image = cv2.imread(imagePath)

        image = cv2.resize(image, (150,150))

        # scale the pixel values to [0, 1]
        image = image.astype("float") / 255.0
        image = image.flatten()
        #print("image.shape[0]",image.shape)
        #print ("image after flattening",len(image))
        image = image.reshape((1, 150,150,3))
        #print ("image--reshape",image.shape)


        # # make a prediction on the image
        #rint (image.shape)
        #(sana, noSana) = model.predict(image)[0]
        preds = model.predict(image)[0]
        print(preds)
        i = preds.argmax(axis=0)
        #print(preds.argmax(axis=0))
        #print('--------')

        #label = "Sana" if sana > noSana else "No Sana"
        #proba = sana if sana > noSana else noSana
        #label = "{}: {:.2f}%".format(label, proba * 100)

        # find the class label index with the largest corresponding
        # probability
        #print(label)
        #print(lb.classes_)
        #print ("preds.argmax(axis=1)",preds.argmax(axis=1))
        #i = preds.argmax(axis=1)[0]
        #print (i)
        #label = lb.classes_[i] esto lo comenté.....

        # draw the class label + probability on the output image
        #text = "{}: {:.2f}%".format(label, preds[0][i] * 100)
        #print(text)
        #print('------------------------------------')

        contToT = contToT +1
        if  i == 0:
            contOK = contOK +1

print(contOK)
print(contToT)
#194 ACERTADAS
#NORMAL 40 FALSOS POSITIVOS

#usando CROP/test/NORMAL (no usadas en el training)
#de 234,  149 clasifico como sanas(correcto)      ---- 150PX 186 clasifico como sanas (correcto)   80%
#         85 como enfermas (incorrecto)                      48 enfermas


#usando CROP/test/PNEUMONIA (no usadas en el training)
#de 390       13 claisfico como sanas(incorrecto)            26 como sanas            93%
#             360 clasificó como enfermas                    364  enferma


# #usando CROP/test/NORMAL
# SANAS ---- 188 sanas de 234.........      80.34%
# #usando CROP/test/PNEUMONIA
# ENFERMAS -----380 enfermas de 390........      97,43%
	# -- coding: utf-8 --
	"""Modelo_Acc+80.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1kLQn4QbQKLGSHXhAsmKrIMBcKLWHNWvg
	"""

	import matplotlib
	matplotlib.use("Agg")

	# import the necessary packages

	from sklearn.preprocessing import LabelBinarizer
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import classification_report
	from keras.preprocessing.image import ImageDataGenerator
	from keras.optimizers import SGD
	from keras.models import Sequential
	from keras.layers.normalization import BatchNormalization
	from keras.layers.convolutional import Conv2D
	from keras.layers.convolutional import MaxPooling2D
	from keras.layers.core import Activation
	from keras.layers.core import Flatten
	from keras.layers.core import Dropout
	from keras.layers.core import Dense
	from keras import backend as K
	from imutils import paths
	import matplotlib.pyplot as plt
	import numpy as np
	import argparse
	import random
	import pickle
	import cv2
	import os

	# Install a Drive FUSE wrapper.
	# https://github.com/astrada/google-drive-ocamlfuse
	# !apt-get install -y -qq software-properties-common python-software-properties module-init-tools
	# !add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
	# !apt-get update -qq 2>&1 > /dev/null
	# !apt-get -y install -qq google-drive-ocamlfuse fuse

	# from google.colab import auth
	# auth.authenticate_user()


	# # Generate creds for the Drive FUSE library.
	# from oauth2client.client import GoogleCredentials
	# creds = GoogleCredentials.get_application_default()
	# import getpass
	# !google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 \| grep URL
	# !echo {vcode} \| google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}
	# vcode = getpass.getpass()

	# # Create a directory and mount Google Drive using that directory.
	# !mkdir -p drive
	# !google-drive-ocamlfuse drive

	# # print 'Files in Drive:'
	# !ls drive/'Colab Notebooks'/Ojos

	# import os
	# os.chdir("drive/Neumonía/")

	# !ls

	image_data_folder_path = "train/"

	imagePaths = list(paths.list_images(image_data_folder_path))

	len(imagePaths)

	data = []
	labels = []

	for imagePath in imagePaths:
	# load the image, resize it to 64x64 pixels (the required input
	# spatial dimensions of SmallVGGNet), and store the image in the
	# data list
	#imageName = ''
	#labelName = ''

	if (imagePath.find('PNEUMONIA') != -1):
	labelName = 1
	else:
	# if (imagePath.find('virus') != -1):
	# labelName = 1
	# else:
	labelName = 0

	#print(imageName)
	image = cv2.imread(imagePath)
	image = cv2.resize(image, (150, 150))
	data.append(image)
	labels.append(labelName)

	data = np.array(data, dtype="float") / 255.0
	labels = np.array(labels)

	# partition the data into training and testing splits using 75% of
	# the data for training and the remaining 25% for testing
	(trainX, testX, trainY, testY) = train_test_split(data,
	labels, test_size=0.25, random_state=42)

	print("trainX.shape------>>",trainX.shape)
	# convert the labels from integers to vectors (for 2-class, binary
	# classification you should use Keras' to_categorical function
	# instead as the scikit-learn's LabelBinarizer will not return a
	# vector)
	#lb = LabelBinarizer()
	#trainY = lb.fit_transform(trainY)
	#testY = lb.transform(testY)

	# construct the image generator for data augmentation
	aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
	height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
	horizontal_flip=True, fill_mode="nearest")

	from keras.utils import to_categorical

	trainY = to_categorical(trainY, num_classes=2)
	testY = to_categorical(testY, num_classes=2)

	height = 150
	width = 150
	depth = 3

	inputShape = (height, width, depth)

	classes = 2

	#classes = len(lb.classes_)

	chanDim=-1

	model = Sequential()
	# CONV => RELU => POOL layer set
	model.add(Conv2D(32, (3, 3), padding="same",
	input_shape=inputShape))
	model.add(Activation("relu"))
	model.add(BatchNormalization(axis=chanDim))
	model.add(MaxPooling2D(pool_size=(2, 2)))
	model.add(Dropout(0.25))

	# (CONV => RELU) * 2 => POOL layer set
	model.add(Conv2D(64, (3, 3), padding="same"))
	model.add(Activation("relu"))
	model.add(BatchNormalization(axis=chanDim))
	model.add(Conv2D(64, (3, 3), padding="same"))
	model.add(Activation("relu"))
	model.add(BatchNormalization(axis=chanDim))
	model.add(MaxPooling2D(pool_size=(2, 2)))
	model.add(Dropout(0.25))

	# (CONV => RELU) * 3 => POOL layer set
	model.add(Conv2D(128, (3, 3), padding="same"))
	model.add(Activation("relu"))
	model.add(BatchNormalization(axis=chanDim))
	model.add(Conv2D(128, (3, 3), padding="same"))
	model.add(Activation("relu"))
	model.add(BatchNormalization(axis=chanDim))
	model.add(Conv2D(128, (3, 3), padding="same"))
	model.add(Activation("relu"))
	model.add(BatchNormalization(axis=chanDim))
	model.add(MaxPooling2D(pool_size=(2, 2)))
	model.add(Dropout(0.25))

	# first (and only) set of FC => RELU layers
	model.add(Flatten())
	model.add(Dense(512))
	model.add(Activation("relu"))
	model.add(BatchNormalization())
	model.add(Dropout(0.5))

	# softmax classifier
	model.add(Dense(2))
	model.add(Activation("sigmoid"))
	model.summary()


	# initialize our initial learning rate, # of epochs to train for,
	# and batch size
	INIT_LR = 0.01
	EPOCHS = 200
	BS = 250

	# initialize the model and optimizer (you'll want to use
	# binary_crossentropy for 2-class classification)
	print("[INFO] training network...")
	opt = SGD(lr=INIT_LR, decay=INIT_LR / EPOCHS)
	model.compile(loss="binary_crossentropy", optimizer=opt,
	metrics=["accuracy"])

	from keras.callbacks import ReduceLROnPlateau , ModelCheckpoint

	filepath="weights150x150_A_crop.hdf5"
	checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

	# train the network
	H = model.fit_generator(aug.flow(trainX, trainY, batch_size=BS),
	validation_data=(testX, testY), steps_per_epoch=len(trainX) // BS,
	callbacks=[checkpoint],epochs=EPOCHS)

	# evaluate the network
	print("[INFO] evaluating network...")
	predictions = model.predict(testX, batch_size=128)
	print(classification_report(testY.argmax(axis=1), predictions.argmax(axis=-1)))#, target_names=lb.classes_))

	pred = predictions.argmax(axis=1)

	y_true = testY.argmax(axis=1)

	from sklearn.metrics import classification_report, confusion_matrix

	confusion_matrix(y_true,pred)

	#precision = TP / (TP + FP)
	p1 = 214 / (214 + 117 )
	p1

	p0 = 349 / (349 + 95 )
	p0

	#recall = TP / (TP / FN)
	r1 = 214 / (214 + 131)
	r1

	r0 = 349 / (349 + 8)
	r0

	#image_data_folder_path_test = "val/NORMAL/"
	#image_data_folder_path_test = "val/PNEUMONIA/"
	#image_data_folder_path_test = "test/PNEUMONIA/"
	#image_data_folder_path_test = "test/NORMAL/"
	#image_data_folder_path_test = "CROP/test/NORMAL/"
	#image_data_folder_path_test = "CROP/test/PNEUMONIA/"
	#image_data_folder_path_test = "CROP/NORMAL/"
	#image_data_folder_path_test = "CROP/PNEUMONIA/"
	#image_data_folder_path_test = "CROP/val/NORMAL/"
	image_data_folder_path_test = "CROP/val/PNEUMONIA/"
	imagePaths_test = list(paths.list_images(image_data_folder_path_test))

	contOK = 0
	contToT = 0
	for imagePath in imagePaths_test:
	if contToT < 2000:
	image = cv2.imread(imagePath)

	image = cv2.resize(image, (150,150))

	# scale the pixel values to [0, 1]
	image = image.astype("float") / 255.0
	image = image.flatten()
	#print("image.shape[0]",image.shape)
	#print ("image after flattening",len(image))
	image = image.reshape((1, 150,150,3))
	#print ("image--reshape",image.shape)


	# # make a prediction on the image
	#rint (image.shape)
	#(sana, noSana) = model.predict(image)[0]
	preds = model.predict(image)[0]
	print(preds)
	i = preds.argmax(axis=0)
	#print(preds.argmax(axis=0))
	#print('--------')

	#label = "Sana" if sana > noSana else "No Sana"
	#proba = sana if sana > noSana else noSana
	#label = "{}: {:.2f}%".format(label, proba * 100)

	# find the class label index with the largest corresponding
	# probability
	#print(label)
	#print(lb.classes_)
	#print ("preds.argmax(axis=1)",preds.argmax(axis=1))
	#i = preds.argmax(axis=1)[0]
	#print (i)
	#label = lb.classes_[i] esto lo comenté.....

	# draw the class label + probability on the output image
	#text = "{}: {:.2f}%".format(label, preds[0][i] * 100)
	#print(text)
	#print('------------------------------------')

	contToT = contToT +1
	if i == 0:
	contOK = contOK +1

	print(contOK)
	print(contToT)
	#194 ACERTADAS
	#NORMAL 40 FALSOS POSITIVOS

	#usando CROP/test/NORMAL (no usadas en el training)
	#de 234, 149 clasifico como sanas(correcto) ---- 150PX 186 clasifico como sanas (correcto) 80%
	# 85 como enfermas (incorrecto) 48 enfermas


	#usando CROP/test/PNEUMONIA (no usadas en el training)
	#de 390 13 claisfico como sanas(incorrecto) 26 como sanas 93%
	# 360 clasificó como enfermas 364 enferma



	# #usando CROP/test/NORMAL
	# SANAS ---- 188 sanas de 234......... 80.34%
	# #usando CROP/test/PNEUMONIA
	# ENFERMAS -----380 enfermas de 390........ 97,43%