Created
September 12, 2020 22:57
-
-
Save MuhammadArdiPutra/f7ad4ab7839ea2f1491ab8d1b32601cd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import cv2 | |
import pickle # Used to save variables | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from tqdm import tqdm # Used to display progress bar | |
from sklearn.preprocessing import OneHotEncoder | |
from sklearn.metrics import confusion_matrix | |
from keras.models import Model, load_model | |
from keras.layers import Dense, Input, Conv2D, MaxPool2D, Flatten | |
from keras.preprocessing.image import ImageDataGenerator # Used to generate images | |
np.random.seed(22) | |
# Do not forget to include the last slash | |
def load_normal(norm_path): | |
norm_files = np.array(os.listdir(norm_path)) | |
norm_labels = np.array(['normal']*len(norm_files)) | |
norm_images = [] | |
for image in tqdm(norm_files): | |
# Read image | |
image = cv2.imread(norm_path + image) | |
# Resize image to 200x200 px | |
image = cv2.resize(image, dsize=(200,200)) | |
# Convert to grayscale | |
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
norm_images.append(image) | |
norm_images = np.array(norm_images) | |
return norm_images, norm_labels | |
def load_pneumonia(pneu_path): | |
pneu_files = np.array(os.listdir(pneu_path)) | |
pneu_labels = np.array([pneu_file.split('_')[1] for pneu_file in pneu_files]) | |
pneu_images = [] | |
for image in tqdm(pneu_files): | |
# Read image | |
image = cv2.imread(pneu_path + image) | |
# Resize image to 200x200 px | |
image = cv2.resize(image, dsize=(200,200)) | |
# Convert to grayscale | |
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
pneu_images.append(image) | |
pneu_images = np.array(pneu_images) | |
return pneu_images, pneu_labels | |
print('Loading images') | |
# All images are stored in _images, all labels are in _labels | |
norm_images, norm_labels = load_normal('/kaggle/input/chest-xray-pneumonia/chest_xray/train/NORMAL/') | |
pneu_images, pneu_labels = load_pneumonia('/kaggle/input/chest-xray-pneumonia/chest_xray/train/PNEUMONIA/') | |
# Put all train images to X_train | |
X_train = np.append(norm_images, pneu_images, axis=0) | |
# Put all train labels to y_train | |
y_train = np.append(norm_labels, pneu_labels) | |
print(X_train.shape) | |
print(y_train.shape) | |
# Finding out the number of samples of each class | |
print(np.unique(y_train, return_counts=True)) | |
print('Display several images') | |
fig, axes = plt.subplots(ncols=7, nrows=2, figsize=(16, 4)) | |
indices = np.random.choice(len(X_train), 14) | |
counter = 0 | |
for i in range(2): | |
for j in range(7): | |
axes[i,j].set_title(y_train[indices[counter]]) | |
axes[i,j].imshow(X_train[indices[counter]], cmap='gray') | |
axes[i,j].get_xaxis().set_visible(False) | |
axes[i,j].get_yaxis().set_visible(False) | |
counter += 1 | |
plt.show() | |
print('Loading test images') | |
# Do the exact same thing as what we have done on train data | |
norm_images_test, norm_labels_test = load_normal('/kaggle/input/chest-xray-pneumonia/chest_xray/test/NORMAL/') | |
pneu_images_test, pneu_labels_test = load_pneumonia('/kaggle/input/chest-xray-pneumonia/chest_xray/test/PNEUMONIA/') | |
X_test = np.append(norm_images_test, pneu_images_test, axis=0) | |
y_test = np.append(norm_labels_test, pneu_labels_test) | |
# Save the loaded images to pickle file for future use | |
with open('pneumonia_data.pickle', 'wb') as f: | |
pickle.dump((X_train, X_test, y_train, y_test), f) | |
# Here's how to load it | |
with open('pneumonia_data.pickle', 'rb') as f: | |
(X_train, X_test, y_train, y_test) = pickle.load(f) | |
print('Label preprocessing') | |
# Create new axis on all y data | |
y_train = y_train[:, np.newaxis] | |
y_test = y_test[:, np.newaxis] | |
# Initialize OneHotEncoder object | |
one_hot_encoder = OneHotEncoder(sparse=False) | |
# Convert all labels to one-hot | |
y_train_one_hot = one_hot_encoder.fit_transform(y_train) | |
y_test_one_hot = one_hot_encoder.transform(y_test) | |
print('Reshaping X data') | |
# Reshape the data into (no of samples, height, width, 1), where 1 represents a single color channel | |
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1) | |
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1) | |
print('Data augmentation') | |
# Generate new images with some randomness | |
datagen = ImageDataGenerator( | |
rotation_range = 10, | |
zoom_range = 0.1, | |
width_shift_range = 0.1, | |
height_shift_range = 0.1) | |
datagen.fit(X_train) | |
train_gen = datagen.flow(X_train, y_train_one_hot, batch_size = 32) | |
print('CNN') | |
# Define the input shape of the neural network | |
input_shape = (X_train.shape[1], X_train.shape[2], 1) | |
print(input_shape) | |
input1 = Input(shape=input_shape) | |
cnn = Conv2D(16, (3, 3), activation='relu', strides=(1, 1), | |
padding='same')(input1) | |
cnn = Conv2D(32, (3, 3), activation='relu', strides=(1, 1), | |
padding='same')(cnn) | |
cnn = MaxPool2D((2, 2))(cnn) | |
cnn = Conv2D(16, (2, 2), activation='relu', strides=(1, 1), | |
padding='same')(cnn) | |
cnn = Conv2D(32, (2, 2), activation='relu', strides=(1, 1), | |
padding='same')(cnn) | |
cnn = MaxPool2D((2, 2))(cnn) | |
cnn = Flatten()(cnn) | |
cnn = Dense(100, activation='relu')(cnn) | |
cnn = Dense(50, activation='relu')(cnn) | |
output1 = Dense(3, activation='softmax')(cnn) | |
model = Model(inputs=input1, outputs=output1) | |
model.compile(loss='categorical_crossentropy', | |
optimizer='adam', metrics=['acc']) | |
# Using fit_generator() instead of fit() because we are going to use data | |
# taken from the generator. Note that the randomness is changing | |
# on each epoch | |
history = model.fit_generator(train_gen, epochs=30, | |
validation_data=(X_test, y_test_one_hot)) | |
# Saving model | |
model.save('pneumonia_cnn.h5') | |
print('Displaying accuracy') | |
plt.figure(figsize=(8,6)) | |
plt.title('Accuracy scores') | |
plt.plot(history.history['acc']) | |
plt.plot(history.history['val_acc']) | |
plt.legend(['acc', 'val_acc']) | |
plt.show() | |
print('Displaying loss') | |
plt.figure(figsize=(8,6)) | |
plt.title('Loss value') | |
plt.plot(history.history['loss']) | |
plt.plot(history.history['val_loss']) | |
plt.legend(['loss', 'val_loss']) | |
plt.show() | |
# Predicting test data | |
predictions = model.predict(X_test) | |
print(predictions) | |
predictions = one_hot_encoder.inverse_transform(predictions) | |
print('Model evaluation') | |
print(one_hot_encoder.categories_) | |
classnames = ['bacteria', 'normal', 'virus'] | |
# Display confusion matrix | |
cm = confusion_matrix(y_test, predictions) | |
plt.figure(figsize=(8,8)) | |
plt.title('Confusion matrix') | |
sns.heatmap(cm, cbar=False, xticklabels=classnames, yticklabels=classnames, fmt='d', annot=True, cmap=plt.cm.Blues) | |
plt.xlabel('Predicted') | |
plt.ylabel('Actual') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
can you send the detection code