Skip to content

Instantly share code, notes, and snippets.

@Lauler
Created August 3, 2019 16:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Lauler/e58eb72924a0863ffe9db86827d5c068 to your computer and use it in GitHub Desktop.
Save Lauler/e58eb72924a0863ffe9db86827d5c068 to your computer and use it in GitHub Desktop.
import os
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from efficientnet.tfkeras import EfficientNetB5, EfficientNetB3, preprocess_input
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras import backend as K
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.layers import (
Add, Dense, Dropout, Flatten, GlobalAveragePooling2D, Input, BatchNormalization
)
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from keras_contrib.callbacks import CyclicLR
# from keras_tqdm import TQDMNotebookCallback
from sklearn.model_selection import KFold
from tensorflow import keras
# import keras
os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '1'
def get_train_valid(train_path,
valid_path,
pretrain):
if pretrain:
df_train = pd.read_csv(train_path)
df_train["filename"] = "aug_" + df_train["image"] + ".jpeg"
df_train = df_train.rename(columns={'level': 'diagnosis'})
df_train = df_train.iloc[:, :]
df_valid = pd.read_csv(valid_path)
df_valid["filename"] = "aug_" + df_valid["id_code"] + ".png"
df = {"train": df_train, "valid": df_valid}
else:
df = pd.read_csv(train_path)
df["filename"] = "aug_" + df["id_code"] + ".png"
return(df)
# Build model and custom loss function
def create_model(img_size, pretrain):
K.clear_session()
tf.reset_default_graph()
if pretrain:
conv_base = EfficientNetB3(include_top=False, weights="imagenet",
input_shape=(img_size, img_size, 3))
model = conv_base.output
model = BatchNormalization()(model)
model = GlobalAveragePooling2D()(model)
model = Dropout(0.3)(model)
model = Dense(2028, activation="relu")(model)
model = BatchNormalization()(model)
model = Dropout(0.3)(model)
model = Dense(512, activation="relu")(model)
model = Dropout(0.3)(model)
model = Dense(1)(model)
model = Model(inputs=conv_base.input, outputs=model)
else:
model = load_model("effnet_pretrain.h5",
custom_objects = {"root_mse": root_mse})
return(model)
# Cross validation set up
def root_mse(y_true, y_pred):
return K.sqrt(K.mean(K.square(y_pred - y_true)))
def regression_cv(
df,
nfolds,
epochs,
batch_size,
learning_rate,
base_lr,
max_lr,
train_img_folder,
valid_img_folder,
img_size,
df_valid = None,
start_fold = None,
pretrain = False
):
kf = KFold(n_splits=nfolds, shuffle=True, random_state=10)
fold_generator = kf.split(df)
fold_nr = 1
if start_fold is not None:
for completed_fold in range(1, start_fold):
next(fold_generator) # Make the generator iterate over completed folds
print(f"Skipped fold nr {completed_fold}. \n")
fold_nr = start_fold
print(f"Starting training at fold {start_fold}.")
for train_index, valid_index in fold_generator:
if pretrain:
df_train = df
else:
# Split data into train/valid
df_train, df_valid = df.iloc[train_index], df.iloc[valid_index]
nr_train_samples, nr_valid_samples = np.ceil(len(df_train)), np.ceil(len(df_valid))
# Create generators
train_datagen = ImageDataGenerator(
rotation_range=360,
# zoom_range=(0.9, 1.2),
# horizontal_flip=True,
# vertical_flip=True,
# width_shift_range = 0.3,
# height_shift_range = 0.3,
preprocessing_function=preprocess_input
# fill_mode="constant",
# cval=0
)
train_generator = train_datagen.flow_from_dataframe(
dataframe=df_train,
directory=train_img_folder,
x_col="filename",
y_col="diagnosis",
batch_size=batch_size,
target_size=(img_size, img_size),
shuffle=True,
class_mode="other")
valid_datagen = ImageDataGenerator(
rotation_range=360,
preprocessing_function=preprocess_input,
)
valid_generator = valid_datagen.flow_from_dataframe(
dataframe=df_valid,
directory=valid_img_folder,
x_col="filename",
y_col="diagnosis",
batch_size=batch_size,
target_size=(img_size, img_size),
shuffle=False,
class_mode="other")
# Model checkpoints
save_checkpoints = ModelCheckpoint(filepath=f'effnet_reg_fold{fold_nr}.h5',
verbose=1,
save_best_only=True,
#save_weights_only=True,
mode="min")
reduce_lr = ReduceLROnPlateau(monitor='val_root_mse', factor=0.4, patience=4)
# Cyclical Learning Rate
iterations_per_epoch = np.ceil(len(df_train)/batch_size)
# Iterations/steps per half cycle (recommended 2x-8x times iterations_per_epoch)
# steps_per_halfcycle = iterations_per_epoch * 4
# clr = CyclicLR(base_lr = base_lr, max_lr = max_lr,
# step_size = steps_per_halfcycle)
# Create model
model = create_model(img_size=img_size, pretrain=pretrain)
opt = tf.keras.optimizers.Adam(lr=learning_rate)
# opt = tf.train.experimental.enable_mixed_precision_graph_rewrite(opt)
model.compile(optimizer = opt,
loss=root_mse,
metrics=[root_mse])
model.fit_generator(
generator=train_generator,
steps_per_epoch=np.ceil(nr_train_samples/batch_size),
validation_data=valid_generator,
validation_steps=np.ceil(nr_valid_samples/batch_size),
epochs=epochs,
callbacks=[save_checkpoints,
reduce_lr
# clr,
# TQDMNotebookCallback(leave_inner=True, leave_outer=True)
],
workers=8,
verbose=2)
print(f"Training fold number {fold_nr} finished.")
fold_nr += 1
if pretrain:
break
df = get_train_valid(train_path="data/train.csv",
valid_path="data/train.csv",
pretrain=False)
# regression_cv(
# df=df["train"], nfolds=6, epochs=25, batch_size=16,
# learning_rate=1e-4, base_lr=2e-5, max_lr=2e-4,
# img_size=300, train_img_folder="data_2015/train_aug/", valid_img_folder="data/train_aug/",
# df_valid=df["valid"], pretrain=True)
regression_cv(
df=df, nfolds=6, epochs=28, batch_size=16,
learning_rate=1e-4, base_lr=2e-5, max_lr=2e-4,
img_size=300, train_img_folder="data/train_aug/", valid_img_folder="data/train_aug/",
df_valid=None, pretrain=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment