Created
June 21, 2021 16:08
-
-
Save esmitt/e3b94972f55413aa21a9ce3ca1cc4524 to your computer and use it in GitHub Desktop.
Sample in how to build a functional model in Tensorflow. This is a starting code where random images are created, training and predictions. The network is a simple convolutional block (conv2D + maxpool + norm -> flatten + dense layer))
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tensorflow.keras import Model | |
from tensorflow.keras.layers import Convolution2D | |
from tensorflow.keras.layers import MaxPool2D | |
from tensorflow.keras.layers import BatchNormalization | |
from tensorflow.keras.layers import Input | |
from tensorflow.keras.layers import Flatten | |
from tensorflow.keras.layers import Dropout | |
from tensorflow.keras.layers import Dense | |
from tensorflow.keras.layers import concatenate | |
from tensorflow.keras.optimizers import Adam | |
from tensorflow.keras.activations import relu | |
from tensorflow.keras.activations import softmax | |
from tensorflow.keras.regularizers import l2 | |
import numpy as np | |
#categorical focal loss | |
from tensorflow.keras import backend as K | |
def categorical_focal_loss(y, y_pred): | |
gamma = 2.5 # 2.5 and 0.5 are values set on some paper | |
alpha = 0.5 # 2.0 and 0.25 | |
# paper multiclass classification | |
def focal_loss(y_true, y_pred): | |
epsilon = K.epsilon() | |
y_pred = K.clip(y_pred, epsilon, 1.0-epsilon) | |
cross_entropy = -y_true * K.log(y_pred) | |
weight = alpha * y_true * K.pow((1-y_pred), gamma) | |
loss = weight * cross_entropy | |
loss = K.sum(loss, axis=1) | |
return loss | |
return focal_loss(y, y_pred) | |
# function to get a functional model, merging 3 different networks (example) | |
def get_model_functional(input_size, filters, kernel_size, regularizerL2): | |
def conv_block(filter, x): | |
hidden_conv = Convolution2D(filter, (kernel_size, kernel_size), padding='same', kernel_regularizer=l2(regularizerL2), | |
activation=relu)(x) | |
hidden_max_pool = MaxPool2D()(hidden_conv) # pool_size default is (2, 2) | |
hidden_norm = BatchNormalization()(hidden_max_pool) | |
return hidden_norm | |
# network 1 | |
visible_n1 = Input(shape=(input_size, input_size, 1)) | |
first_block_n1 = conv_block(filters[0], visible_n1) | |
second_block_n1 = conv_block(filters[1], first_block_n1) | |
third_block_n1 = conv_block(filters[2], second_block_n1) | |
flat_n1 = Flatten()(third_block_n1) | |
# network 2 | |
visible_n2 = Input(shape=(input_size, input_size, 1)) | |
first_block_n2 = conv_block(filters[0], visible_n2) | |
flat_n2 = Flatten()(first_block_n2) | |
# network 3 | |
visible_n3 = Input(shape=(input_size, input_size, 1)) | |
first_block_n3 = conv_block(filters[0], visible_n3) | |
second_block_n3 = conv_block(filters[1], first_block_n3) | |
flat_n3 = Flatten()(second_block_n3) | |
# merge them | |
merge = concatenate([flat_n1, flat_n1, flat_n3]) | |
drop_layer = Dropout(0.5)(merge) | |
prediction = Dense(5, activation=softmax)(drop_layer) | |
return [visible_n1, visible_n2, visible_n3], prediction | |
PATCH_SIZE = 64 | |
NUM_CLASSES = 5 | |
filters_size = [64, 128, 256] | |
kernel_size = 3 | |
regularizerL2 = 0.0005 | |
inputs, output = get_model_functional(PATCH_SIZE, filters_size, kernel_size, regularizerL2) | |
model = Model(inputs=inputs, outputs=output) | |
print(model.summary()) | |
metrics_fcn = ['mse', 'acc'] | |
optimizer_fcn = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999) # parameters used in another paper | |
loss_fcn = categorical_focal_loss | |
model.compile(optimizer=optimizer_fcn, loss=loss_fcn, metrics=metrics_fcn) | |
# lists of images for each network | |
list_images_n1 = [] | |
list_images_n2 = [] | |
list_images_n3 = [] | |
list_prob_vector = [] | |
# create random normalized images to fill networks | |
for i in range(0, 150): | |
list_images_n1.append(np.random.rand(PATCH_SIZE, PATCH_SIZE)) | |
list_images_n2.append(np.random.rand(PATCH_SIZE, PATCH_SIZE)) | |
list_images_n3.append(np.random.rand(PATCH_SIZE, PATCH_SIZE)) | |
list_prob_vector.append(np.random.rand(NUM_CLASSES)) | |
# input values for the networks | |
x_n1 = np.asarray(list_images_n1, dtype=np.float32).reshape(-1, PATCH_SIZE, PATCH_SIZE, 1) | |
x_n2 = np.asarray(list_images_n2, dtype=np.float32).reshape(-1, PATCH_SIZE, PATCH_SIZE, 1) | |
x_n3 = np.asarray(list_images_n3, dtype=np.float32).reshape(-1, PATCH_SIZE, PATCH_SIZE, 1) | |
y = np.asarray(list_prob_vector, dtype=np.float32).reshape(-1, NUM_CLASSES) | |
# calling for training using 10% of data for validation | |
batch_size = 32 | |
num_epochs = 50 | |
history = model.fit(x=[x_n1, x_n2, x_n3], y=y, batch_size=batch_size, epochs=num_epochs, validation_split=0.10, verbose=True, use_multiprocessing=True) | |
print(history.history.keys()) | |
# just to see something | |
import matplotlib.pyplot as plt | |
plt.figure(0) | |
plt.plot(history.history['acc']) | |
plt.plot(history.history['val_acc']) | |
plt.title('model error') | |
plt.ylabel('accuracy mean_absolute_error/squared') | |
plt.xlabel('epoch') | |
plt.legend(['acc', 'val acc'], loc='upper left') | |
plt.show() | |
# predict | |
image_to_predict_n1 = np.random.rand(PATCH_SIZE, PATCH_SIZE) | |
image_to_predict_n2 = np.random.rand(PATCH_SIZE, PATCH_SIZE) | |
image_to_predict_n3 = np.random.rand(PATCH_SIZE, PATCH_SIZE) | |
# resize them. Last value is the amount of data passed to be evaluated | |
inputs_pred = [image_to_predict_n1.reshape(-1, PATCH_SIZE, PATCH_SIZE, 1), | |
image_to_predict_n2.reshape(-1, PATCH_SIZE, PATCH_SIZE, 1), | |
image_to_predict_n3.reshape(-1, PATCH_SIZE, PATCH_SIZE, 1)] | |
y_predict = model.predict(inputs_pred, use_multiprocessing=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment