Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
TensorBoard example for custom model
# Load the TensorBoard notebook extension
%load_ext tensorboard
# Clear out any prior log data. (optional)
!rm -rf logs
import datetime
import io
import itertools
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tqdm.auto import tqdm
from sklearn import metrics
# Setup seed
tf.random.set_seed(1234)
BATCH_SIZE = 100
train_images, train_labels = process_data(dataset['train']['X'], dataset['train']['y'])
train_ds = tf.data.Dataset.from_tensor_slices((train_images, train_labels)) #load train Dataset
train_ds = train_ds.shuffle(buffer_size=len(train_images)).batch(batch_size=BATCH_SIZE)
# load test_images and test_labels
test_images = []
test_labels = []
test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels)) #load train Dataset
# create path to save TensorBoard logs, you might paste that as a parameter when initalizing model but it's optional
tfboard_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# We're going to use existing layers from tf.keras instead of custom one but you should use your own layer
class TBCompatibleModel(tf.keras.Model):
def __init__(self):
super(TBCompatibleModel, self).__init__()
""" Initialize sample network structure """
self.fc1 = tensorflow.keras.layers.Dense(128, activation='relu')
self.out = tensorflow.keras.layers.Dense(10, activation='softmax')
"""
Important staff!!!
initialize multiple writers for tensorboard. Each writer is responsoble for different "Run" inside tensorboard.
If you want to store only train and val losses then define two writers
If you want to store more than that then you should have writter per type of "run"
"""
self.train_summary_writer = tf.summary.create_file_writer(tfboard_dir + '/train')
self.val_summary_writer = tf.summary.create_file_writer(tfboard_dir) + '/val'
self.cm_writer = tf.summary.create_file_writer(tfboard_dir) + '/cm'
self.img_writer = tf.summary.create_file_writer(tfboard_dir) + '/image'
def call(self, inputs, training=False):
""" Define call method """
pass
def fit(self, train_data, val_data, epochs=1):
"""
Here we're going to store loss and f1 values for training process.
You can store multiple different things, not only that.
"""
for epoch in tqdm(range(epochs)):
train_losses = []
train_f1 = []
"""
Loop through data and store:
- losses in `train_losses`
- f1 values in `train_f1`
"""
for x, y in train_data:
train_step_result = self.train_step(x, y)
"""
At this point `self.train_step` should return an object with "loss" and "f1" values
You can implement your train_step to return whatever you want but
it's easier and cleaner to return metrics instead of just predictions so we can just append them
"""
train_losses.append(train_step_result["loss"])
train_f1.append(train_step_result["f1"])
"""
Important!!!
Now we can use "self.train_summary_writer" as a default writes to store values for tensorboard
We're calculating np.mean because each epoch should has one metric value per type
"""
with self.train_summary_writer.as_default():
# We store values as scalars (tf.summary.scalar) so they can be displayied in a form of chart
tf.summary.scalar('loss', np.mean(val_losses), step=epoch)
tf.summary.scalar('f1', np.mean(val_f1), step=epoch)
"""
We're basically doing the same thing for validation, just instead of calling "self.train_step" we're calling "self.test_step"
There is no reason to use gradient when validating ofc.
"""
val_losses = []
val_f1 = []
if val_data is not None:
for x, y in val_data:
val_step_result, pred_val = self.test_step(x, y)
"""
This time "self.test_step" returns touple instead of just one object,
reason for that is because at evaluation phase we need that predictions to generate images
"""
val_losses.append(val_step_result["loss"])
val_f1.append(val_step_result["f1"])
"""
Important!!!
Now we can use "self.val_summary_writer" as a default writes to store values for tensorboard
We're calculating np.mean because each epoch should has one metric value per type
"""
with self.val_summary_writer.as_default():
tf.summary.scalar('loss', np.mean(val_losses), step=epoch)
tf.summary.scalar('f1', np.mean(val_f1), step=epoch)
return 'Training done'
def evaluate(self, test_data, class_names, images):
test_losses = []
test_f1 = []
labels = []
preds = []
for x, y in test_data:
test_result, pred_val = self.test_step(x, y)
## Store predictions and labels for confusion matrix generation
labels.append(y[:, 0].numpy())
preds.append(pred_val.numpy())
test_losses.append(test_result["loss"])
test_f1.append(test_result["f1"])
"""
The reason why we've stored "labels" and "preds" is to generate confusion matrix using sklearn
"""
# Calculate the confusion matrix.
cm = metrics.confusion_matrix(labels, preds)
# Log the confusion matrix as an image summary.
figure = plot_confusion_matrix(cm, class_names=class_names)
cm_image = plot_to_image(figure)
"""
Save generated "cm_image" to tensorboard using "self.cm_writer"
This time we're saving it as an "image" not a "scalar"
"""
with self.cm_writer.as_default():
tf.summary.image("Confusion Matrix", cm_image, step=1)
miss_class = (labels != preds).nonzero()
# Prepare the plot
figure = image_grid(labels, preds, miss_class, class_names, images)
# Convert to image and log to tensorboard
with self.img_writer.as_default():
tf.summary.image("Missmatch data", plot_to_image(figure), step=0)
return (np.mean(test_losses), np.mean(test_f1_values))
### You have to implement rest of the mothods in "TBCompatibleModel" class to run this script
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
model = TBCompatibleModel()
model.compile(optimizer=optimizer)
model.fit(train_ds, epochs = 25, validation_data=val_ds)
loss, f1 = model.evaluate(test_ds, class_names, test_images)
### Show tensorboard in the notebook
%tensorboard --logdir logs/fit
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment