burnpiro/tb_example.py

## tb_example.py
# Load the TensorBoard notebook extension
%load_ext tensorboard

# Clear out any prior log data. (optional)
!rm -rf logs

import datetime
import io
import itertools
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tqdm.auto import tqdm
from sklearn import metrics

# Setup seed
tf.random.set_seed(1234)

BATCH_SIZE = 100

train_images, train_labels = process_data(dataset['train']['X'], dataset['train']['y'])
train_ds = tf.data.Dataset.from_tensor_slices((train_images, train_labels)) #load train Dataset
train_ds = train_ds.shuffle(buffer_size=len(train_images)).batch(batch_size=BATCH_SIZE)

# load test_images and test_labels
test_images = []
test_labels = []
test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels)) #load train Dataset

# create path to save TensorBoard logs, you might paste that as a parameter when initalizing model but it's optional
tfboard_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

# We're going to use existing layers from tf.keras instead of custom one but you should use your own layer
class TBCompatibleModel(tf.keras.Model):
  def __init__(self):
    super(TBCompatibleModel, self).__init__()
    """ Initialize sample network structure """
    self.fc1 = tensorflow.keras.layers.Dense(128, activation='relu')
    self.out = tensorflow.keras.layers.Dense(10, activation='softmax')

    """
      Important staff!!!
      initialize multiple writers for tensorboard. Each writer is responsoble for different "Run" inside tensorboard.
      If you want to store only train and val losses then define two writers
      If you want to store more than that then you should have writter per type of "run"
    """
    self.train_summary_writer = tf.summary.create_file_writer(tfboard_dir + '/train')
    self.val_summary_writer = tf.summary.create_file_writer(tfboard_dir) + '/val'
    self.cm_writer = tf.summary.create_file_writer(tfboard_dir) + '/cm'
    self.img_writer = tf.summary.create_file_writer(tfboard_dir) + '/image'

  def call(self, inputs, training=False):
    """ Define call method """
    pass

  def fit(self, train_data, val_data, epochs=1):
    """
      Here we're going to store loss and f1 values for training process.
      You can store multiple different things, not only that.
    """
    for epoch in tqdm(range(epochs)):
      train_losses = []
      train_f1 = []

      """
        Loop through data and store:
          - losses in `train_losses`
          - f1 values in `train_f1`
      """
      for x, y in train_data:
        train_step_result = self.train_step(x, y)

        """
          At this point `self.train_step` should return an object with "loss" and "f1" values
          You can implement your train_step to return whatever you want but
          it's easier and cleaner to return metrics instead of just predictions so we can just append them
        """
        train_losses.append(train_step_result["loss"])
        train_f1.append(train_step_result["f1"])

        """
          Important!!!
          Now we can use "self.train_summary_writer" as a default writes to store values for tensorboard
          We're calculating np.mean because each epoch should has one metric value per type
        """
        with self.train_summary_writer.as_default():
            # We store values as scalars (tf.summary.scalar) so they can be displayied in a form of chart
            tf.summary.scalar('loss', np.mean(val_losses), step=epoch)
            tf.summary.scalar('f1', np.mean(val_f1), step=epoch)

      """
        We're basically doing the same thing for validation, just instead of calling "self.train_step" we're calling "self.test_step"
        There is no reason to use gradient when validating ofc.
      """
      val_losses = []
      val_f1 = []
      if val_data is not None:
        for x, y in val_data:
          val_step_result, pred_val = self.test_step(x, y)
          """
            This time "self.test_step" returns touple instead of just one object,
            reason for that is because at evaluation phase we need that predictions to generate images
          """
          val_losses.append(val_step_result["loss"])
          val_f1.append(val_step_result["f1"])

          """
            Important!!!
            Now we can use "self.val_summary_writer" as a default writes to store values for tensorboard
            We're calculating np.mean because each epoch should has one metric value per type
          """
          with self.val_summary_writer.as_default():
              tf.summary.scalar('loss', np.mean(val_losses), step=epoch)
              tf.summary.scalar('f1', np.mean(val_f1), step=epoch)

      return 'Training done'

    def evaluate(self, test_data, class_names, images):
        test_losses = []
        test_f1 = []
        labels = []
        preds = []

        for x, y in test_data:
            test_result, pred_val = self.test_step(x, y)
            ## Store predictions and labels for confusion matrix generation
            labels.append(y[:, 0].numpy())
            preds.append(pred_val.numpy())

            test_losses.append(test_result["loss"])
            test_f1.append(test_result["f1"])

        """
          The reason why we've stored "labels" and "preds" is to generate confusion matrix using sklearn
        """

        # Calculate the confusion matrix.
        cm = metrics.confusion_matrix(labels, preds)
        # Log the confusion matrix as an image summary.
        figure = plot_confusion_matrix(cm, class_names=class_names)
        cm_image = plot_to_image(figure)

        """
          Save generated "cm_image" to tensorboard using "self.cm_writer"
          This time we're saving it as an "image" not a "scalar"
        """
        with self.cm_writer.as_default():
            tf.summary.image("Confusion Matrix", cm_image, step=1)


        miss_class = (labels != preds).nonzero()

        # Prepare the plot
        figure = image_grid(labels, preds, miss_class, class_names, images)
        # Convert to image and log to tensorboard
        with self.img_writer.as_default():
            tf.summary.image("Missmatch data", plot_to_image(figure), step=0)


        return (np.mean(test_losses), np.mean(test_f1_values))

### You have to implement rest of the mothods in "TBCompatibleModel" class to run this script
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
model = TBCompatibleModel()
model.compile(optimizer=optimizer)

model.fit(train_ds, epochs = 25, validation_data=val_ds)
loss, f1 = model.evaluate(test_ds, class_names, test_images)

### Show tensorboard in the notebook
%tensorboard --logdir logs/fit
	# Load the TensorBoard notebook extension
	%load_ext tensorboard

	# Clear out any prior log data. (optional)
	!rm -rf logs

	import datetime
	import io
	import itertools
	import numpy as np
	import matplotlib.pyplot as plt
	import tensorflow as tf
	from tqdm.auto import tqdm
	from sklearn import metrics

	# Setup seed
	tf.random.set_seed(1234)

	BATCH_SIZE = 100

	train_images, train_labels = process_data(dataset['train']['X'], dataset['train']['y'])
	train_ds = tf.data.Dataset.from_tensor_slices((train_images, train_labels)) #load train Dataset
	train_ds = train_ds.shuffle(buffer_size=len(train_images)).batch(batch_size=BATCH_SIZE)

	# load test_images and test_labels
	test_images = []
	test_labels = []
	test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels)) #load train Dataset

	# create path to save TensorBoard logs, you might paste that as a parameter when initalizing model but it's optional
	tfboard_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

	# We're going to use existing layers from tf.keras instead of custom one but you should use your own layer
	class TBCompatibleModel(tf.keras.Model):
	def __init__(self):
	super(TBCompatibleModel, self).__init__()
	""" Initialize sample network structure """
	self.fc1 = tensorflow.keras.layers.Dense(128, activation='relu')
	self.out = tensorflow.keras.layers.Dense(10, activation='softmax')

	"""
	Important staff!!!
	initialize multiple writers for tensorboard. Each writer is responsoble for different "Run" inside tensorboard.
	If you want to store only train and val losses then define two writers
	If you want to store more than that then you should have writter per type of "run"
	"""
	self.train_summary_writer = tf.summary.create_file_writer(tfboard_dir + '/train')
	self.val_summary_writer = tf.summary.create_file_writer(tfboard_dir) + '/val'
	self.cm_writer = tf.summary.create_file_writer(tfboard_dir) + '/cm'
	self.img_writer = tf.summary.create_file_writer(tfboard_dir) + '/image'

	def call(self, inputs, training=False):
	""" Define call method """
	pass

	def fit(self, train_data, val_data, epochs=1):
	"""
	Here we're going to store loss and f1 values for training process.
	You can store multiple different things, not only that.
	"""
	for epoch in tqdm(range(epochs)):
	train_losses = []
	train_f1 = []

	"""
	Loop through data and store:
	- losses in `train_losses`
	- f1 values in `train_f1`
	"""
	for x, y in train_data:
	train_step_result = self.train_step(x, y)

	"""
	At this point `self.train_step` should return an object with "loss" and "f1" values
	You can implement your train_step to return whatever you want but
	it's easier and cleaner to return metrics instead of just predictions so we can just append them
	"""
	train_losses.append(train_step_result["loss"])
	train_f1.append(train_step_result["f1"])

	"""
	Important!!!
	Now we can use "self.train_summary_writer" as a default writes to store values for tensorboard
	We're calculating np.mean because each epoch should has one metric value per type
	"""
	with self.train_summary_writer.as_default():
	# We store values as scalars (tf.summary.scalar) so they can be displayied in a form of chart
	tf.summary.scalar('loss', np.mean(val_losses), step=epoch)
	tf.summary.scalar('f1', np.mean(val_f1), step=epoch)

	"""
	We're basically doing the same thing for validation, just instead of calling "self.train_step" we're calling "self.test_step"
	There is no reason to use gradient when validating ofc.
	"""
	val_losses = []
	val_f1 = []
	if val_data is not None:
	for x, y in val_data:
	val_step_result, pred_val = self.test_step(x, y)
	"""
	This time "self.test_step" returns touple instead of just one object,
	reason for that is because at evaluation phase we need that predictions to generate images
	"""
	val_losses.append(val_step_result["loss"])
	val_f1.append(val_step_result["f1"])

	"""
	Important!!!
	Now we can use "self.val_summary_writer" as a default writes to store values for tensorboard
	We're calculating np.mean because each epoch should has one metric value per type
	"""
	with self.val_summary_writer.as_default():
	tf.summary.scalar('loss', np.mean(val_losses), step=epoch)
	tf.summary.scalar('f1', np.mean(val_f1), step=epoch)

	return 'Training done'

	def evaluate(self, test_data, class_names, images):
	test_losses = []
	test_f1 = []
	labels = []
	preds = []

	for x, y in test_data:
	test_result, pred_val = self.test_step(x, y)
	## Store predictions and labels for confusion matrix generation
	labels.append(y[:, 0].numpy())
	preds.append(pred_val.numpy())

	test_losses.append(test_result["loss"])
	test_f1.append(test_result["f1"])

	"""
	The reason why we've stored "labels" and "preds" is to generate confusion matrix using sklearn
	"""

	# Calculate the confusion matrix.
	cm = metrics.confusion_matrix(labels, preds)
	# Log the confusion matrix as an image summary.
	figure = plot_confusion_matrix(cm, class_names=class_names)
	cm_image = plot_to_image(figure)

	"""
	Save generated "cm_image" to tensorboard using "self.cm_writer"
	This time we're saving it as an "image" not a "scalar"
	"""
	with self.cm_writer.as_default():
	tf.summary.image("Confusion Matrix", cm_image, step=1)


	miss_class = (labels != preds).nonzero()

	# Prepare the plot
	figure = image_grid(labels, preds, miss_class, class_names, images)
	# Convert to image and log to tensorboard
	with self.img_writer.as_default():
	tf.summary.image("Missmatch data", plot_to_image(figure), step=0)


	return (np.mean(test_losses), np.mean(test_f1_values))

	### You have to implement rest of the mothods in "TBCompatibleModel" class to run this script
	optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
	model = TBCompatibleModel()
	model.compile(optimizer=optimizer)

	model.fit(train_ds, epochs = 25, validation_data=val_ds)
	loss, f1 = model.evaluate(test_ds, class_names, test_images)

	### Show tensorboard in the notebook
	%tensorboard --logdir logs/fit