trungnt13/DL_UEF19_Tut1.py

## DL_UEF19_Tut1.py
from __future__ import absolute_import, division, print_function

import os

import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from tensorflow import keras

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

tf.random.set_seed(8)
np.random.seed(8)


# ===========================================================================
# Helper functions for visualization
# ===========================================================================
def show_sample_images(images,
                       labels=None,
                       n=5,
                       labels_name=None,
                       seed=8,
                       axes=None,
                       interpolation=None,
                       shuffle=True):
  from matplotlib import pyplot as plt
  from itertools import chain
  if labels is not None and labels.ndim == 2:
    labels = np.argmax(labels, axis=1)
  np.random.seed(seed)
  if shuffle:
    indices = np.random.permutation(len(images))
  else:
    indices = np.arange(len(images), dtype='int32')
  if axes is None:
    n_images = len(images)
    nrows = n if n_images > n else 1
    ncols = min(n, n_images)
    fig, axes = plt.subplots(nrows=nrows,
                             ncols=ncols,
                             figsize=(int(ncols * 1.2), int(nrows * 1.2) + 1))
  if isinstance(axes[0], (np.ndarray, tuple, list)):
    axes = chain(*axes)
  elif not isinstance(axes, (np.ndarray, tuple, list)):
    axes = [axes]
  fig_images = []
  for count, ax in enumerate(axes):
    ax.axis('off')
    if count >= len(indices):
      continue
    idx = indices[count]
    img = images[idx]
    cmap = 'Greys'
    if img.ndim == 1:
      dim = int(np.sqrt(len(img)))
      img = img.reshape(dim, dim)
    elif img.ndim == 3 and img.shape[-1] == 1:
      img = np.squeeze(img, axis=-1)
    else:
      cmap = None
    fig_images.append(ax.imshow(img, cmap=cmap, interpolation=interpolation))
    if labels is not None:
      lab = labels[idx] if labels_name is None else \
        labels_name[int(labels[idx])]
      ax.set_title(lab, fontsize=8)
  return fig_images


def show_animation(images_list,
                   labels=None,
                   n=5,
                   labels_name=None,
                   fps=1.5,
                   seed=8,
                   delta=False,
                   cmap=None,
                   interpolation=None):
  from matplotlib import pyplot as plt
  import matplotlib.animation as animation
  from itertools import chain
  # ims is a list of lists, each row is a list of artists to draw in the
  # current frame; here we are just animating one artist, the image, in
  # each frame
  ims = []
  # images_list is list of image list
  if isinstance(images_list[0], (tuple, list)):
    # have to reuse the same axes
    n_images = len(images_list[0])
    nrows = n if n_images > n else 1
    ncols = min(n, n_images)
    fig, axes = plt.subplots(nrows=nrows,
                             ncols=ncols,
                             figsize=(int(ncols * 1.2), int(nrows * 1.2)))
    for images in images_list:
      ims.append(
          show_sample_images(images,
                             labels,
                             n=n,
                             labels_name=labels_name,
                             seed=seed,
                             axes=axes))
  # single image
  else:
    fig = plt.figure(figsize=(8, 8))
    ax = plt.gca()
    img_generator = [img2 - img1
                     for img1, img2 in zip(images_list, images_list[1:])] \
      if delta else images_list
    vmin = np.min([np.min(i) for i in img_generator])
    vmax = np.max([np.max(i) for i in img_generator])
    for img in img_generator:
      img = ax.imshow(
          img,
          cmap='Blues' if cmap is None else cmap,
          interpolation='bilinear' if interpolation is None else interpolation,
          vmin=vmin,
          vmax=vmax)
      ax.axis('off')
      ims.append([img])
  #
  ani = animation.ArtistAnimation(
      fig,
      ims,
      interval=1 / fps * 1000,  # in millisecond
      blit=True,
      repeat=False,
      repeat_delay=1000)
  return ani


def show_misclassification(images,
                           y_true,
                           y_pred,
                           n=5,
                           labels_name=None,
                           title=None):
  if y_true.ndim == 2:
    y_true = np.argmax(y_true, axis=1)
  if y_pred.ndim == 2:
    y_pred = np.argmax(y_pred, axis=1)
  indices = np.asarray(y_true) != np.asarray(y_pred)
  if np.sum(indices) == 0:
    return
  images = images[indices]
  y_true = y_true[indices]
  y_pred = y_pred[indices]
  labels = np.array([
      "True:%s\nPred:%s" %
      ((str(true), str(pred)) if labels_name is None else
       (str(labels_name[int(true)]), str(labels_name[int(pred)])))
      for true, pred in zip(y_true, y_pred)
  ])
  show_sample_images(images=images, labels=labels, n=n, labels_name=None)
  if title is not None:
    plt.suptitle(str(title))


def show_confusion_matrix(y_true, y_pred, labels=None, title=None, ax=None):
  from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
  import seaborn as sns
  from matplotlib import pyplot as plt
  if y_true.ndim == 2:
    y_true = np.argmax(y_true, axis=1)
  if y_pred.ndim == 2:
    y_pred = np.argmax(y_pred, axis=1)
  if labels is None:
    labels = np.unique(np.concatenate([y_true, y_pred]))
    labels = ['#%d' % i for i in sorted(labels)]
  cm = confusion_matrix(y_true, y_pred)
  acc = accuracy_score(y_true, y_pred)
  f1 = f1_score(y_true, y_pred, average='weighted')
  if ax is None:
    plt.figure(figsize=(6, 6))
    ax = plt.gca()
  elif isinstance(ax, (tuple, list)):
    ax = plt.subplot(*ax)
  sns.heatmap(cm,
              cmap='Blues',
              annot=True,
              fmt="d",
              linewidths=.2,
              xticklabels=labels,
              yticklabels=labels,
              cbar=False,
              ax=ax)
  plt.title("%sAccuracy:%.2f  F1-score:%.2f" %
            (('[%s]' % str(title)) if title is not None else '', acc, f1))


def evaluate_classifier(model,
                        X_train,
                        y_train,
                        X_test,
                        y_test,
                        labels_name=None):
  plt.figure(figsize=(18, 5))
  y_train_pred = model.predict(X_train)
  y_test_pred = model.predict(X_test)
  show_confusion_matrix(y_true=y_train,
                        y_pred=y_train_pred,
                        title='Train',
                        labels=labels_name,
                        ax=(1, 2, 1))
  show_confusion_matrix(y_true=y_test,
                        y_pred=y_test_pred,
                        title='Test',
                        labels=labels_name,
                        ax=(1, 2, 2))
  show_misclassification(images=X_train,
                         y_true=y_train,
                         y_pred=y_train_pred,
                         labels_name=labels_name,
                         title='Train')
  show_misclassification(images=X_test,
                         y_true=y_test,
                         y_pred=y_test_pred,
                         labels_name=labels_name,
                         title='Test')


# ===========================================================================
# DOwnloading the data
# ===========================================================================
def download_images(target_size=None):
  import base64
  url = b'aHR0cHM6Ly9haS1kYXRhc2V0cy5zMy5hbWF6b25hd3MuY29tL3N0ZXRpbmQuanBlZw==\n'
  url = str(base64.decodebytes(url), 'utf-8')
  name = os.path.basename(url)
  image_path = tf.keras.utils.get_file(name, origin=url)
  img = keras.preprocessing.image.load_img(image_path, target_size=target_size)
  return np.array(img)


def deprocess(img):
  img = 255 * (img + 1.0) / 2.0
  return tf.cast(img, tf.uint8)


def download_fmnist():
  import tensorflow_datasets as tfds
  train = tfds.load('fashion_mnist', split='train')
  test = tfds.load('fashion_mnist', split='test')

  X_train, y_train = [], []
  for data in train.batch(256):
    X_train.append(data['image'].numpy().astype('float32'))
    y_train.append(data['label'].numpy().astype('float32'))
  X_train = np.concatenate(X_train, axis=0)
  y_train = np.concatenate(y_train, axis=0)

  X_test, y_test = [], []
  for data in test.batch(256):
    X_test.append(data['image'].numpy().astype('float32'))
    y_test.append(data['label'].numpy().astype('float32'))
  X_test = np.concatenate(X_test, axis=0)
  y_test = np.concatenate(y_test, axis=0)

  X_train = X_train.reshape(-1, np.prod(X_train.shape[1:]))
  X_test = X_test.reshape(-1, np.prod(X_test.shape[1:]))
  return dict(X_train=X_train,
              y_train=y_train,
              X_test=X_test,
              y_test=y_test,
              labels_name=np.array([
                  'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
                  'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'
              ]))


def download_digits():
  from six.moves.urllib.request import urlretrieve
  import base64
  url = b'aHR0cHM6Ly9haS1kYXRhc2V0cy5zMy5hbWF6b25hd3MuY29tL2RpZ2l0cy5ucHo=\n'
  urlretrieve(str(base64.decodebytes(url), 'utf-8'), 'digits.npz')
  return {i: j for i, j in np.load('digits.npz').items()}


# ===========================================================================
# I should work. Wait! but why?
# ===========================================================================
dataset = download_digits()
for i, j in dataset.items():
  print('%-15s:' % i, j.dtype, j.shape)

X_train = dataset['X_train']
y_train = dataset['y_train']
X_test = dataset['X_test']
y_test = dataset['y_test']
show_sample_images(images=X_train, labels=y_train, n=5)

X_train = X_train / np.max(X_train)
y_train = tf.one_hot(y_train, 10).numpy()

model = keras.Sequential([
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(10, activation='softmax'),
])

model.compile(optimizer='adam',
              loss="categorical_crossentropy",
              metrics=["accuracy"])
model.fit(
    x=X_train,
    y=y_train,
    batch_size=128,
    epochs=12,
    validation_split=0.2,
)

# ====== show the learning curve ====== #
history = model.history.history
train_loss = history['loss']
valid_loss = history['val_loss']
train_acc = history['accuracy']
valid_acc = history['val_accuracy']
plt.figure(figsize=(8, 4))
plt.subplot(1, 2, 1)
plt.plot(train_loss, label="Train Loss")
plt.plot(valid_loss, label="Valid Loss")
plt.legend()
plt.grid()
plt.subplot(1, 2, 2)
plt.plot(train_acc, label="Train Accuracy")
plt.plot(valid_acc, label="Valid Accuracy")
plt.legend()
plt.grid()

# ====== evaluating the classifier ====== #
evaluate_classifier(model, X_train, y_train, X_test, y_test)

# ===========================================================================
# What does artificial neural network learn?
# ===========================================================================
X_train = dataset['X_train_dot']
y_train = tf.one_hot(dataset['y_train'], 10).numpy()
show_sample_images(X_train, y_train)
show_sample_images(X_test, y_test)

model = keras.Sequential([
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(10, activation='softmax'),
])
opt = keras.optimizers.Adam(learning_rate=0.001)
fn_loss = keras.losses.categorical_crossentropy

X_train, X_valid, y_train, y_valid = train_test_split(X_train,
                                                      y_train,
                                                      test_size=0.2)
total_iter = 0
n_epoch = 25
batch_size = 64

hidden_images1 = []
hidden_images2 = []
kernel_images = []

for epoch in range(n_epoch):
  for batch_start in range(0, X_train.shape[0], batch_size):
    x = X_train[batch_start:batch_start + batch_size]
    y_true = y_train[batch_start:batch_start + batch_size]
    with tf.GradientTape() as tape:
      y_pred = model(x)
      loss = tf.reduce_mean(fn_loss(y_true, y_pred))
    # get the gradients
    grads = tape.gradient(loss, model.trainable_variables)
    opt.apply_gradients([
        (g, v) for g, v in zip(grads, model.trainable_variables)
    ])
    # store the kernel (i.e. the weight)
    kernel_images.append(
        [l.kernel.numpy() for l in model.layers if hasattr(l, 'kernel')])
    # monitoring the training
    total_iter += 1
    if total_iter % 10 == 0:
      y_train_pred = model(X_train).numpy()
      train_acc = accuracy_score(y_true=np.argmax(y_train, axis=1),
                                 y_pred=np.argmax(y_train_pred, axis=1))
      y_valid_pred = model(X_valid).numpy()
      valid_acc = accuracy_score(y_true=np.argmax(y_valid, axis=1),
                                 y_pred=np.argmax(y_valid_pred, axis=1))
      print("Iter#%d Loss:%.4f Train_acc:%.4f Valid_acc:%.4f" %
            (total_iter, loss.numpy(), train_acc, valid_acc))
    # record everything first 10 iteration
    if total_iter < 50 or total_iter % 5 == 0:
      # store hidden activation images
      h1 = model.layers[0](X_valid)
      h2 = model.layers[1](h1)
      # convert to list of images
      hidden_images1.append([i for i in h1.numpy()])
      hidden_images2.append([i for i in h2.numpy()])
# ====== show classifier results ====== #
evaluate_classifier(model, X_train, y_train, X_test, y_test)
# ====== show animation ====== #
ani = show_animation([k[0] for k in kernel_images], fps=12, delta=False)
ani = show_animation([k[0] for k in kernel_images], fps=12, delta=True)
ani = show_animation(hidden_images1,
                     labels=np.argmax(y_valid, -1),
                     fps=20,
                     delta=False)
ani = show_animation(hidden_images2,
                     labels=np.argmax(y_valid, -1),
                     fps=20,
                     delta=False)

# ===========================================================================
# Handling uncertainty Attention based model
# ===========================================================================
X_train = dataset['X_train_shift']
X_test = dataset['X_test_shift']
y_train = tf.one_hot(dataset['y_train'], 10).numpy()
show_sample_images(X_train, y_train)


class SelfAttention(keras.layers.Layer):

  def __init__(self, units):
    super().__init__()
    self.units = units

  def build(self, input_shape):
    super().build(input_shape)
    self.kernel = self.add_weight('kernel',
                                  shape=[1, self.units],
                                  initializer='glorot_uniform',
                                  dtype=self.dtype,
                                  trainable=True)
    self.bias = self.add_weight('bias',
                                shape=(self.units,),
                                initializer='zeros',
                                dtype=self.dtype,
                                trainable=True)
    self.kernel1 = self.add_weight('kernel',
                                   shape=[self.units, 1],
                                   initializer='glorot_uniform',
                                   dtype=self.dtype,
                                   trainable=True)
    self.bias1 = self.add_weight('bias',
                                 shape=(1,),
                                 initializer='zeros',
                                 dtype=self.dtype,
                                 trainable=True)

  def call(self, inputs):
    x = tf.expand_dims(inputs, axis=-1)
    alpha = tf.nn.relu(x @ self.kernel + self.bias)
    alpha = alpha @ self.kernel1 + self.bias1
    alpha = tf.squeeze(alpha, axis=-1)
    attention = tf.nn.softmax(alpha, axis=-1)
    outputs = inputs * attention + inputs
    return tf.concat([outputs, attention], axis=-1)


model = keras.Sequential([
    SelfAttention(64),
    keras.layers.Lambda(lambda x: tf.split(x, 2, axis=-1)[0]),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(10, activation='softmax'),
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    x=X_train,
    y=y_train,
    batch_size=128,
    epochs=80,
    validation_split=0.2,
)

evaluate_classifier(model, X_train, y_train, X_test, y_test)
model_att = keras.Sequential(
    model.layers[:1] +
    [keras.layers.Lambda(lambda x: tf.split(x, 2, axis=-1)[1])])
att = model_att(X_test).numpy()
show_sample_images(X_test, y_test, interpolation='bilinear')
show_sample_images(att, y_test, interpolation='bilinear')
# ===========================================================================
# Fashion MNIST
# ===========================================================================
dataset = download_fmnist()
for name, value in dataset.items():
  print(name, value.dtype, value.shape)
X_train = dataset['X_train']
y_train = dataset['y_train']
X_test = dataset['X_test']
y_test = dataset['y_test']
labels_name = dataset['labels_name']
y_train = tf.one_hot(y_train.astype('int32'), 10).numpy()
print(labels_name)
img_dim = int(np.sqrt(X_train.shape[1]))

# show_sample_images(X_train, y_train, labels_name=dataset['labels_name'], n=6)

model = keras.Sequential([
    keras.layers.Dense(512,
                       activation='relu',
                       name='L1',
                       input_shape=X_train.shape[1:]),
    keras.layers.Dense(512, activation='relu', name='L2'),
    keras.layers.Dense(512, activation='relu', name='L3'),
    keras.layers.Dense(10, activation='softmax'),
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'],
)
model.fit(
    x=X_train,
    y=y_train,
    batch_size=256,
    epochs=25,
    validation_split=0.2,
    verbose=1,
)

evaluate_classifier(model, X_train, y_train, X_test, y_test, labels_name)

# ====== adversarial example ====== #
X_adversarial = tf.Variable(np.random.randn(1, X_train.shape[1]),
                            dtype='float32',
                            trainable=True)
print("Adversarial prediction:",
      ['%.2f' % i for i in model(X_adversarial).numpy().ravel()])

opt = tf.optimizers.Adam(learning_rate=0.01)
# everything to number 'Bag'
y_target = tf.convert_to_tensor([[0, 0, 0, 0, 0, 0, 0, 0, 1, 0]],
                                dtype='float32')
fn_ce = keras.losses.categorical_crossentropy

adversarial_images = []
for epoch in range(200):
  with tf.GradientTape() as tape:
    y_adversarial = model(X_adversarial)
    loss = tf.reduce_mean(fn_ce(y_target, y_adversarial))
  grad = tape.gradient(loss, X_adversarial)
  opt.apply_gradients([(grad, X_adversarial)])
  if epoch % 10 == 0:
    print("Epoch#%d  Adversarial_Loss:%.4f" % (epoch, loss))

  img = np.squeeze(X_adversarial.numpy(), axis=0)
  dim = int(np.sqrt(X_train.shape[1]))
  img = img.reshape(dim, dim)
  adversarial_images.append(img)
print("Adversarial prediction:",
      ['%.2f' % i for i in model(X_adversarial).numpy().ravel()])

ani = show_animation(adversarial_images,
                     fps=24,
                     delta=False,
                     cmap='Greys',
                     interpolation='nearest')
ani.save('/tmp/tmp1.mp4')
ani = show_animation(adversarial_images,
                     fps=24,
                     delta=True,
                     cmap='Greys',
                     interpolation='nearest')
ani.save('/tmp/tmp2.mp4')

# ====== adversarial example Convert 'T-shirt/top' to 'Sneaker ====== #
X_tshirt = None
for x, y in zip(X_train, y_train):
  if np.argmax(y) == 0:
    X_tshirt = x
    break
X_tshirt = tf.convert_to_tensor(np.expand_dims(X_tshirt, 0), dtype='float32')
X_adversarial = tf.Variable(X_tshirt, dtype='float32', trainable=True)
print("Original prediction:",
      ['%.2f' % i for i in model(X_adversarial).numpy().ravel()])

opt = tf.optimizers.SGD(learning_rate=15.0)
# everything to number Sneaker
y_target = tf.convert_to_tensor([[0, 0, 0, 0, 0, 0, 0, 0, 1, 0]],
                                dtype='float32')
fn_ce = keras.losses.categorical_crossentropy
fn_mse = keras.losses.categorical_crossentropy
for epoch in range(400):
  with tf.GradientTape() as tape:
    y_adversarial = model(X_adversarial)
    loss = tf.reduce_mean(fn_ce(y_target, y_adversarial)) +\
      0.001 * tf.reduce_mean(fn_mse(X_tshirt, X_adversarial))
  grad = tape.gradient(loss, X_adversarial)
  opt.apply_gradients([(grad, X_adversarial)])
  if epoch % 10 == 0:
    print("Epoch#%d  Adversarial_Loss:%.4f  Grad:%.2f" %
          (epoch, loss, tf.norm(grad).numpy()))

print("Adversarial prediction:",
      ['%.2f' % i for i in model(X_adversarial).numpy().ravel()])

X_tshirt = X_tshirt.numpy().reshape(img_dim, img_dim)
X_adversarial = X_adversarial.numpy().reshape(img_dim, img_dim)
plt.figure(figsize=(12, 4))

plt.subplot(1, 3, 1)
plt.imshow(X_tshirt, cmap='Greys')
plt.axis('off')
plt.title("Original")

plt.subplot(1, 3, 2)
plt.imshow(X_adversarial, cmap='Greys')
plt.axis('off')
plt.title("Adversarial")

plt.subplot(1, 3, 3)
plt.imshow(X_adversarial - X_tshirt,
           cmap='Greys',
           vmin=np.min(X_tshirt),
           vmax=np.max(X_tshirt))
plt.axis('off')
plt.title("Differences")


# ===========================================================================
# Deep Dream
# ===========================================================================
@tf.function
def deep_dream_optimizing(dream_model, img, learning_rate):
  with tf.GradientTape() as tape:
    # This needs gradients relative to `img`
    # `GradientTape` only watches `tf.Variable`s by default
    tape.watch(img)
    # add batch dimension
    layer_activations = dream_model(tf.expand_dims(img, axis=0))
    # calculate activation of each layer
    losses = []
    for act in layer_activations:
      loss = tf.math.reduce_mean(act)
      losses.append(loss)
    loss = tf.reduce_sum(losses)
  # Calculate the gradient of the loss with respect to the pixels of the input image.
  gradients = tape.gradient(loss, img)
  # Normalize the gradients.
  gradients /= tf.math.reduce_std(gradients) + 1e-8
  # update images, note this is gradient ascent, not descent
  img = img + gradients * learning_rate
  return loss, img


# ====== Deep dream ====== #
def deep_dream(model,
               layers_name,
               img,
               n_epoch=100,
               learning_rate=0.01,
               octave_scale=1,
               octave_step=1):
  # add the batch dimension
  img = tf.convert_to_tensor(img, dtype='float32')
  base_shape = tf.cast(tf.shape(img)[:-1], tf.float32)
  # Maximize the activations of these layers
  layers = [model.get_layer(name).output for name in layers_name]
  # Create the feature extraction model
  dream_model = keras.Model(inputs=model.input, outputs=layers)

  for n in range(max(octave_step, 1)):
    # resize the image
    if octave_scale > 1:
      new_shape = tf.cast(base_shape * (octave_scale**n), tf.int32)
      img = tf.image.resize(img, new_shape)
    # optimize the resized image
    for epoch in range(n_epoch):
      loss, img = deep_dream_optimizing(dream_model, img, learning_rate)
  return loss, img


img = X_train[1]
images = [img]
for i in range(80):
  loss, img = deep_dream(model, ['L1', 'L2', 'L3'], img)
  images.append(img.numpy().reshape(img_dim, img_dim))
  print("Iter#%d  Loss:%.4f" % ((i + 1) * 100, loss.numpy()))

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.imshow(images[0].reshape(img_dim, img_dim),
           cmap='Blues',
           interpolation='bilinear')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(images[-1].reshape(img_dim, img_dim),
           cmap='Blues',
           interpolation='bilinear')
plt.axis('off')

# ====== USing inception ====== #
model = keras.applications.InceptionV3(include_top=False, weights='imagenet')
img = download_images()
img = tf.keras.applications.inception_v3.preprocess_input(img)
images = [img]
loss, img = deep_dream(model, ['mixed3', 'mixed5'],
                       img,
                       n_epoch=50,
                       octave_scale=1.3,
                       octave_step=3)
images.append(img.numpy())

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.imshow(deprocess(images[0]))
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(deprocess(images[-1]))
plt.axis('off')
	from __future__ import absolute_import, division, print_function

	import os

	import numpy as np
	import tensorflow as tf
	from matplotlib import pyplot as plt
	from sklearn.metrics import accuracy_score
	from sklearn.model_selection import train_test_split
	from tensorflow import keras

	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
	os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

	tf.random.set_seed(8)
	np.random.seed(8)


	# ===========================================================================
	# Helper functions for visualization
	# ===========================================================================
	def show_sample_images(images,
	labels=None,
	n=5,
	labels_name=None,
	seed=8,
	axes=None,
	interpolation=None,
	shuffle=True):
	from matplotlib import pyplot as plt
	from itertools import chain
	if labels is not None and labels.ndim == 2:
	labels = np.argmax(labels, axis=1)
	np.random.seed(seed)
	if shuffle:
	indices = np.random.permutation(len(images))
	else:
	indices = np.arange(len(images), dtype='int32')
	if axes is None:
	n_images = len(images)
	nrows = n if n_images > n else 1
	ncols = min(n, n_images)
	fig, axes = plt.subplots(nrows=nrows,
	ncols=ncols,
	figsize=(int(ncols * 1.2), int(nrows * 1.2) + 1))
	if isinstance(axes[0], (np.ndarray, tuple, list)):
	axes = chain(*axes)
	elif not isinstance(axes, (np.ndarray, tuple, list)):
	axes = [axes]
	fig_images = []
	for count, ax in enumerate(axes):
	ax.axis('off')
	if count >= len(indices):
	continue
	idx = indices[count]
	img = images[idx]
	cmap = 'Greys'
	if img.ndim == 1:
	dim = int(np.sqrt(len(img)))
	img = img.reshape(dim, dim)
	elif img.ndim == 3 and img.shape[-1] == 1:
	img = np.squeeze(img, axis=-1)
	else:
	cmap = None
	fig_images.append(ax.imshow(img, cmap=cmap, interpolation=interpolation))
	if labels is not None:
	lab = labels[idx] if labels_name is None else \
	labels_name[int(labels[idx])]
	ax.set_title(lab, fontsize=8)
	return fig_images


	def show_animation(images_list,
	labels=None,
	n=5,
	labels_name=None,
	fps=1.5,
	seed=8,
	delta=False,
	cmap=None,
	interpolation=None):
	from matplotlib import pyplot as plt
	import matplotlib.animation as animation
	from itertools import chain
	# ims is a list of lists, each row is a list of artists to draw in the
	# current frame; here we are just animating one artist, the image, in
	# each frame
	ims = []
	# images_list is list of image list
	if isinstance(images_list[0], (tuple, list)):
	# have to reuse the same axes
	n_images = len(images_list[0])
	nrows = n if n_images > n else 1
	ncols = min(n, n_images)
	fig, axes = plt.subplots(nrows=nrows,
	ncols=ncols,
	figsize=(int(ncols * 1.2), int(nrows * 1.2)))
	for images in images_list:
	ims.append(
	show_sample_images(images,
	labels,
	n=n,
	labels_name=labels_name,
	seed=seed,
	axes=axes))
	# single image
	else:
	fig = plt.figure(figsize=(8, 8))
	ax = plt.gca()
	img_generator = [img2 - img1
	for img1, img2 in zip(images_list, images_list[1:])] \
	if delta else images_list
	vmin = np.min([np.min(i) for i in img_generator])
	vmax = np.max([np.max(i) for i in img_generator])
	for img in img_generator:
	img = ax.imshow(
	img,
	cmap='Blues' if cmap is None else cmap,
	interpolation='bilinear' if interpolation is None else interpolation,
	vmin=vmin,
	vmax=vmax)
	ax.axis('off')
	ims.append([img])
	#
	ani = animation.ArtistAnimation(
	fig,
	ims,
	interval=1 / fps * 1000, # in millisecond
	blit=True,
	repeat=False,
	repeat_delay=1000)
	return ani


	def show_misclassification(images,
	y_true,
	y_pred,
	n=5,
	labels_name=None,
	title=None):
	if y_true.ndim == 2:
	y_true = np.argmax(y_true, axis=1)
	if y_pred.ndim == 2:
	y_pred = np.argmax(y_pred, axis=1)
	indices = np.asarray(y_true) != np.asarray(y_pred)
	if np.sum(indices) == 0:
	return
	images = images[indices]
	y_true = y_true[indices]
	y_pred = y_pred[indices]
	labels = np.array([
	"True:%s\nPred:%s" %
	((str(true), str(pred)) if labels_name is None else
	(str(labels_name[int(true)]), str(labels_name[int(pred)])))
	for true, pred in zip(y_true, y_pred)
	])
	show_sample_images(images=images, labels=labels, n=n, labels_name=None)
	if title is not None:
	plt.suptitle(str(title))


	def show_confusion_matrix(y_true, y_pred, labels=None, title=None, ax=None):
	from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
	import seaborn as sns
	from matplotlib import pyplot as plt
	if y_true.ndim == 2:
	y_true = np.argmax(y_true, axis=1)
	if y_pred.ndim == 2:
	y_pred = np.argmax(y_pred, axis=1)
	if labels is None:
	labels = np.unique(np.concatenate([y_true, y_pred]))
	labels = ['#%d' % i for i in sorted(labels)]
	cm = confusion_matrix(y_true, y_pred)
	acc = accuracy_score(y_true, y_pred)
	f1 = f1_score(y_true, y_pred, average='weighted')
	if ax is None:
	plt.figure(figsize=(6, 6))
	ax = plt.gca()
	elif isinstance(ax, (tuple, list)):
	ax = plt.subplot(*ax)
	sns.heatmap(cm,
	cmap='Blues',
	annot=True,
	fmt="d",
	linewidths=.2,
	xticklabels=labels,
	yticklabels=labels,
	cbar=False,
	ax=ax)
	plt.title("%sAccuracy:%.2f F1-score:%.2f" %
	(('[%s]' % str(title)) if title is not None else '', acc, f1))


	def evaluate_classifier(model,
	X_train,
	y_train,
	X_test,
	y_test,
	labels_name=None):
	plt.figure(figsize=(18, 5))
	y_train_pred = model.predict(X_train)
	y_test_pred = model.predict(X_test)
	show_confusion_matrix(y_true=y_train,
	y_pred=y_train_pred,
	title='Train',
	labels=labels_name,
	ax=(1, 2, 1))
	show_confusion_matrix(y_true=y_test,
	y_pred=y_test_pred,
	title='Test',
	labels=labels_name,
	ax=(1, 2, 2))
	show_misclassification(images=X_train,
	y_true=y_train,
	y_pred=y_train_pred,
	labels_name=labels_name,
	title='Train')
	show_misclassification(images=X_test,
	y_true=y_test,
	y_pred=y_test_pred,
	labels_name=labels_name,
	title='Test')


	# ===========================================================================
	# DOwnloading the data
	# ===========================================================================
	def download_images(target_size=None):
	import base64
	url = b'aHR0cHM6Ly9haS1kYXRhc2V0cy5zMy5hbWF6b25hd3MuY29tL3N0ZXRpbmQuanBlZw==\n'
	url = str(base64.decodebytes(url), 'utf-8')
	name = os.path.basename(url)
	image_path = tf.keras.utils.get_file(name, origin=url)
	img = keras.preprocessing.image.load_img(image_path, target_size=target_size)
	return np.array(img)


	def deprocess(img):
	img = 255 * (img + 1.0) / 2.0
	return tf.cast(img, tf.uint8)


	def download_fmnist():
	import tensorflow_datasets as tfds
	train = tfds.load('fashion_mnist', split='train')
	test = tfds.load('fashion_mnist', split='test')

	X_train, y_train = [], []
	for data in train.batch(256):
	X_train.append(data['image'].numpy().astype('float32'))
	y_train.append(data['label'].numpy().astype('float32'))
	X_train = np.concatenate(X_train, axis=0)
	y_train = np.concatenate(y_train, axis=0)

	X_test, y_test = [], []
	for data in test.batch(256):
	X_test.append(data['image'].numpy().astype('float32'))
	y_test.append(data['label'].numpy().astype('float32'))
	X_test = np.concatenate(X_test, axis=0)
	y_test = np.concatenate(y_test, axis=0)

	X_train = X_train.reshape(-1, np.prod(X_train.shape[1:]))
	X_test = X_test.reshape(-1, np.prod(X_test.shape[1:]))
	return dict(X_train=X_train,
	y_train=y_train,
	X_test=X_test,
	y_test=y_test,
	labels_name=np.array([
	'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
	'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'
	]))


	def download_digits():
	from six.moves.urllib.request import urlretrieve
	import base64
	url = b'aHR0cHM6Ly9haS1kYXRhc2V0cy5zMy5hbWF6b25hd3MuY29tL2RpZ2l0cy5ucHo=\n'
	urlretrieve(str(base64.decodebytes(url), 'utf-8'), 'digits.npz')
	return {i: j for i, j in np.load('digits.npz').items()}


	# ===========================================================================
	# I should work. Wait! but why?
	# ===========================================================================
	dataset = download_digits()
	for i, j in dataset.items():
	print('%-15s:' % i, j.dtype, j.shape)

	X_train = dataset['X_train']
	y_train = dataset['y_train']
	X_test = dataset['X_test']
	y_test = dataset['y_test']
	show_sample_images(images=X_train, labels=y_train, n=5)

	X_train = X_train / np.max(X_train)
	y_train = tf.one_hot(y_train, 10).numpy()

	model = keras.Sequential([
	keras.layers.Dense(64, activation='relu'),
	keras.layers.Dense(64, activation='relu'),
	keras.layers.Dense(10, activation='softmax'),
	])

	model.compile(optimizer='adam',
	loss="categorical_crossentropy",
	metrics=["accuracy"])
	model.fit(
	x=X_train,
	y=y_train,
	batch_size=128,
	epochs=12,
	validation_split=0.2,
	)

	# ====== show the learning curve ====== #
	history = model.history.history
	train_loss = history['loss']
	valid_loss = history['val_loss']
	train_acc = history['accuracy']
	valid_acc = history['val_accuracy']
	plt.figure(figsize=(8, 4))
	plt.subplot(1, 2, 1)
	plt.plot(train_loss, label="Train Loss")
	plt.plot(valid_loss, label="Valid Loss")
	plt.legend()
	plt.grid()
	plt.subplot(1, 2, 2)
	plt.plot(train_acc, label="Train Accuracy")
	plt.plot(valid_acc, label="Valid Accuracy")
	plt.legend()
	plt.grid()

	# ====== evaluating the classifier ====== #
	evaluate_classifier(model, X_train, y_train, X_test, y_test)

	# ===========================================================================
	# What does artificial neural network learn?
	# ===========================================================================
	X_train = dataset['X_train_dot']
	y_train = tf.one_hot(dataset['y_train'], 10).numpy()
	show_sample_images(X_train, y_train)
	show_sample_images(X_test, y_test)

	model = keras.Sequential([
	keras.layers.Dense(64, activation='relu'),
	keras.layers.Dense(64, activation='relu'),
	keras.layers.Dense(64, activation='relu'),
	keras.layers.Dense(10, activation='softmax'),
	])
	opt = keras.optimizers.Adam(learning_rate=0.001)
	fn_loss = keras.losses.categorical_crossentropy

	X_train, X_valid, y_train, y_valid = train_test_split(X_train,
	y_train,
	test_size=0.2)
	total_iter = 0
	n_epoch = 25
	batch_size = 64

	hidden_images1 = []
	hidden_images2 = []
	kernel_images = []

	for epoch in range(n_epoch):
	for batch_start in range(0, X_train.shape[0], batch_size):
	x = X_train[batch_start:batch_start + batch_size]
	y_true = y_train[batch_start:batch_start + batch_size]
	with tf.GradientTape() as tape:
	y_pred = model(x)
	loss = tf.reduce_mean(fn_loss(y_true, y_pred))
	# get the gradients
	grads = tape.gradient(loss, model.trainable_variables)
	opt.apply_gradients([
	(g, v) for g, v in zip(grads, model.trainable_variables)
	])
	# store the kernel (i.e. the weight)
	kernel_images.append(
	[l.kernel.numpy() for l in model.layers if hasattr(l, 'kernel')])
	# monitoring the training
	total_iter += 1
	if total_iter % 10 == 0:
	y_train_pred = model(X_train).numpy()
	train_acc = accuracy_score(y_true=np.argmax(y_train, axis=1),
	y_pred=np.argmax(y_train_pred, axis=1))
	y_valid_pred = model(X_valid).numpy()
	valid_acc = accuracy_score(y_true=np.argmax(y_valid, axis=1),
	y_pred=np.argmax(y_valid_pred, axis=1))
	print("Iter#%d Loss:%.4f Train_acc:%.4f Valid_acc:%.4f" %
	(total_iter, loss.numpy(), train_acc, valid_acc))
	# record everything first 10 iteration
	if total_iter < 50 or total_iter % 5 == 0:
	# store hidden activation images
	h1 = model.layers[0](X_valid)
	h2 = model.layers[1](h1)
	# convert to list of images
	hidden_images1.append([i for i in h1.numpy()])
	hidden_images2.append([i for i in h2.numpy()])
	# ====== show classifier results ====== #
	evaluate_classifier(model, X_train, y_train, X_test, y_test)
	# ====== show animation ====== #
	ani = show_animation([k[0] for k in kernel_images], fps=12, delta=False)
	ani = show_animation([k[0] for k in kernel_images], fps=12, delta=True)
	ani = show_animation(hidden_images1,
	labels=np.argmax(y_valid, -1),
	fps=20,
	delta=False)
	ani = show_animation(hidden_images2,
	labels=np.argmax(y_valid, -1),
	fps=20,
	delta=False)

	# ===========================================================================
	# Handling uncertainty Attention based model
	# ===========================================================================
	X_train = dataset['X_train_shift']
	X_test = dataset['X_test_shift']
	y_train = tf.one_hot(dataset['y_train'], 10).numpy()
	show_sample_images(X_train, y_train)


	class SelfAttention(keras.layers.Layer):

	def __init__(self, units):
	super().__init__()
	self.units = units

	def build(self, input_shape):
	super().build(input_shape)
	self.kernel = self.add_weight('kernel',
	shape=[1, self.units],
	initializer='glorot_uniform',
	dtype=self.dtype,
	trainable=True)
	self.bias = self.add_weight('bias',
	shape=(self.units,),
	initializer='zeros',
	dtype=self.dtype,
	trainable=True)
	self.kernel1 = self.add_weight('kernel',
	shape=[self.units, 1],
	initializer='glorot_uniform',
	dtype=self.dtype,
	trainable=True)
	self.bias1 = self.add_weight('bias',
	shape=(1,),
	initializer='zeros',
	dtype=self.dtype,
	trainable=True)

	def call(self, inputs):
	x = tf.expand_dims(inputs, axis=-1)
	alpha = tf.nn.relu(x @ self.kernel + self.bias)
	alpha = alpha @ self.kernel1 + self.bias1
	alpha = tf.squeeze(alpha, axis=-1)
	attention = tf.nn.softmax(alpha, axis=-1)
	outputs = inputs * attention + inputs
	return tf.concat([outputs, attention], axis=-1)


	model = keras.Sequential([
	SelfAttention(64),
	keras.layers.Lambda(lambda x: tf.split(x, 2, axis=-1)[0]),
	keras.layers.Dense(256, activation='relu'),
	keras.layers.Dense(256, activation='relu'),
	keras.layers.Dense(256, activation='relu'),
	keras.layers.Dense(10, activation='softmax'),
	])

	model.compile(optimizer='adam',
	loss='categorical_crossentropy',
	metrics=['accuracy'])
	model.fit(
	x=X_train,
	y=y_train,
	batch_size=128,
	epochs=80,
	validation_split=0.2,
	)

	evaluate_classifier(model, X_train, y_train, X_test, y_test)
	model_att = keras.Sequential(
	model.layers[:1] +
	[keras.layers.Lambda(lambda x: tf.split(x, 2, axis=-1)[1])])
	att = model_att(X_test).numpy()
	show_sample_images(X_test, y_test, interpolation='bilinear')
	show_sample_images(att, y_test, interpolation='bilinear')
	# ===========================================================================
	# Fashion MNIST
	# ===========================================================================
	dataset = download_fmnist()
	for name, value in dataset.items():
	print(name, value.dtype, value.shape)
	X_train = dataset['X_train']
	y_train = dataset['y_train']
	X_test = dataset['X_test']
	y_test = dataset['y_test']
	labels_name = dataset['labels_name']
	y_train = tf.one_hot(y_train.astype('int32'), 10).numpy()
	print(labels_name)
	img_dim = int(np.sqrt(X_train.shape[1]))

	# show_sample_images(X_train, y_train, labels_name=dataset['labels_name'], n=6)

	model = keras.Sequential([
	keras.layers.Dense(512,
	activation='relu',
	name='L1',
	input_shape=X_train.shape[1:]),
	keras.layers.Dense(512, activation='relu', name='L2'),
	keras.layers.Dense(512, activation='relu', name='L3'),
	keras.layers.Dense(10, activation='softmax'),
	])

	model.compile(
	optimizer='adam',
	loss='categorical_crossentropy',
	metrics=['accuracy'],
	)
	model.fit(
	x=X_train,
	y=y_train,
	batch_size=256,
	epochs=25,
	validation_split=0.2,
	verbose=1,
	)

	evaluate_classifier(model, X_train, y_train, X_test, y_test, labels_name)

	# ====== adversarial example ====== #
	X_adversarial = tf.Variable(np.random.randn(1, X_train.shape[1]),
	dtype='float32',
	trainable=True)
	print("Adversarial prediction:",
	['%.2f' % i for i in model(X_adversarial).numpy().ravel()])

	opt = tf.optimizers.Adam(learning_rate=0.01)
	# everything to number 'Bag'
	y_target = tf.convert_to_tensor([[0, 0, 0, 0, 0, 0, 0, 0, 1, 0]],
	dtype='float32')
	fn_ce = keras.losses.categorical_crossentropy

	adversarial_images = []
	for epoch in range(200):
	with tf.GradientTape() as tape:
	y_adversarial = model(X_adversarial)
	loss = tf.reduce_mean(fn_ce(y_target, y_adversarial))
	grad = tape.gradient(loss, X_adversarial)
	opt.apply_gradients([(grad, X_adversarial)])
	if epoch % 10 == 0:
	print("Epoch#%d Adversarial_Loss:%.4f" % (epoch, loss))

	img = np.squeeze(X_adversarial.numpy(), axis=0)
	dim = int(np.sqrt(X_train.shape[1]))
	img = img.reshape(dim, dim)
	adversarial_images.append(img)
	print("Adversarial prediction:",
	['%.2f' % i for i in model(X_adversarial).numpy().ravel()])

	ani = show_animation(adversarial_images,
	fps=24,
	delta=False,
	cmap='Greys',
	interpolation='nearest')
	ani.save('/tmp/tmp1.mp4')
	ani = show_animation(adversarial_images,
	fps=24,
	delta=True,
	cmap='Greys',
	interpolation='nearest')
	ani.save('/tmp/tmp2.mp4')

	# ====== adversarial example Convert 'T-shirt/top' to 'Sneaker ====== #
	X_tshirt = None
	for x, y in zip(X_train, y_train):
	if np.argmax(y) == 0:
	X_tshirt = x
	break
	X_tshirt = tf.convert_to_tensor(np.expand_dims(X_tshirt, 0), dtype='float32')
	X_adversarial = tf.Variable(X_tshirt, dtype='float32', trainable=True)
	print("Original prediction:",
	['%.2f' % i for i in model(X_adversarial).numpy().ravel()])

	opt = tf.optimizers.SGD(learning_rate=15.0)
	# everything to number Sneaker
	y_target = tf.convert_to_tensor([[0, 0, 0, 0, 0, 0, 0, 0, 1, 0]],
	dtype='float32')
	fn_ce = keras.losses.categorical_crossentropy
	fn_mse = keras.losses.categorical_crossentropy
	for epoch in range(400):
	with tf.GradientTape() as tape:
	y_adversarial = model(X_adversarial)
	loss = tf.reduce_mean(fn_ce(y_target, y_adversarial)) +\
	0.001 * tf.reduce_mean(fn_mse(X_tshirt, X_adversarial))
	grad = tape.gradient(loss, X_adversarial)
	opt.apply_gradients([(grad, X_adversarial)])
	if epoch % 10 == 0:
	print("Epoch#%d Adversarial_Loss:%.4f Grad:%.2f" %
	(epoch, loss, tf.norm(grad).numpy()))

	print("Adversarial prediction:",
	['%.2f' % i for i in model(X_adversarial).numpy().ravel()])

	X_tshirt = X_tshirt.numpy().reshape(img_dim, img_dim)
	X_adversarial = X_adversarial.numpy().reshape(img_dim, img_dim)
	plt.figure(figsize=(12, 4))

	plt.subplot(1, 3, 1)
	plt.imshow(X_tshirt, cmap='Greys')
	plt.axis('off')
	plt.title("Original")

	plt.subplot(1, 3, 2)
	plt.imshow(X_adversarial, cmap='Greys')
	plt.axis('off')
	plt.title("Adversarial")

	plt.subplot(1, 3, 3)
	plt.imshow(X_adversarial - X_tshirt,
	cmap='Greys',
	vmin=np.min(X_tshirt),
	vmax=np.max(X_tshirt))
	plt.axis('off')
	plt.title("Differences")


	# ===========================================================================
	# Deep Dream
	# ===========================================================================
	@tf.function
	def deep_dream_optimizing(dream_model, img, learning_rate):
	with tf.GradientTape() as tape:
	# This needs gradients relative to `img`
	# `GradientTape` only watches `tf.Variable`s by default
	tape.watch(img)
	# add batch dimension
	layer_activations = dream_model(tf.expand_dims(img, axis=0))
	# calculate activation of each layer
	losses = []
	for act in layer_activations:
	loss = tf.math.reduce_mean(act)
	losses.append(loss)
	loss = tf.reduce_sum(losses)
	# Calculate the gradient of the loss with respect to the pixels of the input image.
	gradients = tape.gradient(loss, img)
	# Normalize the gradients.
	gradients /= tf.math.reduce_std(gradients) + 1e-8
	# update images, note this is gradient ascent, not descent
	img = img + gradients * learning_rate
	return loss, img


	# ====== Deep dream ====== #
	def deep_dream(model,
	layers_name,
	img,
	n_epoch=100,
	learning_rate=0.01,
	octave_scale=1,
	octave_step=1):
	# add the batch dimension
	img = tf.convert_to_tensor(img, dtype='float32')
	base_shape = tf.cast(tf.shape(img)[:-1], tf.float32)
	# Maximize the activations of these layers
	layers = [model.get_layer(name).output for name in layers_name]
	# Create the feature extraction model
	dream_model = keras.Model(inputs=model.input, outputs=layers)

	for n in range(max(octave_step, 1)):
	# resize the image
	if octave_scale > 1:
	new_shape = tf.cast(base_shape * (octave_scale**n), tf.int32)
	img = tf.image.resize(img, new_shape)
	# optimize the resized image
	for epoch in range(n_epoch):
	loss, img = deep_dream_optimizing(dream_model, img, learning_rate)
	return loss, img


	img = X_train[1]
	images = [img]
	for i in range(80):
	loss, img = deep_dream(model, ['L1', 'L2', 'L3'], img)
	images.append(img.numpy().reshape(img_dim, img_dim))
	print("Iter#%d Loss:%.4f" % ((i + 1) * 100, loss.numpy()))

	plt.figure(figsize=(12, 6))
	plt.subplot(1, 2, 1)
	plt.imshow(images[0].reshape(img_dim, img_dim),
	cmap='Blues',
	interpolation='bilinear')
	plt.axis('off')
	plt.subplot(1, 2, 2)
	plt.imshow(images[-1].reshape(img_dim, img_dim),
	cmap='Blues',
	interpolation='bilinear')
	plt.axis('off')

	# ====== USing inception ====== #
	model = keras.applications.InceptionV3(include_top=False, weights='imagenet')
	img = download_images()
	img = tf.keras.applications.inception_v3.preprocess_input(img)
	images = [img]
	loss, img = deep_dream(model, ['mixed3', 'mixed5'],
	img,
	n_epoch=50,
	octave_scale=1.3,
	octave_step=3)
	images.append(img.numpy())

	plt.figure(figsize=(12, 6))
	plt.subplot(1, 2, 1)
	plt.imshow(deprocess(images[0]))
	plt.axis('off')
	plt.subplot(1, 2, 2)
	plt.imshow(deprocess(images[-1]))
	plt.axis('off')