Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Optimization Visualization
import math
from itertools import product
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from matplotlib import gridspec
# # Activation Model #
def make_activation_model(model, layer_name, filter):
layer = model.get_layer(layer_name) # Grab the layer
feature_map = layer.output[:, :, :, filter]
activation_model = keras.Model(
inputs=model.inputs, # New inputs are original inputs (images)
outputs=feature_map, # New outputs are the layer's outputs (feature maps)
)
return activation_model
def show_feature_map(image, model, layer_name, filter, ax=None):
act = make_activation_model(model, layer_name, filter)
feature_map = tf.squeeze(act(tf.expand_dims(image, axis=0)))
if ax is None:
fig, ax = plt.subplots()
ax.imshow(
feature_map, cmap="magma", vmin=0.0, vmax=1.0,
)
ax.axis("off")
return ax
def show_feature_maps(
image,
model,
layer_name,
offset=0,
rows=None,
cols=3,
width=12,
cmap="magma",
):
if rows is None:
num_filters = model.get_layer(layer_name).output.shape[-1]
rows = math.floor(num_filters / cols)
fig, axs = plt.subplots(
rows,
cols,
figsize=(width, (width * rows) / cols),
gridspec_kw=dict(wspace=0.01, hspace=0.01),
)
for f, (r, c) in enumerate(product(range(rows), range(cols))):
axs[r, c] = show_feature_map(
image, model, layer_name, f + offset, ax=axs[r, c]
)
return fig
# # Optimization Model #
class OptVis(object):
def __init__(
self,
model,
layer,
filter,
neuron=False,
size=[128, 128],
fft=True,
scale=0.01,
):
# Create activation model
activations = model.get_layer(layer).output
if len(activations.shape) == 4:
activations = activations[:, :, :, filter]
else:
raise ValueError("Activation shapes other than 4 not implemented.")
if neuron:
_, y, x = activations.shape
# find center
# TODO: need to compute this from selected size, not activations
yc = int(round(y / 2))
xc = int(round(x / 2))
activations = activations[:, yc, xc]
self.activation_model = keras.Model(
inputs=model.inputs, outputs=activations
)
# Create random initialization buffer
self.shape = [1, *size, 3]
self.fft = fft
self.image = init_buffer(
height=size[0], width=size[1], fft=fft, scale=scale
)
self.fft_scale = fft_scale(size[0], size[1], decay_power=1.0)
def __call__(self):
image = self.activation_model(self.image)
return image
def compile(self, optimizer):
self.optimizer = optimizer
@tf.function
def train_step(self):
# Compute loss
with tf.GradientTape() as tape:
image = self.image
if self.fft:
image = fft_to_rgb(
shape=self.shape, buffer=image, fft_scale=self.fft_scale
)
image = to_valid_rgb(image)
image = random_transform(
tf.squeeze(image),
jitter=8,
scale=1.1,
rotate=1.0,
fill_method="reflect",
)
image = tf.expand_dims(image, 0)
loss = clip_gradients(score(self.activation_model(image)))
# Apply gradient
grads = tape.gradient(loss, self.image)
self.optimizer.apply_gradients([(-grads, self.image)])
return {"loss": loss}
@tf.function
def fit(self, epochs=1, log=False):
for epoch in tf.range(epochs):
loss = self.train_step()
if log:
print("Score: {}".format(loss["loss"]))
image = self.image
if self.fft:
image = fft_to_rgb(
shape=self.shape, buffer=image, fft_scale=self.fft_scale
)
return to_valid_rgb(image)
# # Loss and Gradients #
def score(x):
s = tf.math.reduce_mean(x)
return s
@tf.custom_gradient
def clip_gradients(y):
def backward(dy):
return tf.clip_by_norm(dy, 1.0)
return y, backward
# unused
def normalize_gradients(grads, method="l2"):
if method == "l2":
grads = tf.math.l2_normalize(grads)
elif method == "std":
grads /= tf.math.reduce_std(grads) + 1e-8
elif method == "clip":
grads = tf.clip_by_norm(grads, 1.0)
return grads
# # Color Transforms #
# ImageNet statistics
color_correlation_svd_sqrt = np.asarray(
[[0.26, 0.09, 0.02], [0.27, 0.00, -0.05], [0.27, -0.09, 0.03]]
).astype("float32")
max_norm_svd_sqrt = np.max(np.linalg.norm(color_correlation_svd_sqrt, axis=0))
color_correlation_normalized = color_correlation_svd_sqrt / max_norm_svd_sqrt
color_mean = np.asarray([0.485, 0.456, 0.406])
color_std = np.asarray([0.229, 0.224, 0.225])
def correlate_color(image):
image_flat = tf.reshape(image, [-1, 3])
image_flat = tf.matmul(image_flat, color_correlation_normalized.T)
image = tf.reshape(image_flat, tf.shape(image))
return image
def normalize(image):
return (image - color_mean) / color_std
def to_valid_rgb(image, crop=False):
if crop:
image = image[:, 25:-25, 25:-25, :]
image = correlate_color(image)
image = tf.nn.sigmoid(image)
return image
# # Spatial Transforms #
# Adapted from https://github.com/tensorflow/lucid/blob/master/lucid/optvis/param/spatial.py
# and https://github.com/elichen/Feature-visualization/blob/master/optvis.py
def rfft2d_freqs(h, w):
"""Computes 2D spectrum frequencies."""
fy = np.fft.fftfreq(h)[:, np.newaxis]
# when we have an odd input dimension we need to keep one additional
# frequency and later cut off 1 pixel
if w % 2 == 1:
fx = np.fft.fftfreq(w)[: w // 2 + 2]
else:
fx = np.fft.fftfreq(w)[: w // 2 + 1]
return np.sqrt(fx * fx + fy * fy)
def fft_scale(h, w, decay_power=1.0):
freqs = rfft2d_freqs(h, w)
scale = 1.0 / np.maximum(freqs, 1.0 / max(w, h)) ** decay_power
scale *= np.sqrt(w * h)
return tf.convert_to_tensor(scale, dtype=tf.complex64)
def fft_to_rgb(shape, buffer, fft_scale):
"""Convert FFT spectrum buffer to RGB image buffer."""
batch, h, w, ch = shape
spectrum = tf.complex(buffer[0], buffer[1]) * fft_scale
image = tf.signal.irfft2d(spectrum)
image = tf.transpose(image, (0, 2, 3, 1))
# in case of odd spatial input dimensions we need to crop
image = image[:batch, :h, :w, :ch]
image = image / 4.0
return image
# # Affine Transforms #
@tf.function
def random_transform(image, jitter=0, rotate=0, scale=1, **kwargs):
jx = tf.random.uniform([], -jitter, jitter)
jy = tf.random.uniform([], -jitter, jitter)
r = tf.random.uniform([], -rotate, rotate)
s = tf.random.uniform([], 1.0, scale)
image = apply_affine_transform(
image, theta=r, tx=jx, ty=jy, zx=s, zy=s, **kwargs,
)
return image
@tf.function
def apply_affine_transform(
x,
theta=0,
tx=0,
ty=0,
shear=0,
zx=1,
zy=1,
row_axis=0,
col_axis=1,
channel_axis=2,
fill_method="reflect",
cval=0.0,
interpolation_method="nearest",
):
""" Apply an affine transformation to an image x. """
theta = tf.convert_to_tensor(theta, dtype=tf.float32)
tx = tf.convert_to_tensor(tx, dtype=tf.float32)
ty = tf.convert_to_tensor(ty, dtype=tf.float32)
shear = tf.convert_to_tensor(shear, dtype=tf.float32)
zx = tf.convert_to_tensor(zx, dtype=tf.float32)
zy = tf.convert_to_tensor(zy, dtype=tf.float32)
transform_matrix = _get_inverse_affine_transform(
theta, tx, ty, shear, zx, zy,
)
x = _apply_inverse_affine_transform(
x,
transform_matrix,
fill_method=fill_method,
interpolation_method=interpolation_method,
)
return x
@tf.function
def _get_inverse_affine_transform(theta, tx, ty, shear, zx, zy):
""" Construct the inverse of the affine transformation matrix with the given transformations.
The transformation is taken with respect to the usual right-handed coordinate system."""
transform_matrix = tf.eye(3, dtype=tf.float32)
if theta != 0:
theta = theta * math.pi / 180 # convert degrees to radians
# this is
rotation_matrix = tf.convert_to_tensor(
[
[tf.math.cos(theta), tf.math.sin(theta), 0],
[-tf.math.sin(theta), tf.math.cos(theta), 0],
[0, 0, 1],
],
dtype=tf.float32,
)
transform_matrix = rotation_matrix
if tx != 0 or ty != 0:
shift_matrix = tf.convert_to_tensor(
[[1, 0, -tx], [0, 1, -ty], [0, 0, 1]], dtype=tf.float32
)
if transform_matrix is None:
transform_matrix = shift_matrix
else:
transform_matrix = tf.matmul(transform_matrix, shift_matrix)
if shear != 0:
shear = shear * math.pi / 180 # convert degrees to radians
shear_matrix = tf.convert_to_tensor(
[
[1, tf.math.sin(shear), 0],
[0, tf.math.cos(shear), 0],
[0, 0, 1],
],
dtype=tf.float32,
)
if transform_matrix is None:
transform_matrix = shear_matrix
else:
transform_matrix = tf.matmul(transform_matrix, shear_matrix)
if zx != 1 or zy != 1:
# need to assert !=0
zoom_matrix = tf.convert_to_tensor(
[[1 / zx, 0, 0], [0, 1 / zy, 0], [0, 0, 1]], dtype=tf.float32
)
if transform_matrix is None:
transform_matrix = zoom_matrix
else:
transform_matrix = tf.matmul(transform_matrix, zoom_matrix)
return transform_matrix
@tf.function
def _apply_inverse_affine_transform(A, Ti, fill_method, interpolation_method):
"""Perform an affine transformation of the image A defined by a
transform whose inverse is Ti. The matrix Ti is assumed to be in
homogeneous coordinate form.
Available fill methods are "replicate" and "reflect" (default).
Available interpolation method is "nearest".
"""
nrows, ncols, _ = A.shape
# Create centered coordinate grid
x = tf.range(ncols * nrows) % ncols
x = tf.cast(x, dtype=tf.float32) - ((ncols - 1) / 2) # center
y = tf.range(ncols * nrows) // ncols
y = tf.cast(y, dtype=tf.float32) - ((nrows - 1) / 2) # center
y = -y # left-handed to right-handed coordinates
z = tf.ones([ncols * nrows], dtype=tf.float32)
grid = tf.stack([x, y, z])
# apply transformation
# x, y, _ = tf.matmul(Ti, grid)
xy = tf.matmul(Ti, grid)
x = xy[0, :]
y = xy[1, :]
# convert coordinates to (approximate) indices
i = -y + ((nrows - 1) / 2)
j = x + ((ncols - 1) / 2)
# replicate: 111|1234|444
if fill_method == "replicate":
i = tf.clip_by_value(i, 0.0, nrows - 1)
j = tf.clip_by_value(j, 0.0, ncols - 1)
# reflect: 432|1234|321
elif fill_method == "reflect":
i = _reflect_index(i, nrows - 1)
j = _reflect_index(j, ncols - 1)
# nearest neighbor interpolation
grid = tf.stack([i, j])
grid = tf.round(grid)
grid = tf.cast(grid, dtype=tf.int32)
B = tf.gather_nd(A, tf.transpose(grid))
B = tf.reshape(B, A.shape)
return B
@tf.function
def _reflect_index(i, n):
"""Reflect the index i across dimensions [0, n]."""
i = tf.math.floormod(i - n, 2 * n)
i = tf.math.abs(i - n)
return tf.math.floor(i)
# # Buffer Initializers #
def init_buffer(
height, width=None, batches=1, channels=3, scale=0.01, fft=True
):
"""Initialize an image buffer."""
width = width or height
shape = [batches, height, width, channels]
fn = init_fft if fft else init_pixel
buffer = fn(shape, scale)
return tf.Variable(buffer, trainable=True)
def init_pixel(shape, scale=None):
batches, h, w, ch = shape
# initializer = tf.initializers.VarianceScaling(scale=scale)
initializer = tf.random.uniform
buffer = initializer(shape=[batches, h, w, ch], dtype=tf.float32)
return buffer
def init_fft(shape, scale=0.1):
"""Initialize FFT image buffer."""
batch, h, w, ch = shape
freqs = rfft2d_freqs(h, w)
init_val_size = (2, batch, ch) + freqs.shape
buffer = np.random.normal(size=init_val_size, scale=scale).astype(
np.float32
)
return buffer
# # Plotting #
def read_image(filename, size=[256, 256]):
image = tf.io.read_file(filename)
image = tf.io.decode_jpeg(image)
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image = tf.image.resize(image, size=size)
return image
def visualize(
model,
layer,
filter,
neuron=False,
size=[150, 150],
fft=True,
lr=0.05,
epochs=500,
log=False,
ax=None,
):
optvis = OptVis(model, layer, filter, neuron=neuron, size=size, fft=fft)
optvis.compile(optimizer=tf.optimizers.Adam(lr))
image = optvis.fit(epochs=epochs, log=log)
if ax is None:
fig, ax = plt.subplots()
ax.imshow(tf.squeeze(image).numpy())
ax.axis("off")
return ax
def visualize_layer(
model,
layer,
init_filter=0,
neuron=False,
size=[150, 150],
fft=True,
lr=0.05,
epochs=500,
log=False,
rows=2,
cols=4,
width=16,
):
gs = gridspec.GridSpec(rows, cols, wspace=0.01, hspace=0.01)
plt.figure(figsize=(width, (width * rows) / cols))
for f, (r, c) in enumerate(product(range(rows), range(cols))):
optvis = OptVis(
model, layer, f + init_filter, neuron=neuron, size=size, fft=fft
)
optvis.compile(optimizer=tf.optimizers.Adam(lr))
image = optvis.fit(epochs=epochs)
plt.subplot(gs[r, c])
plt.imshow(tf.squeeze(image))
plt.axis("off")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment