Skip to content

Instantly share code, notes, and snippets.

@thomasdullien
Created July 4, 2024 11:58
Show Gist options
  • Save thomasdullien/651b012406c25760c36be9fca59b6682 to your computer and use it in GitHub Desktop.
Save thomasdullien/651b012406c25760c36be9fca59b6682 to your computer and use it in GitHub Desktop.
Visualising creases in a relu network.
from PIL import Image, ImageOps, ImageDraw
import numpy as np
import pandas as pd
import os, sys
import logging
logging.basicConfig(
format='%(asctime)s %(levelname)-8s %(message)s',
level=logging.INFO,
datefmt='%Y-%m-%d %H:%M:%S')
# Load and process the image
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--inputimage", help="Input PNG image to approximate", type=str,
default = "/home/thomasdullien/Downloads/black_circle.png")
parser.add_argument("--layer_neurons", help="How many neurons in the first layer",
type=int, default=10)
parser.add_argument("--number_of_layers", help="How many layers?", type=int, default=1)
parser.add_argument("--draw_size", help="How big the output drawing should be", type=int, default=300)
parser.add_argument("--draw_interval", help="How often (in epochs) should the boundaries be drawn?", type=int, default=500)
parser.add_argument("--epochs", help="How many epochs to train", type=int, default=300000)
parser.add_argument("--random_seed", help="Random seed", type=int, default=1)
# Function to load an image from a file
def load_image(file_path):
return Image.open(file_path)
# Step 2: Grayscale the PNG file
def grayscale_image(img):
gray_img = ImageOps.grayscale(img)
return gray_img
# Step 3: Truncate the PNG file so it is square
def truncate_image(img):
min_side = min(img.size)
left = (img.width - min_side) // 2
top = (img.height - min_side) // 2
right = (img.width + min_side) // 2
bottom = (img.height + min_side) // 2
square_img = img.crop((left, top, right, bottom))
return square_img
# Step 4: Convert the PNG file to (x, y, z) triples
def image_to_triples(img):
img = np.array(img)
height, width = img.shape
triples = []
for y in range(height):
for x in range(width):
z = img[y, x] / 255.0
triples.append((x / width, y / height, z))
return np.array(triples)
args = parser.parse_args()
g_file_path = args.inputimage
g_layer_neurons = args.layer_neurons
g_epochs = args.epochs
g_draw_size = args.draw_size
g_draw_interval = args.draw_interval
g_rand = args.random_seed
# Load the input graphics
filename = os.path.split(g_file_path)[1]
img = load_image(g_file_path)
grayscale_img = grayscale_image(img)
# Make sure the image is square.
square_img = truncate_image(grayscale_img)
# Convert the image to triples.
triples = image_to_triples(square_img)
import torch
import torch.nn as nn
import torch.optim as optim
torch.manual_seed(g_rand)
# Make sure the data is all moved to the GPU
torch.set_default_tensor_type("torch.cuda.FloatTensor")
# Step 5: Create and train a 1-layer ReLU network
class SimpleNN(nn.Module):
def __init__(self):
super(SimpleNN, self).__init__()
self.fc = nn.Linear(2, g_layer_neurons)
self.relu = nn.ReLU()
self.out = nn.Linear(g_layer_neurons, 1)
def forward(self, x):
x = self.fc(x)
x = self.relu(x)
x = self.out(x)
return x
def train_network(triples):
model = SimpleNN()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
x = torch.tensor(triples[:, :2], dtype=torch.float32)
y = torch.tensor(triples[:, 2], dtype=torch.float32).unsqueeze(1)
for epoch in range(g_epochs):
optimizer.zero_grad()
outputs = model(x)
loss = criterion(outputs, y)
loss.backward()
optimizer.step()
logging.info("Epoch %d: Training loss is now %f" % (epoch, loss))
if epoch % g_draw_interval == 0:
write_model_and_decision_boundaries(model, epoch, loss, optimizer.param_groups[0]['lr'])
return model
def write_model_and_decision_boundaries(model, train_step, loss, learn_rate):
logging.info("Writing the model and polytopes: Beginning calculation of derivatives.")
derivative_image, value_image = calculate_derivative(model)
logging.info("Creating new image.")
create_new_image(derivative_image, value_image,
"./%s-%d-%d-step-%08.08d.png" % (filename, g_rand, g_layer_neurons, train_step), loss, train_step, learn_rate)
logging.info("Done writing image.")
# Step 6: Calculate the derivative of the model
def calculate_derivative(model):
float_draw_size = float(g_draw_size)
# The image where the derivatives at each point are stored.
derivative_image = np.zeros((g_draw_size, g_draw_size))
# The image where the model outputs at each point are stored.
value_image = np.zeros((g_draw_size, g_draw_size))
# The points at which to evaluate the model.
if not ('g_points' in globals()):
global g_points
g_points = torch.tensor([ [ x/float_draw_size, y/float_draw_size ] for x in range(g_draw_size) for y in range(g_draw_size)],
dtype=torch.float32)
global g_points_int
g_points_int = [[x,y] for x in range(g_draw_size) for y in range(g_draw_size)]
g_points.requires_grad = True
# Evaluate the model on all image points. This should entirely happen on GPU.
output = model(g_points)
loss = output.mean()
loss.backward()
# Copy the gradients back to CPU so we can build the derivative image. Is this even a
# sensible thing to do? Would it not make *much* more sense to run this on GPU too?
gradients = g_points.grad.cpu().detach().numpy()
g_points.grad.data.zero_()
output_cpu = output.cpu().detach().numpy()
for index in range(len(g_points_int)):
x,y = g_points_int[index]
value_image[x,y] = output_cpu[index]
derivative_image[x,y] = gradients[index][0]**2 + gradients[index][1]**2
return (derivative_image, value_image)
# Step 7: Create a new PNG file from the data points
def create_new_image(derivative_image, value_image, filename, loss, epoch, learn_rate):
new_image = Image.new("RGB", (g_draw_size * 2, g_draw_size))
for x in range(g_draw_size-1):
for y in range(g_draw_size-1):
pixel_color = int(value_image[x,y] * 255)
new_image.putpixel((x + g_draw_size, y), (pixel_color, pixel_color, pixel_color))
if derivative_image[x, y] != derivative_image[x+1, y] or derivative_image[x, y] != derivative_image[x, y+1]:
new_image.putpixel((x, y), (255, 0, 0))
else:
new_image.putpixel((x, y), (pixel_color, pixel_color, pixel_color))
draw = ImageDraw.Draw(new_image)
draw.text((g_draw_size + 5, 5), "Loss: %f\nEpoch: %d\nLR %f" % (loss, epoch, learn_rate), fill=(255,0,0,255))
new_image.save(filename)
# Train the network and calculate the derivative image
print("About to train the network.")
model = train_network(triples)
#print("Done training the network. Beginning calculation of derivatives.")
#derivative_image, value_image = calculate_derivative(model)
#print("Creating new image.")
#create_new_image(derivative_image, value_image)
# The final image is saved as output.png
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment