thomasdullien/relu_viz.py

## relu_viz.py
from PIL import Image, ImageOps, ImageDraw
import numpy as np
import pandas as pd
import os, sys
import logging

logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S')

# Load and process the image
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--inputimage", help="Input PNG image to approximate", type=str,
                    default = "/home/thomasdullien/Downloads/black_circle.png")
parser.add_argument("--layer_neurons", help="How many neurons in the first layer",
                    type=int, default=10)
parser.add_argument("--number_of_layers", help="How many layers?", type=int, default=1)
parser.add_argument("--draw_size", help="How big the output drawing should be", type=int, default=300)
parser.add_argument("--draw_interval", help="How often (in epochs) should the boundaries be drawn?", type=int, default=500)
parser.add_argument("--epochs", help="How many epochs to train", type=int, default=300000)
parser.add_argument("--random_seed", help="Random seed", type=int, default=1)

# Function to load an image from a file
def load_image(file_path):
    return Image.open(file_path)

# Step 2: Grayscale the PNG file
def grayscale_image(img):
    gray_img = ImageOps.grayscale(img)
    return gray_img

# Step 3: Truncate the PNG file so it is square
def truncate_image(img):
    min_side = min(img.size)
    left = (img.width - min_side) // 2
    top = (img.height - min_side) // 2
    right = (img.width + min_side) // 2
    bottom = (img.height + min_side) // 2
    square_img = img.crop((left, top, right, bottom))
    return square_img

# Step 4: Convert the PNG file to (x, y, z) triples
def image_to_triples(img):
    img = np.array(img)
    height, width = img.shape
    triples = []
    for y in range(height):
        for x in range(width):
            z = img[y, x] / 255.0
            triples.append((x / width, y / height, z))
    return np.array(triples)

args = parser.parse_args()
g_file_path = args.inputimage
g_layer_neurons = args.layer_neurons
g_epochs = args.epochs
g_draw_size = args.draw_size
g_draw_interval = args.draw_interval
g_rand = args.random_seed

# Load the input graphics
filename = os.path.split(g_file_path)[1]
img = load_image(g_file_path)
grayscale_img = grayscale_image(img)

# Make sure the image is square.
square_img = truncate_image(grayscale_img)

# Convert the image to triples.
triples = image_to_triples(square_img)

import torch
import torch.nn as nn
import torch.optim as optim

torch.manual_seed(g_rand)

# Make sure the data is all moved to the GPU
torch.set_default_tensor_type("torch.cuda.FloatTensor")

# Step 5: Create and train a 1-layer ReLU network
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc = nn.Linear(2, g_layer_neurons)
        self.relu = nn.ReLU()
        self.out = nn.Linear(g_layer_neurons, 1)

    def forward(self, x):
        x = self.fc(x)
        x = self.relu(x)
        x = self.out(x)
        return x

def train_network(triples):
    model = SimpleNN()
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    x = torch.tensor(triples[:, :2], dtype=torch.float32)
    y = torch.tensor(triples[:, 2], dtype=torch.float32).unsqueeze(1)

    for epoch in range(g_epochs):
        optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        logging.info("Epoch %d: Training loss is now %f" % (epoch, loss))
        if epoch % g_draw_interval == 0:
            write_model_and_decision_boundaries(model, epoch, loss, optimizer.param_groups[0]['lr'])
    return model

def write_model_and_decision_boundaries(model, train_step, loss, learn_rate):
    logging.info("Writing the model and polytopes: Beginning calculation of derivatives.")
    derivative_image, value_image = calculate_derivative(model)
    logging.info("Creating new image.")
    create_new_image(derivative_image, value_image,
        "./%s-%d-%d-step-%08.08d.png" % (filename, g_rand, g_layer_neurons, train_step), loss, train_step, learn_rate)
    logging.info("Done writing image.")

# Step 6: Calculate the derivative of the model
def calculate_derivative(model):
    float_draw_size = float(g_draw_size)
    # The image where the derivatives at each point are stored.
    derivative_image = np.zeros((g_draw_size, g_draw_size))
    # The image where the model outputs at each point are stored.
    value_image = np.zeros((g_draw_size, g_draw_size))
    # The points at which to evaluate the model.
    if not ('g_points' in globals()):
        global g_points
        g_points = torch.tensor([ [ x/float_draw_size, y/float_draw_size ] for x in range(g_draw_size) for y in range(g_draw_size)],
            dtype=torch.float32)
        global g_points_int
        g_points_int = [[x,y] for x in range(g_draw_size) for y in range(g_draw_size)]
    g_points.requires_grad = True
    # Evaluate the model on all image points. This should entirely happen on GPU.
    output = model(g_points)
    loss = output.mean()
    loss.backward()
    # Copy the gradients back to CPU so we can build the derivative image. Is this even a
    # sensible thing to do? Would it not make *much* more sense to run this on GPU too?
    gradients = g_points.grad.cpu().detach().numpy()
    g_points.grad.data.zero_()
    output_cpu = output.cpu().detach().numpy()
    for index in range(len(g_points_int)):
        x,y = g_points_int[index]
        value_image[x,y] = output_cpu[index]
        derivative_image[x,y] = gradients[index][0]**2 + gradients[index][1]**2
    return (derivative_image, value_image)

# Step 7: Create a new PNG file from the data points
def create_new_image(derivative_image, value_image, filename, loss, epoch, learn_rate):
    new_image = Image.new("RGB", (g_draw_size * 2, g_draw_size))
    for x in range(g_draw_size-1):
        for y in range(g_draw_size-1):
            pixel_color = int(value_image[x,y] * 255)
            new_image.putpixel((x + g_draw_size, y), (pixel_color, pixel_color, pixel_color))
            if derivative_image[x, y] != derivative_image[x+1, y] or derivative_image[x, y] != derivative_image[x, y+1]:
                new_image.putpixel((x, y), (255, 0, 0))
            else:
                new_image.putpixel((x, y), (pixel_color, pixel_color, pixel_color))
    draw = ImageDraw.Draw(new_image)
    draw.text((g_draw_size + 5, 5), "Loss: %f\nEpoch: %d\nLR %f" % (loss, epoch, learn_rate), fill=(255,0,0,255))
    new_image.save(filename)

# Train the network and calculate the derivative image
print("About to train the network.")
model = train_network(triples)
#print("Done training the network. Beginning calculation of derivatives.")
#derivative_image, value_image = calculate_derivative(model)
#print("Creating new image.")
#create_new_image(derivative_image, value_image)

# The final image is saved as output.png
	from PIL import Image, ImageOps, ImageDraw
	import numpy as np
	import pandas as pd
	import os, sys
	import logging

	logging.basicConfig(
	format='%(asctime)s %(levelname)-8s %(message)s',
	level=logging.INFO,
	datefmt='%Y-%m-%d %H:%M:%S')

	# Load and process the image
	import argparse
	parser = argparse.ArgumentParser()
	parser.add_argument("--inputimage", help="Input PNG image to approximate", type=str,
	default = "/home/thomasdullien/Downloads/black_circle.png")
	parser.add_argument("--layer_neurons", help="How many neurons in the first layer",
	type=int, default=10)
	parser.add_argument("--number_of_layers", help="How many layers?", type=int, default=1)
	parser.add_argument("--draw_size", help="How big the output drawing should be", type=int, default=300)
	parser.add_argument("--draw_interval", help="How often (in epochs) should the boundaries be drawn?", type=int, default=500)
	parser.add_argument("--epochs", help="How many epochs to train", type=int, default=300000)
	parser.add_argument("--random_seed", help="Random seed", type=int, default=1)

	# Function to load an image from a file
	def load_image(file_path):
	return Image.open(file_path)

	# Step 2: Grayscale the PNG file
	def grayscale_image(img):
	gray_img = ImageOps.grayscale(img)
	return gray_img

	# Step 3: Truncate the PNG file so it is square
	def truncate_image(img):
	min_side = min(img.size)
	left = (img.width - min_side) // 2
	top = (img.height - min_side) // 2
	right = (img.width + min_side) // 2
	bottom = (img.height + min_side) // 2
	square_img = img.crop((left, top, right, bottom))
	return square_img

	# Step 4: Convert the PNG file to (x, y, z) triples
	def image_to_triples(img):
	img = np.array(img)
	height, width = img.shape
	triples = []
	for y in range(height):
	for x in range(width):
	z = img[y, x] / 255.0
	triples.append((x / width, y / height, z))
	return np.array(triples)

	args = parser.parse_args()
	g_file_path = args.inputimage
	g_layer_neurons = args.layer_neurons
	g_epochs = args.epochs
	g_draw_size = args.draw_size
	g_draw_interval = args.draw_interval
	g_rand = args.random_seed

	# Load the input graphics
	filename = os.path.split(g_file_path)[1]
	img = load_image(g_file_path)
	grayscale_img = grayscale_image(img)

	# Make sure the image is square.
	square_img = truncate_image(grayscale_img)

	# Convert the image to triples.
	triples = image_to_triples(square_img)

	import torch
	import torch.nn as nn
	import torch.optim as optim

	torch.manual_seed(g_rand)

	# Make sure the data is all moved to the GPU
	torch.set_default_tensor_type("torch.cuda.FloatTensor")

	# Step 5: Create and train a 1-layer ReLU network
	class SimpleNN(nn.Module):
	def __init__(self):
	super(SimpleNN, self).__init__()
	self.fc = nn.Linear(2, g_layer_neurons)
	self.relu = nn.ReLU()
	self.out = nn.Linear(g_layer_neurons, 1)

	def forward(self, x):
	x = self.fc(x)
	x = self.relu(x)
	x = self.out(x)
	return x

	def train_network(triples):
	model = SimpleNN()
	criterion = nn.MSELoss()
	optimizer = optim.Adam(model.parameters(), lr=0.01)
	x = torch.tensor(triples[:, :2], dtype=torch.float32)
	y = torch.tensor(triples[:, 2], dtype=torch.float32).unsqueeze(1)

	for epoch in range(g_epochs):
	optimizer.zero_grad()
	outputs = model(x)
	loss = criterion(outputs, y)
	loss.backward()
	optimizer.step()
	logging.info("Epoch %d: Training loss is now %f" % (epoch, loss))
	if epoch % g_draw_interval == 0:
	write_model_and_decision_boundaries(model, epoch, loss, optimizer.param_groups[0]['lr'])
	return model

	def write_model_and_decision_boundaries(model, train_step, loss, learn_rate):
	logging.info("Writing the model and polytopes: Beginning calculation of derivatives.")
	derivative_image, value_image = calculate_derivative(model)
	logging.info("Creating new image.")
	create_new_image(derivative_image, value_image,
	"./%s-%d-%d-step-%08.08d.png" % (filename, g_rand, g_layer_neurons, train_step), loss, train_step, learn_rate)
	logging.info("Done writing image.")

	# Step 6: Calculate the derivative of the model
	def calculate_derivative(model):
	float_draw_size = float(g_draw_size)
	# The image where the derivatives at each point are stored.
	derivative_image = np.zeros((g_draw_size, g_draw_size))
	# The image where the model outputs at each point are stored.
	value_image = np.zeros((g_draw_size, g_draw_size))
	# The points at which to evaluate the model.
	if not ('g_points' in globals()):
	global g_points
	g_points = torch.tensor([ [ x/float_draw_size, y/float_draw_size ] for x in range(g_draw_size) for y in range(g_draw_size)],
	dtype=torch.float32)
	global g_points_int
	g_points_int = [[x,y] for x in range(g_draw_size) for y in range(g_draw_size)]
	g_points.requires_grad = True
	# Evaluate the model on all image points. This should entirely happen on GPU.
	output = model(g_points)
	loss = output.mean()
	loss.backward()
	# Copy the gradients back to CPU so we can build the derivative image. Is this even a
	# sensible thing to do? Would it not make much more sense to run this on GPU too?
	gradients = g_points.grad.cpu().detach().numpy()
	g_points.grad.data.zero_()
	output_cpu = output.cpu().detach().numpy()
	for index in range(len(g_points_int)):
	x,y = g_points_int[index]
	value_image[x,y] = output_cpu[index]
	derivative_image[x,y] = gradients[index][0]2 + gradients[index][1]2
	return (derivative_image, value_image)

	# Step 7: Create a new PNG file from the data points
	def create_new_image(derivative_image, value_image, filename, loss, epoch, learn_rate):
	new_image = Image.new("RGB", (g_draw_size * 2, g_draw_size))
	for x in range(g_draw_size-1):
	for y in range(g_draw_size-1):
	pixel_color = int(value_image[x,y] * 255)
	new_image.putpixel((x + g_draw_size, y), (pixel_color, pixel_color, pixel_color))
	if derivative_image[x, y] != derivative_image[x+1, y] or derivative_image[x, y] != derivative_image[x, y+1]:
	new_image.putpixel((x, y), (255, 0, 0))
	else:
	new_image.putpixel((x, y), (pixel_color, pixel_color, pixel_color))
	draw = ImageDraw.Draw(new_image)
	draw.text((g_draw_size + 5, 5), "Loss: %f\nEpoch: %d\nLR %f" % (loss, epoch, learn_rate), fill=(255,0,0,255))
	new_image.save(filename)

	# Train the network and calculate the derivative image
	print("About to train the network.")
	model = train_network(triples)
	#print("Done training the network. Beginning calculation of derivatives.")
	#derivative_image, value_image = calculate_derivative(model)
	#print("Creating new image.")
	#create_new_image(derivative_image, value_image)

	# The final image is saved as output.png