danoneata/maml.py

## maml.py
"""This code is inspired by the homework 2 from CSC421/2516 Winter 2019,
but I'm taking a more functional approach.

http://www.cs.toronto.edu/~rgrosse/courses/csc421_2019/homeworks/hw2.pdf
http://www.cs.toronto.edu/~rgrosse/courses/csc421_2019/homeworks/maml.py

"""

import autograd.numpy as np
import autograd as ag


def predict(params, x):
    """Predict using a two-layer ReLU MLP."""
    relu = lambda z: np.maximum(z, 0.0)
    H1 = relu(np.outer(x, params["W1"]) + params["b1"])
    H2 = relu(np.dot(H1, params["W2"]) + params["b2"])
    return np.dot(H2, params["w3"]) + params["b3"]


def loss(params, task):
    x, y = task
    return 0.5 * np.mean((y - predict(params, x)) ** 2)


def step(cost, params, λ):
    """Perform one gradient descent step."""
    grad = ag.grad(cost)(params)
    return {k: params[k] - λ * grad[k] for k in params}


def update(params, task, nr_steps=5, α=0.1):
    """Update parameters for the current task."""
    for _ in range(nr_steps):
        params = step(lambda p: loss(p, task), params, α)
    return params


def meta_objective(params, tasks):
    return np.sum(loss(update(params, t), t) for t in tasks) / len(tasks)


def random_init(std, nhid):
    return {
        "W1": np.random.normal(0, std, size=nhid),
        "b1": np.random.normal(0.0, std, size=nhid),
        "W2": np.random.normal(0.0, std, size=(nhid, nhid)),
        "b2": np.random.normal(0.0, std, size=nhid),
        "w3": np.random.normal(0.0, std, size=nhid),
        "b3": np.random.normal(0.0, std),
    }


class ToyDataGen:
    """Samples a random piecewise linear function, and then samples noisy
    observations of the function."""

    def __init__(self, xmin, xmax, ymin, ymax, std, nr_pieces):
        self.xmin = xmin
        self.xmax = xmax
        self.ymin = ymin
        self.ymax = ymax
        self.std = std
        self.nr_pieces = nr_pieces

    def sample_dataset(self, npts):
        x = np.random.uniform(self.xmin, self.xmax, size=npts)
        heights = np.random.uniform(self.ymin, self.ymax, size=self.nr_pieces)
        bins = np.floor((x - self.xmin) / (self.xmax - self.xmin) * self.nr_pieces)
        bins = bins.astype(int)
        y = np.random.normal(heights[bins], self.std)
        return x, y


# Some constants
SEED = 0

PRINT_EVERY = 100
DISPLAY_EVERY = 1000

XMIN, XMAX = -3, +3
YMIN, YMAX = -3, +3

NOISE = 0.1
BINS = 6
NR_DATA = 100

INIT_STD = 0.1
NR_HID = 50

NR_STEPS = 12000


def main():
    np.random.seed(SEED)

    data_gen = ToyDataGen(XMIN, XMAX, YMIN, YMAX, NOISE, BINS)
    params = random_init(INIT_STD, NR_HID)

    tasks_val = [data_gen.sample_dataset(NR_DATA)]
    β = 0.01  # Learning rate for the meta-learner

    # Optimize the meta objective
    for i in range(NR_STEPS):

        tasks = [data_gen.sample_dataset(NR_DATA)]
        params = step(lambda p: meta_objective(p, tasks), params, β)

        if (i + 1) % PRINT_EVERY == 0:
            loss_val = meta_objective(params, tasks_val)
            print(f"Iteration {i + 1} Meta-objective: {loss_val:1.3f}")


if __name__ == "__main__":
    main()
	"""This code is inspired by the homework 2 from CSC421/2516 Winter 2019,
	but I'm taking a more functional approach.

	http://www.cs.toronto.edu/~rgrosse/courses/csc421_2019/homeworks/hw2.pdf
	http://www.cs.toronto.edu/~rgrosse/courses/csc421_2019/homeworks/maml.py

	"""

	import autograd.numpy as np
	import autograd as ag


	def predict(params, x):
	"""Predict using a two-layer ReLU MLP."""
	relu = lambda z: np.maximum(z, 0.0)
	H1 = relu(np.outer(x, params["W1"]) + params["b1"])
	H2 = relu(np.dot(H1, params["W2"]) + params["b2"])
	return np.dot(H2, params["w3"]) + params["b3"]


	def loss(params, task):
	x, y = task
	return 0.5 * np.mean((y - predict(params, x)) ** 2)


	def step(cost, params, λ):
	"""Perform one gradient descent step."""
	grad = ag.grad(cost)(params)
	return {k: params[k] - λ * grad[k] for k in params}


	def update(params, task, nr_steps=5, α=0.1):
	"""Update parameters for the current task."""
	for _ in range(nr_steps):
	params = step(lambda p: loss(p, task), params, α)
	return params


	def meta_objective(params, tasks):
	return np.sum(loss(update(params, t), t) for t in tasks) / len(tasks)


	def random_init(std, nhid):
	return {
	"W1": np.random.normal(0, std, size=nhid),
	"b1": np.random.normal(0.0, std, size=nhid),
	"W2": np.random.normal(0.0, std, size=(nhid, nhid)),
	"b2": np.random.normal(0.0, std, size=nhid),
	"w3": np.random.normal(0.0, std, size=nhid),
	"b3": np.random.normal(0.0, std),
	}


	class ToyDataGen:
	"""Samples a random piecewise linear function, and then samples noisy
	observations of the function."""

	def __init__(self, xmin, xmax, ymin, ymax, std, nr_pieces):
	self.xmin = xmin
	self.xmax = xmax
	self.ymin = ymin
	self.ymax = ymax
	self.std = std
	self.nr_pieces = nr_pieces

	def sample_dataset(self, npts):
	x = np.random.uniform(self.xmin, self.xmax, size=npts)
	heights = np.random.uniform(self.ymin, self.ymax, size=self.nr_pieces)
	bins = np.floor((x - self.xmin) / (self.xmax - self.xmin) * self.nr_pieces)
	bins = bins.astype(int)
	y = np.random.normal(heights[bins], self.std)
	return x, y


	# Some constants
	SEED = 0

	PRINT_EVERY = 100
	DISPLAY_EVERY = 1000

	XMIN, XMAX = -3, +3
	YMIN, YMAX = -3, +3

	NOISE = 0.1
	BINS = 6
	NR_DATA = 100

	INIT_STD = 0.1
	NR_HID = 50

	NR_STEPS = 12000


	def main():
	np.random.seed(SEED)

	data_gen = ToyDataGen(XMIN, XMAX, YMIN, YMAX, NOISE, BINS)
	params = random_init(INIT_STD, NR_HID)

	tasks_val = [data_gen.sample_dataset(NR_DATA)]
	β = 0.01 # Learning rate for the meta-learner

	# Optimize the meta objective
	for i in range(NR_STEPS):

	tasks = [data_gen.sample_dataset(NR_DATA)]
	params = step(lambda p: meta_objective(p, tasks), params, β)

	if (i + 1) % PRINT_EVERY == 0:
	loss_val = meta_objective(params, tasks_val)
	print(f"Iteration {i + 1} Meta-objective: {loss_val:1.3f}")


	if __name__ == "__main__":
	main()