tdeboissiere/YellowFin_MLP.py

## YellowFin_MLP.py
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
from yellowfin_tuner import YFOptimizer
from tqdm import tqdm


# Load data
iteration = 0
npts = 10000

X_train = np.ones((npts, 2016)).astype(np.float32)
y_train = np.ones((npts, 42)).astype(np.float32)


# Parameters
learning_rate = 1E-3
training_epochs = 1000
n_batch_per_epoch = 1000
batch_size = 4096


# network
class MLPNet(nn.Module):
    def __init__(self, input_dim, output_dim):

        super(MLPNet, self).__init__()

        self.fc1 = nn.Linear(input_dim, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 256)
        self.fc4 = nn.Linear(256, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x


model = MLPNet(X_train.shape[-1], y_train.shape[-1]).cuda()
optimizer = optim.Adam(model.parameters(), lr=1E-4)
optimizer = YFOptimizer(model.parameters(), lr=1.0, mu=0, weight_decay=5e-4)

loss_fn = torch.nn.MSELoss(size_average=True).cuda()


# Train
for epoch in range(training_epochs):
    for i in tqdm(range(n_batch_per_epoch)):
        # Sample a start index
        start = np.random.randint(0, X_train.shape[0] - batch_size)
        # Get the batch
        batch_x, batch_y = X_train[start:start + batch_size], y_train[start:start + batch_size]
        # Convert to FloatTensor
        batch_x, batch_y = torch.FloatTensor(batch_x), torch.FloatTensor(batch_y)
        # Wrap to Variable
        x, y_true = Variable(batch_x.cuda()), Variable(batch_y.cuda())

        # Forward pass
        y_pred = model(x)
        # loss = loss_fn(y_pred, y_true)
        loss = torch.mean(torch.pow(y_true - y_pred, 2))

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
	import numpy as np

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torch.autograd import Variable
	import torch.optim as optim
	from yellowfin_tuner import YFOptimizer
	from tqdm import tqdm


	# Load data
	iteration = 0
	npts = 10000

	X_train = np.ones((npts, 2016)).astype(np.float32)
	y_train = np.ones((npts, 42)).astype(np.float32)


	# Parameters
	learning_rate = 1E-3
	training_epochs = 1000
	n_batch_per_epoch = 1000
	batch_size = 4096


	# network
	class MLPNet(nn.Module):
	def __init__(self, input_dim, output_dim):

	super(MLPNet, self).__init__()

	self.fc1 = nn.Linear(input_dim, 256)
	self.fc2 = nn.Linear(256, 256)
	self.fc3 = nn.Linear(256, 256)
	self.fc4 = nn.Linear(256, output_dim)

	def forward(self, x):
	x = F.relu(self.fc1(x))
	x = F.relu(self.fc2(x))
	x = F.relu(self.fc3(x))
	x = self.fc4(x)
	return x


	model = MLPNet(X_train.shape[-1], y_train.shape[-1]).cuda()
	optimizer = optim.Adam(model.parameters(), lr=1E-4)
	optimizer = YFOptimizer(model.parameters(), lr=1.0, mu=0, weight_decay=5e-4)

	loss_fn = torch.nn.MSELoss(size_average=True).cuda()


	# Train
	for epoch in range(training_epochs):
	for i in tqdm(range(n_batch_per_epoch)):
	# Sample a start index
	start = np.random.randint(0, X_train.shape[0] - batch_size)
	# Get the batch
	batch_x, batch_y = X_train[start:start + batch_size], y_train[start:start + batch_size]
	# Convert to FloatTensor
	batch_x, batch_y = torch.FloatTensor(batch_x), torch.FloatTensor(batch_y)
	# Wrap to Variable
	x, y_true = Variable(batch_x.cuda()), Variable(batch_y.cuda())

	# Forward pass
	y_pred = model(x)
	# loss = loss_fn(y_pred, y_true)
	loss = torch.mean(torch.pow(y_true - y_pred, 2))

	# Backward pass
	optimizer.zero_grad()
	loss.backward()
	optimizer.step()