Skip to content

Instantly share code, notes, and snippets.

@turian
Created June 26, 2020 01:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save turian/caf869ae30932384c7c4b7d201125493 to your computer and use it in GitHub Desktop.
Save turian/caf869ae30932384c7c4b7d201125493 to your computer and use it in GitHub Desktop.
# Number of inputs
NIN = 1000
NHID = 10
# Number of examples
EXAMPLES = 100000
import timeit
import logging
from collections import OrderedDict
import pytorch_lightning as pl
from pytorch_lightning import LightningModule
from pytorch_lightning import Trainer
from torch import optim
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler
import numpy as np
np.random.seed(0)
X = np.random.random((EXAMPLES, NIN))
Y = np.random.random((EXAMPLES, 1))
class BinaryDataset(torch.utils.data.Dataset):
def __init__(self, x, y):
self.x = torch.Tensor(x).float()
self.y = torch.Tensor(y).float()
def __len__(self):
return self.x.shape[0]
def __getitem__(self, index):
return self.x[index,:], self.y[index]
def cuda(self):
self.x = self.x.to('cuda')
self.y = self.y.to('cuda')
binaryDataset = BinaryDataset(X, Y)
class BinaryModule(pl.LightningModule):
def __init__(self, binaryDataset):
super().__init__()
self.dataset = binaryDataset
# build model
self.__build_model()
def __build_model(self):
self.fc1 = nn.Linear(NIN, NHID)
self.do1 = nn.Dropout(0.2)
self.out = nn.Linear(NHID, 1)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.do1(x)
x = self.out(x)
return x
def loss(self, pred, true):
loss_val = F.mse_loss(pred, true)
return loss_val
def _step(self, batch, batch_idx, name, training_step=False):
x, y = batch
pred = self.forward(x)
loss_val = self.loss(pred, y)
# in DP mode (default) make sure if result is scalar, there's another dim in the beginning
if self.trainer.use_dp or self.trainer.use_ddp2:
loss_val = loss_val.unsqueeze(0)
tqdm_dict = OrderedDict({name: loss_val})
if training_step:
return OrderedDict({
'loss': loss_val,
'progress_bar': tqdm_dict,
'log': tqdm_dict
})
else:
return tqdm_dict
def training_step(self, batch, batch_idx):
return self._step(batch, batch_idx, name="train_loss", training_step=True)
def _epoch_end(self, outputs, name):
# With DP training I think you have to average the things individually? Not sure
# Look at the pytorch lightning siamese network code
#if self.trainer.use_dp or self.trainer.use_ddp2:
# val_acc = torch.mean(val_acc)
avg_loss = torch.stack([x[name] for x in outputs]).mean()
tqdm_dict = {name: avg_loss}
result = OrderedDict({name: avg_loss, 'progress_bar': tqdm_dict, 'log': tqdm_dict})
return result
# ---------------------
# TRAINING SETUP
# ---------------------
def configure_optimizers(self):
optimizer = optim.SGD(self.parameters(),
lr=0.01, momentum=0.90)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
T_max=10)
return [optimizer], [scheduler]
def __dataloader(self, train, dataset):
# when using multi-node (ddp) we need to add the datasampler
train_sampler = None
if self.use_ddp:
train_sampler = DistributedSampler(dataset)
should_shuffle = train and train_sampler is None
loader = DataLoader(
dataset=dataset,
batch_size=len(dataset),
shuffle=should_shuffle,
sampler=train_sampler,
num_workers=0,
drop_last=True
)
return loader
@pl.data_loader
def train_dataloader(self):
logging.info('training data loader called')
return self.__dataloader(train=True, dataset=self.dataset)
def fit(): trainer_gpu.fit(model_gpu)
model_gpu = BinaryModule(binaryDataset)
trainer_gpu = Trainer(max_epochs=10, gpus=1)
print(timeit.timeit(fit, number=1))
print("Don't load to GPU")
binaryDataset.cuda()
model_gpu = BinaryModule(binaryDataset)
trainer_gpu = Trainer(max_epochs=10, gpus=1)
print(timeit.timeit(fit, number=1))
print("Load to GPU")
# Number of inputs
NIN = 100
NHID = 1000
# Size of the learned representation
NOUT = 200
# Number of examples
EXAMPLES = 100000
# Batch size
BATCH_SIZE = 1000
import timeit
import logging
from collections import OrderedDict
import pytorch_lightning as pl
from pytorch_lightning import LightningModule
from pytorch_lightning import Trainer
from torch import optim
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler
import numpy as np
np.random.seed(0)
X1 = np.random.random((EXAMPLES, NIN))
X2 = np.random.random((EXAMPLES, NIN))
DIST = np.random.random((EXAMPLES,))
class TableDistanceDataset(torch.utils.data.Dataset):
def __init__(self, x1, x2, dist):
self.dist = torch.Tensor(dist).float()
self.X1 = torch.Tensor(X1).float()
self.X2 = torch.Tensor(X2).float()
def __len__(self):
return self.X1.shape[0]
def __getitem__(self, index):
return self.X1[index,:], self.X2[index,:], self.dist[index]
def cuda(self):
self.dist = self.dist.to('cuda')
self.X1 = self.X1.to('cuda')
self.X2 = self.X2.to('cuda')
tableDistanceDataset = TableDistanceDataset(X1, X2, DIST)
class Table2Representation(pl.LightningModule):
def __init__(self):
super().__init__()
# build model
self.__build_model()
def __build_model(self):
self.fc1 = nn.Linear(NIN, NHID)
self.do1 = nn.Dropout(0.2)
self.out = nn.Linear(NHID, NOUT)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.do1(x)
x = self.out(x)
return x
# Based upon https://github.com/PyTorchLightning/Siamese-Neural-Networks/blob/master/model.py
class TableDistanceModule(pl.LightningModule):
def __init__(self, tableDistanceDataset):
super().__init__()
self.dataset = tableDistanceDataset
self.table2Representation = Table2Representation()
# build model
self.__build_model()
def __build_model(self):
pass
def forward(self, x1, x2):
z1 = self.table2Representation.forward(x1)
z2 = self.table2Representation.forward(x2)
dis = torch.mean(torch.abs(z1 - z2), axis=1)
return dis
def loss(self, pred_dists, true_dists):
loss_val = F.mse_loss(pred_dists, true_dists)
return loss_val
def _step(self, batch, batch_idx, name, training_step=False):
X1, X2, dist = batch
pred = self.forward(X1, X2)
loss_val = self.loss(pred, dist)
# in DP mode (default) make sure if result is scalar, there's another dim in the beginning
if self.trainer.use_dp or self.trainer.use_ddp2:
loss_val = loss_val.unsqueeze(0)
tqdm_dict = OrderedDict({name: loss_val})
if training_step:
return OrderedDict({
'loss': loss_val,
'progress_bar': tqdm_dict,
'log': tqdm_dict
})
else:
return tqdm_dict
def training_step(self, batch, batch_idx):
return self._step(batch, batch_idx, name="train_loss", training_step=True)
def _epoch_end(self, outputs, name):
# With DP training I think you have to average the things individually? Not sure
# Look at the pytorch lightning siamese network code
#if self.trainer.use_dp or self.trainer.use_ddp2:
# val_acc = torch.mean(val_acc)
avg_loss = torch.stack([x[name] for x in outputs]).mean()
tqdm_dict = {name: avg_loss}
result = OrderedDict({name: avg_loss, 'progress_bar': tqdm_dict, 'log': tqdm_dict})
return result
# ---------------------
# TRAINING SETUP
# ---------------------
def configure_optimizers(self):
optimizer = optim.SGD(self.parameters(),
lr=0.01, momentum=0.90)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
T_max=10)
return [optimizer], [scheduler]
def __dataloader(self, train, dataset):
# when using multi-node (ddp) we need to add the datasampler
train_sampler = None
batch_size = BATCH_SIZE
if self.use_ddp:
train_sampler = DistributedSampler(dataset)
should_shuffle = train and train_sampler is None
loader = DataLoader(
dataset=dataset,
batch_size=len(dataset),
shuffle=should_shuffle,
sampler=train_sampler,
num_workers=0,
drop_last=True
)
return loader
@pl.data_loader
def train_dataloader(self):
logging.info('training data loader called')
return self.__dataloader(train=True, dataset=self.dataset)
def fit(): trainer_gpu.fit(model_gpu)
model_gpu = TableDistanceModule(tableDistanceDataset)
trainer_gpu = Trainer(max_epochs=10, gpus=1)
print("Don't load", timeit.timeit(fit, number=1))
tableDistanceDataset.cuda()
model_gpu = TableDistanceModule(tableDistanceDataset)
trainer_gpu = Trainer(max_epochs=10, gpus=1)
print("Load", timeit.timeit(fit, number=1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment