Skip to content

Instantly share code, notes, and snippets.

@lebrice
Created August 12, 2022 19:28
Show Gist options
  • Save lebrice/8b00ea2911d88fcc6b3f9c156f49b555 to your computer and use it in GitHub Desktop.
Save lebrice/8b00ea2911d88fcc6b3f9c156f49b555 to your computer and use it in GitHub Desktop.
Benchmarking the ImagenetFfcvDataModule from https://gist.github.com/lebrice/37d89c29388d7fc9ce267eed1ba6dbda
"""
"""
from __future__ import annotations
import datetime
import itertools
# NOTE: Need to import cv2 to prevent a loading error for GLIBCXX with ffcv.
import cv2 # noqa
import torch
import tqdm
from pytorch_lightning import Trainer
from typing_extensions import ParamSpec
from torchvision.models import resnet18
from pytorch_lightning import LightningModule
from torch import nn, Tensor
from torch.optim import Adam
from torch.utils.data import DataLoader
from imagenet_ffcv import ImagenetFfcvDataModule
try:
from imagenet import ImagenetDataModule
except ImportError:
from pl_bolts.datamodules import ImagenetDataModule
P = ParamSpec("P")
MAX_BATCHES = 50
class Model(LightningModule):
def __init__(self):
super().__init__()
self.encoder = resnet18()
self.loss = nn.CrossEntropyLoss()
def forward(self, x: Tensor) -> Tensor: # type: ignore
return self.encoder(x)
def training_step(self, batch: tuple[Tensor, Tensor], batch_idx: int) -> Tensor: # type: ignore
x, y = batch
# Check that the memory address is the same as the source tensor, e.g. that no extra
# allocation or move was made between the loader and the model.
logits = self(x)
loss = self.loss(logits, y)
return loss
def configure_optimizers(self):
return Adam(self.parameters(), lr=0.001)
def _trainer():
return Trainer(
accelerator="gpu",
devices=1,
strategy=None,
limit_val_batches=0,
limit_train_batches=MAX_BATCHES,
max_epochs=1,
enable_checkpointing=False,
log_every_n_steps=0,
logger=False,
)
def main():
datamodule_torch = ImagenetDataModule(batch_size=512, num_workers=8)
datamodule_torch.prepare_data()
datamodule_ffcv = ImagenetFfcvDataModule(batch_size=512, num_workers=8)
datamodule_ffcv.prepare_data()
print(f"Pure for loops over 200 batches:")
print("Pure for-loop (PyTorch):\n", for_loop(datamodule_torch, max_batches=200))
print("Pure for-loop (FFCV):\n", for_loop(datamodule_ffcv, max_batches=200))
print(f"Training on {MAX_BATCHES} batches:")
# TODO: Maybe caching has an impact? Roll everything twice, and only take the second value.
# print("Manual train loop (FFCV):", manual_loop(datamodule_ffcv))
# print("Manual train loop (Pytorch):", manual_loop(datamodule_torch))
# print("PL + DataLoaders:", train_time(datamodule_torch))
# print("PL + FFCV:", train_time(datamodule_ffcv))
# print("PL + DataLoaders (hidden):", train_time(datamodule_torch, obfuscate=True))
# print("PL + DataLoaders (no optimizations):", pl_without_dataloader_optimizations())
def for_loop(datamodule: ImagenetDataModule, max_batches=1000):
loader = datamodule.train_dataloader()
start_time = datetime.datetime.now()
for _ in tqdm.tqdm(
itertools.islice(loader, max_batches),
leave=False,
total=max_batches,
):
pass
# trainer.fit(model, train_dataloaders=loader)
return datetime.datetime.now() - start_time
def train_time(datamodule: ImagenetDataModule, obfuscate=False):
loader = datamodule.train_dataloader()
trainer = _trainer()
model = Model()
def _obfuscate(loader):
def _inner_hidden_fn():
yield from loader
return _inner_hidden_fn()
if obfuscate:
loader = _obfuscate(loader)
start_time = datetime.datetime.now()
# NOTE: By passing an islice, the idea is that now PL doesn't get a DataLoader,
# and therefore won't apply the optimizations that it might normally apply.
trainer.fit(model, train_dataloaders=loader) # type: ignore
return datetime.datetime.now() - start_time
def manual_loop(datamodule: ImagenetDataModule):
model = Model()
model.cuda()
model.eval()
loader = datamodule.train_dataloader()
optimizer = model.configure_optimizers()
start_time = datetime.datetime.now()
for i, batch in enumerate(tqdm.tqdm(itertools.islice(loader, MAX_BATCHES))):
if isinstance(loader, DataLoader):
batch = tuple(v.cuda() for v in batch)
assert all(v.device.type == "cuda" for v in batch)
# _ = model.validation_step(batch, batch_idx=batch_idx)
optimizer.zero_grad()
loss = model.training_step(batch, batch_idx=i)
loss.backward()
optimizer.step()
return datetime.datetime.now() - start_time
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment