Created
August 12, 2022 19:28
-
-
Save lebrice/8b00ea2911d88fcc6b3f9c156f49b555 to your computer and use it in GitHub Desktop.
Benchmarking the ImagenetFfcvDataModule from https://gist.github.com/lebrice/37d89c29388d7fc9ce267eed1ba6dbda
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
""" | |
from __future__ import annotations | |
import datetime | |
import itertools | |
# NOTE: Need to import cv2 to prevent a loading error for GLIBCXX with ffcv. | |
import cv2 # noqa | |
import torch | |
import tqdm | |
from pytorch_lightning import Trainer | |
from typing_extensions import ParamSpec | |
from torchvision.models import resnet18 | |
from pytorch_lightning import LightningModule | |
from torch import nn, Tensor | |
from torch.optim import Adam | |
from torch.utils.data import DataLoader | |
from imagenet_ffcv import ImagenetFfcvDataModule | |
try: | |
from imagenet import ImagenetDataModule | |
except ImportError: | |
from pl_bolts.datamodules import ImagenetDataModule | |
P = ParamSpec("P") | |
MAX_BATCHES = 50 | |
class Model(LightningModule): | |
def __init__(self): | |
super().__init__() | |
self.encoder = resnet18() | |
self.loss = nn.CrossEntropyLoss() | |
def forward(self, x: Tensor) -> Tensor: # type: ignore | |
return self.encoder(x) | |
def training_step(self, batch: tuple[Tensor, Tensor], batch_idx: int) -> Tensor: # type: ignore | |
x, y = batch | |
# Check that the memory address is the same as the source tensor, e.g. that no extra | |
# allocation or move was made between the loader and the model. | |
logits = self(x) | |
loss = self.loss(logits, y) | |
return loss | |
def configure_optimizers(self): | |
return Adam(self.parameters(), lr=0.001) | |
def _trainer(): | |
return Trainer( | |
accelerator="gpu", | |
devices=1, | |
strategy=None, | |
limit_val_batches=0, | |
limit_train_batches=MAX_BATCHES, | |
max_epochs=1, | |
enable_checkpointing=False, | |
log_every_n_steps=0, | |
logger=False, | |
) | |
def main(): | |
datamodule_torch = ImagenetDataModule(batch_size=512, num_workers=8) | |
datamodule_torch.prepare_data() | |
datamodule_ffcv = ImagenetFfcvDataModule(batch_size=512, num_workers=8) | |
datamodule_ffcv.prepare_data() | |
print(f"Pure for loops over 200 batches:") | |
print("Pure for-loop (PyTorch):\n", for_loop(datamodule_torch, max_batches=200)) | |
print("Pure for-loop (FFCV):\n", for_loop(datamodule_ffcv, max_batches=200)) | |
print(f"Training on {MAX_BATCHES} batches:") | |
# TODO: Maybe caching has an impact? Roll everything twice, and only take the second value. | |
# print("Manual train loop (FFCV):", manual_loop(datamodule_ffcv)) | |
# print("Manual train loop (Pytorch):", manual_loop(datamodule_torch)) | |
# print("PL + DataLoaders:", train_time(datamodule_torch)) | |
# print("PL + FFCV:", train_time(datamodule_ffcv)) | |
# print("PL + DataLoaders (hidden):", train_time(datamodule_torch, obfuscate=True)) | |
# print("PL + DataLoaders (no optimizations):", pl_without_dataloader_optimizations()) | |
def for_loop(datamodule: ImagenetDataModule, max_batches=1000): | |
loader = datamodule.train_dataloader() | |
start_time = datetime.datetime.now() | |
for _ in tqdm.tqdm( | |
itertools.islice(loader, max_batches), | |
leave=False, | |
total=max_batches, | |
): | |
pass | |
# trainer.fit(model, train_dataloaders=loader) | |
return datetime.datetime.now() - start_time | |
def train_time(datamodule: ImagenetDataModule, obfuscate=False): | |
loader = datamodule.train_dataloader() | |
trainer = _trainer() | |
model = Model() | |
def _obfuscate(loader): | |
def _inner_hidden_fn(): | |
yield from loader | |
return _inner_hidden_fn() | |
if obfuscate: | |
loader = _obfuscate(loader) | |
start_time = datetime.datetime.now() | |
# NOTE: By passing an islice, the idea is that now PL doesn't get a DataLoader, | |
# and therefore won't apply the optimizations that it might normally apply. | |
trainer.fit(model, train_dataloaders=loader) # type: ignore | |
return datetime.datetime.now() - start_time | |
def manual_loop(datamodule: ImagenetDataModule): | |
model = Model() | |
model.cuda() | |
model.eval() | |
loader = datamodule.train_dataloader() | |
optimizer = model.configure_optimizers() | |
start_time = datetime.datetime.now() | |
for i, batch in enumerate(tqdm.tqdm(itertools.islice(loader, MAX_BATCHES))): | |
if isinstance(loader, DataLoader): | |
batch = tuple(v.cuda() for v in batch) | |
assert all(v.device.type == "cuda" for v in batch) | |
# _ = model.validation_step(batch, batch_idx=batch_idx) | |
optimizer.zero_grad() | |
loss = model.training_step(batch, batch_idx=i) | |
loss.backward() | |
optimizer.step() | |
return datetime.datetime.now() - start_time | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment