Created
July 20, 2021 15:16
-
-
Save tchaton/7f9772a79bc9d773832cfb260caf7ba1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import pytorch_lightning as pl | |
from pytorch_lightning import Trainer | |
from fairscale.nn import checkpoint_wrapper, auto_wrap, wrap | |
class MyModel(pl.LightningModule): | |
... | |
def configure_sharded_model(self): | |
# Created within sharded model context, modules are instantly sharded across processes | |
# as soon as they are wrapped with ``wrap`` or ``auto_wrap`` | |
# Wraps the layer in a Fully Sharded Wrapper automatically | |
linear_layer = wrap(nn.Linear(32, 32)) | |
# For best memory efficiency, | |
# add fairscale activation checkpointing | |
block = auto_wrap( | |
checkpoint_wrapper( | |
nn.Sequential( | |
nn.Linear(32, 32), | |
nn.ReLU() | |
) | |
) | |
) | |
self.model = nn.Sequential( | |
linear_layer, | |
nn.ReLU(), | |
block | |
) | |
def configure_optimizers(self): | |
return torch.optim.AdamW(self.model.parameters()) | |
model = MyModel() | |
trainer = Trainer(gpus=4, plugins='fsdp', precision=16) | |
trainer.fit(model) | |
trainer.test() | |
trainer.predict() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment