Skip to content

Instantly share code, notes, and snippets.

@eavae
Created April 22, 2024 12:00
Show Gist options
  • Save eavae/fd993ded22f57f03a966cb541243c3bb to your computer and use it in GitHub Desktop.
Save eavae/fd993ded22f57f03a966cb541243c3bb to your computer and use it in GitHub Desktop.
Simple DeepSpeed with Zero-1
import torch
import enum
import deepspeed
import argparse
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.multiprocessing import spawn
class BoolOps(enum.Enum):
AND = 1
OR = 2
XOR = 3
class BoolDataset(Dataset):
def __init__(self, size=128):
self.size = size
self.data = torch.randint(0, 2, (size, 2))
self.ops = torch.randint(0, 3, (size,))
self.labels = torch.zeros(size, dtype=torch.long)
for i in range(size):
if self.ops[i] == BoolOps.AND.value:
self.labels[i] = self.data[i].all()
elif self.ops[i] == BoolOps.OR.value:
self.labels[i] = self.data[i].any()
elif self.ops[i] == BoolOps.XOR.value:
self.labels[i] = self.data[i][0] ^ self.data[i][1]
def __len__(self):
return self.size
def __getitem__(self, idx):
return self.data[idx], self.ops[idx], self.labels[idx]
class Network(nn.Module):
def __init__(self, embed_dim=1024):
super().__init__()
self.cat_embedding = nn.Embedding(3, embed_dim, max_norm=1.0)
self.real_embedding = nn.Linear(2, embed_dim)
self.fc1 = nn.Linear(embed_dim, embed_dim * 2)
self.fc2 = nn.Linear(embed_dim * 2, 2)
def forward(self, bools, ops, labels):
x = self.cat_embedding(ops) + self.real_embedding(bools)
x = self.fc1(x)
y_ = self.fc2(x)
loss_fn = nn.CrossEntropyLoss()
return loss_fn(y_, labels)
def main(config):
deepspeed.init_distributed(dist_backend="nccl")
dataset = BoolDataset()
model = Network()
model_engine, optimizer, _, _ = deepspeed.initialize(
model=model,
model_parameters=model.parameters(),
config=config,
)
dataloader = DataLoader(
dataset,
batch_size=model_engine.train_batch_size(),
shuffle=True,
)
for bools, ops, labels in dataloader:
optimizer.zero_grad()
model_dtype, _ = model_engine.get_data_types()
loss = model_engine(
bools.to(model_engine.device, dtype=model_dtype),
ops.to(model_engine.device),
labels.to(model_engine.device),
)
model_engine.backward(loss)
model_engine.step()
# 统计优化器备份的模型权重大小
print(
"Optimizer Backup Model Size: ",
sum(
p.numel()
for p in optimizer.state_dict()["single_partition_of_fp32_groups"]
),
)
# 统计 优化器状态-Momentum 的参数量大小
print(
"Optimizer State Momentum Size: ",
sum(
state_value["exp_avg"].numel()
for state_value in optimizer.state_dict()["base_optimizer_state"][
"state"
].values()
),
)
# 统计 优化器状态-Variance 的参数量大小
print(
"Optimizer State Variance Size: ",
sum(
state_value["exp_avg_sq"].numel()
for state_value in optimizer.state_dict()["base_optimizer_state"][
"state"
].values()
),
)
...
...
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--local_rank", type=int, default=0)
parser = deepspeed.add_config_arguments(parser)
args = parser.parse_args()
main(args.deepspeed_config)
"""
bash:
deepspeed example_deepspeed_zero_1.py --deepspeed --deepspeed_config deepspeed_config.json
deepseed_config.json:
{
"train_micro_batch_size_per_gpu": 32,
"zero_optimization": {
"stage": 1
},
"optimizer": {
"type": "Adam",
"params": {
"lr": 1e-3
}
}
}
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment