Skip to content

Instantly share code, notes, and snippets.

@Dounm
Created January 6, 2023 02:09
Show Gist options
  • Save Dounm/454193118da6dbd40261c9019721ec19 to your computer and use it in GitHub Desktop.
Save Dounm/454193118da6dbd40261c9019721ec19 to your computer and use it in GitHub Desktop.
Show the error when broadcast_object_list() was passed list with different length
# !/usr/bin/env python3
import torch
from torch import distributed as dist
dist.init_process_group(backend="nccl")
torch.cuda.set_device(f"cuda:{dist.get_rank()}")
if dist.get_rank() == 0:
print("I am rank 0")
a = torch.LongTensor(data=[864]).to("cuda")
b = ["From Rank 0"]
else:
print("I am rank 1")
a = torch.LongTensor(data=[0]).to("cuda")
b = []
print(f"before BR Rank{dist.get_rank()}: {b=}")
dist.broadcast_object_list(b, src=0)
print(f"after BR Rank{dist.get_rank()}: {b=}")
print(f"before BR Rank{dist.get_rank()}: {a=}")
dist.broadcast(a, src=0)
print(f"after BR Rank{dist.get_rank()}: {a=}")
## Output
## torchrun --nproc_per_node=2 --master_port=33400 t2.py
# I am rank 1
# I am rank 0
# before BR Rank0: b=['From Rank 0']
# before BR Rank1: b=[]
# after BR Rank0: b=['From Rank 0']
# before BR Rank0: a=tensor([864], device='cuda:0')
# after BR Rank0: a=tensor([864], device='cuda:0')
# after BR Rank1: b=[]
# before BR Rank1: a=tensor([0], device='cuda:1')
# after BR Rank1: a=tensor([26], device='cuda:1')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment