Skip to content

Instantly share code, notes, and snippets.

@kmaehashi
Last active July 22, 2022 05:00
Show Gist options
  • Save kmaehashi/bfc01ca32a3330dbd9d856d18577b183 to your computer and use it in GitHub Desktop.
Save kmaehashi/bfc01ca32a3330dbd9d856d18577b183 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# Simple "cupyx.distributed" example using sparse matrix.
# To try this script on a single node (with 2+ GPUs), run:
# $ mpiexec -n 2 ./sparse_reduce.py
import os
import scipy
import mpi4py
import cupy
import cupyx.distributed
def main():
comm_world = mpi4py.MPI.COMM_WORLD
workers = comm_world.Get_size()
rank = comm_world.Get_rank()
pid = os.getpid()
print(f'[{pid}] Size: {workers}')
print(f'[{pid}] Rank: {rank}')
cupy.cuda.Device(rank).use()
comm = cupyx.distributed.init_process_group(workers, rank, use_mpi=True)
sm_gpu = cupyx.scipy.sparse.csr_matrix(generate(rank))
comm.reduce(sm_gpu, sm_gpu, root=0, op='sum')
if rank == 0:
expected = sum([generate(n) for n in range(workers)])
actual = sm_gpu.get()
assert (expected != actual).nnz == 0
print('Success!')
def generate(seed):
return scipy.sparse.random(1000, 1000, format='csr', random_state=seed)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment