Skip to content

Instantly share code, notes, and snippets.

@kouyoumin
Last active Sep 5, 2019
Embed
What would you like to do?
Test code for unified memory and cross gpu access on pytorch
from __future__ import print_function
import os
import sys
import resource
import torch
import numpy as np
allocated_total = 0
tensors = []
total_gpus = torch.cuda.device_count()
if len(sys.argv) > 1:
to_be_allocated = int(sys.argv[1])
else:
to_be_allocated = 48
allocated_gpu = [0] * total_gpus
for i in range(to_be_allocated):
# allocate 1 GB
tensors.append(torch.rand((256, 1024, 1024), device='cuda:%d' % (i%total_gpus)))
for j in range(total_gpus):
allocated_gpu[j] = torch.cuda.memory_allocated(device='cuda:%d' % (j))//1024//1024//1024
allocated_total = i+1
print('Allocated %d(%r) GB' % (allocated_total, allocated_gpu))
os.system('nvidia-smi')
print('Testing cross gpu computation')
for i in range(0,len(tensors)-1):
result = tensors[i]+tensors[i+1]
np.testing.assert_array_almost_equal(result.cpu().numpy(),(tensors[i].cpu().numpy() + tensors[i+1].cpu().numpy()))
print('\rProgress: %d/%d' % (i+1, len(tensors)-1), end='')
sys.stdout.flush()
print('\nTest done')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment