Skip to content

Instantly share code, notes, and snippets.

Last active Sep 5, 2019
What would you like to do?
Test code for unified memory and cross gpu access on pytorch
from __future__ import print_function
import os
import sys
import resource
import torch
import numpy as np
allocated_total = 0
tensors = []
total_gpus = torch.cuda.device_count()
if len(sys.argv) > 1:
to_be_allocated = int(sys.argv[1])
to_be_allocated = 48
allocated_gpu = [0] * total_gpus
for i in range(to_be_allocated):
# allocate 1 GB
tensors.append(torch.rand((256, 1024, 1024), device='cuda:%d' % (i%total_gpus)))
for j in range(total_gpus):
allocated_gpu[j] = torch.cuda.memory_allocated(device='cuda:%d' % (j))//1024//1024//1024
allocated_total = i+1
print('Allocated %d(%r) GB' % (allocated_total, allocated_gpu))
print('Testing cross gpu computation')
for i in range(0,len(tensors)-1):
result = tensors[i]+tensors[i+1]
np.testing.assert_array_almost_equal(result.cpu().numpy(),(tensors[i].cpu().numpy() + tensors[i+1].cpu().numpy()))
print('\rProgress: %d/%d' % (i+1, len(tensors)-1), end='')
print('\nTest done')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment