Skip to content

Instantly share code, notes, and snippets.

@weiyangfb
Last active November 13, 2018 00:38
Show Gist options
  • Save weiyangfb/f4c55c88b6092ef8f7e348f6b9ad8946 to your computer and use it in GitHub Desktop.
Save weiyangfb/f4c55c88b6092ef8f7e348f6b9ad8946 to your computer and use it in GitHub Desktop.
# please run this script at ipython notebook
import torch
import random
from random import randint
import timeit, time
random.seed(1)
torch.manual_seed(1)
nnzs = [
1000,
10000,
]
dims_to_sum = [
[0, 1],
[2, 3],
[0, 2, 3],
]
keep_dim = [
# True,
False,
]
sizes = [
[1000, 1000, 2, 2],
# [10000, 1000, 2, 2],
]
all_results = dict(dict())
for nnz in nnzs:
for d in sizes:
for d_to_sum in dims_to_sum:
for k in keep_dim:
results = {}
print("------ nnz = %d, sizes = %s, d_to_sum = %s, keep_dim = %s --------" % (nnz, d, d_to_sum, k))
I = torch.cat([torch.randint(0, d[0], size=(nnz,)),
torch.randint(0, d[1], size=(nnz,))], 0).reshape(2, nnz)
V = torch.randn(nnz, d[2], d[3])
size = torch.Size(d)
print("======== CPU sparse ========")
S = torch.sparse_coo_tensor(I, V, size).coalesce()
res = %timeit -o torch.sparse.sum(S)
results['CPU_sparse_sumAll'] = ' '.join(str(res).split()[:2])
res = %timeit -o torch.sparse.sum(S, d_to_sum, k)
results['CPU_sparse_sumD'] = ' '.join(str(res).split()[:2])
print("======== CPU sparse backward ========")
S = torch.sparse_coo_tensor(I, V, size).coalesce().requires_grad_(True)
S_sum = torch.sparse.sum(S)
res = %timeit -o S_sum.backward(retain_graph=True)
results['CPU_sparse_backward_sumAll'] = ' '.join(str(res).split()[:2])
S = torch.sparse_coo_tensor(I, V, size).coalesce().requires_grad_(True)
S_sum = torch.sparse.sum(S, d_to_sum, k)
data = S_sum.clone().detach()
res = %timeit -o S_sum.backward(data, retain_graph=True)
results['CPU_sparse_backward_sumD'] = ' '.join(str(res).split()[:2])
print("======== CUDA sparse ========")
S = torch.sparse_coo_tensor(I, V, size).coalesce().cuda()
torch.cuda.synchronize()
res = %timeit -o torch.sparse.sum(S); torch.cuda.synchronize();
results['CUDA_sparse_sumAll'] = ' '.join(str(res).split()[:2])
torch.cuda.synchronize()
res = %timeit -o torch.sparse.sum(S, d_to_sum, k); torch.cuda.synchronize();
results['CUDA_sparse_sumD'] = ' '.join(str(res).split()[:2])
print("======== CUDA sparse backward ========")
S = torch.sparse_coo_tensor(I, V, size).coalesce().cuda().requires_grad_(True)
S_sum = torch.sparse.sum(S)
torch.cuda.synchronize()
res = %timeit -o S_sum.backward(retain_graph=True); torch.cuda.synchronize();
results['CUDA_sparse_backward_sumAll'] = ' '.join(str(res).split()[:2])
S = torch.sparse_coo_tensor(I, V, size).coalesce().cuda().requires_grad_(True)
S_sum = torch.sparse.sum(S, d_to_sum, k)
data = S_sum.clone().detach()
torch.cuda.synchronize()
res = %timeit -o S_sum.backward(data, retain_graph=True); torch.cuda.synchronize();
results['CUDA_sparse_backward_sumD'] = ' '.join(str(res).split()[:2])
print("======== CPU dense ========")
a = torch.randn(d)
res = %timeit -o a.sum()
results['CPU_dense_sumAll'] = ' '.join(str(res).split()[:2])
res = %timeit -o a.sum(d_to_sum, k)
results['CPU_dense_sumD'] = ' '.join(str(res).split()[:2])
print("======== CPU dense backward ========")
a = torch.randn(d).requires_grad_(True)
a_sum = a.sum()
res = %timeit -o a_sum.backward(retain_graph=True)
results['CPU_dense_backward_sumAll'] = ' '.join(str(res).split()[:2])
a = torch.randn(d).requires_grad_(True)
a_sum = a.sum(d_to_sum, k)
data = a_sum.clone().detach()
res = %timeit -o a_sum.backward(data, retain_graph=True)
results['CPU_dense_backward_sumD'] = ' '.join(str(res).split()[:2])
print("======== CUDA dense ========")
a = torch.randn(d).cuda()
torch.cuda.synchronize()
res = %timeit -o a.sum(); torch.cuda.synchronize();
results['CUDA_dense_sumAll'] = ' '.join(str(res).split()[:2])
torch.cuda.synchronize()
res = %timeit -o a.sum(d_to_sum, k); torch.cuda.synchronize();
results['CUDA_dense_sumD'] = ' '.join(str(res).split()[:2])
print("======== CUDA dense backward ========")
a = torch.randn(d).cuda().requires_grad_(True)
a_sum = a.sum()
torch.cuda.synchronize()
res = %timeit -o a_sum.backward(retain_graph=True); torch.cuda.synchronize();
results['CUDA_dense_backward_sumAll'] = ' '.join(str(res).split()[:2])
a = torch.randn(d).cuda().requires_grad_(True)
a_sum = a.sum(d_to_sum, k)
data = a_sum.clone().detach()
torch.cuda.synchronize()
res = %timeit -o a_sum.backward(data, retain_graph=True); torch.cuda.synchronize();
results['CUDA_dense_backward_sumD'] = ' '.join(str(res).split()[:2])
all_results[', '.join([str(nnz), str(d), str(d_to_sum), str(k)])] = results
print("(nnz, sizes, sum_dims, keepdim, sum all or dims, bk=backward) , CPU (sparse vs dense) , CUDA(sparse vs dense)")
for p, res in all_results.items():
print("(" + p + ", sumAll) , " + "%s vs %s , %s vs %s" %
(res['CPU_sparse_sumAll'], res['CPU_dense_sumAll'], res['CUDA_sparse_sumAll'], res['CUDA_dense_sumAll']))
print("(" + p + ", sumD) , " + "%s vs %s , %s vs %s" %
(res['CPU_sparse_sumD'], res['CPU_dense_sumD'], res['CUDA_sparse_sumD'], res['CUDA_dense_sumD']))
print("(" + p + ", sumAll, bk) , " + "%s vs %s , %s vs %s" %
(res['CPU_sparse_backward_sumAll'], res['CPU_dense_backward_sumAll'], res['CUDA_sparse_backward_sumAll'], res['CUDA_dense_backward_sumAll']))
print("(" + p + ", sumD, bk) , " + "%s vs %s , %s vs %s" %
(res['CPU_sparse_backward_sumD'], res['CPU_dense_backward_sumD'], res['CUDA_sparse_backward_sumD'], res['CUDA_dense_backward_sumD']))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment