-
-
Save cpuhrsch/7fec60079cbe2daeff59c0577f933320 to your computer and use it in GitHub Desktop.
sparse.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn.functional as F | |
import itertools | |
import torch.utils.benchmark as benchmark | |
import math | |
dtype = torch.float16 | |
device = "cuda" | |
def create_blocked_tensor(M, N, blocksize, sparsity): | |
assert sparsity <= 1.0 and sparsity >= 0.0, \ | |
"sparsity should be a value between 0 and 1" | |
A = torch.bernoulli(torch.full((M//blocksize, N//blocksize), | |
1 - sparsity, dtype=dtype, device=device)) | |
A = torch.repeat_interleave(A, blocksize, dim=0) | |
A = torch.repeat_interleave(A, blocksize, dim=1) | |
return A.contiguous() | |
def benchmark_in_us(f, *args, **kwargs): | |
t0 = benchmark.Timer( | |
stmt="f(*args, **kwargs)", | |
globals={"args": args, "kwargs": kwargs, "f": f} | |
) | |
return int(t0.blocked_autorange().mean * 1e6) | |
def run_benchmark(x, b, weightsize, batchsize, seqlen, blocksize, sparsity): | |
A = create_blocked_tensor(weightsize, weightsize, | |
blocksize=blocksize, sparsity=sparsity) | |
A_sparse = A.to_sparse_bsr(blocksize=blocksize) | |
dense_time = benchmark_in_us(F.linear, x, A, b) | |
sparse_time = benchmark_in_us(F.linear, x, A_sparse, b) | |
ratio = dense_time / sparse_time | |
return (",".join(map(str, [weightsize, batchsize, blocksize, seqlen, sparsity, dense_time, sparse_time, ratio]))), ratio | |
def create_experiments(): | |
shapes = [int(math.pow(2, i)) for i in range(13, 9, -1)] | |
batchsizes = [64, 128, 256] | |
seqlens = [256, 512] | |
blocksizes = [32, 64] | |
sparsity = list(range(10, 100, 10)) + [95, 99] | |
return list(itertools.product(shapes, batchsizes, seqlens, blocksizes, sparsity)) | |
positives = [] | |
experiments = create_experiments() | |
for weightsize, batchsize, seqlen, blocksize, sparsity in experiments: | |
x = torch.randn(batchsize, seqlen, weightsize, dtype=dtype, device=device) | |
b = torch.randn(weightsize, dtype=dtype, device=device) | |
result, ratio = run_benchmark( | |
x, b, weightsize, batchsize, seqlen, blocksize, sparsity / 100.) | |
if ratio > 1.0: | |
positives += [result] | |
print(",".join(["weightsize", "batchsize", "blocksize", "seqlen", "sparsity", "dense_time", "sparse_time", "ratio"])) | |
print("\n".join(positives)) |
Author
cpuhrsch
commented
Nov 29, 2023
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment