Skip to content

Instantly share code, notes, and snippets.

@cpuhrsch
Created August 16, 2023 20:26
Show Gist options
  • Save cpuhrsch/7fec60079cbe2daeff59c0577f933320 to your computer and use it in GitHub Desktop.
Save cpuhrsch/7fec60079cbe2daeff59c0577f933320 to your computer and use it in GitHub Desktop.
sparse.py
import torch
import torch.nn.functional as F
import itertools
import torch.utils.benchmark as benchmark
import math
dtype = torch.float16
device = "cuda"
def create_blocked_tensor(M, N, blocksize, sparsity):
assert sparsity <= 1.0 and sparsity >= 0.0, \
"sparsity should be a value between 0 and 1"
A = torch.bernoulli(torch.full((M//blocksize, N//blocksize),
1 - sparsity, dtype=dtype, device=device))
A = torch.repeat_interleave(A, blocksize, dim=0)
A = torch.repeat_interleave(A, blocksize, dim=1)
return A.contiguous()
def benchmark_in_us(f, *args, **kwargs):
t0 = benchmark.Timer(
stmt="f(*args, **kwargs)",
globals={"args": args, "kwargs": kwargs, "f": f}
)
return int(t0.blocked_autorange().mean * 1e6)
def run_benchmark(x, b, weightsize, batchsize, seqlen, blocksize, sparsity):
A = create_blocked_tensor(weightsize, weightsize,
blocksize=blocksize, sparsity=sparsity)
A_sparse = A.to_sparse_bsr(blocksize=blocksize)
dense_time = benchmark_in_us(F.linear, x, A, b)
sparse_time = benchmark_in_us(F.linear, x, A_sparse, b)
ratio = dense_time / sparse_time
return (",".join(map(str, [weightsize, batchsize, blocksize, seqlen, sparsity, dense_time, sparse_time, ratio]))), ratio
def create_experiments():
shapes = [int(math.pow(2, i)) for i in range(13, 9, -1)]
batchsizes = [64, 128, 256]
seqlens = [256, 512]
blocksizes = [32, 64]
sparsity = list(range(10, 100, 10)) + [95, 99]
return list(itertools.product(shapes, batchsizes, seqlens, blocksizes, sparsity))
positives = []
experiments = create_experiments()
for weightsize, batchsize, seqlen, blocksize, sparsity in experiments:
x = torch.randn(batchsize, seqlen, weightsize, dtype=dtype, device=device)
b = torch.randn(weightsize, dtype=dtype, device=device)
result, ratio = run_benchmark(
x, b, weightsize, batchsize, seqlen, blocksize, sparsity / 100.)
if ratio > 1.0:
positives += [result]
print(",".join(["weightsize", "batchsize", "blocksize", "seqlen", "sparsity", "dense_time", "sparse_time", "ratio"]))
print("\n".join(positives))
@cpuhrsch
Copy link
Author

weightsize,batchsize,blocksize,seqlen,sparsity,dense_time,sparse_time,ratio
8192,64,32,256,0.9,8889,5777,1.5386879002942704
8192,64,32,256,0.95,8754,3725,2.3500671140939597
8192,64,32,256,0.99,8644,2115,4.086997635933806
8192,64,64,256,0.7,9064,7436,1.2189349112426036
8192,64,64,256,0.8,9017,5420,1.6636531365313654
8192,64,64,256,0.9,8955,3564,2.5126262626262625
8192,64,64,256,0.95,8853,2550,3.471764705882353
8192,64,64,256,0.99,8656,1831,4.727471327143637
8192,64,32,512,0.9,18208,11594,1.5704674831809557
8192,64,32,512,0.95,17650,7479,2.3599411686054284
8192,64,32,512,0.99,17599,4288,4.104244402985074
8192,64,64,512,0.7,18582,14593,1.2733502364147193
8192,64,64,512,0.8,18128,11104,1.6325648414985592
8192,64,64,512,0.9,17801,7205,2.470645385149202
8192,64,64,512,0.95,17923,5223,3.4315527474631438
8192,64,64,512,0.99,17364,3621,4.795360397680199
8192,128,32,256,0.9,18032,11606,1.5536791314837153
8192,128,32,256,0.95,17941,7552,2.3756620762711864
8192,128,32,256,0.99,17781,4295,4.139930151338766
8192,128,64,256,0.6,18338,18318,1.0010918222513374
8192,128,64,256,0.7,18538,14733,1.2582637616235661
8192,128,64,256,0.8,18382,10948,1.6790281329923273
8192,128,64,256,0.9,17892,7082,2.5264049703473597
8192,128,64,256,0.95,17741,5271,3.365774995257067
8192,128,64,256,0.99,17482,3538,4.941209723007349
8192,128,32,512,0.9,36043,23189,1.5543145456897667
8192,128,32,512,0.95,36123,14969,2.413187253657559
8192,128,32,512,0.99,35881,8599,4.172694499360391
8192,128,64,512,0.7,36677,29638,1.237499156488292
8192,128,64,512,0.8,36307,21571,1.6831394001205322
8192,128,64,512,0.9,36529,14294,2.5555477822862738
8192,128,64,512,0.95,36195,11293,3.205082794651554
8192,128,64,512,0.99,34607,7156,4.8360816098378985
8192,256,32,256,0.9,36080,22860,1.5783027121609798
8192,256,32,256,0.95,36289,15240,2.381167979002625
8192,256,32,256,0.99,35913,8663,4.14556158374697
8192,256,64,256,0.7,36563,29162,1.253789177697003
8192,256,64,256,0.8,37023,22289,1.6610435640899097
8192,256,64,256,0.9,36352,14292,2.5435208508256366
8192,256,64,256,0.95,36309,10336,3.5128676470588234
8192,256,64,256,0.99,34966,7290,4.796433470507544
8192,256,32,512,0.9,72892,46382,1.5715579319563624
8192,256,32,512,0.95,72469,30274,2.393770231882143
8192,256,32,512,0.99,71144,17447,4.077721098183069
8192,256,64,512,0.7,73834,59517,1.2405531192768453
8192,256,64,512,0.8,73092,45125,1.6197673130193906
8192,256,64,512,0.9,72559,28481,2.5476282433903306
8192,256,64,512,0.95,71975,20830,3.4553528564570333
8192,256,64,512,0.99,69997,14575,4.80253859348199
4096,64,32,256,0.9,2414,1897,1.2725355824986821
4096,64,32,256,0.95,2399,1436,1.6706128133704736
4096,64,32,256,0.99,2330,948,2.457805907172996
4096,64,64,256,0.7,2401,2216,1.0834837545126355
4096,64,64,256,0.8,2377,1730,1.3739884393063584
4096,64,64,256,0.9,2383,1294,1.8415765069551777
4096,64,64,256,0.95,2357,1074,2.1945996275605215
4096,64,64,256,0.99,2298,829,2.7720144752714115
4096,64,32,512,0.9,4809,3753,1.2813749000799362
4096,64,32,512,0.95,4745,2793,1.698890082348729
4096,64,32,512,0.99,4637,1909,2.429020429544264
4096,64,64,512,0.7,4867,4362,1.115772581384686
4096,64,64,512,0.8,4739,3442,1.3768158047646717
4096,64,64,512,0.9,4783,2620,1.8255725190839696
4096,64,64,512,0.95,4671,2103,2.2211126961483596
4096,64,64,512,0.99,4614,1716,2.6888111888111887
4096,128,32,256,0.9,4780,3774,1.2665606783253842
4096,128,32,256,0.95,4699,2809,1.6728373086507653
4096,128,32,256,0.99,4643,1927,2.4094447327451998
4096,128,64,256,0.7,4833,4288,1.127098880597015
4096,128,64,256,0.8,4818,3404,1.4153936545240893
4096,128,64,256,0.9,4762,2583,1.8435927216415022
4096,128,64,256,0.95,4667,2181,2.1398441082072446
4096,128,64,256,0.99,4625,1659,2.787823990355636
4096,128,32,512,0.9,9593,7581,1.26540034296267
4096,128,32,512,0.95,9481,5612,1.689415538132573
4096,128,32,512,0.99,9241,3678,2.5125067971723762
4096,128,64,512,0.7,9706,8472,1.1456562795089706
4096,128,64,512,0.8,9639,6794,1.418751839858699
4096,128,64,512,0.9,9537,5279,1.8065921576056072
4096,128,64,512,0.95,9369,4168,2.2478406909788866
4096,128,64,512,0.99,9219,3355,2.7478390461997018
4096,256,32,256,0.9,9582,7570,1.2657859973579921
4096,256,32,256,0.95,9472,5557,1.7045168256253374
4096,256,32,256,0.99,9276,3848,2.4106029106029108
4096,256,64,256,0.7,9722,8700,1.1174712643678162
4096,256,64,256,0.8,9605,6765,1.4198078344419809
4096,256,64,256,0.9,9504,5112,1.8591549295774648
4096,256,64,256,0.95,9396,4327,2.1714813958862953
4096,256,64,256,0.99,9200,3379,2.722699023379698
4096,256,32,512,0.9,18763,15159,1.2377465532027179
4096,256,32,512,0.95,18864,10810,1.7450508788159111
4096,256,32,512,0.99,18438,7587,2.4302095689996044
4096,256,64,512,0.7,19078,17674,1.0794387235487157
4096,256,64,512,0.8,19036,13722,1.3872613321673226
4096,256,64,512,0.9,19026,10310,1.8453928225024248
4096,256,64,512,0.95,18572,8607,2.1577785523411177
4096,256,64,512,0.99,18233,6672,2.7327637889688248
2048,64,32,256,0.95,629,577,1.0901213171577122
2048,64,32,256,0.99,617,456,1.3530701754385965
2048,64,64,256,0.9,628,536,1.171641791044776
2048,64,64,256,0.95,627,493,1.2718052738336714
2048,64,64,256,0.99,607,414,1.4661835748792271
2048,64,32,512,0.95,1231,1130,1.0893805309734512
2048,64,32,512,0.99,1161,892,1.3015695067264574
2048,64,64,512,0.9,1237,1079,1.1464318813716403
2048,64,64,512,0.95,1224,929,1.317545748116254
2048,64,64,512,0.99,1199,794,1.5100755667506298
2048,128,32,256,0.95,1234,1121,1.1008028545941124
2048,128,32,256,0.99,1215,883,1.37599093997735
2048,128,64,256,0.9,1236,1099,1.1246587807097361
2048,128,64,256,0.95,1236,951,1.2996845425867507
2048,128,64,256,0.99,1195,777,1.537966537966538
2048,128,32,512,0.95,2505,2224,1.1263489208633093
2048,128,32,512,0.99,2378,1677,1.4180083482409065
2048,128,64,512,0.9,2512,2131,1.1787893007977475
2048,128,64,512,0.95,2375,1901,1.2493424513413993
2048,128,64,512,0.99,2422,1632,1.4840686274509804
2048,256,32,256,0.95,2509,2259,1.1106684373616644
2048,256,32,256,0.99,2432,1734,1.4025374855824684
2048,256,64,256,0.9,2515,2192,1.1473540145985401
2048,256,64,256,0.95,2431,1827,1.3305966064586754
2048,256,64,256,0.99,2438,1579,1.5440151994933502
2048,256,32,512,0.95,4964,4520,1.0982300884955751
2048,256,32,512,0.99,4764,3452,1.3800695249130939
2048,256,64,512,0.9,4978,4188,1.1886341929321873
2048,256,64,512,0.95,4826,3706,1.3022126281705342
2048,256,64,512,0.99,4797,3225,1.4874418604651163

@cpuhrsch
Copy link
Author

weightsize,batchsize,blocksize,seqlen,sparsity,dense_time,sparse_time,ratio
8192,64,32,256,0.9,8995,5828,1.543411118737131
8192,64,32,256,0.95,8925,3750,2.38
8192,64,32,256,0.99,8734,2148,4.06610800744879
8192,64,64,256,0.7,9161,7392,1.2393127705627707
8192,64,64,256,0.8,9079,5463,1.6619073768991397
8192,64,64,256,0.9,8915,3657,2.4377905386929175
8192,64,64,256,0.95,8899,2633,3.379794910748196
8192,64,64,256,0.99,8687,1837,4.728905824714208
8192,64,32,512,0.9,18169,11478,1.5829412789684614
8192,64,32,512,0.95,18020,7511,2.3991479163892957
8192,64,32,512,0.99,17807,4351,4.092622385658469
8192,64,64,512,0.7,18609,14751,1.2615415904006508
8192,64,64,512,0.8,18286,10838,1.687211662668389
8192,64,64,512,0.9,17933,7036,2.54874928936896
8192,64,64,512,0.95,18086,5168,3.499613003095975
8192,64,64,512,0.99,17533,3647,4.80751302440362
8192,128,32,256,0.9,18329,11610,1.5787252368647717
8192,128,32,256,0.95,17930,7503,2.3897107823537254
8192,128,32,256,0.99,17506,4356,4.0188246097337
8192,128,64,256,0.7,18754,14814,1.265964628054543
8192,128,64,256,0.8,18430,11019,1.6725655685633904
8192,128,64,256,0.9,18130,7350,2.466666666666667
8192,128,64,256,0.95,18083,5132,3.523577552611068
8192,128,64,256,0.99,17411,3676,4.736398258977149
8192,128,32,512,0.9,35676,23119,1.543146329858558
8192,128,32,512,0.95,35746,15047,2.3756230477836113
8192,128,32,512,0.99,35719,8648,4.13031914893617
8192,128,64,512,0.6,37149,37083,1.0017797912790227
8192,128,64,512,0.7,36588,28952,1.2637468914064658
8192,128,64,512,0.8,36522,21827,1.6732487286388418
8192,128,64,512,0.9,36616,14011,2.6133752051959176
8192,128,64,512,0.95,35960,10188,3.529642716921869
8192,128,64,512,0.99,34942,7092,4.926959954878736
8192,256,32,256,0.9,35969,23386,1.5380569571538527
8192,256,32,256,0.95,36707,15123,2.427230046948357
8192,256,32,256,0.99,35863,8643,4.149369431910216
8192,256,64,256,0.6,37325,36614,1.0194188015513193
8192,256,64,256,0.7,36952,29522,1.25167671566967
8192,256,64,256,0.8,36610,21741,1.6839151832942367
8192,256,64,256,0.9,36841,14212,2.592245989304813
8192,256,64,256,0.95,36250,10454,3.4675722211593647
8192,256,64,256,0.99,35005,7384,4.740655471289274
8192,256,32,512,0.9,72132,46334,1.5567833556351707
8192,256,32,512,0.95,72559,29951,2.42259023071016
8192,256,32,512,0.99,71383,17507,4.077397612383618
8192,256,64,512,0.6,74475,74128,1.0046810921649039
8192,256,64,512,0.7,73362,59413,1.2347802669449448
8192,256,64,512,0.8,72960,44036,1.6568262330820238
8192,256,64,512,0.9,72577,28181,2.575387672545332
8192,256,64,512,0.95,71984,20419,3.5253440423135314
8192,256,64,512,0.99,70058,14476,4.839596573639127
4096,64,32,256,0.9,2396,1920,1.2479166666666666
4096,64,32,256,0.95,2405,1405,1.7117437722419928
4096,64,32,256,0.99,2333,946,2.466173361522199
4096,64,64,256,0.7,2447,2191,1.1168416248288453
4096,64,64,256,0.8,2423,1777,1.3635340461451886
4096,64,64,256,0.9,2419,1327,1.8229088168801808
4096,64,64,256,0.95,2366,1114,2.1238779174147218
4096,64,64,256,0.99,2316,852,2.7183098591549295
4096,64,32,512,0.9,4822,3798,1.269615587151132
4096,64,32,512,0.95,4707,2754,1.7091503267973855
4096,64,32,512,0.99,4647,1928,2.4102697095435683
4096,64,64,512,0.7,4837,4281,1.1298761971501985
4096,64,64,512,0.8,4819,3547,1.3586129123202706
4096,64,64,512,0.9,4729,2503,1.889332800639233
4096,64,64,512,0.95,4717,2137,2.2072999532054283
4096,64,64,512,0.99,4588,1628,2.8181818181818183
4096,128,32,256,0.9,4772,3785,1.260766182298547
4096,128,32,256,0.95,4765,2806,1.6981468282252317
4096,128,32,256,0.99,4581,1880,2.4367021276595744
4096,128,64,256,0.7,4856,4339,1.1191518783129752
4096,128,64,256,0.8,4798,3387,1.4165928550339533
4096,128,64,256,0.9,4767,2603,1.8313484441029582
4096,128,64,256,0.95,4705,2205,2.133786848072562
4096,128,64,256,0.99,4604,1688,2.727488151658768
4096,128,32,512,0.9,9558,7558,1.2646202699126754
4096,128,32,512,0.95,9446,5571,1.6955663256147908
4096,128,32,512,0.99,9222,3715,2.4823687752355315
4096,128,64,512,0.7,9626,8740,1.1013729977116704
4096,128,64,512,0.8,9590,6967,1.376489163197933
4096,128,64,512,0.9,9378,5033,1.863302205444069
4096,128,64,512,0.95,9357,4278,2.187237026647966
4096,128,64,512,0.99,9164,3263,2.808458473797119
4096,256,32,256,0.9,9536,7571,1.2595429929996038
4096,256,32,256,0.95,9420,5570,1.6912028725314183
4096,256,32,256,0.99,9197,3782,2.4317821258593337
4096,256,64,256,0.7,9625,8631,1.1151662611516626
4096,256,64,256,0.8,9571,6799,1.4077070157376084
4096,256,64,256,0.9,9407,5070,1.8554240631163708
4096,256,64,256,0.95,9351,4223,2.214302628463178
4096,256,64,256,0.99,9159,3312,2.7653985507246377
4096,256,32,512,0.9,19059,15134,1.2593498083784855
4096,256,32,512,0.95,18950,11193,1.6930224247297418
4096,256,32,512,0.99,18282,7495,2.439226150767178
4096,256,64,512,0.7,19043,17552,1.0849475843208751
4096,256,64,512,0.8,18931,13722,1.3796093863868242
4096,256,64,512,0.9,18658,10418,1.7909387598387407
4096,256,64,512,0.95,18668,8685,2.149453080023028
4096,256,64,512,0.99,18244,6712,2.7181168057210967
2048,64,32,256,0.95,633,584,1.0839041095890412
2048,64,32,256,0.99,616,453,1.359823399558499
2048,64,64,256,0.9,631,566,1.1148409893992932
2048,64,64,256,0.95,623,484,1.287190082644628
2048,64,64,256,0.99,607,409,1.4841075794621026
2048,64,32,512,0.95,1236,1143,1.0813648293963254
2048,64,32,512,0.99,1200,893,1.343784994400896
2048,64,64,512,0.8,1255,1232,1.0186688311688312
2048,64,64,512,0.9,1238,1098,1.127504553734062
2048,64,64,512,0.95,1230,955,1.287958115183246
2048,64,64,512,0.99,1197,808,1.4814356435643565
2048,128,32,256,0.95,1243,1133,1.0970873786407767
2048,128,32,256,0.99,1220,880,1.3863636363636365
2048,128,64,256,0.9,1237,1122,1.1024955436720143
2048,128,64,256,0.95,1227,931,1.317937701396348
2048,128,64,256,0.99,1199,782,1.5332480818414322
2048,128,32,512,0.95,2443,2261,1.0804953560371517
2048,128,32,512,0.99,2429,1728,1.4056712962962963
2048,128,64,512,0.9,2495,2087,1.195495927168184
2048,128,64,512,0.95,2392,1882,1.2709883103081827
2048,128,64,512,0.99,2463,1601,1.53841349156777
2048,256,32,256,0.95,2430,2299,1.0569812962157459
2048,256,32,256,0.99,2417,1733,1.3946912867859205
2048,256,64,256,0.8,2564,2537,1.0106424911312575
2048,256,64,256,0.9,2501,2127,1.1758345086976962
2048,256,64,256,0.95,2433,1850,1.315135135135135
2048,256,64,256,0.99,2436,1583,1.5388502842703726
2048,256,32,512,0.95,4958,4487,1.1049699130822375
2048,256,32,512,0.99,4820,3453,1.3958876339415
2048,256,64,512,0.9,4931,4298,1.1472778036295952
2048,256,64,512,0.95,4859,3715,1.3079407806191117
2048,256,64,512,0.99,4834,3115,1.5518459069020867
1024,256,64,256,0.99,611,331,1.8459214501510575

@cpuhrsch
Copy link
Author

weightsize,batchsize,blocksize,seqlen,sparsity,dense_time,sparse_time,ratio
8192,64,32,256,0.8,8116,6962,1.1657569663889686
8192,64,32,256,0.9,8075,4323,1.8679157992135091
8192,64,32,256,0.95,8028,3092,2.596377749029754
8192,64,32,256,0.99,7923,2273,3.4857017157941046
8192,64,64,256,0.6,8194,7285,1.12477693891558
8192,64,64,256,0.7,8173,5903,1.3845502286972726
8192,64,64,256,0.8,8111,4533,1.7893227443194353
8192,64,64,256,0.9,8045,3158,2.5474984167194425
8192,64,64,256,0.95,8007,2554,3.135082223962412
8192,64,64,256,0.99,7927,2095,3.7837708830548924
8192,64,32,512,0.8,16155,13954,1.1577325498065072
8192,64,32,512,0.9,15981,8689,1.8392220048336978
8192,64,32,512,0.95,15946,6101,2.6136698901819373
8192,64,32,512,0.99,15630,4500,3.473333333333333
8192,64,64,512,0.6,16314,14352,1.1367056856187292
8192,64,64,512,0.7,16267,11749,1.3845433653927994
8192,64,64,512,0.8,16112,9062,1.7779739571838447
8192,64,64,512,0.9,15972,6381,2.5030559473436766
8192,64,64,512,0.95,15963,5113,3.122041854097399
8192,64,64,512,0.99,15588,4137,3.7679477882523567
8192,128,32,256,0.8,16150,13770,1.1728395061728396
8192,128,32,256,0.9,15978,8720,1.8323394495412844
8192,128,32,256,0.95,15969,6128,2.6059073107049606
8192,128,32,256,0.99,15742,4505,3.4943396226415095
8192,128,64,256,0.6,16410,14559,1.1271378528745106
8192,128,64,256,0.7,16327,11780,1.385993208828523
8192,128,64,256,0.8,16144,9074,1.779149217544633
8192,128,64,256,0.9,15972,6359,2.511715678565812
8192,128,64,256,0.95,15961,5113,3.121650694308625
8192,128,64,256,0.99,15608,4142,3.7682279092225976
8192,128,32,512,0.8,32404,27878,1.162350240332879
8192,128,32,512,0.9,32161,17185,1.871457666569683
8192,128,32,512,0.95,31930,12193,2.6187156565242353
8192,128,32,512,0.99,31512,9054,3.4804506295559974
8192,128,64,512,0.6,33158,30366,1.0919449384179674
8192,128,64,512,0.7,32586,24185,1.3473640686375852
8192,128,64,512,0.8,32412,19299,1.679465257267216
8192,128,64,512,0.9,32142,13657,2.3535183422420736
8192,128,64,512,0.95,31820,11025,2.886167800453515
8192,128,64,512,0.99,31493,8801,3.578343370071583
8192,256,32,256,0.8,32555,28101,1.1584996975196613
8192,256,32,256,0.9,32223,17222,1.8710370456392986
8192,256,32,256,0.95,31817,12146,2.6195455293923926
8192,256,32,256,0.99,31600,9035,3.497509684560044
8192,256,64,256,0.6,33230,29815,1.1145396612443401
8192,256,64,256,0.7,32948,24538,1.3427337191295134
8192,256,64,256,0.8,32554,19472,1.6718364831552999
8192,256,64,256,0.9,32163,13611,2.3630152082874147
8192,256,64,256,0.95,31905,11054,2.8862855075085943
8192,256,64,256,0.99,31508,8637,3.6480259349311104
8192,256,32,512,0.8,65233,57490,1.1346842929205079
8192,256,32,512,0.9,65038,35076,1.8542023035693922
8192,256,32,512,0.95,64529,24502,2.6336217451636603
8192,256,32,512,0.99,63651,17601,3.6163286176921767
8192,256,64,512,0.6,65928,59839,1.1017563796186434
8192,256,64,512,0.7,65471,47404,1.3811281748375666
8192,256,64,512,0.8,65051,35744,1.8199138316920322
8192,256,64,512,0.9,64728,25610,2.5274502147598596
8192,256,64,512,0.95,64389,20339,3.1657898618417817
8192,256,64,512,0.99,63083,16722,3.772455447912929
4096,64,32,256,0.8,2212,2189,1.0105070808588397
4096,64,32,256,0.9,2188,1541,1.4198572355613237
4096,64,32,256,0.95,2166,1259,1.720413026211279
4096,64,32,256,0.99,2166,1071,2.022408963585434
4096,64,64,256,0.7,2212,1935,1.1431524547803618
4096,64,64,256,0.8,2188,1582,1.3830594184576486
4096,64,64,256,0.9,2188,1288,1.6987577639751552
4096,64,64,256,0.95,2166,1163,1.8624247635425624
4096,64,64,256,0.99,2166,991,2.1856710393541876
4096,64,32,512,0.8,4402,4348,1.0124195032198713
4096,64,32,512,0.9,4371,3095,1.4122778675282714
4096,64,32,512,0.95,4350,2470,1.7611336032388665
4096,64,32,512,0.99,4306,2100,2.0504761904761906
4096,64,64,512,0.7,4408,3866,1.1401965856182101
4096,64,64,512,0.8,4398,3209,1.3705204113430975
4096,64,64,512,0.9,4343,2509,1.730968513351933
4096,64,64,512,0.95,4307,2303,1.8701693443334781
4096,64,64,512,0.99,4306,1958,2.199182839632278
4096,128,32,256,0.8,4394,4360,1.0077981651376147
4096,128,32,256,0.9,4362,3076,1.418075422626788
4096,128,32,256,0.95,4329,2485,1.7420523138832997
4096,128,32,256,0.99,4306,2105,2.045605700712589
4096,128,64,256,0.7,4416,3810,1.1590551181102362
4096,128,64,256,0.8,4408,3136,1.405612244897959
4096,128,64,256,0.9,4349,2536,1.7149053627760253
4096,128,64,256,0.95,4307,2299,1.8734232274902132
4096,128,64,256,0.99,4306,1928,2.233402489626556
4096,128,32,512,0.8,8766,8581,1.0215592588276425
4096,128,32,512,0.9,8642,6050,1.4284297520661158
4096,128,32,512,0.95,8586,4974,1.7261761158021713
4096,128,32,512,0.99,8584,4214,2.0370194589463693
4096,128,64,512,0.7,8765,7611,1.1516226514255683
4096,128,64,512,0.8,8742,6232,1.402759948652118
4096,128,64,512,0.9,8673,5086,1.7052693668895005
4096,128,64,512,0.95,8586,4560,1.8828947368421052
4096,128,64,512,0.99,8586,3915,2.193103448275862
4096,256,32,256,0.8,8751,8588,1.018979972054029
4096,256,32,256,0.9,8674,6071,1.4287596771536815
4096,256,32,256,0.95,8673,4979,1.741916047399076
4096,256,32,256,0.99,8582,4227,2.030281523539153
4096,256,64,256,0.7,8769,7577,1.1573181998152302
4096,256,64,256,0.8,8735,6252,1.3971529110684582
4096,256,64,256,0.9,8674,5007,1.7323746754543639
4096,256,64,256,0.95,8583,4518,1.899734395750332
4096,256,64,256,0.99,8584,3882,2.211231324059763
4096,256,32,512,0.9,17291,12352,1.3998542746113989
4096,256,32,512,0.95,17293,9887,1.749064428036816
4096,256,32,512,0.99,17110,8242,2.075952438728464
4096,256,64,512,0.7,17475,15210,1.1489151873767258
4096,256,64,512,0.8,17466,12491,1.3982867664718597
4096,256,64,512,0.9,17289,10010,1.7271728271728273
4096,256,64,512,0.95,17114,9198,1.8606218743205045
4096,256,64,512,0.99,17113,7887,2.169773044250032
2048,64,32,256,0.99,557,507,1.0986193293885602
2048,64,64,256,0.95,565,539,1.0482374768089053
2048,64,64,256,0.99,559,472,1.1843220338983051
2048,64,32,512,0.99,1092,968,1.128099173553719
2048,64,64,512,0.95,1100,1074,1.0242085661080074
2048,64,64,512,0.99,1090,965,1.1295336787564767
2048,128,32,256,0.99,1091,1009,1.081268582755203
2048,128,64,256,0.99,1090,929,1.1733046286329387
2048,128,32,512,0.99,2181,1964,1.1104887983706722
2048,128,64,512,0.95,2203,2115,1.0416075650118204
2048,128,64,512,0.99,2177,1865,1.1672922252010725
2048,256,32,256,0.99,2177,1962,1.1095820591233436
2048,256,64,256,0.95,2205,2174,1.0142594296228151
2048,256,64,256,0.99,2177,1836,1.1857298474945535
2048,256,32,512,0.99,4377,3853,1.1359979236958215
2048,256,64,512,0.95,4434,4351,1.0190760744656402
2048,256,64,512,0.99,4335,3684,1.1767100977198697

@cpuhrsch
Copy link
Author

weightsize,batchsize,blocksize,seqlen,sparsity,dense_time,sparse_time,ratio
8192,64,32,256,0.8,8131,6321,1.2863470969783262
8192,64,32,256,0.9,8104,3244,2.4981504315659677
8192,64,32,256,0.95,8042,2020,3.981188118811881
8192,64,32,256,0.99,7946,1558,5.100128369704749
8192,64,64,256,0.5,8266,7610,1.0862023653088042
8192,64,64,256,0.6,8218,6288,1.3069338422391859
8192,64,64,256,0.7,8187,4898,1.671498570845243
8192,64,64,256,0.8,8142,3861,2.108780108780109
8192,64,64,256,0.9,8061,2157,3.737134909596662
8192,64,64,256,0.95,8014,1569,5.107711918419375
8192,64,64,256,0.99,7926,1085,7.305069124423963
8192,64,32,512,0.8,16140,11955,1.3500627352572145
8192,64,32,512,0.9,15958,6533,2.442675646716669
8192,64,32,512,0.95,15940,3980,4.005025125628141
8192,64,32,512,0.99,15757,2418,6.516542597187758
8192,64,64,512,0.5,16490,15432,1.068558838776568
8192,64,64,512,0.6,16373,12775,1.2816438356164384
8192,64,64,512,0.7,16286,9777,1.6657461388974122
8192,64,64,512,0.8,16191,7034,2.3018197327267558
8192,64,64,512,0.9,16140,4284,3.7675070028011204
8192,64,64,512,0.95,15977,3033,5.267721727662381
8192,64,64,512,0.99,15611,2074,7.5270009643201545
8192,128,32,256,0.8,16149,11899,1.3571728716698883
8192,128,32,256,0.9,15947,6578,2.4242930982061415
8192,128,32,256,0.95,15955,3993,3.995742549461558
8192,128,32,256,0.99,15769,2390,6.597907949790795
8192,128,64,256,0.5,16743,15667,1.068679389800217
8192,128,64,256,0.6,16420,12649,1.298126334097557
8192,128,64,256,0.7,16309,9862,1.653721354694788
8192,128,64,256,0.8,16239,6947,2.337555779473154
8192,128,64,256,0.9,16181,4160,3.8896634615384613
8192,128,64,256,0.95,16046,3057,5.248936866208702
8192,128,64,256,0.99,15728,2035,7.728746928746928
8192,128,32,512,0.8,32701,23634,1.3836422103748836
8192,128,32,512,0.9,32654,13049,2.5024139780826116
8192,128,32,512,0.95,32069,8202,3.9099000243842967
8192,128,32,512,0.99,31674,4982,6.357687675632276
8192,128,64,512,0.5,33014,31721,1.0407616405535765
8192,128,64,512,0.6,33110,26052,1.2709196990634117
8192,128,64,512,0.7,32806,20657,1.5881299317422666
8192,128,64,512,0.8,32533,15117,2.15208043924059
8192,128,64,512,0.9,32418,9437,3.43520186499947
8192,128,64,512,0.95,31939,6906,4.624818997972778
8192,128,64,512,0.99,31457,4441,7.083314568790813
8192,256,32,256,0.8,32576,23851,1.3658127541822145
8192,256,32,256,0.9,32333,13163,2.456354934285497
8192,256,32,256,0.95,32046,7768,4.125386199794026
8192,256,32,256,0.99,31725,4901,6.47316874107325
8192,256,64,256,0.5,32959,32037,1.0287792240222242
8192,256,64,256,0.6,33096,25863,1.279665932026447
8192,256,64,256,0.7,33006,20750,1.5906506024096385
8192,256,64,256,0.8,32514,15121,2.1502546127901594
8192,256,64,256,0.9,32649,9827,3.322377124249517
8192,256,64,256,0.95,31853,6768,4.7064125295508275
8192,256,64,256,0.99,31504,4632,6.801381692573402
8192,256,32,512,0.8,65223,49911,1.3067860792210133
8192,256,32,512,0.9,65173,26720,2.439109281437126
8192,256,32,512,0.95,64679,16855,3.837377632749926
8192,256,32,512,0.99,63603,9025,7.0474238227146815
8192,256,64,512,0.5,66261,62287,1.0638014352914733
8192,256,64,512,0.6,65846,50688,1.2990451388888888
8192,256,64,512,0.7,65337,39494,1.6543525598825137
8192,256,64,512,0.8,65206,28054,2.3243031296784773
8192,256,64,512,0.9,64540,17535,3.68063872255489
8192,256,64,512,0.95,64093,12162,5.269939154744286
8192,256,64,512,0.99,63471,8309,7.638825370080635
4096,64,32,256,0.8,2211,1679,1.3168552709946397
4096,64,32,256,0.9,2188,980,2.23265306122449
4096,64,32,256,0.95,2188,737,2.9687924016282223
4096,64,32,256,0.99,2165,526,4.11596958174905
4096,64,64,256,0.5,2243,2113,1.0615238996687175
4096,64,64,256,0.6,2237,1708,1.309718969555035
4096,64,64,256,0.7,2236,1344,1.6636904761904763
4096,64,64,256,0.8,2195,1071,2.049486461251167
4096,64,64,256,0.9,2188,770,2.8415584415584414
4096,64,64,256,0.95,2165,633,3.420221169036335
4096,64,64,256,0.99,2165,435,4.977011494252873
4096,64,32,512,0.8,4415,3305,1.3358547655068078
4096,64,32,512,0.9,4393,2208,1.9895833333333333
4096,64,32,512,0.95,4326,1426,3.0336605890603088
4096,64,32,512,0.99,4306,1046,4.1166347992351815
4096,64,64,512,0.5,4461,4165,1.0710684273709483
4096,64,64,512,0.6,4444,3386,1.312463083284111
4096,64,64,512,0.7,4397,2734,1.6082662765179225
4096,64,64,512,0.8,4382,2065,2.1220338983050846
4096,64,64,512,0.9,4350,1518,2.8656126482213438
4096,64,64,512,0.95,4305,1240,3.471774193548387
4096,64,64,512,0.99,4307,880,4.894318181818182
4096,128,32,256,0.8,4371,3320,1.316566265060241
4096,128,32,256,0.9,4351,2016,2.158234126984127
4096,128,32,256,0.95,4311,1406,3.0661450924608817
4096,128,32,256,0.99,4306,1031,4.176527643064985
4096,128,64,256,0.5,4427,4040,1.0957920792079208
4096,128,64,256,0.6,4413,3434,1.2850902737332557
4096,128,64,256,0.7,4392,2831,1.5513952666902155
4096,128,64,256,0.8,4351,2089,2.082814743896601
4096,128,64,256,0.9,4340,1531,2.834748530372306
4096,128,64,256,0.95,4306,1237,3.481002425222312
4096,128,64,256,0.99,4306,941,4.575982996811902
4096,128,32,512,0.8,8683,6357,1.3658958628283782
4096,128,32,512,0.9,8673,4071,2.130434782608696
4096,128,32,512,0.95,8586,2852,3.0105189340813463
4096,128,32,512,0.99,8584,2112,4.0643939393939394
4096,128,64,512,0.5,8765,8143,1.0763846248311433
4096,128,64,512,0.6,8764,6915,1.2673897324656545
4096,128,64,512,0.7,8723,5530,1.577396021699819
4096,128,64,512,0.8,8679,4137,2.097897026831037
4096,128,64,512,0.9,8585,3061,2.804639006860503
4096,128,64,512,0.95,8584,2434,3.5267050123253902
4096,128,64,512,0.99,8585,1769,4.853024307518372
4096,256,32,256,0.8,8684,6390,1.358998435054773
4096,256,32,256,0.9,8657,3967,2.1822535921351145
4096,256,32,256,0.95,8585,2907,2.953216374269006
4096,256,32,256,0.99,8584,2085,4.117026378896883
4096,256,64,256,0.5,8808,8458,1.0413809411208323
4096,256,64,256,0.6,8732,6778,1.288285629979345
4096,256,64,256,0.7,8694,5561,1.5633878798777199
4096,256,64,256,0.8,8674,4333,2.001846295868913
4096,256,64,256,0.9,8585,3088,2.7801165803108807
4096,256,64,256,0.95,8584,2505,3.426746506986028
4096,256,64,256,0.99,8586,1873,4.584089695675387
4096,256,32,512,0.8,17291,13604,1.271023228462217
4096,256,32,512,0.9,17291,7995,2.162726704190119
4096,256,32,512,0.95,17114,5813,2.944090830896267
4096,256,32,512,0.99,17116,4014,4.2640757349277525
4096,256,64,512,0.5,17605,16694,1.0545705043728286
4096,256,64,512,0.6,17454,13535,1.2895456224602881
4096,256,64,512,0.7,17467,11141,1.56781258414864
4096,256,64,512,0.8,17292,8272,2.0904255319148937
4096,256,64,512,0.9,17150,6105,2.809172809172809
4096,256,64,512,0.95,17117,5010,3.416566866267465
4096,256,64,512,0.99,17112,3362,5.08982748364069
2048,64,32,256,0.8,581,519,1.1194605009633911
2048,64,32,256,0.9,576,366,1.5737704918032787
2048,64,32,256,0.95,569,317,1.7949526813880126
2048,64,32,256,0.99,562,277,2.0288808664259927
2048,64,64,256,0.6,584,539,1.0834879406307978
2048,64,64,256,0.7,583,452,1.2898230088495575
2048,64,64,256,0.8,577,380,1.518421052631579
2048,64,64,256,0.9,573,324,1.7685185185185186
2048,64,64,256,0.95,568,281,2.02135231316726
2048,64,64,256,0.99,557,276,2.0181159420289854
2048,64,32,512,0.8,1139,1006,1.13220675944334
2048,64,32,512,0.9,1125,731,1.5389876880984952
2048,64,32,512,0.95,1117,646,1.7291021671826625
2048,64,32,512,0.99,1090,458,2.3799126637554586
2048,64,64,512,0.5,1151,1139,1.0105355575065846
2048,64,64,512,0.6,1143,1042,1.0969289827255277
2048,64,64,512,0.7,1137,910,1.2494505494505495
2048,64,64,512,0.8,1127,762,1.479002624671916
2048,64,64,512,0.9,1123,662,1.6963746223564955
2048,64,64,512,0.95,1104,569,1.9402460456942003
2048,64,64,512,0.99,1092,403,2.7096774193548385
2048,128,32,256,0.8,1138,1004,1.1334661354581674
2048,128,32,256,0.9,1127,729,1.5459533607681757
2048,128,32,256,0.95,1114,631,1.7654516640253566
2048,128,32,256,0.99,1090,468,2.3290598290598292
2048,128,64,256,0.6,1144,1087,1.0524379024839006
2048,128,64,256,0.7,1139,891,1.2783389450056117
2048,128,64,256,0.8,1128,793,1.4224464060529634
2048,128,64,256,0.9,1117,661,1.6898638426626325
2048,128,64,256,0.95,1102,541,2.0369685767097967
2048,128,64,256,0.99,1086,411,2.6423357664233578
2048,128,32,512,0.8,2253,2040,1.1044117647058824
2048,128,32,512,0.9,2227,1426,1.5617110799438991
2048,128,32,512,0.95,2219,1237,1.793856103476152
2048,128,32,512,0.99,2172,931,2.3329752953813103
2048,128,64,512,0.6,2279,2091,1.0899091343854614
2048,128,64,512,0.7,2258,1790,1.2614525139664805
2048,128,64,512,0.8,2246,1481,1.5165428764348414
2048,128,64,512,0.9,2227,1248,1.7844551282051282
2048,128,64,512,0.95,2177,1077,2.021355617455896
2048,128,64,512,0.99,2154,716,3.0083798882681565
2048,256,32,256,0.8,2252,1977,1.139099645928174
2048,256,32,256,0.9,2229,1440,1.5479166666666666
2048,256,32,256,0.95,2227,1231,1.809098294069862
2048,256,32,256,0.99,2158,894,2.413870246085011
2048,256,64,256,0.6,2284,2072,1.1023166023166022
2048,256,64,256,0.7,2256,1734,1.301038062283737
2048,256,64,256,0.8,2230,1446,1.5421853388658369
2048,256,64,256,0.9,2229,1242,1.7946859903381642
2048,256,64,256,0.95,2195,1099,1.997270245677889
2048,256,64,256,0.99,2158,824,2.6189320388349513
2048,256,32,512,0.9,4489,3418,1.313341135166764
2048,256,32,512,0.95,4434,2562,1.730679156908665
2048,256,32,512,0.99,4340,1676,2.5894988066825775
2048,256,64,512,0.6,4548,4105,1.107917174177832
2048,256,64,512,0.7,4523,3457,1.3083598495805613
2048,256,64,512,0.8,4522,2920,1.5486301369863014
2048,256,64,512,0.9,4440,2338,1.8990590248075279
2048,256,64,512,0.95,4416,2213,1.9954812471757795
2048,256,64,512,0.99,4326,1450,2.983448275862069
1024,64,32,512,0.95,282,278,1.014388489208633
1024,64,64,512,0.9,282,277,1.0180505415162455
1024,64,64,512,0.95,282,277,1.0180505415162455
1024,64,64,512,0.99,279,82,3.402439024390244
1024,128,32,256,0.99,279,277,1.0072202166064983
1024,128,64,256,0.9,285,278,1.025179856115108
1024,128,64,256,0.95,282,276,1.0217391304347827
1024,128,64,256,0.99,279,276,1.0108695652173914
1024,128,32,512,0.95,567,560,1.0125
1024,128,32,512,0.99,556,388,1.4329896907216495
1024,128,64,512,0.9,568,550,1.0327272727272727
1024,128,64,512,0.95,559,485,1.1525773195876288
1024,128,64,512,0.99,553,375,1.4746666666666666
1024,256,32,256,0.95,566,556,1.0179856115107915
1024,256,32,256,0.99,558,457,1.2210065645514223
1024,256,64,256,0.95,564,504,1.119047619047619
1024,256,64,256,0.99,558,389,1.4344473007712082
1024,256,32,512,0.95,1115,1061,1.0508953817153628
1024,256,32,512,0.99,1102,797,1.3826850690087829
1024,256,64,512,0.95,1100,931,1.1815252416756177
1024,256,64,512,0.99,1087,742,1.4649595687331536

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment