Created
April 16, 2020 23:50
-
-
Save jowens/69accb35536621d0a03abdba063c650f to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"config": {"view": {"continuousWidth": 400, "continuousHeight": 300}}, | |
"layer": [ | |
{ | |
"mark": "point", | |
"encoding": { | |
"x": { | |
"type": "quantitative", | |
"axis": {"title": "MxNxK"}, | |
"field": "MxNxK", | |
"scale": {"type": "log"} | |
}, | |
"y": { | |
"type": "quantitative", | |
"aggregate": "mean", | |
"axis": {"title": "Runtime (ms)"}, | |
"field": "perf", | |
"scale": {"type": "log"} | |
} | |
}, | |
"selection": { | |
"selector001": { | |
"type": "interval", | |
"bind": "scales", | |
"encodings": ["x", "y"] | |
} | |
} | |
}, | |
{ | |
"mark": {"type": "errorband", "extent": "ci"}, | |
"encoding": { | |
"x": { | |
"type": "quantitative", | |
"axis": {"title": "MxNxK"}, | |
"field": "MxNxK", | |
"scale": {"type": "log"} | |
}, | |
"y": { | |
"type": "quantitative", | |
"aggregate": "mean", | |
"axis": {"title": "Runtime (ms)"}, | |
"field": "perf", | |
"scale": {"type": "log"} | |
} | |
}, | |
"selection": { | |
"selector002": { | |
"type": "interval", | |
"bind": "scales", | |
"encodings": ["x", "y"] | |
} | |
} | |
} | |
], | |
"data": {"name": "data-0e4914f6c5c15972c83660a50d164be0"}, | |
"encoding": { | |
"color": { | |
"type": "nominal", | |
"field": "schedule", | |
"legend": {"title": "Schedule"}, | |
"scale": { | |
"domain": ["roofline", "stream-kxmxn", "output-mxn", "cublas"], | |
"range": ["#377eb8", "#e41a1c", "#ff7f00", "#4daf4a"] | |
} | |
}, | |
"shape": { | |
"type": "nominal", | |
"field": "schedule", | |
"legend": {"title": "Schedule"} | |
} | |
}, | |
"$schema": "https://vega.github.io/schema/vega-lite/v4.8.1.json", | |
"datasets": { | |
"data-0e4914f6c5c15972c83660a50d164be0": [ | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 6.613455154265933e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.551492247803546, | |
"compute_intensity": 7.501831501831502, | |
"tile_compute_intensity": 0.64, | |
"MxNxK": 524288, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000009486887393705613, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.613679913574441, | |
"compute_intensity": 7.937984496124031, | |
"tile_compute_intensity": 0.9142857142857143, | |
"MxNxK": 8388608, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0003006613543235933, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1054563464628848, | |
"compute_intensity": 15.953261927945473, | |
"tile_compute_intensity": 1.9248120300751879, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.5030590631757748, | |
"compute_intensity": 97.52380952380952, | |
"tile_compute_intensity": 2.909090909090909, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7266195150506327, | |
"compute_intensity": 56.79029462738301, | |
"tile_compute_intensity": 3.9384615384615387, | |
"MxNxK": 268435456, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2103735787124164, | |
"compute_intensity": 630.1538461538462, | |
"tile_compute_intensity": 12.19047619047619, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 1.641961279679818e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 21.55471073845189, | |
"compute_intensity": 7.529411764705882, | |
"tile_compute_intensity": 0.5714285714285714, | |
"MxNxK": 131072, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.3261900870207946, | |
"compute_intensity": 157.53846153846155, | |
"tile_compute_intensity": 3.2, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5717425067308954, | |
"compute_intensity": 204.8, | |
"tile_compute_intensity": 1.8686131386861313, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1655299607326786, | |
"compute_intensity": 1638.4, | |
"tile_compute_intensity": 30.11764705882353, | |
"MxNxK": 34359738368, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.065229293022569, | |
"compute_intensity": 186.1818181818182, | |
"tile_compute_intensity": 2.6391752577319587, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000023717218484264033, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.9829290215553703, | |
"compute_intensity": 42.22680412371134, | |
"tile_compute_intensity": 1.5238095238095237, | |
"MxNxK": 8388608, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.5572215966204097, | |
"compute_intensity": 204.8, | |
"tile_compute_intensity": 3.764705882352941, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002408209876863733, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.2828575371087947, | |
"compute_intensity": 7.876923076923077, | |
"tile_compute_intensity": 0.8421052631578947, | |
"MxNxK": 2097152, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5475795055831563, | |
"compute_intensity": 315.0769230769231, | |
"tile_compute_intensity": 3.9689922480620154, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00015069555744616993, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0569654793841783, | |
"compute_intensity": 7.9669341113542425, | |
"tile_compute_intensity": 0.9394495412844037, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5935476240345885, | |
"compute_intensity": 117.02857142857142, | |
"tile_compute_intensity": 1.8686131386861313, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.120088930644338, | |
"compute_intensity": 468.1142857142857, | |
"tile_compute_intensity": 3.8714555765595464, | |
"MxNxK": 17179869184, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.129901895901066, | |
"compute_intensity": 862.3157894736842, | |
"tile_compute_intensity": 12.720496894409937, | |
"MxNxK": 34359738368, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000026271380474877087, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.197395714073759, | |
"compute_intensity": 30.11764705882353, | |
"tile_compute_intensity": 2.2857142857142856, | |
"MxNxK": 8388608, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002141699386453929, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.57123015703832, | |
"compute_intensity": 78.76923076923077, | |
"tile_compute_intensity": 1.5238095238095237, | |
"MxNxK": 8388608, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2952550190004157, | |
"compute_intensity": 655.36, | |
"tile_compute_intensity": 7.529411764705882, | |
"MxNxK": 8589934592, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000010708496932269645, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 7.993652590257787, | |
"compute_intensity": 60.23529411764706, | |
"tile_compute_intensity": 1.4545454545454546, | |
"MxNxK": 4194304, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.120748428088651, | |
"compute_intensity": 1170.2857142857142, | |
"tile_compute_intensity": 12.641975308641975, | |
"MxNxK": 17179869184, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00000483466376794613, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7943750278699315, | |
"compute_intensity": 7.861804222648752, | |
"tile_compute_intensity": 0.7901234567901234, | |
"MxNxK": 4194304, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.458608077023638, | |
"compute_intensity": 61.59398496240601, | |
"tile_compute_intensity": 5.12, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00007735462028713808, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4855635669187341, | |
"compute_intensity": 31.44721689059501, | |
"tile_compute_intensity": 3.1604938271604937, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1192309469762824, | |
"compute_intensity": 248.24242424242425, | |
"tile_compute_intensity": 23.272727272727273, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.195592682482062, | |
"compute_intensity": 113.3840830449827, | |
"tile_compute_intensity": 5.278350515463917, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 11.32409209211117, | |
"compute_intensity": 146.28571428571428, | |
"tile_compute_intensity": 1.9692307692307693, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 7.297605687465856e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.621349574513979, | |
"compute_intensity": 28.444444444444443, | |
"tile_compute_intensity": 1.6, | |
"MxNxK": 2097152, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3747377529075056, | |
"compute_intensity": 327.68, | |
"tile_compute_intensity": 3.764705882352941, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4544058085690694, | |
"compute_intensity": 62.06060606060606, | |
"tile_compute_intensity": 5.818181818181818, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2235143519360572, | |
"compute_intensity": 203.527950310559, | |
"tile_compute_intensity": 6.320987654320987, | |
"MxNxK": 2147483648, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1467835866304656, | |
"compute_intensity": 126.03076923076924, | |
"tile_compute_intensity": 13.473684210526315, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3133191338662902, | |
"compute_intensity": 1489.4545454545455, | |
"tile_compute_intensity": 21.11340206185567, | |
"MxNxK": 68719476736, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3392168723353555, | |
"compute_intensity": 60.12477064220184, | |
"tile_compute_intensity": 3.9689922480620154, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3945000843089936, | |
"compute_intensity": 292.57142857142856, | |
"tile_compute_intensity": 5.818181818181818, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002955530303423672, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.8616184520161214, | |
"compute_intensity": 28.248275862068965, | |
"tile_compute_intensity": 1.8823529411764706, | |
"MxNxK": 8388608, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.282945815574234, | |
"compute_intensity": 1170.2857142857142, | |
"tile_compute_intensity": 13.837837837837839, | |
"MxNxK": 17179869184, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000010526796204169499, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.640917174025763, | |
"compute_intensity": 15.044995408631772, | |
"tile_compute_intensity": 1.3195876288659794, | |
"MxNxK": 16777216, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.306953145341202, | |
"compute_intensity": 256, | |
"tile_compute_intensity": 4.923076923076923, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000010513113193505498, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4622499225287777, | |
"compute_intensity": 7.527682058350563, | |
"tile_compute_intensity": 0.6649350649350649, | |
"MxNxK": 8388608, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.157635016182315, | |
"compute_intensity": 655.36, | |
"tile_compute_intensity": 21.333333333333332, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000051083239812261, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.4963175920275287, | |
"compute_intensity": 30.567164179104477, | |
"tile_compute_intensity": 2.4615384615384617, | |
"MxNxK": 16777216, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0003015370670060892, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4117323124608676, | |
"compute_intensity": 15.929995138551288, | |
"tile_compute_intensity": 1.9248120300751879, | |
"MxNxK": 536870912, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000029281642820956753, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.2446829159788386, | |
"compute_intensity": 14.197573656845753, | |
"tile_compute_intensity": 0.9846153846153847, | |
"MxNxK": 4194304, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.195726875136539, | |
"compute_intensity": 63.38104448742747, | |
"tile_compute_intensity": 7.013698630136986, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.17356283494815, | |
"compute_intensity": 431.1578947368421, | |
"tile_compute_intensity": 18.285714285714285, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0851229112188325, | |
"compute_intensity": 862.3157894736842, | |
"tile_compute_intensity": 36.57142857142857, | |
"MxNxK": 34359738368, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00239594989930879, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3994950458015878, | |
"compute_intensity": 15.984390243902439, | |
"tile_compute_intensity": 1.9768339768339769, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004971493874586116, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.766229581227142, | |
"compute_intensity": 7.750236518448439, | |
"tile_compute_intensity": 0.6632124352331606, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.353382096064251, | |
"compute_intensity": 390.0952380952381, | |
"tile_compute_intensity": 16, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.127009874332965, | |
"compute_intensity": 474.8985507246377, | |
"tile_compute_intensity": 36.57142857142857, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0003015370670060892, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.103614859113013, | |
"compute_intensity": 15.929995138551288, | |
"tile_compute_intensity": 1.8754578754578755, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1140415981227691, | |
"compute_intensity": 468.1142857142857, | |
"tile_compute_intensity": 3.9233716475095783, | |
"MxNxK": 17179869184, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3659246927907513, | |
"compute_intensity": 372.3636363636364, | |
"tile_compute_intensity": 3.5310344827586206, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 5.354248466134823e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 10.441054825328882, | |
"compute_intensity": 46.54545454545455, | |
"tile_compute_intensity": 1.6, | |
"MxNxK": 2097152, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1121607776585947, | |
"compute_intensity": 481.88235294117646, | |
"tile_compute_intensity": 3.930902111324376, | |
"MxNxK": 34359738368, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00007647890760464218, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4311501596107339, | |
"compute_intensity": 31.62934362934363, | |
"tile_compute_intensity": 3.4594594594594597, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2312069033879274, | |
"compute_intensity": 62.77394636015325, | |
"tile_compute_intensity": 6.2439024390243905, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3743175090756368, | |
"compute_intensity": 512, | |
"tile_compute_intensity": 9.846153846153847, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000014029646934153111, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.6691510372260017, | |
"compute_intensity": 25.580015612802498, | |
"tile_compute_intensity": 1.3264248704663213, | |
"MxNxK": 33554432, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 5.645610333572172, | |
"compute_intensity": 128, | |
"tile_compute_intensity": 1.5609756097560976, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1717039269554026, | |
"compute_intensity": 399.609756097561, | |
"tile_compute_intensity": 12.19047619047619, | |
"MxNxK": 4294967296, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.3749365030122616, | |
"compute_intensity": 157.53846153846155, | |
"tile_compute_intensity": 4.571428571428571, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4083208236033584, | |
"compute_intensity": 167.18367346938774, | |
"tile_compute_intensity": 1.9104477611940298, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00002103534839412033, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5280945878373742, | |
"compute_intensity": 15.05190629306385, | |
"tile_compute_intensity": 0.9980506822612085, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.8835045811449524, | |
"compute_intensity": 256, | |
"tile_compute_intensity": 3.1219512195121952, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0011991425665643896, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3995093567303631, | |
"compute_intensity": 15.976596782057532, | |
"tile_compute_intensity": 1.9692307692307693, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.61325195592407, | |
"compute_intensity": 97.52380952380952, | |
"tile_compute_intensity": 4, | |
"MxNxK": 33554432, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.1377416436973964, | |
"compute_intensity": 56.10958904109589, | |
"tile_compute_intensity": 3.5555555555555554, | |
"MxNxK": 33554432, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000014960091659305007, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.8652169375249548, | |
"compute_intensity": 28.054794520547944, | |
"tile_compute_intensity": 1.28, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00007560319492214628, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1854737230422148, | |
"compute_intensity": 15.906796116504854, | |
"tile_compute_intensity": 1.8823529411764706, | |
"MxNxK": 134217728, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000005847206557082019, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.8245977538772877, | |
"compute_intensity": 14.209887250650477, | |
"tile_compute_intensity": 0.9922480620155039, | |
"MxNxK": 8388608, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2150748230182844, | |
"compute_intensity": 431.1578947368421, | |
"tile_compute_intensity": 7.013698630136986, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4954480374164743, | |
"compute_intensity": 186.1818181818182, | |
"tile_compute_intensity": 6.4, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004779931725290136, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7727449615505204, | |
"compute_intensity": 7.9073359073359075, | |
"tile_compute_intensity": 0.8648648648648649, | |
"MxNxK": 4194304, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.3670458990817824, | |
"compute_intensity": 146.28571428571428, | |
"tile_compute_intensity": 1.7297297297297298, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.053585353335932, | |
"compute_intensity": 126.03076923076924, | |
"tile_compute_intensity": 1.967339097022094, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0709023208717845, | |
"compute_intensity": 390.0952380952381, | |
"tile_compute_intensity": 5.305699481865285, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.06927314501595, | |
"compute_intensity": 799.219512195122, | |
"tile_compute_intensity": 24.38095238095238, | |
"MxNxK": 34359738368, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.703320272951015, | |
"compute_intensity": 107.78947368421052, | |
"tile_compute_intensity": 5.333333333333333, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002141699386453929, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 11.114911902855777, | |
"compute_intensity": 85.33333333333333, | |
"tile_compute_intensity": 1.3061224489795917, | |
"MxNxK": 8388608, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1061495412624387, | |
"compute_intensity": 474.8985507246377, | |
"tile_compute_intensity": 30.11764705882353, | |
"MxNxK": 34359738368, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 11.33604441102209, | |
"compute_intensity": 146.28571428571428, | |
"tile_compute_intensity": 1.5802469135802468, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1362143863133172, | |
"compute_intensity": 468.1142857142857, | |
"tile_compute_intensity": 7.086505190311419, | |
"MxNxK": 17179869184, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000023388826228328075, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.914350059644594, | |
"compute_intensity": 28.419774501300953, | |
"tile_compute_intensity": 1.9844961240310077, | |
"MxNxK": 67108864, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000009961231763390895, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7466113857740306, | |
"compute_intensity": 15.485822306238186, | |
"tile_compute_intensity": 1.5609756097560976, | |
"MxNxK": 16777216, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000009961231763390895, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.6939269649661468, | |
"compute_intensity": 15.485822306238186, | |
"tile_compute_intensity": 1.3195876288659794, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3194224366152645, | |
"compute_intensity": 337.8144329896907, | |
"tile_compute_intensity": 7.314285714285714, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000001222348952650531, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.6022446707846063, | |
"compute_intensity": 7.816793893129771, | |
"tile_compute_intensity": 0.7619047619047619, | |
"MxNxK": 1048576, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4424993529704386, | |
"compute_intensity": 409.6, | |
"tile_compute_intensity": 5.224489795918367, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000051083239812261, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.4812834657482483, | |
"compute_intensity": 30.567164179104477, | |
"tile_compute_intensity": 2.6666666666666665, | |
"MxNxK": 16777216, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 4.062568125877826, | |
"compute_intensity": 81.92, | |
"tile_compute_intensity": 1.6842105263157894, | |
"MxNxK": 16777216, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002141699386453929, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 5.042724523579948, | |
"compute_intensity": 62.06060606060606, | |
"tile_compute_intensity": 1.6, | |
"MxNxK": 8388608, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000019557583242408497, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.9999198107665932, | |
"compute_intensity": 31.267175572519083, | |
"tile_compute_intensity": 3.0476190476190474, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1950060567450536, | |
"compute_intensity": 819.2, | |
"tile_compute_intensity": 15.058823529411764, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.425900664949112, | |
"compute_intensity": 118.72463768115942, | |
"tile_compute_intensity": 1.7746967071057191, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3886752737840893, | |
"compute_intensity": 221.40540540540542, | |
"tile_compute_intensity": 3.710144927536232, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.277723946826653, | |
"compute_intensity": 163.84, | |
"tile_compute_intensity": 3.3684210526315788, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2727182969336432, | |
"compute_intensity": 489.07462686567163, | |
"tile_compute_intensity": 7.728301886792453, | |
"MxNxK": 68719476736, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4120329491408465, | |
"compute_intensity": 315.0769230769231, | |
"tile_compute_intensity": 6.095238095238095, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 3.1927024882663123e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 11.756810952289273, | |
"compute_intensity": 7.641791044776119, | |
"tile_compute_intensity": 0.6153846153846154, | |
"MxNxK": 262144, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.1340063317075844, | |
"compute_intensity": 58.51428571428571, | |
"tile_compute_intensity": 4, | |
"MxNxK": 33554432, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5152451909624784, | |
"compute_intensity": 163.84, | |
"tile_compute_intensity": 3.3684210526315788, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000019010262815848558, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.453530681910854, | |
"compute_intensity": 7.930300096805421, | |
"tile_compute_intensity": 0.927536231884058, | |
"MxNxK": 16777216, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2712801423651239, | |
"compute_intensity": 118.72463768115942, | |
"tile_compute_intensity": 7.529411764705882, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1965819711145615, | |
"compute_intensity": 399.609756097561, | |
"tile_compute_intensity": 3.878787878787879, | |
"MxNxK": 4294967296, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5612836331594215, | |
"compute_intensity": 84.89119170984456, | |
"tile_compute_intensity": 1.7655172413793103, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000019885975498344463, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4958883830107685, | |
"compute_intensity": 15.500473036896878, | |
"tile_compute_intensity": 1.3264248704663213, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.8810302645317605, | |
"compute_intensity": 113.77777777777777, | |
"tile_compute_intensity": 2.4615384615384617, | |
"MxNxK": 16777216, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4321337348006202, | |
"compute_intensity": 127.0077519379845, | |
"tile_compute_intensity": 1.8686131386861313, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002141699386453929, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 4.714013671194087, | |
"compute_intensity": 78.76923076923077, | |
"tile_compute_intensity": 2.2857142857142856, | |
"MxNxK": 8388608, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4677596763307375, | |
"compute_intensity": 195.04761904761904, | |
"tile_compute_intensity": 1.9104477611940298, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4053675660201446, | |
"compute_intensity": 221.40540540540542, | |
"tile_compute_intensity": 9.846153846153847, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3630997675498888, | |
"compute_intensity": 203.527950310559, | |
"tile_compute_intensity": 1.9616858237547892, | |
"MxNxK": 2147483648, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7173277778448703, | |
"compute_intensity": 61.134328358208954, | |
"tile_compute_intensity": 4.923076923076923, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.731522133270276, | |
"compute_intensity": 99.90243902439025, | |
"tile_compute_intensity": 3.0476190476190474, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003765564534732382, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2436063464020062, | |
"compute_intensity": 7.968871595330739, | |
"tile_compute_intensity": 0.9552238805970149, | |
"MxNxK": 33554432, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1471965263051922, | |
"compute_intensity": 123.18796992481202, | |
"tile_compute_intensity": 10.24, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002141699386453929, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.851894491282986, | |
"compute_intensity": 49.951219512195124, | |
"tile_compute_intensity": 1.5238095238095237, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.133463606760656, | |
"compute_intensity": 250.13740458015266, | |
"tile_compute_intensity": 25.6, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1560219865026764, | |
"compute_intensity": 126.51737451737452, | |
"tile_compute_intensity": 14.222222222222221, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.9023210151116623, | |
"compute_intensity": 51.1201248049922, | |
"tile_compute_intensity": 2.6391752577319587, | |
"MxNxK": 134217728, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.121282480880646, | |
"compute_intensity": 237.44927536231884, | |
"tile_compute_intensity": 15.058823529411764, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002141699386453929, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 45.135000172658884, | |
"compute_intensity": 78.76923076923077, | |
"tile_compute_intensity": 0.9922480620155039, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000019885975498344463, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5335431016516246, | |
"compute_intensity": 15.500473036896878, | |
"tile_compute_intensity": 1.5802469135802468, | |
"MxNxK": 33554432, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000009933865742062898, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5684528126312118, | |
"compute_intensity": 7.75390440132513, | |
"tile_compute_intensity": 0.6649350649350649, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4525147113256598, | |
"compute_intensity": 341.3333333333333, | |
"tile_compute_intensity": 5.224489795918367, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00001871835858834992, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.749726145626669, | |
"compute_intensity": 42.6111833550065, | |
"tile_compute_intensity": 1.5900621118012421, | |
"MxNxK": 67108864, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1432335114630678, | |
"compute_intensity": 248.24242424242425, | |
"tile_compute_intensity": 3.8714555765595464, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000005327252151850076, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.4015382922925177, | |
"compute_intensity": 29.8978102189781, | |
"tile_compute_intensity": 2.4615384615384617, | |
"MxNxK": 16777216, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0006007389001921893, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4041907958322308, | |
"compute_intensity": 15.961032635168047, | |
"tile_compute_intensity": 1.9541984732824427, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 3.7400229148262517e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 13.125053568818041, | |
"compute_intensity": 14.027397260273972, | |
"tile_compute_intensity": 0.8888888888888888, | |
"MxNxK": 524288, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0006036579424671757, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2170866689837623, | |
"compute_intensity": 31.844509232264333, | |
"tile_compute_intensity": 3.7372262773722627, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0003002234979823453, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0298355046978815, | |
"compute_intensity": 7.982460414129111, | |
"tile_compute_intensity": 0.9678638941398866, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.305762133981854, | |
"compute_intensity": 102.0809968847352, | |
"tile_compute_intensity": 3.1801242236024843, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.044179977034163, | |
"compute_intensity": 240.94117647058823, | |
"tile_compute_intensity": 3.750915750915751, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.073912049189928, | |
"compute_intensity": 1820.4444444444443, | |
"tile_compute_intensity": 26.94736842105263, | |
"MxNxK": 68719476736, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.0686846689432947, | |
"compute_intensity": 113.77777777777777, | |
"tile_compute_intensity": 1.7655172413793103, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.125074017894043, | |
"compute_intensity": 1170.2857142857142, | |
"tile_compute_intensity": 15.753846153846155, | |
"MxNxK": 17179869184, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00015237400675428709, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3086956780385608, | |
"compute_intensity": 31.690522243713733, | |
"tile_compute_intensity": 3.506849315068493, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0915342625486857, | |
"compute_intensity": 1260.3076923076924, | |
"tile_compute_intensity": 36.57142857142857, | |
"MxNxK": 34359738368, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2993186783333979, | |
"compute_intensity": 62.534351145038165, | |
"tile_compute_intensity": 6.095238095238095, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4051808853527137, | |
"compute_intensity": 169.78238341968913, | |
"tile_compute_intensity": 3.5310344827586206, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 4.013683128106221e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 10.795072453767208, | |
"compute_intensity": 26.94736842105263, | |
"tile_compute_intensity": 1.3333333333333333, | |
"MxNxK": 1048576, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3684460708499044, | |
"compute_intensity": 102.0809968847352, | |
"tile_compute_intensity": 1.9248120300751879, | |
"MxNxK": 536870912, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 6.750285260905918e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.077372657394267, | |
"compute_intensity": 14.840579710144928, | |
"tile_compute_intensity": 0.9411764705882353, | |
"MxNxK": 1048576, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0678089253899077, | |
"compute_intensity": 481.88235294117646, | |
"tile_compute_intensity": 5.305699481865285, | |
"MxNxK": 2147483648, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3605784744227953, | |
"compute_intensity": 315.0769230769231, | |
"tile_compute_intensity": 6.4, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1532241545888886, | |
"compute_intensity": 504.12307692307695, | |
"tile_compute_intensity": 7.420289855072464, | |
"MxNxK": 8589934592, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.355331456700681, | |
"compute_intensity": 481.88235294117646, | |
"tile_compute_intensity": 7.111111111111111, | |
"MxNxK": 2147483648, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.551618364754678, | |
"compute_intensity": 215.57894736842104, | |
"tile_compute_intensity": 3.1801242236024843, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3703253625370027, | |
"compute_intensity": 123.18796992481202, | |
"tile_compute_intensity": 1.9616858237547892, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0619983743662147, | |
"compute_intensity": 668.734693877551, | |
"tile_compute_intensity": 13.837837837837839, | |
"MxNxK": 17179869184, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2058765110782972, | |
"compute_intensity": 585.1428571428571, | |
"tile_compute_intensity": 7.876923076923077, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.042817421083696, | |
"compute_intensity": 50.5679012345679, | |
"tile_compute_intensity": 2.4615384615384617, | |
"MxNxK": 16777216, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0704878524199817, | |
"compute_intensity": 224.43835616438355, | |
"tile_compute_intensity": 3.1950078003120126, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4466549087921194, | |
"compute_intensity": 195.04761904761904, | |
"tile_compute_intensity": 8, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.3821502924430638, | |
"compute_intensity": 199.8048780487805, | |
"tile_compute_intensity": 2.6597402597402597, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.374148239480464, | |
"compute_intensity": 334.3673469387755, | |
"tile_compute_intensity": 7.111111111111111, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.1516091805567084, | |
"compute_intensity": 221.40540540540542, | |
"tile_compute_intensity": 1.8788990825688074, | |
"MxNxK": 1073741824, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2025935472389564, | |
"compute_intensity": 237.44927536231884, | |
"tile_compute_intensity": 1.9768339768339769, | |
"MxNxK": 4294967296, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00007954390199337785, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5038538211486665, | |
"compute_intensity": 31.000946073793756, | |
"tile_compute_intensity": 2.6528497409326426, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1317568120870027, | |
"compute_intensity": 819.2, | |
"tile_compute_intensity": 12.641975308641975, | |
"MxNxK": 17179869184, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.2139428576101623, | |
"compute_intensity": 58.51428571428571, | |
"tile_compute_intensity": 3.5555555555555554, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.552412139785528, | |
"compute_intensity": 110.70270270270271, | |
"tile_compute_intensity": 6.4, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0003002234979823453, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0249643584828698, | |
"compute_intensity": 7.982460414129111, | |
"tile_compute_intensity": 0.9808429118773946, | |
"MxNxK": 268435456, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 1.9156214929597874e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 18.742743501543572, | |
"compute_intensity": 13.837837837837839, | |
"tile_compute_intensity": 0.8, | |
"MxNxK": 262144, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1781502702938915, | |
"compute_intensity": 431.1578947368421, | |
"tile_compute_intensity": 21.333333333333332, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 4.263997075628031, | |
"compute_intensity": 84.89119170984456, | |
"tile_compute_intensity": 0.9995119570522206, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1420530407639684, | |
"compute_intensity": 126.51737451737452, | |
"tile_compute_intensity": 1.9825750242013553, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000010708496932269645, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.266425301387257, | |
"compute_intensity": 51.2, | |
"tile_compute_intensity": 2, | |
"MxNxK": 4194304, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1933777392946514, | |
"compute_intensity": 819.2, | |
"tile_compute_intensity": 12.19047619047619, | |
"MxNxK": 4294967296, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 5.354248466134823e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 14.666483912875476, | |
"compute_intensity": 64, | |
"tile_compute_intensity": 1.2307692307692308, | |
"MxNxK": 2097152, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2950577218255106, | |
"compute_intensity": 862.3157894736842, | |
"tile_compute_intensity": 7.6992481203007515, | |
"MxNxK": 34359738368, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000010708496932269645, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 12.012889738140837, | |
"compute_intensity": 85.33333333333333, | |
"tile_compute_intensity": 1.28, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.248622986637561, | |
"compute_intensity": 585.1428571428571, | |
"tile_compute_intensity": 6.320987654320987, | |
"MxNxK": 2147483648, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003798403760325979, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2508833715147816, | |
"compute_intensity": 7.934140435835351, | |
"tile_compute_intensity": 0.9343065693430657, | |
"MxNxK": 33554432, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0921470564449454, | |
"compute_intensity": 123.18796992481202, | |
"tile_compute_intensity": 1.8806244260789715, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.1827527476990514, | |
"compute_intensity": 122.26865671641791, | |
"tile_compute_intensity": 1.8788990825688074, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003853135802981973, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.6543408888208557, | |
"compute_intensity": 31.50769230769231, | |
"tile_compute_intensity": 3.3684210526315788, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.5807544018844655, | |
"compute_intensity": 157.53846153846155, | |
"tile_compute_intensity": 3.0476190476190474, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.0331056099101845, | |
"compute_intensity": 56.888888888888886, | |
"tile_compute_intensity": 3.2, | |
"MxNxK": 16777216, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00007604105126339423, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1727980346685678, | |
"compute_intensity": 15.860600193610843, | |
"tile_compute_intensity": 1.855072463768116, | |
"MxNxK": 134217728, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.9520484904106628, | |
"compute_intensity": 99.90243902439025, | |
"tile_compute_intensity": 1.8285714285714285, | |
"MxNxK": 67108864, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004998859895914112, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0561487874408018, | |
"compute_intensity": 15.456603773584906, | |
"tile_compute_intensity": 1.3061224489795917, | |
"MxNxK": 8388608, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3149371745374567, | |
"compute_intensity": 61.82641509433962, | |
"tile_compute_intensity": 5.224489795918367, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003882326225731836, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.6412428440179738, | |
"compute_intensity": 31.386973180076627, | |
"tile_compute_intensity": 3.1219512195121952, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0768307394248053, | |
"compute_intensity": 1365.3333333333333, | |
"tile_compute_intensity": 36.57142857142857, | |
"MxNxK": 34359738368, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0001506225813892953, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1034945372909573, | |
"compute_intensity": 15.937743190661479, | |
"tile_compute_intensity": 1.9104477611940298, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000042107184816677995, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.9123768169535125, | |
"compute_intensity": 30.089990817263544, | |
"tile_compute_intensity": 2.6391752577319587, | |
"MxNxK": 134217728, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.521688816475054, | |
"compute_intensity": 163.84, | |
"tile_compute_intensity": 5.333333333333333, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000005272520109194081, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.991305878681544, | |
"compute_intensity": 15.03119266055046, | |
"tile_compute_intensity": 1.3061224489795917, | |
"MxNxK": 8388608, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0011991425665643896, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0471909066524594, | |
"compute_intensity": 15.976596782057532, | |
"tile_compute_intensity": 1.9616858237547892, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000017696693792104705, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.835292814779369, | |
"compute_intensity": 25.440993788819874, | |
"tile_compute_intensity": 1.28, | |
"MxNxK": 4194304, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3188621398167926, | |
"compute_intensity": 682.6666666666666, | |
"tile_compute_intensity": 18.285714285714285, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0839026923157487, | |
"compute_intensity": 468.1142857142857, | |
"tile_compute_intensity": 32, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7173277778448703, | |
"compute_intensity": 99.90243902439025, | |
"tile_compute_intensity": 4.571428571428571, | |
"MxNxK": 67108864, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 11.33903266061394, | |
"compute_intensity": 102.4, | |
"tile_compute_intensity": 1.5802469135802468, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3619149653157396, | |
"compute_intensity": 334.3673469387755, | |
"tile_compute_intensity": 3.8208955223880596, | |
"MxNxK": 2147483648, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000013181300272985204, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.7871832481683008, | |
"compute_intensity": 7.51559633027523, | |
"tile_compute_intensity": 0.6530612244897959, | |
"MxNxK": 1048576, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7924083981606924, | |
"compute_intensity": 60.23529411764706, | |
"tile_compute_intensity": 4.571428571428571, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1582004089043747, | |
"compute_intensity": 489.07462686567163, | |
"tile_compute_intensity": 42.666666666666664, | |
"MxNxK": 68719476736, | |
"size_m": 16384, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0358833038409663, | |
"compute_intensity": 237.44927536231884, | |
"tile_compute_intensity": 1.9375591296121097, | |
"MxNxK": 4294967296, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00001890079873053657, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4436639111137552, | |
"compute_intensity": 7.953398058252427, | |
"tile_compute_intensity": 0.9411764705882353, | |
"MxNxK": 16777216, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0006001550917371922, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0568841805663305, | |
"compute_intensity": 15.968810916179336, | |
"tile_compute_intensity": 1.9541984732824427, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4570437983463218, | |
"compute_intensity": 215.57894736842104, | |
"tile_compute_intensity": 1.9248120300751879, | |
"MxNxK": 536870912, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2895046089598925, | |
"compute_intensity": 1170.2857142857142, | |
"tile_compute_intensity": 20.48, | |
"MxNxK": 17179869184, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.078649899213568, | |
"compute_intensity": 442.81081081081084, | |
"tile_compute_intensity": 19.692307692307693, | |
"MxNxK": 8589934592, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002141699386453929, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 4.025214666251466, | |
"compute_intensity": 53.89473684210526, | |
"tile_compute_intensity": 2.2857142857142856, | |
"MxNxK": 8388608, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2055263361957598, | |
"compute_intensity": 225.98620689655172, | |
"tile_compute_intensity": 1.9768339768339769, | |
"MxNxK": 4294967296, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3978151524669369, | |
"compute_intensity": 240.94117647058823, | |
"tile_compute_intensity": 3.5555555555555554, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00015193615041303915, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1736298033817962, | |
"compute_intensity": 15.868280871670702, | |
"tile_compute_intensity": 1.7716262975778547, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.4006640467183304, | |
"compute_intensity": 146.28571428571428, | |
"tile_compute_intensity": 2.56, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.152916454821083, | |
"compute_intensity": 327.68, | |
"tile_compute_intensity": 3.190031152647975, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1109101530781038, | |
"compute_intensity": 1310.72, | |
"tile_compute_intensity": 26.94736842105263, | |
"MxNxK": 68719476736, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.949900728482551, | |
"compute_intensity": 127.0077519379845, | |
"tile_compute_intensity": 1.332465842550423, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.9486865397557125, | |
"compute_intensity": 50.5679012345679, | |
"tile_compute_intensity": 1.5609756097560976, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.358757446128717, | |
"compute_intensity": 168.90721649484536, | |
"tile_compute_intensity": 1.9248120300751879, | |
"MxNxK": 536870912, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1457738293715554, | |
"compute_intensity": 1260.3076923076924, | |
"tile_compute_intensity": 14.628571428571428, | |
"MxNxK": 34359738368, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1226627967164449, | |
"compute_intensity": 910.2222222222222, | |
"tile_compute_intensity": 10.556701030927835, | |
"MxNxK": 8589934592, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3386274438403738, | |
"compute_intensity": 390.0952380952381, | |
"tile_compute_intensity": 11.636363636363637, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5071676424835547, | |
"compute_intensity": 60.014652014652015, | |
"tile_compute_intensity": 3.9384615384615387, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000019995439583656446, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.971969755299062, | |
"compute_intensity": 30.91320754716981, | |
"tile_compute_intensity": 2.6122448979591835, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 24.045206495911085, | |
"compute_intensity": 81.92, | |
"tile_compute_intensity": 0.9961089494163424, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2961319425178057, | |
"compute_intensity": 780.1904761904761, | |
"tile_compute_intensity": 13.837837837837839, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.751460028324196, | |
"compute_intensity": 167.18367346938774, | |
"tile_compute_intensity": 1.5975039001560063, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5447546652743533, | |
"compute_intensity": 110.70270270270271, | |
"tile_compute_intensity": 4.923076923076923, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.6000377923159317, | |
"compute_intensity": 63.875243664717345, | |
"tile_compute_intensity": 1.7655172413793103, | |
"MxNxK": 134217728, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4650515326720939, | |
"compute_intensity": 292.57142857142856, | |
"tile_compute_intensity": 5.12, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3458705348318054, | |
"compute_intensity": 254.015503875969, | |
"tile_compute_intensity": 3.7372262773722627, | |
"MxNxK": 2147483648, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00007954390199337785, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.851153861119139, | |
"compute_intensity": 31.000946073793756, | |
"tile_compute_intensity": 3.1604938271604937, | |
"MxNxK": 268435456, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002490307940847724, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.106084998238796, | |
"compute_intensity": 7.742911153119093, | |
"tile_compute_intensity": 0.7804878048780488, | |
"MxNxK": 2097152, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.1909938753251414, | |
"compute_intensity": 252.06153846153848, | |
"tile_compute_intensity": 2.6631989596879064, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4374565968521325, | |
"compute_intensity": 85.11168831168831, | |
"tile_compute_intensity": 1.8686131386861313, | |
"MxNxK": 268435456, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.452584865207051, | |
"compute_intensity": 195.04761904761904, | |
"tile_compute_intensity": 5.818181818181818, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 1.824401421866464e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 27.871059555999512, | |
"compute_intensity": 36.57142857142857, | |
"tile_compute_intensity": 0.8888888888888888, | |
"MxNxK": 524288, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.031050669638595, | |
"compute_intensity": 431.1578947368421, | |
"tile_compute_intensity": 6.3602484472049685, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000014960091659305007, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 4.089814548486245, | |
"compute_intensity": 28.054794520547944, | |
"tile_compute_intensity": 1.7777777777777777, | |
"MxNxK": 4194304, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 5.733204164121806, | |
"compute_intensity": 107.78947368421052, | |
"tile_compute_intensity": 1.7534246575342465, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 8.665906753865705e-8, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 55.98029300226937, | |
"compute_intensity": 7.314285714285714, | |
"tile_compute_intensity": 0.5, | |
"MxNxK": 65536, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1152920528391401, | |
"compute_intensity": 780.1904761904761, | |
"tile_compute_intensity": 7.086505190311419, | |
"MxNxK": 17179869184, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002141699386453929, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 4.074521463973465, | |
"compute_intensity": 53.89473684210526, | |
"tile_compute_intensity": 2.6666666666666665, | |
"MxNxK": 8388608, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0761781214773354, | |
"compute_intensity": 780.1904761904761, | |
"tile_compute_intensity": 32, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1946879861811188, | |
"compute_intensity": 630.1538461538462, | |
"tile_compute_intensity": 7.314285714285714, | |
"MxNxK": 4294967296, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1432182236923045, | |
"compute_intensity": 668.734693877551, | |
"tile_compute_intensity": 7.641791044776119, | |
"MxNxK": 17179869184, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3150764631155216, | |
"compute_intensity": 474.8985507246377, | |
"tile_compute_intensity": 7.6992481203007515, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.1877492249296036, | |
"compute_intensity": 81.92, | |
"tile_compute_intensity": 2.6666666666666665, | |
"MxNxK": 16777216, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0646429888441313, | |
"compute_intensity": 992.969696969697, | |
"tile_compute_intensity": 14.628571428571428, | |
"MxNxK": 34359738368, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.217928879955657, | |
"compute_intensity": 399.609756097561, | |
"tile_compute_intensity": 7.314285714285714, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.8204235778125226, | |
"compute_intensity": 59.36231884057971, | |
"tile_compute_intensity": 3.764705882352941, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 3.3295325949062974e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 12.475024572259086, | |
"compute_intensity": 7.474452554744525, | |
"tile_compute_intensity": 0.6153846153846154, | |
"MxNxK": 262144, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1197233830589775, | |
"compute_intensity": 1260.3076923076924, | |
"tile_compute_intensity": 12.720496894409937, | |
"MxNxK": 34359738368, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002141699386453929, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 11.070087479521554, | |
"compute_intensity": 102.4, | |
"tile_compute_intensity": 1.3061224489795917, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00015003877293429804, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0572414056563584, | |
"compute_intensity": 7.984405458089668, | |
"tile_compute_intensity": 0.9770992366412213, | |
"MxNxK": 134217728, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2976085713094128, | |
"compute_intensity": 409.6, | |
"tile_compute_intensity": 3.7372262773722627, | |
"MxNxK": 2147483648, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.763040720880788, | |
"compute_intensity": 99.90243902439025, | |
"tile_compute_intensity": 1.3298701298701299, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.78059563170885, | |
"compute_intensity": 120.47058823529412, | |
"tile_compute_intensity": 2.909090909090909, | |
"MxNxK": 33554432, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3532186867812044, | |
"compute_intensity": 585.1428571428571, | |
"tile_compute_intensity": 11.636363636363637, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2131604543904906, | |
"compute_intensity": 122.26865671641791, | |
"tile_compute_intensity": 10.666666666666666, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000019557583242408497, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.9956657514027423, | |
"compute_intensity": 31.267175572519083, | |
"tile_compute_intensity": 3.2, | |
"MxNxK": 67108864, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 7.66248597183915e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.481447252859274, | |
"compute_intensity": 27.675675675675677, | |
"tile_compute_intensity": 1.6, | |
"MxNxK": 2097152, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3600648053251514, | |
"compute_intensity": 315.0769230769231, | |
"tile_compute_intensity": 9.142857142857142, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0762834463163165, | |
"compute_intensity": 334.3673469387755, | |
"tile_compute_intensity": 3.1950078003120126, | |
"MxNxK": 2147483648, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.0854003174957936, | |
"compute_intensity": 55.351351351351354, | |
"tile_compute_intensity": 3.2, | |
"MxNxK": 16777216, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00000483466376794613, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.815555331111213, | |
"compute_intensity": 7.861804222648752, | |
"tile_compute_intensity": 0.8648648648648649, | |
"MxNxK": 4194304, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2227060535229821, | |
"compute_intensity": 237.44927536231884, | |
"tile_compute_intensity": 3.8496240601503757, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2250668250570615, | |
"compute_intensity": 234.05714285714285, | |
"tile_compute_intensity": 16, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00015069555744616993, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.049936784032701, | |
"compute_intensity": 7.9669341113542425, | |
"tile_compute_intensity": 0.9660377358490566, | |
"MxNxK": 134217728, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 3.466362701546282e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 10.348600454014873, | |
"compute_intensity": 14.628571428571428, | |
"tile_compute_intensity": 0.8888888888888888, | |
"MxNxK": 524288, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005989874748271975, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.01540958475008, | |
"compute_intensity": 7.9921951219512195, | |
"tile_compute_intensity": 0.9884169884169884, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004779931725290136, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.808226721321689, | |
"compute_intensity": 7.9073359073359075, | |
"tile_compute_intensity": 0.8888888888888888, | |
"MxNxK": 4194304, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1668304404322691, | |
"compute_intensity": 125.06870229007633, | |
"tile_compute_intensity": 12.8, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3924456627145971, | |
"compute_intensity": 240.94117647058823, | |
"tile_compute_intensity": 5.818181818181818, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1271044037155173, | |
"compute_intensity": 455.1111111111111, | |
"tile_compute_intensity": 3.8641509433962264, | |
"MxNxK": 8589934592, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.038369604959424, | |
"compute_intensity": 237.44927536231884, | |
"tile_compute_intensity": 3.5493934142114383, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.8283614221379167, | |
"compute_intensity": 167.18367346938774, | |
"tile_compute_intensity": 1.996101364522417, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000042107184816677995, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.6637160306237755, | |
"compute_intensity": 30.089990817263544, | |
"tile_compute_intensity": 1.9922178988326849, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1198554524119597, | |
"compute_intensity": 244.53731343283582, | |
"tile_compute_intensity": 21.333333333333332, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000025176739621757206, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.374255036641246, | |
"compute_intensity": 15.398496240601503, | |
"tile_compute_intensity": 1.28, | |
"MxNxK": 4194304, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0563041044172161, | |
"compute_intensity": 819.2, | |
"tile_compute_intensity": 32, | |
"MxNxK": 17179869184, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00015091448561679391, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1238788055190343, | |
"compute_intensity": 15.922254616132166, | |
"tile_compute_intensity": 1.9104477611940298, | |
"MxNxK": 268435456, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000010070695848702883, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.1295845960014304, | |
"compute_intensity": 30.796992481203006, | |
"tile_compute_intensity": 2.909090909090909, | |
"MxNxK": 33554432, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.8265404787758985, | |
"compute_intensity": 83.59183673469387, | |
"tile_compute_intensity": 1.7777777777777777, | |
"MxNxK": 33554432, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1758380798980153, | |
"compute_intensity": 1489.4545454545455, | |
"tile_compute_intensity": 24.975609756097562, | |
"MxNxK": 68719476736, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2946216806305197, | |
"compute_intensity": 1024, | |
"tile_compute_intensity": 19.692307692307693, | |
"MxNxK": 8589934592, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.057619492651442, | |
"compute_intensity": 107.78947368421052, | |
"tile_compute_intensity": 1.5900621118012421, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.313256726972009, | |
"compute_intensity": 63.01538461538462, | |
"tile_compute_intensity": 1.6842105263157894, | |
"MxNxK": 16777216, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000005874572578410015, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.1799715952540306, | |
"compute_intensity": 28.346020761245676, | |
"tile_compute_intensity": 1.3195876288659794, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.8693566744714205, | |
"compute_intensity": 51.0404984423676, | |
"tile_compute_intensity": 1.5900621118012421, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003860433408669438, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3419426076447487, | |
"compute_intensity": 15.738712776176753, | |
"tile_compute_intensity": 1.7534246575342465, | |
"MxNxK": 67108864, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1144780639780587, | |
"compute_intensity": 885.6216216216217, | |
"tile_compute_intensity": 51.2, | |
"MxNxK": 68719476736, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00001985860947701646, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3577587142796355, | |
"compute_intensity": 7.755739644970414, | |
"tile_compute_intensity": 0.7975077881619937, | |
"MxNxK": 16777216, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002997856416410974, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.03082720778901, | |
"compute_intensity": 7.988298391028766, | |
"tile_compute_intensity": 0.9846153846153847, | |
"MxNxK": 268435456, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7823228859240798, | |
"compute_intensity": 59.36231884057971, | |
"tile_compute_intensity": 4.571428571428571, | |
"MxNxK": 67108864, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3709381473484281, | |
"compute_intensity": 481.88235294117646, | |
"tile_compute_intensity": 11.636363636363637, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00001985860947701646, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3640431490244729, | |
"compute_intensity": 7.755739644970414, | |
"tile_compute_intensity": 0.6657997399219766, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 5.761406364169306, | |
"compute_intensity": 93.0909090909091, | |
"tile_compute_intensity": 1.5609756097560976, | |
"MxNxK": 16777216, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.2105809069552116, | |
"compute_intensity": 119.5912408759124, | |
"tile_compute_intensity": 1.7762359063313096, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000011712657128382701, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.921750151576348, | |
"compute_intensity": 28.395147313691506, | |
"tile_compute_intensity": 1.3264248704663213, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.147453454870908, | |
"compute_intensity": 50.88198757763975, | |
"tile_compute_intensity": 1.5802469135802468, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7425413885722818, | |
"compute_intensity": 195.04761904761904, | |
"tile_compute_intensity": 1.7716262975778547, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 5.6805357554730955, | |
"compute_intensity": 170.66666666666666, | |
"tile_compute_intensity": 2.6122448979591835, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000024264538910823974, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.1483201577368787, | |
"compute_intensity": 7.846743295019157, | |
"tile_compute_intensity": 0.7804878048780488, | |
"MxNxK": 2097152, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1375896911575878, | |
"compute_intensity": 468.1142857142857, | |
"tile_compute_intensity": 7.474452554744525, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.880516331553103, | |
"compute_intensity": 186.1818181818182, | |
"tile_compute_intensity": 1.855072463768116, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.1608542991464774, | |
"compute_intensity": 93.0909090909091, | |
"tile_compute_intensity": 3.2, | |
"MxNxK": 16777216, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.8904149932581644, | |
"compute_intensity": 110.70270270270271, | |
"tile_compute_intensity": 1.855072463768116, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2286913856409003, | |
"compute_intensity": 123.65283018867925, | |
"tile_compute_intensity": 1.9768339768339769, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3086074429170673, | |
"compute_intensity": 496.4848484848485, | |
"tile_compute_intensity": 7.314285714285714, | |
"MxNxK": 4294967296, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1825477126218704, | |
"compute_intensity": 799.219512195122, | |
"tile_compute_intensity": 14.628571428571428, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00001890079873053657, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4375688410725742, | |
"compute_intensity": 7.953398058252427, | |
"tile_compute_intensity": 0.927536231884058, | |
"MxNxK": 16777216, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2039387861340598, | |
"compute_intensity": 682.6666666666666, | |
"tile_compute_intensity": 10.448979591836734, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.2917314517205085, | |
"compute_intensity": 227.55555555555554, | |
"tile_compute_intensity": 3.3684210526315788, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1475798246910496, | |
"compute_intensity": 1638.4, | |
"tile_compute_intensity": 24.38095238095238, | |
"MxNxK": 34359738368, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.7589306523038206, | |
"compute_intensity": 146.28571428571428, | |
"tile_compute_intensity": 3.5555555555555554, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.529999928118416, | |
"compute_intensity": 56.79029462738301, | |
"tile_compute_intensity": 2.6528497409326426, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.718541796707589, | |
"compute_intensity": 61.134328358208954, | |
"tile_compute_intensity": 5.333333333333333, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4996503058710429, | |
"compute_intensity": 186.1818181818182, | |
"tile_compute_intensity": 5.333333333333333, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000010708496932269645, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.239531055060609, | |
"compute_intensity": 48.76190476190476, | |
"tile_compute_intensity": 2, | |
"MxNxK": 4194304, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.054257541996491, | |
"compute_intensity": 157.53846153846155, | |
"tile_compute_intensity": 1.9844961240310077, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.6386522473189125, | |
"compute_intensity": 102.4, | |
"tile_compute_intensity": 4, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3609110683689551, | |
"compute_intensity": 390.0952380952381, | |
"tile_compute_intensity": 3.8208955223880596, | |
"MxNxK": 2147483648, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.392492639228498, | |
"compute_intensity": 170.66666666666666, | |
"tile_compute_intensity": 4.571428571428571, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002997856416410974, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0296210593303479, | |
"compute_intensity": 7.988298391028766, | |
"tile_compute_intensity": 0.9808429118773946, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7040673353489775, | |
"compute_intensity": 107.78947368421052, | |
"tile_compute_intensity": 4.571428571428571, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002700114104362367, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.230678340309104, | |
"compute_intensity": 29.681159420289855, | |
"tile_compute_intensity": 2.2857142857142856, | |
"MxNxK": 8388608, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1077622312138382, | |
"compute_intensity": 481.88235294117646, | |
"tile_compute_intensity": 36.57142857142857, | |
"MxNxK": 34359738368, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00015193615041303915, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.427294290636588, | |
"compute_intensity": 15.868280871670702, | |
"tile_compute_intensity": 1.8686131386861313, | |
"MxNxK": 268435456, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000010070695848702883, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0603144627507635, | |
"compute_intensity": 30.796992481203006, | |
"tile_compute_intensity": 2.56, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.7338570096109525, | |
"compute_intensity": 112.21917808219177, | |
"tile_compute_intensity": 1.5975039001560063, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00001933865507178452, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5833986679434213, | |
"compute_intensity": 15.723608445297504, | |
"tile_compute_intensity": 1.7297297297297298, | |
"MxNxK": 33554432, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.296824988125738, | |
"compute_intensity": 1638.4, | |
"tile_compute_intensity": 20.897959183673468, | |
"MxNxK": 34359738368, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.153947860670407, | |
"compute_intensity": 963.7647058823529, | |
"tile_compute_intensity": 23.272727272727273, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1745141589499206, | |
"compute_intensity": 337.8144329896907, | |
"tile_compute_intensity": 7.013698630136986, | |
"MxNxK": 4294967296, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2900050286562084, | |
"compute_intensity": 448.8767123287671, | |
"tile_compute_intensity": 3.9384615384615387, | |
"MxNxK": 17179869184, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7261060158171082, | |
"compute_intensity": 240.94117647058823, | |
"tile_compute_intensity": 2.6528497409326426, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.077946925639474, | |
"compute_intensity": 334.3673469387755, | |
"tile_compute_intensity": 3.992202729044834, | |
"MxNxK": 2147483648, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1254723492544845, | |
"compute_intensity": 246.37593984962405, | |
"tile_compute_intensity": 1.9883495145631067, | |
"MxNxK": 17179869184, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.018691865889492, | |
"compute_intensity": 496.4848484848485, | |
"tile_compute_intensity": 5.3194805194805195, | |
"MxNxK": 4294967296, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002141699386453929, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.258954677407632, | |
"compute_intensity": 102.4, | |
"tile_compute_intensity": 1.5238095238095237, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2237186135398657, | |
"compute_intensity": 224.43835616438355, | |
"tile_compute_intensity": 10.24, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5320544346448726, | |
"compute_intensity": 126.03076923076924, | |
"tile_compute_intensity": 1.855072463768116, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.308353844871352, | |
"compute_intensity": 227.55555555555554, | |
"tile_compute_intensity": 4.923076923076923, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002141699386453929, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 4.047626877918578, | |
"compute_intensity": 49.951219512195124, | |
"tile_compute_intensity": 2.2857142857142856, | |
"MxNxK": 8388608, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0633321565180793, | |
"compute_intensity": 234.05714285714285, | |
"tile_compute_intensity": 1.9357277882797732, | |
"MxNxK": 2147483648, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0853915513239663, | |
"compute_intensity": 1489.4545454545455, | |
"tile_compute_intensity": 42.666666666666664, | |
"MxNxK": 68719476736, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7537474944058378, | |
"compute_intensity": 84.45360824742268, | |
"tile_compute_intensity": 1.7534246575342465, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000010708496932269645, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 7.581269390387177, | |
"compute_intensity": 73.14285714285714, | |
"tile_compute_intensity": 1.4545454545454546, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3985389434805156, | |
"compute_intensity": 122.26865671641791, | |
"tile_compute_intensity": 1.9320754716981132, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.204323953025233, | |
"compute_intensity": 119.5912408759124, | |
"tile_compute_intensity": 7.757575757575758, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000013865450806185128, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 4.475007666252459, | |
"compute_intensity": 29.257142857142856, | |
"tile_compute_intensity": 2, | |
"MxNxK": 4194304, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3607243877015245, | |
"compute_intensity": 682.6666666666666, | |
"tile_compute_intensity": 10.24, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1548626638864659, | |
"compute_intensity": 125.5478927203065, | |
"tile_compute_intensity": 13.473684210526315, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2191661702027596, | |
"compute_intensity": 234.05714285714285, | |
"tile_compute_intensity": 14.222222222222221, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7427282391038321, | |
"compute_intensity": 59.7956204379562, | |
"tile_compute_intensity": 3.878787878787879, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003984492705356358, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.71520957712011, | |
"compute_intensity": 30.97164461247637, | |
"tile_compute_intensity": 2.6391752577319587, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4071068896726995, | |
"compute_intensity": 221.40540540540542, | |
"tile_compute_intensity": 12.8, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2011840147745882, | |
"compute_intensity": 682.6666666666666, | |
"tile_compute_intensity": 7.013698630136986, | |
"MxNxK": 4294967296, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3154039611380937, | |
"compute_intensity": 118.72463768115942, | |
"tile_compute_intensity": 9.142857142857142, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5548867202797338, | |
"compute_intensity": 292.57142857142856, | |
"tile_compute_intensity": 3.1604938271604937, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1905965540645835, | |
"compute_intensity": 122.26865671641791, | |
"tile_compute_intensity": 9.846153846153847, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 1.277080995306525e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 32.12325547479827, | |
"compute_intensity": 23.272727272727273, | |
"tile_compute_intensity": 0.8, | |
"MxNxK": 262144, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1192754513756153, | |
"compute_intensity": 246.37593984962405, | |
"tile_compute_intensity": 23.272727272727273, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00007593158717808223, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1089139431328212, | |
"compute_intensity": 7.9360620004843785, | |
"tile_compute_intensity": 0.8873483535528596, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5587621701682086, | |
"compute_intensity": 60.12477064220184, | |
"tile_compute_intensity": 5.224489795918367, | |
"MxNxK": 536870912, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 7.38882575855918e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.240775103252198, | |
"compute_intensity": 14.124137931034483, | |
"tile_compute_intensity": 0.9411764705882353, | |
"MxNxK": 1048576, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1603372995288328, | |
"compute_intensity": 127.0077519379845, | |
"tile_compute_intensity": 14.628571428571428, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.306094048471535, | |
"compute_intensity": 1820.4444444444443, | |
"tile_compute_intensity": 21.11340206185567, | |
"MxNxK": 68719476736, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3911266678259708, | |
"compute_intensity": 227.55555555555554, | |
"tile_compute_intensity": 3.5310344827586206, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.531774686609574, | |
"compute_intensity": 97.52380952380952, | |
"tile_compute_intensity": 1.7297297297297298, | |
"MxNxK": 33554432, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.5826220578793713, | |
"compute_intensity": 83.59183673469387, | |
"tile_compute_intensity": 2.909090909090909, | |
"MxNxK": 33554432, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1511974208459852, | |
"compute_intensity": 239.1824817518248, | |
"tile_compute_intensity": 3.9083969465648853, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.3471081738919817, | |
"compute_intensity": 124.12121212121212, | |
"tile_compute_intensity": 3.2, | |
"MxNxK": 67108864, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0735572879761617, | |
"compute_intensity": 1310.72, | |
"tile_compute_intensity": 26.94736842105263, | |
"MxNxK": 68719476736, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2949121482750208, | |
"compute_intensity": 234.05714285714285, | |
"tile_compute_intensity": 1.9616858237547892, | |
"MxNxK": 2147483648, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.155261589771326, | |
"compute_intensity": 252.06153846153848, | |
"tile_compute_intensity": 26.94736842105263, | |
"MxNxK": 68719476736, | |
"size_m": 16384, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003809350168857177, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.341289124586616, | |
"compute_intensity": 15.845261121856867, | |
"tile_compute_intensity": 1.8285714285714285, | |
"MxNxK": 67108864, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.067704665159816, | |
"compute_intensity": 157.53846153846155, | |
"tile_compute_intensity": 1.5900621118012421, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 5.457535421447756, | |
"compute_intensity": 84.45360824742268, | |
"tile_compute_intensity": 0.9990243902439024, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002955530303423672, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.8421295236211193, | |
"compute_intensity": 28.248275862068965, | |
"tile_compute_intensity": 1.3061224489795917, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1600982157805082, | |
"compute_intensity": 885.6216216216217, | |
"tile_compute_intensity": 7.816793893129771, | |
"MxNxK": 68719476736, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.389784231688841, | |
"compute_intensity": 292.57142857142856, | |
"tile_compute_intensity": 7.111111111111111, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.307597685658157, | |
"compute_intensity": 404.5432098765432, | |
"tile_compute_intensity": 7.529411764705882, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 5.354248466134823e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 14.971288089070061, | |
"compute_intensity": 56.888888888888886, | |
"tile_compute_intensity": 1.2307692307692308, | |
"MxNxK": 2097152, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1272137962085342, | |
"compute_intensity": 125.5478927203065, | |
"tile_compute_intensity": 12.487804878048781, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1111773493382209, | |
"compute_intensity": 819.2, | |
"tile_compute_intensity": 7.474452554744525, | |
"MxNxK": 17179869184, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0213300255308644, | |
"compute_intensity": 399.609756097561, | |
"tile_compute_intensity": 3.5493934142114383, | |
"MxNxK": 4294967296, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.7480514590532517, | |
"compute_intensity": 113.77777777777777, | |
"tile_compute_intensity": 1.6842105263157894, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000009924743734953566, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.108669042347244, | |
"compute_intensity": 31.03030303030303, | |
"tile_compute_intensity": 2.909090909090909, | |
"MxNxK": 33554432, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003854960204403839, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1751716558637166, | |
"compute_intensity": 7.875030040855563, | |
"tile_compute_intensity": 0.8858131487889274, | |
"MxNxK": 33554432, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4668724760341123, | |
"compute_intensity": 341.3333333333333, | |
"tile_compute_intensity": 5.12, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.78526574228811, | |
"compute_intensity": 124.12121212121212, | |
"tile_compute_intensity": 1.3298701298701299, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000009523375422142943, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.6024570554786115, | |
"compute_intensity": 7.922630560928433, | |
"tile_compute_intensity": 0.8767123287671232, | |
"MxNxK": 8388608, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.412009507892343, | |
"compute_intensity": 199.8048780487805, | |
"tile_compute_intensity": 3.657142857142857, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002141699386453929, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 4.509316196054707, | |
"compute_intensity": 78.76923076923077, | |
"tile_compute_intensity": 1.6, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3653585356801539, | |
"compute_intensity": 585.1428571428571, | |
"tile_compute_intensity": 14.222222222222221, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.112095804632851, | |
"compute_intensity": 246.37593984962405, | |
"tile_compute_intensity": 20.48, | |
"MxNxK": 17179869184, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3856869392601798, | |
"compute_intensity": 372.3636363636364, | |
"tile_compute_intensity": 6.2439024390243905, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0003006613543235933, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0844041091568042, | |
"compute_intensity": 15.953261927945473, | |
"tile_compute_intensity": 1.9393939393939394, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 1.824401421866464e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 19.785119560575716, | |
"compute_intensity": 14.222222222222221, | |
"tile_compute_intensity": 0.8, | |
"MxNxK": 262144, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000010708496932269645, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 12.422285367876073, | |
"compute_intensity": 73.14285714285714, | |
"tile_compute_intensity": 1.28, | |
"MxNxK": 4194304, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000013865450806185128, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 4.2488342815418045, | |
"compute_intensity": 29.257142857142856, | |
"tile_compute_intensity": 1.7777777777777777, | |
"MxNxK": 4194304, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005992793790546961, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0225507581948179, | |
"compute_intensity": 7.990246281394782, | |
"tile_compute_intensity": 0.982725527831094, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003798403760325979, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2080021562714234, | |
"compute_intensity": 7.934140435835351, | |
"tile_compute_intensity": 0.8858131487889274, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3542749868088828, | |
"compute_intensity": 169.78238341968913, | |
"tile_compute_intensity": 1.9320754716981132, | |
"MxNxK": 1073741824, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000001249714973978528, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.612984052593058, | |
"compute_intensity": 7.728301886792453, | |
"tile_compute_intensity": 0.7619047619047619, | |
"MxNxK": 1048576, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.405729810340007, | |
"compute_intensity": 168.90721649484536, | |
"tile_compute_intensity": 1.5987509758001561, | |
"MxNxK": 536870912, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1843165076991746, | |
"compute_intensity": 399.609756097561, | |
"tile_compute_intensity": 18.285714285714285, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1274516909080219, | |
"compute_intensity": 780.1904761904761, | |
"tile_compute_intensity": 10.61139896373057, | |
"MxNxK": 17179869184, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00030241277968858513, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2455558109157576, | |
"compute_intensity": 31.813592233009707, | |
"tile_compute_intensity": 3.710144927536232, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.245704721062983, | |
"compute_intensity": 63.01538461538462, | |
"tile_compute_intensity": 6.7368421052631575, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3148553849638738, | |
"compute_intensity": 63.627184466019415, | |
"tile_compute_intensity": 7.420289855072464, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.2180517007989557, | |
"compute_intensity": 50.88198757763975, | |
"tile_compute_intensity": 2.56, | |
"MxNxK": 33554432, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2820426480503673, | |
"compute_intensity": 112.99310344827586, | |
"tile_compute_intensity": 5.224489795918367, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2853395407475143, | |
"compute_intensity": 1365.3333333333333, | |
"tile_compute_intensity": 20.48, | |
"MxNxK": 17179869184, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.403558871044405, | |
"compute_intensity": 110.70270270270271, | |
"tile_compute_intensity": 1.5950155763239875, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00007604105126339423, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1785213516386386, | |
"compute_intensity": 15.860600193610843, | |
"tile_compute_intensity": 1.7655172413793103, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4420558377541857, | |
"compute_intensity": 102.0809968847352, | |
"tile_compute_intensity": 5.224489795918367, | |
"MxNxK": 536870912, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4111341131520292, | |
"compute_intensity": 169.78238341968913, | |
"tile_compute_intensity": 6.2439024390243905, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.406430160105082, | |
"compute_intensity": 248.24242424242425, | |
"tile_compute_intensity": 2.6597402597402597, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0018229999011217, | |
"compute_intensity": 655.36, | |
"tile_compute_intensity": 6.38006230529595, | |
"MxNxK": 8589934592, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.299316130371604, | |
"compute_intensity": 474.8985507246377, | |
"tile_compute_intensity": 3.9536679536679538, | |
"MxNxK": 34359738368, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4886076093205336, | |
"compute_intensity": 61.82641509433962, | |
"tile_compute_intensity": 6.095238095238095, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000024811859337383914, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.48913234835193, | |
"compute_intensity": 15.515151515151516, | |
"tile_compute_intensity": 1.4545454545454546, | |
"MxNxK": 4194304, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.1138354770984793, | |
"compute_intensity": 56.10958904109589, | |
"tile_compute_intensity": 2.56, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4020525827942611, | |
"compute_intensity": 202.2716049382716, | |
"tile_compute_intensity": 6.2439024390243905, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3241587578618281, | |
"compute_intensity": 113.3840830449827, | |
"tile_compute_intensity": 7.757575757575758, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.353843107284822, | |
"compute_intensity": 252.06153846153848, | |
"tile_compute_intensity": 3.710144927536232, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.368434265293593, | |
"compute_intensity": 372.3636363636364, | |
"tile_compute_intensity": 3.710144927536232, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 12.060703090523388, | |
"compute_intensity": 97.52380952380952, | |
"tile_compute_intensity": 1.3264248704663213, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1354881322700046, | |
"compute_intensity": 250.13740458015266, | |
"tile_compute_intensity": 24.38095238095238, | |
"MxNxK": 34359738368, | |
"size_m": 8192, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.2992022455642522, | |
"compute_intensity": 163.84, | |
"tile_compute_intensity": 1.8823529411764706, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.082668468707633, | |
"compute_intensity": 224.43835616438355, | |
"tile_compute_intensity": 1.8806244260789715, | |
"MxNxK": 2147483648, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0615696464132007, | |
"compute_intensity": 125.06870229007633, | |
"tile_compute_intensity": 1.9375591296121097, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4199004606358956, | |
"compute_intensity": 112.99310344827586, | |
"tile_compute_intensity": 1.9393939393939394, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0623375930130383, | |
"compute_intensity": 1310.72, | |
"tile_compute_intensity": 15.058823529411764, | |
"MxNxK": 68719476736, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.400500024741197, | |
"compute_intensity": 409.6, | |
"tile_compute_intensity": 7.529411764705882, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1471651014430677, | |
"compute_intensity": 126.51737451737452, | |
"tile_compute_intensity": 13.837837837837839, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 6.567845118719272e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 5.934366325676873, | |
"compute_intensity": 15.058823529411764, | |
"tile_compute_intensity": 1.1428571428571428, | |
"MxNxK": 1048576, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.37738746330901, | |
"compute_intensity": 202.2716049382716, | |
"tile_compute_intensity": 1.9541984732824427, | |
"MxNxK": 1073741824, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.146068543619046, | |
"compute_intensity": 442.81081081081084, | |
"tile_compute_intensity": 7.420289855072464, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.407784147002344, | |
"compute_intensity": 199.8048780487805, | |
"tile_compute_intensity": 1.7746967071057191, | |
"MxNxK": 536870912, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2446949638040725, | |
"compute_intensity": 203.527950310559, | |
"tile_compute_intensity": 10.24, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.478685940112101, | |
"compute_intensity": 101.1358024691358, | |
"tile_compute_intensity": 1.8823529411764706, | |
"MxNxK": 134217728, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000011712657128382701, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.8485301206750022, | |
"compute_intensity": 28.395147313691506, | |
"tile_compute_intensity": 1.9692307692307693, | |
"MxNxK": 33554432, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 3.283922559359636e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 16.44862171547781, | |
"compute_intensity": 39.38461538461539, | |
"tile_compute_intensity": 1.1428571428571428, | |
"MxNxK": 1048576, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2145789047211375, | |
"compute_intensity": 885.6216216216217, | |
"tile_compute_intensity": 14.840579710144928, | |
"MxNxK": 68719476736, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3935896126279845, | |
"compute_intensity": 409.6, | |
"tile_compute_intensity": 6.095238095238095, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0003041642050535769, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.8018163509381235, | |
"compute_intensity": 31.721200387221685, | |
"tile_compute_intensity": 3.710144927536232, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004889395810602124, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0079372997072378, | |
"compute_intensity": 15.633587786259541, | |
"tile_compute_intensity": 1.5238095238095237, | |
"MxNxK": 8388608, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.4122432500057345, | |
"compute_intensity": 168.90721649484536, | |
"tile_compute_intensity": 1.9980487804878049, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000035210947442022757, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.4565087941250385, | |
"compute_intensity": 25.5202492211838, | |
"tile_compute_intensity": 1.3061224489795917, | |
"MxNxK": 8388608, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1583674702659925, | |
"compute_intensity": 125.5478927203065, | |
"tile_compute_intensity": 1.9806576402321083, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0867823138031198, | |
"compute_intensity": 448.8767123287671, | |
"tile_compute_intensity": 28.444444444444443, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3766637572274913, | |
"compute_intensity": 224.43835616438355, | |
"tile_compute_intensity": 3.8208955223880596, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3374542772984166, | |
"compute_intensity": 409.6, | |
"tile_compute_intensity": 16, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1612138833179837, | |
"compute_intensity": 2048, | |
"tile_compute_intensity": 24.975609756097562, | |
"MxNxK": 68719476736, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002645382061706373, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.3757628682482297, | |
"compute_intensity": 15.003663003663004, | |
"tile_compute_intensity": 1.28, | |
"MxNxK": 4194304, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1249828857938822, | |
"compute_intensity": 744.7272727272727, | |
"tile_compute_intensity": 10.556701030927835, | |
"MxNxK": 8589934592, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1423055437777405, | |
"compute_intensity": 120.02930402930403, | |
"tile_compute_intensity": 7.876923076923077, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.085820297446649, | |
"compute_intensity": 234.05714285714285, | |
"tile_compute_intensity": 3.5432525951557095, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0003041642050535769, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2387098741852238, | |
"compute_intensity": 31.721200387221685, | |
"tile_compute_intensity": 3.5310344827586206, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 1.6875713152264795e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 20.89630314447214, | |
"compute_intensity": 7.420289855072464, | |
"tile_compute_intensity": 0.5714285714285714, | |
"MxNxK": 131072, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00007560319492214628, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1774317044220806, | |
"compute_intensity": 15.906796116504854, | |
"tile_compute_intensity": 1.855072463768116, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4530518216718074, | |
"compute_intensity": 167.18367346938774, | |
"tile_compute_intensity": 5.818181818181818, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000010708496932269645, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.0423045436290925, | |
"compute_intensity": 48.76190476190476, | |
"tile_compute_intensity": 1.4545454545454546, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 13.505163694618627, | |
"compute_intensity": 83.59183673469387, | |
"tile_compute_intensity": 0.9980506822612085, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3057052295017908, | |
"compute_intensity": 655.36, | |
"tile_compute_intensity": 13.473684210526315, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5521318639882025, | |
"compute_intensity": 101.1358024691358, | |
"tile_compute_intensity": 4.923076923076923, | |
"MxNxK": 134217728, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0344037873593264, | |
"compute_intensity": 240.94117647058823, | |
"tile_compute_intensity": 1.965451055662188, | |
"MxNxK": 4294967296, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 5.7576707124512545, | |
"compute_intensity": 128, | |
"tile_compute_intensity": 1.9393939393939394, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 6.202964834345979e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 9.610887776946955, | |
"compute_intensity": 40.96, | |
"tile_compute_intensity": 1.3333333333333333, | |
"MxNxK": 2097152, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0024006203669487687, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7726604540220186, | |
"compute_intensity": 31.937621832358673, | |
"tile_compute_intensity": 3.9083969465648853, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1215947760645146, | |
"compute_intensity": 225.98620689655172, | |
"tile_compute_intensity": 10.448979591836734, | |
"MxNxK": 4294967296, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000009523375422142943, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.6226180668212593, | |
"compute_intensity": 7.922630560928433, | |
"tile_compute_intensity": 0.9142857142857143, | |
"MxNxK": 8388608, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1429168847441495, | |
"compute_intensity": 246.37593984962405, | |
"tile_compute_intensity": 3.9233716475095783, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4353088349240202, | |
"compute_intensity": 127.0077519379845, | |
"tile_compute_intensity": 3.4594594594594597, | |
"MxNxK": 268435456, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.5216895141012006, | |
"compute_intensity": 126.03076923076924, | |
"tile_compute_intensity": 1.3315994798439532, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000023388826228328075, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.805443261958237, | |
"compute_intensity": 28.419774501300953, | |
"tile_compute_intensity": 1.3298701298701299, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3962365202715155, | |
"compute_intensity": 63.38104448742747, | |
"tile_compute_intensity": 7.314285714285714, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0012026454172943731, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.793691134823464, | |
"compute_intensity": 31.906523855890946, | |
"tile_compute_intensity": 3.878787878787879, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.878648505694077, | |
"compute_intensity": 186.1818181818182, | |
"tile_compute_intensity": 3.1219512195121952, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.121342273050742, | |
"compute_intensity": 744.7272727272727, | |
"tile_compute_intensity": 7.420289855072464, | |
"MxNxK": 8589934592, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1358139316513807, | |
"compute_intensity": 799.219512195122, | |
"tile_compute_intensity": 7.757575757575758, | |
"MxNxK": 34359738368, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.841528175487021, | |
"compute_intensity": 56.69204152249135, | |
"tile_compute_intensity": 3.878787878787879, | |
"MxNxK": 134217728, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000011685291107054704, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.546970400230194, | |
"compute_intensity": 14.216052060737526, | |
"tile_compute_intensity": 0.9961089494163424, | |
"MxNxK": 16777216, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.391862273480695, | |
"compute_intensity": 112.99310344827586, | |
"tile_compute_intensity": 1.5987509758001561, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0023936146654888013, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0191541807854434, | |
"compute_intensity": 7.996095656417765, | |
"tile_compute_intensity": 0.9941747572815534, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0560802235075948, | |
"compute_intensity": 780.1904761904761, | |
"tile_compute_intensity": 23.272727272727273, | |
"MxNxK": 17179869184, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00007516533858089832, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1210699303805904, | |
"compute_intensity": 7.976630963972736, | |
"tile_compute_intensity": 0.9696969696969697, | |
"MxNxK": 67108864, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5764117316502206, | |
"compute_intensity": 84.89119170984456, | |
"tile_compute_intensity": 3.1219512195121952, | |
"MxNxK": 134217728, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.38965581441443, | |
"compute_intensity": 227.55555555555554, | |
"tile_compute_intensity": 1.9320754716981132, | |
"MxNxK": 1073741824, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.301443168877126, | |
"compute_intensity": 910.2222222222222, | |
"tile_compute_intensity": 19.692307692307693, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000753842667515223, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1209129373536388, | |
"compute_intensity": 7.964997569275644, | |
"tile_compute_intensity": 0.9377289377289377, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1408260272961006, | |
"compute_intensity": 125.06870229007633, | |
"tile_compute_intensity": 12.19047619047619, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1176996219381672, | |
"compute_intensity": 963.7647058823529, | |
"tile_compute_intensity": 10.61139896373057, | |
"MxNxK": 17179869184, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1320375974766872, | |
"compute_intensity": 239.1824817518248, | |
"tile_compute_intensity": 1.9844961240310077, | |
"MxNxK": 8589934592, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000010581528246825492, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.015890282681882, | |
"compute_intensity": 30.007326007326007, | |
"tile_compute_intensity": 2.56, | |
"MxNxK": 33554432, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00002103534839412033, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.6225640430049932, | |
"compute_intensity": 15.05190629306385, | |
"tile_compute_intensity": 1.3264248704663213, | |
"MxNxK": 33554432, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000001331813037962519, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.582484636173203, | |
"compute_intensity": 14.94890510948905, | |
"tile_compute_intensity": 0.9696969696969697, | |
"MxNxK": 2097152, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0918319585031036, | |
"compute_intensity": 124.12121212121212, | |
"tile_compute_intensity": 1.9357277882797732, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00007647890760464218, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.442029089365667, | |
"compute_intensity": 31.62934362934363, | |
"tile_compute_intensity": 3.5555555555555554, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000007023945474185887, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.95764619097598, | |
"compute_intensity": 25.5600624024961, | |
"tile_compute_intensity": 1.3195876288659794, | |
"MxNxK": 16777216, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1177368221803574, | |
"compute_intensity": 244.53731343283582, | |
"tile_compute_intensity": 19.692307692307693, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0995207638595088, | |
"compute_intensity": 992.969696969697, | |
"tile_compute_intensity": 25.6, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000019119726901160545, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.577428374828343, | |
"compute_intensity": 15.814671814671815, | |
"tile_compute_intensity": 1.7297297297297298, | |
"MxNxK": 33554432, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000024264538910823974, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.281518740737827, | |
"compute_intensity": 7.846743295019157, | |
"tile_compute_intensity": 0.8421052631578947, | |
"MxNxK": 2097152, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4476822469874076, | |
"compute_intensity": 199.8048780487805, | |
"tile_compute_intensity": 9.142857142857142, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0783711521933188, | |
"compute_intensity": 448.8767123287671, | |
"tile_compute_intensity": 20.48, | |
"MxNxK": 17179869184, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1333894610724544, | |
"compute_intensity": 963.7647058823529, | |
"tile_compute_intensity": 14.222222222222221, | |
"MxNxK": 17179869184, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.029941796665903, | |
"compute_intensity": 442.81081081081084, | |
"tile_compute_intensity": 6.38006230529595, | |
"MxNxK": 8589934592, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5680070248769638, | |
"compute_intensity": 113.77777777777777, | |
"tile_compute_intensity": 6.4, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7699029310919248, | |
"compute_intensity": 56.69204152249135, | |
"tile_compute_intensity": 2.6391752577319587, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000010581528246825492, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.9176058795536317, | |
"compute_intensity": 30.007326007326007, | |
"tile_compute_intensity": 1.9692307692307693, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4491296591504448, | |
"compute_intensity": 204.8, | |
"tile_compute_intensity": 8, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.1882272984093585, | |
"compute_intensity": 202.2716049382716, | |
"tile_compute_intensity": 1.7762359063313096, | |
"MxNxK": 1073741824, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004889395810602124, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0602953998491014, | |
"compute_intensity": 15.633587786259541, | |
"tile_compute_intensity": 1.6, | |
"MxNxK": 8388608, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5179067918523543, | |
"compute_intensity": 112.21917808219177, | |
"tile_compute_intensity": 7.111111111111111, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1219113178513729, | |
"compute_intensity": 744.7272727272727, | |
"tile_compute_intensity": 7.062068965517241, | |
"MxNxK": 8589934592, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0927143085874864, | |
"compute_intensity": 442.81081081081084, | |
"tile_compute_intensity": 25.6, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2232838463257718, | |
"compute_intensity": 225.98620689655172, | |
"tile_compute_intensity": 3.878787878787879, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003854960204403839, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1996595922653435, | |
"compute_intensity": 7.875030040855563, | |
"tile_compute_intensity": 0.7987519500780031, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003772862140419848, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.206426316974397, | |
"compute_intensity": 7.961127308066083, | |
"tile_compute_intensity": 0.9343065693430657, | |
"MxNxK": 33554432, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.4072007577684422, | |
"compute_intensity": 163.84, | |
"tile_compute_intensity": 1.5950155763239875, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3825352803852833, | |
"compute_intensity": 221.40540540540542, | |
"tile_compute_intensity": 1.9541984732824427, | |
"MxNxK": 1073741824, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1421953868961856, | |
"compute_intensity": 250.13740458015266, | |
"tile_compute_intensity": 1.9902818270165208, | |
"MxNxK": 34359738368, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3101015677129921, | |
"compute_intensity": 862.3157894736842, | |
"tile_compute_intensity": 14.027397260273972, | |
"MxNxK": 34359738368, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.385149998778152, | |
"compute_intensity": 327.68, | |
"tile_compute_intensity": 6.7368421052631575, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.110042317291112, | |
"compute_intensity": 885.6216216216217, | |
"tile_compute_intensity": 39.38461538461539, | |
"MxNxK": 68719476736, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.8051086287901732, | |
"compute_intensity": 56.49655172413793, | |
"tile_compute_intensity": 2.6122448979591835, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000021090080436776326, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.9354692406245886, | |
"compute_intensity": 30.06238532110092, | |
"tile_compute_intensity": 1.9844961240310077, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4562967359483596, | |
"compute_intensity": 215.57894736842104, | |
"tile_compute_intensity": 3.506849315068493, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000026316990510423747, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.1680289337735568, | |
"compute_intensity": 7.522497704315886, | |
"tile_compute_intensity": 0.6597938144329897, | |
"MxNxK": 2097152, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.853481513582659, | |
"compute_intensity": 56.49655172413793, | |
"tile_compute_intensity": 3.764705882352941, | |
"MxNxK": 67108864, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3644654750714027, | |
"compute_intensity": 169.78238341968913, | |
"tile_compute_intensity": 3.710144927536232, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.254255171250849, | |
"compute_intensity": 409.6, | |
"tile_compute_intensity": 6.320987654320987, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00015018472504804733, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.057833243403967, | |
"compute_intensity": 7.980516317584024, | |
"tile_compute_intensity": 0.9770992366412213, | |
"MxNxK": 134217728, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3062882031602279, | |
"compute_intensity": 448.8767123287671, | |
"tile_compute_intensity": 7.641791044776119, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000009377423308393628, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.1529153893738178, | |
"compute_intensity": 42.555844155844156, | |
"tile_compute_intensity": 1.5802469135802468, | |
"MxNxK": 33554432, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 12.063317639052135, | |
"compute_intensity": 120.47058823529412, | |
"tile_compute_intensity": 1.3264248704663213, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.355302325004171, | |
"compute_intensity": 62.534351145038165, | |
"tile_compute_intensity": 6.4, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.1986396292120647, | |
"compute_intensity": 202.2716049382716, | |
"tile_compute_intensity": 2.6631989596879064, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.536677040948204, | |
"compute_intensity": 157.53846153846155, | |
"tile_compute_intensity": 1.8285714285714285, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004998859895914112, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0106984678105277, | |
"compute_intensity": 15.456603773584906, | |
"tile_compute_intensity": 1.5238095238095237, | |
"MxNxK": 8388608, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3036514873638723, | |
"compute_intensity": 489.07462686567163, | |
"tile_compute_intensity": 3.9613152804642167, | |
"MxNxK": 68719476736, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4590983048727784, | |
"compute_intensity": 118.72463768115942, | |
"tile_compute_intensity": 1.9248120300751879, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3834443931533358, | |
"compute_intensity": 63.627184466019415, | |
"tile_compute_intensity": 7.529411764705882, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4618766024128134, | |
"compute_intensity": 292.57142857142856, | |
"tile_compute_intensity": 3.4594594594594597, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0006036579424671757, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7918226982804595, | |
"compute_intensity": 31.844509232264333, | |
"tile_compute_intensity": 3.8208955223880596, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.832563426711472, | |
"compute_intensity": 51.1201248049922, | |
"tile_compute_intensity": 1.5950155763239875, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000021090080436776326, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0884130813022375, | |
"compute_intensity": 30.06238532110092, | |
"tile_compute_intensity": 2.6122448979591835, | |
"MxNxK": 67108864, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3037113644660283, | |
"compute_intensity": 1365.3333333333333, | |
"tile_compute_intensity": 20.897959183673468, | |
"MxNxK": 34359738368, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00015091448561679391, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1195743976741588, | |
"compute_intensity": 15.922254616132166, | |
"tile_compute_intensity": 1.8686131386861313, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3532128264690786, | |
"compute_intensity": 372.3636363636364, | |
"tile_compute_intensity": 12.8, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3571524000626993, | |
"compute_intensity": 585.1428571428571, | |
"tile_compute_intensity": 6.918918918918919, | |
"MxNxK": 2147483648, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.533315166140479, | |
"compute_intensity": 315.0769230769231, | |
"tile_compute_intensity": 3.1801242236024843, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 3.466362701546282e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 12.425704680147806, | |
"compute_intensity": 14.628571428571428, | |
"tile_compute_intensity": 1, | |
"MxNxK": 524288, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 5.354248466134823e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 24.061638471383866, | |
"compute_intensity": 64, | |
"tile_compute_intensity": 0.9696969696969697, | |
"MxNxK": 2097152, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 7.66248597183915e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.072180653366879, | |
"compute_intensity": 27.675675675675677, | |
"tile_compute_intensity": 1.2307692307692308, | |
"MxNxK": 2097152, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0870981761335, | |
"compute_intensity": 455.1111111111111, | |
"tile_compute_intensity": 25.6, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2064717998854046, | |
"compute_intensity": 62.89443378119002, | |
"tile_compute_intensity": 6.320987654320987, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0804755987709351, | |
"compute_intensity": 862.3157894736842, | |
"tile_compute_intensity": 42.666666666666664, | |
"MxNxK": 34359738368, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3815314683705586, | |
"compute_intensity": 63.750972762645915, | |
"tile_compute_intensity": 7.641791044776119, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 4.561003554666161e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 9.366360375345359, | |
"compute_intensity": 24.975609756097562, | |
"tile_compute_intensity": 1.1428571428571428, | |
"MxNxK": 1048576, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.158421317191903, | |
"compute_intensity": 1489.4545454545455, | |
"tile_compute_intensity": 14.840579710144928, | |
"MxNxK": 68719476736, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1996373170336498, | |
"compute_intensity": 819.2, | |
"tile_compute_intensity": 10.448979591836734, | |
"MxNxK": 4294967296, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0006007389001921893, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0602995303671603, | |
"compute_intensity": 15.961032635168047, | |
"tile_compute_intensity": 1.9320754716981132, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 2.9190422749863426e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 26.595024628092848, | |
"compute_intensity": 51.2, | |
"tile_compute_intensity": 0.9411764705882353, | |
"MxNxK": 1048576, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.095234242801623, | |
"compute_intensity": 404.5432098765432, | |
"tile_compute_intensity": 19.692307692307693, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.19199445083676, | |
"compute_intensity": 431.1578947368421, | |
"tile_compute_intensity": 3.8496240601503757, | |
"MxNxK": 4294967296, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3481467140343286, | |
"compute_intensity": 327.68, | |
"tile_compute_intensity": 6.7368421052631575, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4146767992302247, | |
"compute_intensity": 62.89443378119002, | |
"tile_compute_intensity": 6.918918918918919, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004706955668415478, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.7826055966291983, | |
"compute_intensity": 42.44559585492228, | |
"tile_compute_intensity": 1.5609756097560976, | |
"MxNxK": 16777216, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.8910216538764506, | |
"compute_intensity": 51.0404984423676, | |
"tile_compute_intensity": 2.6122448979591835, | |
"MxNxK": 67108864, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 2.3717218484264035e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 17.607461951067204, | |
"compute_intensity": 24.38095238095238, | |
"tile_compute_intensity": 1, | |
"MxNxK": 524288, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000001249714973978528, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.4593488987314416, | |
"compute_intensity": 7.728301886792453, | |
"tile_compute_intensity": 0.6530612244897959, | |
"MxNxK": 1048576, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.7831640712139643, | |
"compute_intensity": 128, | |
"tile_compute_intensity": 2.4615384615384617, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002141699386453929, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 4.536211121837833, | |
"compute_intensity": 85.33333333333333, | |
"tile_compute_intensity": 2.2857142857142856, | |
"MxNxK": 8388608, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.403418205383715, | |
"compute_intensity": 167.18367346938774, | |
"tile_compute_intensity": 3.5555555555555554, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000771356921165141, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2666717024764271, | |
"compute_intensity": 15.7462758289284, | |
"tile_compute_intensity": 1.5950155763239875, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1427527110725646, | |
"compute_intensity": 1260.3076923076924, | |
"tile_compute_intensity": 15.875968992248062, | |
"MxNxK": 34359738368, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.3896446215522054, | |
"compute_intensity": 163.84, | |
"tile_compute_intensity": 1.9922178988326849, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1608750043993992, | |
"compute_intensity": 1260.3076923076924, | |
"tile_compute_intensity": 24.38095238095238, | |
"MxNxK": 34359738368, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2989422594443836, | |
"compute_intensity": 404.5432098765432, | |
"tile_compute_intensity": 3.9083969465648853, | |
"MxNxK": 8589934592, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.043377632950108, | |
"compute_intensity": 227.55555555555554, | |
"tile_compute_intensity": 2.6391752577319587, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003794754957482245, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3518449481455685, | |
"compute_intensity": 15.875968992248062, | |
"tile_compute_intensity": 1.8285714285714285, | |
"MxNxK": 67108864, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3636134366475328, | |
"compute_intensity": 252.06153846153848, | |
"tile_compute_intensity": 6.7368421052631575, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3071774418262883, | |
"compute_intensity": 630.1538461538462, | |
"tile_compute_intensity": 12.8, | |
"MxNxK": 4294967296, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0098468713182553, | |
"compute_intensity": 442.81081081081084, | |
"tile_compute_intensity": 3.757798165137615, | |
"MxNxK": 8589934592, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.063828593492419, | |
"compute_intensity": 186.1818181818182, | |
"tile_compute_intensity": 1.7655172413793103, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1542031664421528, | |
"compute_intensity": 744.7272727272727, | |
"tile_compute_intensity": 25.6, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000019010262815848558, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3937734289957873, | |
"compute_intensity": 7.930300096805421, | |
"tile_compute_intensity": 0.8827586206896552, | |
"MxNxK": 16777216, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003809350168857177, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3548978301632146, | |
"compute_intensity": 15.845261121856867, | |
"tile_compute_intensity": 1.7534246575342465, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4479623529206136, | |
"compute_intensity": 101.7639751552795, | |
"tile_compute_intensity": 1.9104477611940298, | |
"MxNxK": 268435456, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00007593158717808223, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.109588303846308, | |
"compute_intensity": 7.9360620004843785, | |
"tile_compute_intensity": 0.9377289377289377, | |
"MxNxK": 67108864, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00007735462028713808, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.672298241532908, | |
"compute_intensity": 31.44721689059501, | |
"tile_compute_intensity": 3.4594594594594597, | |
"MxNxK": 268435456, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000005327252151850076, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.370302460780659, | |
"compute_intensity": 29.8978102189781, | |
"tile_compute_intensity": 1.9393939393939394, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0164418457613866, | |
"compute_intensity": 431.1578947368421, | |
"tile_compute_intensity": 3.750915750915751, | |
"MxNxK": 4294967296, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.553066134815623, | |
"compute_intensity": 101.1358024691358, | |
"tile_compute_intensity": 1.3315994798439532, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3510999716175425, | |
"compute_intensity": 334.3673469387755, | |
"tile_compute_intensity": 11.636363636363637, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3636660095925464, | |
"compute_intensity": 585.1428571428571, | |
"tile_compute_intensity": 10.24, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.7185887732214904, | |
"compute_intensity": 146.28571428571428, | |
"tile_compute_intensity": 2.909090909090909, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000014686431446025037, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.66052156296311, | |
"compute_intensity": 14.173010380622838, | |
"tile_compute_intensity": 0.9696969696969697, | |
"MxNxK": 2097152, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4294023197575925, | |
"compute_intensity": 215.57894736842104, | |
"tile_compute_intensity": 9.142857142857142, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0803169456832369, | |
"compute_intensity": 799.219512195122, | |
"tile_compute_intensity": 36.57142857142857, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5525053951871837, | |
"compute_intensity": 101.1358024691358, | |
"tile_compute_intensity": 3.1219512195121952, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1412221504229876, | |
"compute_intensity": 1170.2857142857142, | |
"tile_compute_intensity": 23.272727272727273, | |
"MxNxK": 17179869184, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1521195282191863, | |
"compute_intensity": 244.53731343283582, | |
"tile_compute_intensity": 3.8641509433962264, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0001517901982992898, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.294502634995624, | |
"compute_intensity": 31.751937984496124, | |
"tile_compute_intensity": 3.657142857142857, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2138375327711814, | |
"compute_intensity": 63.25868725868726, | |
"tile_compute_intensity": 6.918918918918919, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000005874572578410015, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.2693055525078774, | |
"compute_intensity": 28.346020761245676, | |
"tile_compute_intensity": 1.9393939393939394, | |
"MxNxK": 16777216, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003973546296825159, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4637605191245353, | |
"compute_intensity": 15.50780880265026, | |
"tile_compute_intensity": 1.5900621118012421, | |
"MxNxK": 67108864, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0012026454172943731, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.475725330176873, | |
"compute_intensity": 31.906523855890946, | |
"tile_compute_intensity": 3.8496240601503757, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3816656610250357, | |
"compute_intensity": 120.02930402930403, | |
"tile_compute_intensity": 1.9692307692307693, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.8773412314297024, | |
"compute_intensity": 256, | |
"tile_compute_intensity": 3.878787878787879, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4354022601897953, | |
"compute_intensity": 167.18367346938774, | |
"tile_compute_intensity": 3.4594594594594597, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1572972413805078, | |
"compute_intensity": 668.734693877551, | |
"tile_compute_intensity": 14.222222222222221, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5052532738557611, | |
"compute_intensity": 117.02857142857142, | |
"tile_compute_intensity": 7.111111111111111, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.090629754281531, | |
"compute_intensity": 93.0909090909091, | |
"tile_compute_intensity": 2.6666666666666665, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 2.9190422749863426e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 17.73732384369749, | |
"compute_intensity": 42.666666666666664, | |
"tile_compute_intensity": 1.1428571428571428, | |
"MxNxK": 1048576, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0006024903255571812, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4857266662642554, | |
"compute_intensity": 31.875486381322958, | |
"tile_compute_intensity": 3.8208955223880596, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3401273440163648, | |
"compute_intensity": 63.25868725868726, | |
"tile_compute_intensity": 7.111111111111111, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000753842667515223, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1197243729979034, | |
"compute_intensity": 7.964997569275644, | |
"tile_compute_intensity": 0.9624060150375939, | |
"MxNxK": 67108864, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.446865030708054, | |
"compute_intensity": 127.50194552529183, | |
"tile_compute_intensity": 3.506849315068493, | |
"MxNxK": 536870912, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3754439629847064, | |
"compute_intensity": 203.527950310559, | |
"tile_compute_intensity": 3.8208955223880596, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4049356864960838, | |
"compute_intensity": 199.8048780487805, | |
"tile_compute_intensity": 1.9393939393939394, | |
"MxNxK": 536870912, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000009651083521673596, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.6008150339850462, | |
"compute_intensity": 7.869356388088376, | |
"tile_compute_intensity": 0.7950310559006211, | |
"MxNxK": 8388608, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3749653708277578, | |
"compute_intensity": 390.0952380952381, | |
"tile_compute_intensity": 6.918918918918919, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5702950945678584, | |
"compute_intensity": 85.11168831168831, | |
"tile_compute_intensity": 3.1604938271604937, | |
"MxNxK": 268435456, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4580476952931063, | |
"compute_intensity": 195.04761904761904, | |
"tile_compute_intensity": 3.4594594594594597, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0011973911411993978, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0214980863374974, | |
"compute_intensity": 7.9941449133935105, | |
"tile_compute_intensity": 0.9903288201160542, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5099224987750859, | |
"compute_intensity": 61.59398496240601, | |
"tile_compute_intensity": 5.818181818181818, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1205383061727165, | |
"compute_intensity": 1024, | |
"tile_compute_intensity": 15.515151515151516, | |
"MxNxK": 8589934592, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.145614751623558, | |
"compute_intensity": 237.44927536231884, | |
"tile_compute_intensity": 18.285714285714285, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3376818952186689, | |
"compute_intensity": 234.05714285714285, | |
"tile_compute_intensity": 3.7372262773722627, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 3.1927024882663123e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 11.20555395804783, | |
"compute_intensity": 7.641791044776119, | |
"tile_compute_intensity": 0.6666666666666666, | |
"MxNxK": 262144, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.173461010493289, | |
"compute_intensity": 221.40540540540542, | |
"tile_compute_intensity": 3.190031152647975, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00015441733634677753, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3747678919130564, | |
"compute_intensity": 31.477425552353505, | |
"tile_compute_intensity": 3.1801242236024843, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3835508130242598, | |
"compute_intensity": 455.1111111111111, | |
"tile_compute_intensity": 5.278350515463917, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 22.620541176856538, | |
"compute_intensity": 113.77777777777777, | |
"tile_compute_intensity": 1.3195876288659794, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000001222348952650531, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.4006656206515413, | |
"compute_intensity": 7.816793893129771, | |
"tile_compute_intensity": 0.8, | |
"MxNxK": 1048576, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.192662696283232, | |
"compute_intensity": 123.18796992481202, | |
"tile_compute_intensity": 11.636363636363637, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000010708496932269645, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 21.60228459685833, | |
"compute_intensity": 73.14285714285714, | |
"tile_compute_intensity": 0.9846153846153847, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000012041049384318665, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.189493561667562, | |
"compute_intensity": 41.795918367346935, | |
"tile_compute_intensity": 1.4545454545454546, | |
"MxNxK": 4194304, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2078083757104086, | |
"compute_intensity": 120.02930402930403, | |
"tile_compute_intensity": 10.24, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000771356921165141, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4603252340609512, | |
"compute_intensity": 15.7462758289284, | |
"tile_compute_intensity": 1.7655172413793103, | |
"MxNxK": 134217728, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4330676477301332, | |
"compute_intensity": 215.57894736842104, | |
"tile_compute_intensity": 10.666666666666666, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.13423079805679, | |
"compute_intensity": 240.94117647058823, | |
"tile_compute_intensity": 18.285714285714285, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4438533398677007, | |
"compute_intensity": 341.3333333333333, | |
"tile_compute_intensity": 3.506849315068493, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.502871948763211, | |
"compute_intensity": 112.21917808219177, | |
"tile_compute_intensity": 1.9104477611940298, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00001933865507178452, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5433544901717868, | |
"compute_intensity": 15.723608445297504, | |
"tile_compute_intensity": 1.5802469135802468, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.156429830253815, | |
"compute_intensity": 124.12121212121212, | |
"tile_compute_intensity": 11.636363636363637, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.502871948763211, | |
"compute_intensity": 117.02857142857142, | |
"tile_compute_intensity": 8, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.154296591707928, | |
"compute_intensity": 225.98620689655172, | |
"tile_compute_intensity": 15.058823529411764, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.7137328676347345, | |
"compute_intensity": 120.47058823529412, | |
"tile_compute_intensity": 1.7777777777777777, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3721696622154649, | |
"compute_intensity": 455.1111111111111, | |
"tile_compute_intensity": 6.7368421052631575, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.115484848614876, | |
"compute_intensity": 1489.4545454545455, | |
"tile_compute_intensity": 51.2, | |
"MxNxK": 68719476736, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000001331813037962519, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.74346840274397, | |
"compute_intensity": 14.94890510948905, | |
"tile_compute_intensity": 1.2307692307692308, | |
"MxNxK": 2097152, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0011973911411993978, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.02112669012946, | |
"compute_intensity": 7.9941449133935105, | |
"tile_compute_intensity": 0.9922480620155039, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.116777277938455, | |
"compute_intensity": 81.92, | |
"tile_compute_intensity": 1.6842105263157894, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5631045765214402, | |
"compute_intensity": 101.7639751552795, | |
"tile_compute_intensity": 5.12, | |
"MxNxK": 268435456, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.789793679767824, | |
"compute_intensity": 59.7956204379562, | |
"tile_compute_intensity": 4.923076923076923, | |
"MxNxK": 134217728, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4461414095585952, | |
"compute_intensity": 168.90721649484536, | |
"tile_compute_intensity": 6.095238095238095, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4478222150219509, | |
"compute_intensity": 61.94328922495274, | |
"tile_compute_intensity": 6.2439024390243905, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.428818666642677, | |
"compute_intensity": 199.8048780487805, | |
"tile_compute_intensity": 6.095238095238095, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002490307940847724, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.106084998238796, | |
"compute_intensity": 7.742911153119093, | |
"tile_compute_intensity": 0.6597938144329897, | |
"MxNxK": 2097152, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.8039880351932298, | |
"compute_intensity": 84.45360824742268, | |
"tile_compute_intensity": 3.0476190476190474, | |
"MxNxK": 67108864, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.415605294677579, | |
"compute_intensity": 170.66666666666666, | |
"tile_compute_intensity": 2.56, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 5.6805357554730955, | |
"compute_intensity": 204.8, | |
"tile_compute_intensity": 2.6122448979591835, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.312275658579641, | |
"compute_intensity": 337.8144329896907, | |
"tile_compute_intensity": 3.8496240601503757, | |
"MxNxK": 4294967296, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000019119726901160545, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5616959467001847, | |
"compute_intensity": 15.814671814671815, | |
"tile_compute_intensity": 1.7777777777777777, | |
"MxNxK": 33554432, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 6.750285260905918e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 7.01599943137294, | |
"compute_intensity": 14.840579710144928, | |
"tile_compute_intensity": 1.1428571428571428, | |
"MxNxK": 1048576, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5467157465350345, | |
"compute_intensity": 126.03076923076924, | |
"tile_compute_intensity": 3.3684210526315788, | |
"MxNxK": 134217728, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.500864154869642, | |
"compute_intensity": 112.21917808219177, | |
"tile_compute_intensity": 5.12, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.295694202681619, | |
"compute_intensity": 910.2222222222222, | |
"tile_compute_intensity": 13.473684210526315, | |
"MxNxK": 8589934592, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1777727472880999, | |
"compute_intensity": 250.13740458015266, | |
"tile_compute_intensity": 3.930902111324376, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3460222234906005, | |
"compute_intensity": 372.3636363636364, | |
"tile_compute_intensity": 10.666666666666666, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.52640501790828, | |
"compute_intensity": 204.8, | |
"tile_compute_intensity": 3.0476190476190474, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5476027769675402, | |
"compute_intensity": 215.57894736842104, | |
"tile_compute_intensity": 1.8754578754578755, | |
"MxNxK": 536870912, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1496288106336194, | |
"compute_intensity": 1170.2857142857142, | |
"tile_compute_intensity": 28.444444444444443, | |
"MxNxK": 17179869184, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1358941924478876, | |
"compute_intensity": 244.53731343283582, | |
"tile_compute_intensity": 1.9806576402321083, | |
"MxNxK": 8589934592, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 6.202964834345979e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 5.984235173687275, | |
"compute_intensity": 7.757575757575758, | |
"tile_compute_intensity": 0.7272727272727273, | |
"MxNxK": 524288, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003973546296825159, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3847579620384758, | |
"compute_intensity": 15.50780880265026, | |
"tile_compute_intensity": 1.3298701298701299, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 5.696597766893407, | |
"compute_intensity": 170.66666666666666, | |
"tile_compute_intensity": 1.7534246575342465, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.733529927163845, | |
"compute_intensity": 195.04761904761904, | |
"tile_compute_intensity": 2.6528497409326426, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002700114104362367, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.0102431176191606, | |
"compute_intensity": 29.681159420289855, | |
"tile_compute_intensity": 1.8823529411764706, | |
"MxNxK": 8388608, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.057422923525738, | |
"compute_intensity": 125.06870229007633, | |
"tile_compute_intensity": 1.965451055662188, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0183592719433325, | |
"compute_intensity": 630.1538461538462, | |
"tile_compute_intensity": 7.937984496124031, | |
"MxNxK": 4294967296, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00015018472504804733, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0547224987668262, | |
"compute_intensity": 7.980516317584024, | |
"tile_compute_intensity": 0.9660377358490566, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00015237400675428709, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5581450650182387, | |
"compute_intensity": 31.690522243713733, | |
"tile_compute_intensity": 3.657142857142857, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 4.013683128106221e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 10.053609429295157, | |
"compute_intensity": 26.94736842105263, | |
"tile_compute_intensity": 1.1428571428571428, | |
"MxNxK": 1048576, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 6.294184905439302e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 7.483733154823168, | |
"compute_intensity": 7.6992481203007515, | |
"tile_compute_intensity": 0.7272727272727273, | |
"MxNxK": 524288, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 6.294184905439302e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.502509968226466, | |
"compute_intensity": 7.6992481203007515, | |
"tile_compute_intensity": 0.64, | |
"MxNxK": 524288, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.361045345955492, | |
"compute_intensity": 248.24242424242425, | |
"tile_compute_intensity": 3.657142857142857, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1384739188321202, | |
"compute_intensity": 744.7272727272727, | |
"tile_compute_intensity": 12.487804878048781, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1197176076789115, | |
"compute_intensity": 1310.72, | |
"tile_compute_intensity": 42.666666666666664, | |
"MxNxK": 68719476736, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0355242110921505, | |
"compute_intensity": 244.53731343283582, | |
"tile_compute_intensity": 3.757798165137615, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.335388135079768, | |
"compute_intensity": 112.99310344827586, | |
"tile_compute_intensity": 7.529411764705882, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3845197179624082, | |
"compute_intensity": 455.1111111111111, | |
"tile_compute_intensity": 9.846153846153847, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000005272520109194081, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.9087646115563475, | |
"compute_intensity": 15.03119266055046, | |
"tile_compute_intensity": 0.9922480620155039, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000034267190183262865, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.6172203973331873, | |
"compute_intensity": 84.89119170984456, | |
"tile_compute_intensity": 1.855072463768116, | |
"MxNxK": 134217728, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003984492705356358, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.842824318676884, | |
"compute_intensity": 30.97164461247637, | |
"tile_compute_intensity": 3.1219512195121952, | |
"MxNxK": 134217728, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1528271821414036, | |
"compute_intensity": 744.7272727272727, | |
"tile_compute_intensity": 21.333333333333332, | |
"MxNxK": 8589934592, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3859904864418893, | |
"compute_intensity": 113.3840830449827, | |
"tile_compute_intensity": 1.9541984732824427, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5498439641614274, | |
"compute_intensity": 292.57142857142856, | |
"tile_compute_intensity": 3.9384615384615387, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5563343023068905, | |
"compute_intensity": 120.47058823529412, | |
"tile_compute_intensity": 1.8754578754578755, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00000970581556432959, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7754303643442821, | |
"compute_intensity": 15.693486590038313, | |
"tile_compute_intensity": 1.6842105263157894, | |
"MxNxK": 16777216, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.9104454460875933, | |
"compute_intensity": 63.750972762645915, | |
"tile_compute_intensity": 1.7534246575342465, | |
"MxNxK": 67108864, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1612639083012033, | |
"compute_intensity": 504.12307692307695, | |
"tile_compute_intensity": 13.473684210526315, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.421079402557919, | |
"compute_intensity": 202.2716049382716, | |
"tile_compute_intensity": 9.846153846153847, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3001787853029483, | |
"compute_intensity": 780.1904761904761, | |
"tile_compute_intensity": 7.641791044776119, | |
"MxNxK": 17179869184, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.013660830395438, | |
"compute_intensity": 630.1538461538462, | |
"tile_compute_intensity": 6.3602484472049685, | |
"MxNxK": 4294967296, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000010526796204169499, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.599878928028192, | |
"compute_intensity": 15.044995408631772, | |
"tile_compute_intensity": 0.9961089494163424, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.6552276639558547, | |
"compute_intensity": 60.014652014652015, | |
"tile_compute_intensity": 5.12, | |
"MxNxK": 268435456, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2947195223634043, | |
"compute_intensity": 1489.4545454545455, | |
"tile_compute_intensity": 14.124137931034483, | |
"MxNxK": 68719476736, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0889079084636253, | |
"compute_intensity": 2048, | |
"tile_compute_intensity": 39.38461538461539, | |
"MxNxK": 68719476736, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00007516533858089832, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1218361942139115, | |
"compute_intensity": 7.976630963972736, | |
"tile_compute_intensity": 0.9624060150375939, | |
"MxNxK": 67108864, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.352920914979561, | |
"compute_intensity": 254.015503875969, | |
"tile_compute_intensity": 6.918918918918919, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3416156934322234, | |
"compute_intensity": 334.3673469387755, | |
"tile_compute_intensity": 6.918918918918919, | |
"MxNxK": 2147483648, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2906387067543437, | |
"compute_intensity": 1365.3333333333333, | |
"tile_compute_intensity": 14.027397260273972, | |
"MxNxK": 34359738368, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000009651083521673596, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.6870231147397998, | |
"compute_intensity": 7.869356388088376, | |
"tile_compute_intensity": 0.8767123287671232, | |
"MxNxK": 8388608, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.920297828904901, | |
"compute_intensity": 55.351351351351354, | |
"tile_compute_intensity": 2.4615384615384617, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3791851352866442, | |
"compute_intensity": 62.77394636015325, | |
"tile_compute_intensity": 6.7368421052631575, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4348653197077674, | |
"compute_intensity": 127.50194552529183, | |
"tile_compute_intensity": 1.8754578754578755, | |
"MxNxK": 536870912, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2915095151634282, | |
"compute_intensity": 119.5912408759124, | |
"tile_compute_intensity": 9.846153846153847, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.122845910237364, | |
"compute_intensity": 248.24242424242425, | |
"tile_compute_intensity": 1.9825750242013553, | |
"MxNxK": 17179869184, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.23970478063078, | |
"compute_intensity": 224.43835616438355, | |
"tile_compute_intensity": 14.222222222222221, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5336419847065725, | |
"compute_intensity": 85.11168831168831, | |
"tile_compute_intensity": 1.7716262975778547, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1477456120717708, | |
"compute_intensity": 910.2222222222222, | |
"tile_compute_intensity": 51.2, | |
"MxNxK": 68719476736, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 5.354248466134823e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 10.829530669551737, | |
"compute_intensity": 46.54545454545455, | |
"tile_compute_intensity": 1.3333333333333333, | |
"MxNxK": 2097152, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0194216021472564, | |
"compute_intensity": 655.36, | |
"tile_compute_intensity": 7.968871595330739, | |
"MxNxK": 8589934592, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2969953618376882, | |
"compute_intensity": 910.2222222222222, | |
"tile_compute_intensity": 7.728301886792453, | |
"MxNxK": 68719476736, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000005258837098530083, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7786441868622151, | |
"compute_intensity": 7.525953146531925, | |
"tile_compute_intensity": 0.6632124352331606, | |
"MxNxK": 4194304, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000025176739621757206, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.6093928251788308, | |
"compute_intensity": 15.398496240601503, | |
"tile_compute_intensity": 1.4545454545454546, | |
"MxNxK": 4194304, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1254044885387076, | |
"compute_intensity": 239.1824817518248, | |
"tile_compute_intensity": 19.692307692307693, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2818092547500488, | |
"compute_intensity": 120.47058823529412, | |
"tile_compute_intensity": 9.142857142857142, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004283398772907858, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 22.669100572452333, | |
"compute_intensity": 93.0909090909091, | |
"tile_compute_intensity": 1.3195876288659794, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4037451088818684, | |
"compute_intensity": 315.0769230769231, | |
"tile_compute_intensity": 3.657142857142857, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3930293158295126, | |
"compute_intensity": 227.55555555555554, | |
"tile_compute_intensity": 12.8, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.172199930184609, | |
"compute_intensity": 123.65283018867925, | |
"tile_compute_intensity": 12.19047619047619, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3886169254590097, | |
"compute_intensity": 119.5912408759124, | |
"tile_compute_intensity": 1.9541984732824427, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3785547695388412, | |
"compute_intensity": 512, | |
"tile_compute_intensity": 7.757575757575758, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3620900952230381, | |
"compute_intensity": 224.43835616438355, | |
"tile_compute_intensity": 1.9692307692307693, | |
"MxNxK": 2147483648, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.167284402291877, | |
"compute_intensity": 2048, | |
"tile_compute_intensity": 31.03030303030303, | |
"MxNxK": 68719476736, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3704421441192216, | |
"compute_intensity": 512, | |
"tile_compute_intensity": 6.2439024390243905, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.621469642301657, | |
"compute_intensity": 63.50387596899225, | |
"tile_compute_intensity": 1.7297297297297298, | |
"MxNxK": 33554432, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4336513008450487, | |
"compute_intensity": 168.90721649484536, | |
"tile_compute_intensity": 3.506849315068493, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.15328997693523, | |
"compute_intensity": 489.07462686567163, | |
"tile_compute_intensity": 39.38461538461539, | |
"MxNxK": 68719476736, | |
"size_m": 8192, | |
"size_n": 16384, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x16384x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3842746040378382, | |
"compute_intensity": 202.2716049382716, | |
"tile_compute_intensity": 3.764705882352941, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000008566797545815716, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.513144405548268, | |
"compute_intensity": 83.59183673469387, | |
"tile_compute_intensity": 1.7297297297297298, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1652414465255525, | |
"compute_intensity": 481.88235294117646, | |
"tile_compute_intensity": 7.501831501831502, | |
"MxNxK": 34359738368, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.153336698652952, | |
"compute_intensity": 327.68, | |
"tile_compute_intensity": 3.9844357976653697, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000009933865742062898, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5739291177577628, | |
"compute_intensity": 7.75390440132513, | |
"tile_compute_intensity": 0.7950310559006211, | |
"MxNxK": 8388608, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.380913002195961, | |
"compute_intensity": 124.12121212121212, | |
"tile_compute_intensity": 1.8285714285714285, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00000970581556432959, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7328991625804369, | |
"compute_intensity": 15.693486590038313, | |
"tile_compute_intensity": 1.5609756097560976, | |
"MxNxK": 16777216, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00015441733634677753, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.8214950069375722, | |
"compute_intensity": 31.477425552353505, | |
"tile_compute_intensity": 3.506849315068493, | |
"MxNxK": 536870912, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1202931922481465, | |
"compute_intensity": 1024, | |
"tile_compute_intensity": 12.487804878048781, | |
"MxNxK": 8589934592, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000019995439583656446, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0006162026846575, | |
"compute_intensity": 30.91320754716981, | |
"tile_compute_intensity": 3.0476190476190474, | |
"MxNxK": 67108864, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5227628673032296, | |
"compute_intensity": 101.7639751552795, | |
"tile_compute_intensity": 3.1604938271604937, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1889798723063736, | |
"compute_intensity": 337.8144329896907, | |
"tile_compute_intensity": 12.19047619047619, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000017133595091631432, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.481580764438176, | |
"compute_intensity": 84.45360824742268, | |
"tile_compute_intensity": 1.8285714285714285, | |
"MxNxK": 67108864, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3650374924941262, | |
"compute_intensity": 168.90721649484536, | |
"tile_compute_intensity": 3.657142857142857, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1486614344725472, | |
"compute_intensity": 455.1111111111111, | |
"tile_compute_intensity": 7.062068965517241, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000001277080995306525, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.8262255891517793, | |
"compute_intensity": 15.283582089552239, | |
"tile_compute_intensity": 1.3333333333333333, | |
"MxNxK": 2097152, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0168811143746495, | |
"compute_intensity": 244.53731343283582, | |
"tile_compute_intensity": 1.967339097022094, | |
"MxNxK": 8589934592, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003772862140419848, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2535841133721053, | |
"compute_intensity": 7.961127308066083, | |
"tile_compute_intensity": 0.9552238805970149, | |
"MxNxK": 33554432, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.318669598837236, | |
"compute_intensity": 496.4848484848485, | |
"tile_compute_intensity": 12.8, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3170869748350043, | |
"compute_intensity": 910.2222222222222, | |
"tile_compute_intensity": 14.124137931034483, | |
"MxNxK": 68719476736, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.386597495873249, | |
"compute_intensity": 372.3636363636364, | |
"tile_compute_intensity": 5.278350515463917, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000019283923029128524, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.4251042367189735, | |
"compute_intensity": 7.8731379144642, | |
"tile_compute_intensity": 0.8827586206896552, | |
"MxNxK": 16777216, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.017544801373830587, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0923314348619277, | |
"compute_intensity": 1820.4444444444443, | |
"tile_compute_intensity": 39.38461538461539, | |
"MxNxK": 68719476736, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 1.0034207820265552e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 39.704181331700546, | |
"compute_intensity": 13.473684210526315, | |
"tile_compute_intensity": 0.6666666666666666, | |
"MxNxK": 131072, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0000010708496932269645, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 7.563339213379601, | |
"compute_intensity": 73.14285714285714, | |
"tile_compute_intensity": 1.7777777777777777, | |
"MxNxK": 4194304, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 8.939566967145676e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.271444724037163, | |
"compute_intensity": 25.28395061728395, | |
"tile_compute_intensity": 1.2307692307692308, | |
"MxNxK": 2097152, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3672320519871857, | |
"compute_intensity": 248.24242424242425, | |
"tile_compute_intensity": 6.4, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5607699640617776, | |
"compute_intensity": 204.8, | |
"tile_compute_intensity": 3.1604938271604937, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0274845423118957, | |
"compute_intensity": 399.609756097561, | |
"tile_compute_intensity": 5.3194805194805195, | |
"MxNxK": 4294967296, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003860433408669438, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3348966526792314, | |
"compute_intensity": 15.738712776176753, | |
"tile_compute_intensity": 1.5900621118012421, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002141699386453929, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 6.483076114622274, | |
"compute_intensity": 102.4, | |
"tile_compute_intensity": 1.8823529411764706, | |
"MxNxK": 8388608, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0782646473903348, | |
"compute_intensity": 404.5432098765432, | |
"tile_compute_intensity": 12.487804878048781, | |
"MxNxK": 8589934592, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0607453716880495, | |
"compute_intensity": 1260.3076923076924, | |
"tile_compute_intensity": 25.6, | |
"MxNxK": 34359738368, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005992793790546961, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0243342875304067, | |
"compute_intensity": 7.990246281394782, | |
"tile_compute_intensity": 0.9884169884169884, | |
"MxNxK": 536870912, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.773078031215325, | |
"compute_intensity": 117.02857142857142, | |
"tile_compute_intensity": 1.7716262975778547, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3665549736064948, | |
"compute_intensity": 327.68, | |
"tile_compute_intensity": 10.666666666666666, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3226149478109228, | |
"compute_intensity": 630.1538461538462, | |
"tile_compute_intensity": 18.285714285714285, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1069480724886374, | |
"compute_intensity": 239.1824817518248, | |
"tile_compute_intensity": 15.515151515151516, | |
"MxNxK": 8589934592, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00003882326225731836, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7419660206480312, | |
"compute_intensity": 31.386973180076627, | |
"tile_compute_intensity": 3.3684210526315788, | |
"MxNxK": 134217728, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000004971493874586116, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.8692168158646534, | |
"compute_intensity": 7.750236518448439, | |
"tile_compute_intensity": 0.7901234567901234, | |
"MxNxK": 4194304, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000002645382061706373, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.2971351532150153, | |
"compute_intensity": 15.003663003663004, | |
"tile_compute_intensity": 0.9846153846153847, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00013706876073305146, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.358383914929736, | |
"compute_intensity": 341.3333333333333, | |
"tile_compute_intensity": 9.142857142857142, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000019283923029128524, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.3900906072605284, | |
"compute_intensity": 7.8731379144642, | |
"tile_compute_intensity": 0.7975077881619937, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1298241830663531, | |
"compute_intensity": 123.65283018867925, | |
"tile_compute_intensity": 10.448979591836734, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.120431801369733, | |
"compute_intensity": 442.81081081081084, | |
"tile_compute_intensity": 3.9083969465648853, | |
"MxNxK": 8589934592, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0005482750429322058, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.0634023103994705, | |
"compute_intensity": 390.0952380952381, | |
"tile_compute_intensity": 3.5432525951557095, | |
"MxNxK": 2147483648, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0002741375214661029, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.2425763335724704, | |
"compute_intensity": 61.94328922495274, | |
"tile_compute_intensity": 5.278350515463917, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.073750848140436, | |
"compute_intensity": 668.734693877551, | |
"tile_compute_intensity": 23.272727272727273, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00030241277968858513, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.5087933754084861, | |
"compute_intensity": 31.813592233009707, | |
"tile_compute_intensity": 3.764705882352941, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0010965500858644117, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.323770618348562, | |
"compute_intensity": 63.50387596899225, | |
"tile_compute_intensity": 7.314285714285714, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 2.189281706239757e-7, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 17.730016418728564, | |
"compute_intensity": 25.6, | |
"tile_compute_intensity": 1, | |
"MxNxK": 524288, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000001277080995306525, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 3.6934230008845703, | |
"compute_intensity": 15.283582089552239, | |
"tile_compute_intensity": 1.2307692307692308, | |
"MxNxK": 2097152, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.00006853438036652573, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 2.9504606006207235, | |
"compute_intensity": 101.7639751552795, | |
"tile_compute_intensity": 1.332465842550423, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.0021931001717288233, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1519677546283316, | |
"compute_intensity": 655.36, | |
"tile_compute_intensity": 13.473684210526315, | |
"MxNxK": 8589934592, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.008772400686915293, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.1204164286669098, | |
"compute_intensity": 862.3157894736842, | |
"tile_compute_intensity": 7.501831501831502, | |
"MxNxK": 34359738368, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.004386200343457647, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.0749962918652791, | |
"compute_intensity": 468.1142857142857, | |
"tile_compute_intensity": 28.444444444444443, | |
"MxNxK": 17179869184, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "roofline", | |
"metric": "sol-sec", | |
"perf": 0.000009632839507454932, | |
"perf_norm_to_sol": 1, | |
"perf_norm_to_cublas": 1.7579863016908581, | |
"compute_intensity": 15.753846153846155, | |
"tile_compute_intensity": 1.6842105263157894, | |
"MxNxK": 16777216, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006217599730007351, | |
"perf_norm_to_sol": 0.10636669199447026, | |
"perf_norm_to_cublas": 0.7076685900397454, | |
"compute_intensity": 7.501831501831502, | |
"tile_compute_intensity": 0.64, | |
"MxNxK": 524288, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001526400010334328, | |
"perf_norm_to_sol": 0.6215203963230908, | |
"perf_norm_to_cublas": 1.015094370561506, | |
"compute_intensity": 7.937984496124031, | |
"tile_compute_intensity": 0.9142857142857143, | |
"MxNxK": 8388608, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00041714240796864033, | |
"perf_norm_to_sol": 0.7207642967487405, | |
"perf_norm_to_cublas": 0.7975712946936644, | |
"compute_intensity": 15.953261927945473, | |
"tile_compute_intensity": 1.9248120300751879, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000022463999630417675, | |
"perf_norm_to_sol": 0.3813567346313401, | |
"perf_norm_to_cublas": 0.958119646880255, | |
"compute_intensity": 97.52380952380952, | |
"tile_compute_intensity": 2.909090909090909, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00013026880333200097, | |
"perf_norm_to_sol": 0.5260997154618832, | |
"perf_norm_to_cublas": 0.8910069298449822, | |
"compute_intensity": 56.79029462738301, | |
"tile_compute_intensity": 3.9384615384615387, | |
"MxNxK": 268435456, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.001369574386626482, | |
"perf_norm_to_sol": 0.8006502578990395, | |
"perf_norm_to_cublas": 0.96894100808442, | |
"compute_intensity": 630.1538461538462, | |
"tile_compute_intensity": 12.19047619047619, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006019200009177439, | |
"perf_norm_to_sol": 0.027278729352344648, | |
"perf_norm_to_cublas": 0.5922381324026766, | |
"compute_intensity": 7.529411764705882, | |
"tile_compute_intensity": 0.5714285714285714, | |
"MxNxK": 131072, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003185599925927818, | |
"perf_norm_to_sol": 0.537845162293605, | |
"perf_norm_to_cublas": 1.250828777637265, | |
"compute_intensity": 157.53846153846155, | |
"tile_compute_intensity": 3.2, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009491519886069, | |
"perf_norm_to_sol": 0.7220590715625617, | |
"perf_norm_to_cublas": 1.1340480710929828, | |
"compute_intensity": 204.8, | |
"tile_compute_intensity": 1.8686131386861313, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.01085987538099289, | |
"perf_norm_to_sol": 0.8077809716185947, | |
"perf_norm_to_cublas": 0.9325543202268844, | |
"compute_intensity": 1638.4, | |
"tile_compute_intensity": 30.11764705882353, | |
"MxNxK": 34359738368, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005304319784045219, | |
"perf_norm_to_sol": 0.6460242138178511, | |
"perf_norm_to_cublas": 1.9693532754978957, | |
"compute_intensity": 186.1818181818182, | |
"tile_compute_intensity": 2.6391752577319587, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000014291200204752386, | |
"perf_norm_to_sol": 0.1659567996001982, | |
"perf_norm_to_cublas": 0.6612180650979071, | |
"compute_intensity": 42.22680412371134, | |
"tile_compute_intensity": 1.5238095238095237, | |
"MxNxK": 8388608, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000031577597837895155, | |
"perf_norm_to_sol": 0.5425870320974832, | |
"perf_norm_to_cublas": 1.3878192402197977, | |
"compute_intensity": 204.8, | |
"tile_compute_intensity": 3.764705882352941, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006492800457635894, | |
"perf_norm_to_sol": 0.370904649323012, | |
"perf_norm_to_cublas": 0.8713651171601893, | |
"compute_intensity": 7.876923076923077, | |
"tile_compute_intensity": 0.8421052631578947, | |
"MxNxK": 2097152, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001837695948779583, | |
"perf_norm_to_sol": 0.7458729003787544, | |
"perf_norm_to_cublas": 1.1538971250413665, | |
"compute_intensity": 315.0769230769231, | |
"tile_compute_intensity": 3.9689922480620154, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001869536004960537, | |
"perf_norm_to_sol": 0.8060585998147218, | |
"perf_norm_to_cublas": 0.8522499762130001, | |
"compute_intensity": 7.9669341113542425, | |
"tile_compute_intensity": 0.9394495412844037, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009616320021450519, | |
"perf_norm_to_sol": 0.7126882239115421, | |
"perf_norm_to_cublas": 1.1278825028848636, | |
"compute_intensity": 117.02857142857142, | |
"tile_compute_intensity": 1.8686131386861313, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005438639968633652, | |
"perf_norm_to_sol": 0.8064884546052403, | |
"perf_norm_to_cublas": 0.9048114702972854, | |
"compute_intensity": 468.1142857142857, | |
"tile_compute_intensity": 3.8714555765595464, | |
"MxNxK": 17179869184, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010810470581054688, | |
"perf_norm_to_sol": 0.8114726015987588, | |
"perf_norm_to_cublas": 0.9464015744645189, | |
"compute_intensity": 862.3157894736842, | |
"tile_compute_intensity": 12.720496894409937, | |
"MxNxK": 34359738368, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013385599595494569, | |
"perf_norm_to_sol": 0.1962659968083889, | |
"perf_norm_to_cublas": 0.626344755250685, | |
"compute_intensity": 30.11764705882353, | |
"tile_compute_intensity": 2.2857142857142856, | |
"MxNxK": 8388608, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001457920006942004, | |
"perf_norm_to_sol": 0.14690102174715033, | |
"perf_norm_to_cublas": 0.9550043668122271, | |
"compute_intensity": 78.76923076923077, | |
"tile_compute_intensity": 1.5238095238095237, | |
"MxNxK": 8388608, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027106527239084246, | |
"perf_norm_to_sol": 0.809067186063822, | |
"perf_norm_to_cublas": 1.049314196273875, | |
"compute_intensity": 655.36, | |
"tile_compute_intensity": 7.529411764705882, | |
"MxNxK": 8589934592, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008665600034873933, | |
"perf_norm_to_sol": 0.12357478869523468, | |
"perf_norm_to_cublas": 0.9963072768811855, | |
"compute_intensity": 60.23529411764706, | |
"tile_compute_intensity": 1.4545454545454546, | |
"MxNxK": 4194304, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00538102425634861, | |
"perf_norm_to_sol": 0.8151236891903515, | |
"perf_norm_to_cublas": 0.9103943992267824, | |
"compute_intensity": 1170.2857142857142, | |
"tile_compute_intensity": 12.641975308641975, | |
"MxNxK": 17179869184, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000007993599865585566, | |
"perf_norm_to_sol": 0.6048168346229787, | |
"perf_norm_to_cublas": 1.0496397337780077, | |
"compute_intensity": 7.861804222648752, | |
"tile_compute_intensity": 0.7901234567901234, | |
"MxNxK": 4194304, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00011688319500535727, | |
"perf_norm_to_sol": 0.5863493067877251, | |
"perf_norm_to_cublas": 0.8570607727543005, | |
"compute_intensity": 61.59398496240601, | |
"tile_compute_intensity": 5.12, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00014087040908634664, | |
"perf_norm_to_sol": 0.5491190150496652, | |
"perf_norm_to_cublas": 0.8069373975470461, | |
"compute_intensity": 31.44721689059501, | |
"tile_compute_intensity": 3.1604938271604937, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005527308583259583, | |
"perf_norm_to_sol": 0.7935508353454371, | |
"perf_norm_to_cublas": 0.8841661153517874, | |
"compute_intensity": 248.24242424242425, | |
"tile_compute_intensity": 23.272727272727273, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003909856081008911, | |
"perf_norm_to_sol": 0.7011447884172853, | |
"perf_norm_to_cublas": 0.8396422041381781, | |
"compute_intensity": 113.3840830449827, | |
"tile_compute_intensity": 5.278350515463917, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002288320101797581, | |
"perf_norm_to_sol": 0.3743705934797366, | |
"perf_norm_to_cublas": 4.242902863782603, | |
"compute_intensity": 146.28571428571428, | |
"tile_compute_intensity": 1.9692307692307693, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000066912005422636865, | |
"perf_norm_to_sol": 0.10906272561062738, | |
"perf_norm_to_cublas": 0.739837343562357, | |
"compute_intensity": 28.444444444444443, | |
"tile_compute_intensity": 1.6, | |
"MxNxK": 2097152, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003578144125640392, | |
"perf_norm_to_sol": 0.7661444364459177, | |
"perf_norm_to_cublas": 1.0523086525439986, | |
"compute_intensity": 327.68, | |
"tile_compute_intensity": 3.764705882352941, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00011594239622354507, | |
"perf_norm_to_sol": 0.5911071583718749, | |
"perf_norm_to_cublas": 0.860068442038939, | |
"compute_intensity": 62.06060606060606, | |
"tile_compute_intensity": 5.818181818181818, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007077888119965791, | |
"perf_norm_to_sol": 0.774630841346014, | |
"perf_norm_to_cublas": 0.9467004797281833, | |
"compute_intensity": 203.527950310559, | |
"tile_compute_intensity": 6.320987654320987, | |
"MxNxK": 2147483648, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002887840010225773, | |
"perf_norm_to_sol": 0.7594257867344132, | |
"perf_norm_to_cublas": 0.8733115053799436, | |
"compute_intensity": 126.03076923076924, | |
"tile_compute_intensity": 13.473684210526315, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.022187910974025726, | |
"perf_norm_to_sol": 0.7907369645736098, | |
"perf_norm_to_cublas": 1.035417599196242, | |
"compute_intensity": 1489.4545454545455, | |
"tile_compute_intensity": 21.11340206185567, | |
"MxNxK": 68719476736, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00021119038574397565, | |
"perf_norm_to_sol": 0.6490293592210148, | |
"perf_norm_to_cublas": 0.871418494870988, | |
"compute_intensity": 60.12477064220184, | |
"tile_compute_intensity": 3.9689922480620154, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009743040427565575, | |
"perf_norm_to_sol": 0.7034188236828443, | |
"perf_norm_to_cublas": 0.9772719234603799, | |
"compute_intensity": 292.57142857142856, | |
"tile_compute_intensity": 5.818181818181818, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013350399967748672, | |
"perf_norm_to_sol": 0.22138140509374374, | |
"perf_norm_to_cublas": 0.6347075670417169, | |
"compute_intensity": 28.248275862068965, | |
"tile_compute_intensity": 1.8823529411764706, | |
"MxNxK": 8388608, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005378531292080879, | |
"perf_norm_to_sol": 0.8155015012956607, | |
"perf_norm_to_cublas": 1.0605137802385876, | |
"compute_intensity": 1170.2857142857142, | |
"tile_compute_intensity": 13.837837837837839, | |
"MxNxK": 17179869184, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000018041599832940847, | |
"perf_norm_to_sol": 0.5834735445661191, | |
"perf_norm_to_cublas": 0.9563675639471327, | |
"compute_intensity": 15.044995408631772, | |
"tile_compute_intensity": 1.3195876288659794, | |
"MxNxK": 16777216, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005201920284889638, | |
"perf_norm_to_sol": 0.6587411630047665, | |
"perf_norm_to_cublas": 1.4985234698966559, | |
"compute_intensity": 256, | |
"tile_compute_intensity": 4.923076923076923, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001606079895282164, | |
"perf_norm_to_sol": 0.6545822050563993, | |
"perf_norm_to_cublas": 0.9796772589252215, | |
"compute_intensity": 7.527682058350563, | |
"tile_compute_intensity": 0.6649350649350649, | |
"MxNxK": 8388608, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002741273678839207, | |
"perf_norm_to_sol": 0.8000296317212269, | |
"perf_norm_to_cublas": 0.9267808245659249, | |
"compute_intensity": 655.36, | |
"tile_compute_intensity": 21.333333333333332, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000016710400814190507, | |
"perf_norm_to_sol": 0.30569727429207444, | |
"perf_norm_to_cublas": 0.757755610321075, | |
"compute_intensity": 30.567164179104477, | |
"tile_compute_intensity": 2.4615384615384617, | |
"MxNxK": 16777216, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00046759364195168016, | |
"perf_norm_to_sol": 0.6448699040207421, | |
"perf_norm_to_cublas": 0.911759172959968, | |
"compute_intensity": 15.929995138551288, | |
"tile_compute_intensity": 1.9248120300751879, | |
"MxNxK": 536870912, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008611199882579968, | |
"perf_norm_to_sol": 0.34004137890460623, | |
"perf_norm_to_cublas": 0.7491639095530995, | |
"compute_intensity": 14.197573656845753, | |
"tile_compute_intensity": 0.9846153846153847, | |
"MxNxK": 4194304, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0015269824303686618, | |
"perf_norm_to_sol": 0.7181157189868058, | |
"perf_norm_to_cublas": 0.8583077338660535, | |
"compute_intensity": 63.38104448742747, | |
"tile_compute_intensity": 7.013698630136986, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013737695291638375, | |
"perf_norm_to_sol": 0.7982052757654634, | |
"perf_norm_to_cublas": 0.9376547803913269, | |
"compute_intensity": 431.1578947368421, | |
"tile_compute_intensity": 18.285714285714285, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010842486470937728, | |
"perf_norm_to_sol": 0.8090764706443392, | |
"perf_norm_to_cublas": 0.8780713111299685, | |
"compute_intensity": 862.3157894736842, | |
"tile_compute_intensity": 36.57142857142857, | |
"MxNxK": 34359738368, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.003716016188263893, | |
"perf_norm_to_sol": 0.6447630413655887, | |
"perf_norm_to_cublas": 0.9019835881426812, | |
"compute_intensity": 15.984390243902439, | |
"tile_compute_intensity": 1.9768339768339769, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008521600102540106, | |
"perf_norm_to_sol": 0.5833991051873251, | |
"perf_norm_to_cublas": 1.0236575347775845, | |
"compute_intensity": 7.750236518448439, | |
"tile_compute_intensity": 0.6632124352331606, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000695663969963789, | |
"perf_norm_to_sol": 0.7881320100000937, | |
"perf_norm_to_cublas": 1.0609398585071426, | |
"compute_intensity": 390.0952380952381, | |
"tile_compute_intensity": 16, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.011061504483222961, | |
"perf_norm_to_sol": 0.7930567401767488, | |
"perf_norm_to_cublas": 0.8943619708348769, | |
"compute_intensity": 474.8985507246377, | |
"tile_compute_intensity": 36.57142857142857, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0004245087970048189, | |
"perf_norm_to_sol": 0.7103199489236174, | |
"perf_norm_to_cublas": 0.7844473888790103, | |
"compute_intensity": 15.929995138551288, | |
"tile_compute_intensity": 1.8754578754578755, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005405321344733238, | |
"perf_norm_to_sol": 0.8114596827312426, | |
"perf_norm_to_cublas": 0.9127515136495, | |
"compute_intensity": 468.1142857142857, | |
"tile_compute_intensity": 3.9233716475095783, | |
"MxNxK": 17179869184, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00035061121452599763, | |
"perf_norm_to_sol": 0.7818846349131133, | |
"perf_norm_to_cublas": 1.0826807233616165, | |
"compute_intensity": 372.3636363636364, | |
"tile_compute_intensity": 3.5310344827586206, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006828799814684316, | |
"perf_norm_to_sol": 0.07840687399594448, | |
"perf_norm_to_cublas": 0.8172446209449519, | |
"compute_intensity": 46.54545454545455, | |
"tile_compute_intensity": 1.6, | |
"MxNxK": 2097152, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010853850096464158, | |
"perf_norm_to_sol": 0.8082293940813744, | |
"perf_norm_to_cublas": 0.9125603875014405, | |
"compute_intensity": 481.88235294117646, | |
"tile_compute_intensity": 3.930902111324376, | |
"MxNxK": 34359738368, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00013688959879800677, | |
"perf_norm_to_sol": 0.5586904211582493, | |
"perf_norm_to_cublas": 0.7957829108542107, | |
"compute_intensity": 31.62934362934363, | |
"tile_compute_intensity": 3.4594594594594597, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00040315836668014526, | |
"perf_norm_to_sol": 0.679974779448385, | |
"perf_norm_to_cublas": 0.8411344851324131, | |
"compute_intensity": 62.77394636015325, | |
"tile_compute_intensity": 6.2439024390243905, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003655072068795562, | |
"perf_norm_to_sol": 0.7500194696747474, | |
"perf_norm_to_cublas": 1.0321131783049435, | |
"compute_intensity": 512, | |
"tile_compute_intensity": 9.846153846153847, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002574399986770004, | |
"perf_norm_to_sol": 0.5449676431887939, | |
"perf_norm_to_cublas": 0.8960845489793705, | |
"compute_intensity": 25.580015612802498, | |
"tile_compute_intensity": 1.3264248704663213, | |
"MxNxK": 33554432, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001797440054360777, | |
"perf_norm_to_sol": 0.23830551469663125, | |
"perf_norm_to_cublas": 1.3505429199069878, | |
"compute_intensity": 128, | |
"tile_compute_intensity": 1.5609756097560976, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013724831864237786, | |
"perf_norm_to_sol": 0.7989533836998367, | |
"perf_norm_to_cublas": 0.936885209303487, | |
"compute_intensity": 399.609756097561, | |
"tile_compute_intensity": 12.19047619047619, | |
"MxNxK": 4294967296, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000031574402237311004, | |
"perf_norm_to_sol": 0.5426419465634386, | |
"perf_norm_to_cublas": 1.2798215521193799, | |
"compute_intensity": 157.53846153846155, | |
"tile_compute_intensity": 4.571428571428571, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009775359649211168, | |
"perf_norm_to_sol": 0.7010931855796853, | |
"perf_norm_to_cublas": 0.9887063428832067, | |
"compute_intensity": 167.18367346938774, | |
"tile_compute_intensity": 1.9104477611940298, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000340319995302707, | |
"perf_norm_to_sol": 0.618104980149928, | |
"perf_norm_to_cublas": 0.948472057431833, | |
"compute_intensity": 15.05190629306385, | |
"tile_compute_intensity": 0.9980506822612085, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005219840095378458, | |
"perf_norm_to_sol": 0.6564796920427188, | |
"perf_norm_to_cublas": 1.894127057388825, | |
"compute_intensity": 256, | |
"tile_compute_intensity": 3.1219512195121952, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0018814494833350182, | |
"perf_norm_to_sol": 0.6373503924425409, | |
"perf_norm_to_cublas": 0.891021987524942, | |
"compute_intensity": 15.976596782057532, | |
"tile_compute_intensity": 1.9692307692307693, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000022281600104179232, | |
"perf_norm_to_sol": 0.38447856104413664, | |
"perf_norm_to_cublas": 0.997845713622547, | |
"compute_intensity": 97.52380952380952, | |
"tile_compute_intensity": 4, | |
"MxNxK": 33554432, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000022972800070419908, | |
"perf_norm_to_sol": 0.37291046453002663, | |
"perf_norm_to_cublas": 0.7996935792132003, | |
"compute_intensity": 56.10958904109589, | |
"tile_compute_intensity": 3.5555555555555554, | |
"MxNxK": 33554432, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000009014399984152987, | |
"perf_norm_to_sol": 0.1659577086173716, | |
"perf_norm_to_cublas": 0.6769612916697372, | |
"compute_intensity": 28.054794520547944, | |
"tile_compute_intensity": 1.28, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000122857594396919, | |
"perf_norm_to_sol": 0.6153725806960961, | |
"perf_norm_to_cublas": 0.722553621884614, | |
"compute_intensity": 15.906796116504854, | |
"tile_compute_intensity": 1.8823529411764706, | |
"MxNxK": 134217728, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013385599595494569, | |
"perf_norm_to_sol": 0.43682813872978227, | |
"perf_norm_to_cublas": 0.7857996954725512, | |
"compute_intensity": 14.209887250650477, | |
"tile_compute_intensity": 0.9922480620155039, | |
"MxNxK": 8388608, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013684543780982494, | |
"perf_norm_to_sol": 0.801305548372241, | |
"perf_norm_to_cublas": 0.9684035305822569, | |
"compute_intensity": 431.1578947368421, | |
"tile_compute_intensity": 7.013698630136986, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005567040061578154, | |
"perf_norm_to_sol": 0.6155369784342587, | |
"perf_norm_to_cublas": 0.9176868879064652, | |
"compute_intensity": 186.1818181818182, | |
"tile_compute_intensity": 6.4, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008076799713307991, | |
"perf_norm_to_sol": 0.5918101098154425, | |
"perf_norm_to_cublas": 1.070523046854203, | |
"compute_intensity": 7.9073359073359075, | |
"tile_compute_intensity": 0.8648648648648649, | |
"MxNxK": 4194304, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000022684800205752255, | |
"perf_norm_to_sol": 0.3776448312576898, | |
"perf_norm_to_cublas": 1.2729581016617233, | |
"compute_intensity": 146.28571428571428, | |
"tile_compute_intensity": 1.7297297297297298, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002711286395788193, | |
"perf_norm_to_sol": 0.8088780938582003, | |
"perf_norm_to_cublas": 1.6490119474244076, | |
"compute_intensity": 126.03076923076924, | |
"tile_compute_intensity": 1.967339097022094, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006839584093540907, | |
"perf_norm_to_sol": 0.8016204427546697, | |
"perf_norm_to_cublas": 1.6594834045314184, | |
"compute_intensity": 390.0952380952381, | |
"tile_compute_intensity": 5.305699481865285, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010875286161899566, | |
"perf_norm_to_sol": 0.806636308812589, | |
"perf_norm_to_cublas": 0.8629721163106765, | |
"compute_intensity": 799.219512195122, | |
"tile_compute_intensity": 24.38095238095238, | |
"MxNxK": 34359738368, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003216640034224838, | |
"perf_norm_to_sol": 0.5326550347359701, | |
"perf_norm_to_cublas": 0.903601253351002, | |
"compute_intensity": 107.78947368421052, | |
"tile_compute_intensity": 5.333333333333333, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000015913599054329097, | |
"perf_norm_to_sol": 0.13458296763303876, | |
"perf_norm_to_cublas": 1.4896441412023071, | |
"compute_intensity": 85.33333333333333, | |
"tile_compute_intensity": 1.3061224489795917, | |
"MxNxK": 8388608, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.011085740476846694, | |
"perf_norm_to_sol": 0.7913229346507827, | |
"perf_norm_to_cublas": 0.8787022997266825, | |
"compute_intensity": 474.8985507246377, | |
"tile_compute_intensity": 30.11764705882353, | |
"MxNxK": 34359738368, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000023737600713502616, | |
"perf_norm_to_sol": 0.3608956797787352, | |
"perf_norm_to_cublas": 4.094769394118752, | |
"compute_intensity": 146.28571428571428, | |
"tile_compute_intensity": 1.5802469135802468, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005543804913759232, | |
"perf_norm_to_sol": 0.7911895190560344, | |
"perf_norm_to_cublas": 0.8990371156529654, | |
"compute_intensity": 468.1142857142857, | |
"tile_compute_intensity": 7.086505190311419, | |
"MxNxK": 17179869184, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00006739519885741174, | |
"perf_norm_to_sol": 0.34703994683377815, | |
"perf_norm_to_cublas": 0.6590855287778362, | |
"compute_intensity": 28.419774501300953, | |
"tile_compute_intensity": 1.9844961240310077, | |
"MxNxK": 67108864, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000017472000035922974, | |
"perf_norm_to_sol": 0.5701254431610745, | |
"perf_norm_to_cublas": 1.013003692032102, | |
"compute_intensity": 15.485822306238186, | |
"tile_compute_intensity": 1.5609756097560976, | |
"MxNxK": 16777216, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000018192001152783633, | |
"perf_norm_to_sol": 0.5475610780657127, | |
"perf_norm_to_cublas": 0.9236586833781175, | |
"compute_intensity": 15.485822306238186, | |
"tile_compute_intensity": 1.3195876288659794, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.001377071999013424, | |
"perf_norm_to_sol": 0.7962910339110906, | |
"perf_norm_to_cublas": 1.0509207978954427, | |
"compute_intensity": 337.8144329896907, | |
"tile_compute_intensity": 7.314285714285714, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006326400034595281, | |
"perf_norm_to_sol": 0.1932139836188415, | |
"perf_norm_to_cublas": 0.7030854619089865, | |
"compute_intensity": 7.816793893129771, | |
"tile_compute_intensity": 0.7619047619047619, | |
"MxNxK": 1048576, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018319999799132347, | |
"perf_norm_to_sol": 0.7481919336022218, | |
"perf_norm_to_cublas": 1.0838078486295781, | |
"compute_intensity": 409.6, | |
"tile_compute_intensity": 5.224489795918367, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001648320030653849, | |
"perf_norm_to_sol": 0.30991093272098075, | |
"perf_norm_to_cublas": 0.778101306690136, | |
"compute_intensity": 30.567164179104477, | |
"tile_compute_intensity": 2.6666666666666665, | |
"MxNxK": 16777216, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000016291199426632374, | |
"perf_norm_to_sol": 0.2629271584451593, | |
"perf_norm_to_cublas": 1.077195040350678, | |
"compute_intensity": 81.92, | |
"tile_compute_intensity": 1.6842105263157894, | |
"MxNxK": 16777216, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013379199663177132, | |
"perf_norm_to_sol": 0.16007679385699097, | |
"perf_norm_to_cublas": 0.8096149211082392, | |
"compute_intensity": 62.06060606060606, | |
"tile_compute_intensity": 1.6, | |
"MxNxK": 8388608, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005987200420349836, | |
"perf_norm_to_sol": 0.32665656516081243, | |
"perf_norm_to_cublas": 0.6451095535144507, | |
"compute_intensity": 31.267175572519083, | |
"tile_compute_intensity": 3.0476190476190474, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013623840175569057, | |
"perf_norm_to_sol": 0.8048759173135336, | |
"perf_norm_to_cublas": 0.9629965833103589, | |
"compute_intensity": 819.2, | |
"tile_compute_intensity": 15.058823529411764, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018199679907411338, | |
"perf_norm_to_sol": 0.7531383047964147, | |
"perf_norm_to_cublas": 1.811759382440397, | |
"compute_intensity": 118.72463768115942, | |
"tile_compute_intensity": 1.7746967071057191, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003566400147974491, | |
"perf_norm_to_sol": 0.7686673118320645, | |
"perf_norm_to_cublas": 1.069277689701736, | |
"compute_intensity": 221.40540540540542, | |
"tile_compute_intensity": 3.710144927536232, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000054675195133313534, | |
"perf_norm_to_sol": 0.6267410678591965, | |
"perf_norm_to_cublas": 1.4187055066965455, | |
"compute_intensity": 163.84, | |
"tile_compute_intensity": 3.3684210526315788, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.022810059785842895, | |
"perf_norm_to_sol": 0.7691694602536641, | |
"perf_norm_to_cublas": 0.9827092761673766, | |
"compute_intensity": 489.07462686567163, | |
"tile_compute_intensity": 7.728301886792453, | |
"MxNxK": 68719476736, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018832000205293298, | |
"perf_norm_to_sol": 0.7278502508433713, | |
"perf_norm_to_cublas": 1.0278844118018875, | |
"compute_intensity": 315.0769230769231, | |
"tile_compute_intensity": 6.095238095238095, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006131199916126206, | |
"perf_norm_to_sol": 0.05207304494947075, | |
"perf_norm_to_cublas": 0.5897703561447915, | |
"compute_intensity": 7.641791044776119, | |
"tile_compute_intensity": 0.6153846153846154, | |
"MxNxK": 262144, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000226431991904974, | |
"perf_norm_to_sol": 0.3783386558473114, | |
"perf_norm_to_cublas": 0.8094969064839931, | |
"compute_intensity": 58.51428571428571, | |
"tile_compute_intensity": 4, | |
"MxNxK": 33554432, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000057036796351894734, | |
"perf_norm_to_sol": 0.6007909345372012, | |
"perf_norm_to_cublas": 0.9076527064867939, | |
"compute_intensity": 163.84, | |
"tile_compute_intensity": 3.3684210526315788, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002698239986784756, | |
"perf_norm_to_sol": 0.7045430691471345, | |
"perf_norm_to_cublas": 1.0079459981046441, | |
"compute_intensity": 7.930300096805421, | |
"tile_compute_intensity": 0.927536231884058, | |
"MxNxK": 16777216, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001961759990081191, | |
"perf_norm_to_sol": 0.6987030086559091, | |
"perf_norm_to_cublas": 0.887007599966531, | |
"compute_intensity": 118.72463768115942, | |
"tile_compute_intensity": 7.529411764705882, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013576160185039044, | |
"perf_norm_to_sol": 0.8077026721243404, | |
"perf_norm_to_cublas": 0.9670033341585894, | |
"compute_intensity": 399.609756097561, | |
"tile_compute_intensity": 3.878787878787879, | |
"MxNxK": 4294967296, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00006643839878961444, | |
"perf_norm_to_sol": 0.5157738718504375, | |
"perf_norm_to_cublas": 0.8020903221736758, | |
"compute_intensity": 84.89119170984456, | |
"tile_compute_intensity": 1.7655172413793103, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000033603201154619454, | |
"perf_norm_to_sol": 0.5917881277692117, | |
"perf_norm_to_cublas": 0.886106080977851, | |
"compute_intensity": 15.500473036896878, | |
"tile_compute_intensity": 1.3264248704663213, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001710399956209585, | |
"perf_norm_to_sol": 0.25043258200264995, | |
"perf_norm_to_cublas": 0.9586529574871198, | |
"compute_intensity": 113.77777777777777, | |
"tile_compute_intensity": 2.4615384615384617, | |
"MxNxK": 16777216, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00010448959656059743, | |
"perf_norm_to_sol": 0.6558966884974053, | |
"perf_norm_to_cublas": 0.940434320291885, | |
"compute_intensity": 127.0077519379845, | |
"tile_compute_intensity": 1.8686131386861313, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000014259199087973683, | |
"perf_norm_to_sol": 0.1501977336343003, | |
"perf_norm_to_cublas": 0.6932226822196531, | |
"compute_intensity": 78.76923076923077, | |
"tile_compute_intensity": 2.2857142857142856, | |
"MxNxK": 8388608, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009564160136505961, | |
"perf_norm_to_sol": 0.7165749986235921, | |
"perf_norm_to_cublas": 1.0535665836655388, | |
"compute_intensity": 195.04761904761904, | |
"tile_compute_intensity": 1.9104477611940298, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003663104027509689, | |
"perf_norm_to_sol": 0.7483749284960153, | |
"perf_norm_to_cublas": 1.0469633974943737, | |
"compute_intensity": 221.40540540540542, | |
"tile_compute_intensity": 9.846153846153847, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006927231792360544, | |
"perf_norm_to_sol": 0.7914778361204137, | |
"perf_norm_to_cublas": 1.082859122444385, | |
"compute_intensity": 203.527950310559, | |
"tile_compute_intensity": 1.9616858237547892, | |
"MxNxK": 2147483648, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00007431359845213592, | |
"perf_norm_to_sol": 0.46111601237199895, | |
"perf_norm_to_cublas": 0.7933084248043332, | |
"compute_intensity": 61.134328358208954, | |
"tile_compute_intensity": 4.923076923076923, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003367039898876101, | |
"perf_norm_to_sol": 0.5088622530832061, | |
"perf_norm_to_cublas": 0.8754989709190745, | |
"compute_intensity": 99.90243902439025, | |
"tile_compute_intensity": 3.0476190476190474, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005031359614804387, | |
"perf_norm_to_sol": 0.748418881379995, | |
"perf_norm_to_cublas": 0.9278764225497784, | |
"compute_intensity": 7.968871595330739, | |
"tile_compute_intensity": 0.9552238805970149, | |
"MxNxK": 33554432, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007243648171424866, | |
"perf_norm_to_sol": 0.7569045734372781, | |
"perf_norm_to_cublas": 0.8626901692610565, | |
"compute_intensity": 123.18796992481202, | |
"tile_compute_intensity": 10.24, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013104001118335873, | |
"perf_norm_to_sol": 0.16343858391900928, | |
"perf_norm_to_cublas": 0.6283271928565027, | |
"compute_intensity": 49.951219512195124, | |
"tile_compute_intensity": 1.5238095238095237, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.011094802618026733, | |
"perf_norm_to_sol": 0.7906765887535464, | |
"perf_norm_to_cublas": 0.8913637000986893, | |
"compute_intensity": 250.13740458015266, | |
"tile_compute_intensity": 25.6, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005756979435682296, | |
"perf_norm_to_sol": 0.7618926543790598, | |
"perf_norm_to_cublas": 0.8767781255245484, | |
"compute_intensity": 126.51737451737452, | |
"tile_compute_intensity": 14.222222222222221, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00008321920176967978, | |
"perf_norm_to_sol": 0.4117702339671783, | |
"perf_norm_to_cublas": 0.7725140676857651, | |
"compute_intensity": 51.1201248049922, | |
"tile_compute_intensity": 2.6391752577319587, | |
"MxNxK": 134217728, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013902432285249234, | |
"perf_norm_to_sol": 0.7887469353315059, | |
"perf_norm_to_cublas": 0.8828981680472504, | |
"compute_intensity": 237.44927536231884, | |
"tile_compute_intensity": 15.058823529411764, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000015116800204850733, | |
"perf_norm_to_sol": 0.14167676739993512, | |
"perf_norm_to_cublas": 6.417443045397917, | |
"compute_intensity": 78.76923076923077, | |
"tile_compute_intensity": 0.9922480620155039, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000331584014929831, | |
"perf_norm_to_sol": 0.5997266033030175, | |
"perf_norm_to_cublas": 0.9379463431503517, | |
"compute_intensity": 15.500473036896878, | |
"tile_compute_intensity": 1.5802469135802468, | |
"MxNxK": 33554432, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000015420799900311977, | |
"perf_norm_to_sol": 0.6441861515797197, | |
"perf_norm_to_cublas": 0.999169961044977, | |
"compute_intensity": 7.75390440132513, | |
"tile_compute_intensity": 0.6649350649350649, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018517440184950829, | |
"perf_norm_to_sol": 0.7402144106529778, | |
"perf_norm_to_cublas": 1.0715952612665625, | |
"compute_intensity": 341.3333333333333, | |
"tile_compute_intensity": 5.224489795918367, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005739200278185308, | |
"perf_norm_to_sol": 0.3261492486940798, | |
"perf_norm_to_cublas": 0.5750766414692444, | |
"compute_intensity": 42.6111833550065, | |
"tile_compute_intensity": 1.5900621118012421, | |
"MxNxK": 67108864, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005569206178188324, | |
"perf_norm_to_sol": 0.7875808873149832, | |
"perf_norm_to_cublas": 0.8976440635291997, | |
"compute_intensity": 248.24242424242425, | |
"tile_compute_intensity": 3.8714555765595464, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000016617600340396167, | |
"perf_norm_to_sol": 0.32057890686538637, | |
"perf_norm_to_cublas": 0.7718081693043705, | |
"compute_intensity": 29.8978102189781, | |
"tile_compute_intensity": 2.4615384615384617, | |
"MxNxK": 16777216, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0009627263993024826, | |
"perf_norm_to_sol": 0.6239975351537451, | |
"perf_norm_to_cublas": 0.874283703240221, | |
"compute_intensity": 15.961032635168047, | |
"tile_compute_intensity": 1.9541984732824427, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006528000085381791, | |
"perf_norm_to_sol": 0.05729201694101259, | |
"perf_norm_to_cublas": 0.5671568204021059, | |
"compute_intensity": 14.027397260273972, | |
"tile_compute_intensity": 0.8888888888888888, | |
"MxNxK": 524288, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0010217344388365746, | |
"perf_norm_to_sol": 0.5908168693565297, | |
"perf_norm_to_cublas": 0.7200931746938917, | |
"compute_intensity": 31.844509232264333, | |
"tile_compute_intensity": 3.7372262773722627, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00037446720525622366, | |
"perf_norm_to_sol": 0.8017350885958674, | |
"perf_norm_to_cublas": 0.8251424899106176, | |
"compute_intensity": 7.982460414129111, | |
"tile_compute_intensity": 0.9678638941398866, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00020080960821360349, | |
"perf_norm_to_sol": 0.6825806890039338, | |
"perf_norm_to_cublas": 0.880754673752975, | |
"compute_intensity": 102.0809968847352, | |
"tile_compute_intensity": 3.1801242236024843, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013605407439172268, | |
"perf_norm_to_sol": 0.8059663709204757, | |
"perf_norm_to_cublas": 1.6500710022353124, | |
"compute_intensity": 240.94117647058823, | |
"tile_compute_intensity": 3.750915750915751, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.02252197712659836, | |
"perf_norm_to_sol": 0.7790080451289622, | |
"perf_norm_to_cublas": 0.8398359084209509, | |
"compute_intensity": 1820.4444444444443, | |
"tile_compute_intensity": 26.94736842105263, | |
"MxNxK": 68719476736, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005330240237526596, | |
"perf_norm_to_sol": 0.6428826592469676, | |
"perf_norm_to_cublas": 1.9753854937103066, | |
"compute_intensity": 113.77777777777777, | |
"tile_compute_intensity": 1.7655172413793103, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005458499118685722, | |
"perf_norm_to_sol": 0.8035542826127158, | |
"perf_norm_to_cublas": 0.9035737609304375, | |
"compute_intensity": 1170.2857142857142, | |
"tile_compute_intensity": 15.753846153846155, | |
"MxNxK": 17179869184, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0002459231996908784, | |
"perf_norm_to_sol": 0.6195999683877682, | |
"perf_norm_to_cublas": 0.8015640691094315, | |
"compute_intensity": 31.690522243713733, | |
"tile_compute_intensity": 3.506849315068493, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010850406438112258, | |
"perf_norm_to_sol": 0.8084859066755389, | |
"perf_norm_to_cublas": 0.8746268755104057, | |
"compute_intensity": 1260.3076923076924, | |
"tile_compute_intensity": 36.57142857142857, | |
"MxNxK": 34359738368, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00020696001593023539, | |
"perf_norm_to_sol": 0.6622958551532788, | |
"perf_norm_to_cublas": 0.8723772382638278, | |
"compute_intensity": 62.534351145038165, | |
"tile_compute_intensity": 6.095238095238095, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003648672020062804, | |
"perf_norm_to_sol": 0.7513350609720855, | |
"perf_norm_to_cublas": 1.0548670796564874, | |
"compute_intensity": 169.78238341968913, | |
"tile_compute_intensity": 3.5310344827586206, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000069567999162245545, | |
"perf_norm_to_sol": 0.05769438788580887, | |
"perf_norm_to_cublas": 0.8114075487015279, | |
"compute_intensity": 26.94736842105263, | |
"tile_compute_intensity": 1.3333333333333333, | |
"MxNxK": 1048576, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00019230080069974065, | |
"perf_norm_to_sol": 0.7127830993645796, | |
"perf_norm_to_cublas": 0.9772523107860859, | |
"compute_intensity": 102.0809968847352, | |
"tile_compute_intensity": 1.9248120300751879, | |
"MxNxK": 536870912, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006703999679302796, | |
"perf_norm_to_sol": 0.10069041741970869, | |
"perf_norm_to_cublas": 0.6248209907045947, | |
"compute_intensity": 14.840579710144928, | |
"tile_compute_intensity": 0.9411764705882353, | |
"MxNxK": 1048576, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006763679906725883, | |
"perf_norm_to_sol": 0.8106164846550391, | |
"perf_norm_to_cublas": 1.6586285116612227, | |
"compute_intensity": 481.88235294117646, | |
"tile_compute_intensity": 5.305699481865285, | |
"MxNxK": 2147483648, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001902112038806081, | |
"perf_norm_to_sol": 0.7206134966638815, | |
"perf_norm_to_cublas": 0.9762116670659154, | |
"compute_intensity": 315.0769230769231, | |
"tile_compute_intensity": 6.4, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002713926322758198, | |
"perf_norm_to_sol": 0.8080912710629328, | |
"perf_norm_to_cublas": 0.9334219429261521, | |
"compute_intensity": 504.12307692307695, | |
"tile_compute_intensity": 7.420289855072464, | |
"MxNxK": 8589934592, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006933055818080902, | |
"perf_norm_to_sol": 0.790812965189671, | |
"perf_norm_to_cublas": 1.072155224015329, | |
"compute_intensity": 481.88235294117646, | |
"tile_compute_intensity": 7.111111111111111, | |
"MxNxK": 2147483648, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018537599826231598, | |
"perf_norm_to_sol": 0.7394094274227052, | |
"perf_norm_to_cublas": 1.1469705144107105, | |
"compute_intensity": 215.57894736842104, | |
"tile_compute_intensity": 3.1801242236024843, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006876640021800995, | |
"perf_norm_to_sol": 0.7973007765333228, | |
"perf_norm_to_cublas": 1.0814537518676184, | |
"compute_intensity": 123.18796992481202, | |
"tile_compute_intensity": 1.9616858237547892, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005415113270282745, | |
"perf_norm_to_sol": 0.8099923537201698, | |
"perf_norm_to_cublas": 0.8692158768482626, | |
"compute_intensity": 668.734693877551, | |
"tile_compute_intensity": 13.837837837837839, | |
"MxNxK": 17179869184, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006820864044129848, | |
"perf_norm_to_sol": 0.8038205121593952, | |
"perf_norm_to_cublas": 0.976270462777281, | |
"compute_intensity": 585.1428571428571, | |
"tile_compute_intensity": 7.876923076923077, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001646080054342747, | |
"perf_norm_to_sol": 0.2602181322595607, | |
"perf_norm_to_cublas": 0.7873249965345848, | |
"compute_intensity": 50.5679012345679, | |
"tile_compute_intensity": 2.4615384615384617, | |
"MxNxK": 16777216, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006871424149721861, | |
"perf_norm_to_sol": 0.7979059813305204, | |
"perf_norm_to_cublas": 1.6583182837994774, | |
"compute_intensity": 224.43835616438355, | |
"tile_compute_intensity": 3.1950078003120126, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00010650240583345293, | |
"perf_norm_to_sol": 0.6435007719327852, | |
"perf_norm_to_cublas": 0.9296014739923661, | |
"compute_intensity": 195.04761904761904, | |
"tile_compute_intensity": 8, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018235520692542196, | |
"perf_norm_to_sol": 0.7516580581606789, | |
"perf_norm_to_cublas": 1.7973010164009087, | |
"compute_intensity": 199.8048780487805, | |
"tile_compute_intensity": 2.6597402597402597, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007007616106420756, | |
"perf_norm_to_sol": 0.7823987995430381, | |
"perf_norm_to_cublas": 1.076310104612738, | |
"compute_intensity": 334.3673469387755, | |
"tile_compute_intensity": 7.111111111111111, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00035070718731731174, | |
"perf_norm_to_sol": 0.7816706682377446, | |
"perf_norm_to_cublas": 1.6842586407974796, | |
"compute_intensity": 221.40540540540542, | |
"tile_compute_intensity": 1.8788990825688074, | |
"MxNxK": 1073741824, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013536607846617698, | |
"perf_norm_to_sol": 0.8100626820909194, | |
"perf_norm_to_cublas": 0.9748215010820221, | |
"compute_intensity": 237.44927536231884, | |
"tile_compute_intensity": 1.9768339768339769, | |
"MxNxK": 4294967296, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00015536319697275757, | |
"perf_norm_to_sol": 0.5119867738517612, | |
"perf_norm_to_cublas": 0.7700768275859072, | |
"compute_intensity": 31.000946073793756, | |
"tile_compute_intensity": 2.6528497409326426, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005477670207619667, | |
"perf_norm_to_sol": 0.8007419536423095, | |
"perf_norm_to_cublas": 0.9119544200496614, | |
"compute_intensity": 819.2, | |
"tile_compute_intensity": 12.641975308641975, | |
"MxNxK": 17179869184, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000228320001042448, | |
"perf_norm_to_sol": 0.37521012205247073, | |
"perf_norm_to_cublas": 0.7929922509394659, | |
"compute_intensity": 58.51428571428571, | |
"tile_compute_intensity": 3.5555555555555554, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00006587520474568009, | |
"perf_norm_to_sol": 0.5201834334413968, | |
"perf_norm_to_cublas": 0.8036042955220489, | |
"compute_intensity": 110.70270270270271, | |
"tile_compute_intensity": 6.4, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0004065375775098801, | |
"perf_norm_to_sol": 0.7384889235117484, | |
"perf_norm_to_cublas": 0.7575703341271942, | |
"compute_intensity": 7.982460414129111, | |
"tile_compute_intensity": 0.9808429118773946, | |
"MxNxK": 268435456, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000066592001530807465, | |
"perf_norm_to_sol": 0.028766540258946312, | |
"perf_norm_to_cublas": 0.6934166526902698, | |
"compute_intensity": 13.837837837837839, | |
"tile_compute_intensity": 0.8, | |
"MxNxK": 262144, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013664287514984607, | |
"perf_norm_to_sol": 0.8024934228454332, | |
"perf_norm_to_cublas": 0.9449380063968464, | |
"compute_intensity": 431.1578947368421, | |
"tile_compute_intensity": 21.333333333333332, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000053759996080771086, | |
"perf_norm_to_sol": 0.6374105781514292, | |
"perf_norm_to_cublas": 2.7256549093174165, | |
"compute_intensity": 84.89119170984456, | |
"tile_compute_intensity": 0.9995119570522206, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005568825453519821, | |
"perf_norm_to_sol": 0.7876347319676384, | |
"perf_norm_to_cublas": 0.8999097045348821, | |
"compute_intensity": 126.51737451737452, | |
"tile_compute_intensity": 1.9825750242013553, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008582399459555745, | |
"perf_norm_to_sol": 0.12477276294039982, | |
"perf_norm_to_cublas": 0.7576436387929018, | |
"compute_intensity": 51.2, | |
"tile_compute_intensity": 2, | |
"MxNxK": 4194304, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013556735590100288, | |
"perf_norm_to_sol": 0.8088599785520342, | |
"perf_norm_to_cublas": 0.9641353782114495, | |
"compute_intensity": 819.2, | |
"tile_compute_intensity": 12.19047619047619, | |
"MxNxK": 4294967296, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006672000017715618, | |
"perf_norm_to_sol": 0.08024952715704621, | |
"perf_norm_to_cublas": 1.1592325653192224, | |
"compute_intensity": 64, | |
"tile_compute_intensity": 1.2307692307692308, | |
"MxNxK": 2097152, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010837190598249436, | |
"perf_norm_to_sol": 0.8094718467286462, | |
"perf_norm_to_cublas": 1.048953722993839, | |
"compute_intensity": 862.3157894736842, | |
"tile_compute_intensity": 7.6992481203007515, | |
"MxNxK": 34359738368, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008969599730335175, | |
"perf_norm_to_sol": 0.11938656410779984, | |
"perf_norm_to_cublas": 1.4398859027019102, | |
"compute_intensity": 85.33333333333333, | |
"tile_compute_intensity": 1.28, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006752895656973124, | |
"perf_norm_to_sol": 0.811911024222698, | |
"perf_norm_to_cublas": 1.0219213110598575, | |
"compute_intensity": 585.1428571428571, | |
"tile_compute_intensity": 6.320987654320987, | |
"MxNxK": 2147483648, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000053804804338142276, | |
"perf_norm_to_sol": 0.705959961577871, | |
"perf_norm_to_cublas": 0.8684429056011216, | |
"compute_intensity": 7.934140435835351, | |
"tile_compute_intensity": 0.9343065693430657, | |
"MxNxK": 33554432, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006842432077974081, | |
"perf_norm_to_sol": 0.8012867890893848, | |
"perf_norm_to_cublas": 1.6766624170895204, | |
"compute_intensity": 123.18796992481202, | |
"tile_compute_intensity": 1.8806244260789715, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003541023936122656, | |
"perf_norm_to_sol": 0.7741758497297184, | |
"perf_norm_to_cublas": 1.6798305062233942, | |
"compute_intensity": 122.26865671641791, | |
"tile_compute_intensity": 1.8788990825688074, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00008681280305609108, | |
"perf_norm_to_sol": 0.44384418741696463, | |
"perf_norm_to_cublas": 0.726639339920557, | |
"compute_intensity": 31.50769230769231, | |
"tile_compute_intensity": 3.3684210526315788, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003136320156045258, | |
"perf_norm_to_sol": 0.5462961126148561, | |
"perf_norm_to_cublas": 1.4001631726766686, | |
"compute_intensity": 157.53846153846155, | |
"tile_compute_intensity": 3.0476190476190474, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001648640027269721, | |
"perf_norm_to_sol": 0.2598140711166346, | |
"perf_norm_to_cublas": 0.7862965524643564, | |
"compute_intensity": 56.888888888888886, | |
"tile_compute_intensity": 3.2, | |
"MxNxK": 16777216, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001331328065134585, | |
"perf_norm_to_sol": 0.5711668915783518, | |
"perf_norm_to_cublas": 0.6722670848540764, | |
"compute_intensity": 15.860600193610843, | |
"tile_compute_intensity": 1.855072463768116, | |
"MxNxK": 134217728, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003339520189911127, | |
"perf_norm_to_sol": 0.5130555923390719, | |
"perf_norm_to_cublas": 1.5160981250257093, | |
"compute_intensity": 99.90243902439025, | |
"tile_compute_intensity": 1.8285714285714285, | |
"MxNxK": 67108864, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013484800001606346, | |
"perf_norm_to_sol": 0.3707033026310093, | |
"perf_norm_to_cublas": 0.7342192741222211, | |
"compute_intensity": 15.456603773584906, | |
"tile_compute_intensity": 1.3061224489795917, | |
"MxNxK": 8388608, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0002081983955577016, | |
"perf_norm_to_sol": 0.6583564698752121, | |
"perf_norm_to_cublas": 0.8725831193351236, | |
"compute_intensity": 61.82641509433962, | |
"tile_compute_intensity": 5.224489795918367, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00008881599642336368, | |
"perf_norm_to_sol": 0.4371201565116441, | |
"perf_norm_to_cublas": 0.7184651910099518, | |
"compute_intensity": 31.386973180076627, | |
"tile_compute_intensity": 3.1219512195121952, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010797932744026184, | |
"perf_norm_to_sol": 0.8124148292892924, | |
"perf_norm_to_cublas": 0.8795849508098538, | |
"compute_intensity": 1365.3333333333333, | |
"tile_compute_intensity": 36.57142857142857, | |
"MxNxK": 34359738368, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00020264319609850646, | |
"perf_norm_to_sol": 0.7432896060131052, | |
"perf_norm_to_cublas": 0.8141521663972909, | |
"compute_intensity": 15.937743190661479, | |
"tile_compute_intensity": 1.9104477611940298, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000107881601434201, | |
"perf_norm_to_sol": 0.39030923027556236, | |
"perf_norm_to_cublas": 0.7478124161334909, | |
"compute_intensity": 30.089990817263544, | |
"tile_compute_intensity": 2.6391752577319587, | |
"MxNxK": 134217728, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005757439648732543, | |
"perf_norm_to_sol": 0.5951810574481059, | |
"perf_norm_to_cublas": 0.9016786224045885, | |
"compute_intensity": 163.84, | |
"tile_compute_intensity": 5.333333333333333, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001340160088147968, | |
"perf_norm_to_sol": 0.3934246479821998, | |
"perf_norm_to_cublas": 0.7999043923075169, | |
"compute_intensity": 15.03119266055046, | |
"tile_compute_intensity": 1.3061224489795917, | |
"MxNxK": 8388608, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0017687231302261353, | |
"perf_norm_to_sol": 0.6779707609811585, | |
"perf_norm_to_cublas": 0.7101384723287024, | |
"compute_intensity": 15.976596782057532, | |
"tile_compute_intensity": 1.9616858237547892, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00000865600013639778, | |
"perf_norm_to_sol": 0.20444424114195123, | |
"perf_norm_to_cublas": 0.7866913020366293, | |
"compute_intensity": 25.440993788819874, | |
"tile_compute_intensity": 1.28, | |
"MxNxK": 4194304, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.001363577600568533, | |
"perf_norm_to_sol": 0.8041713837241193, | |
"perf_norm_to_cublas": 1.0554963584866728, | |
"compute_intensity": 682.6666666666666, | |
"tile_compute_intensity": 18.285714285714285, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005447027087211609, | |
"perf_norm_to_sol": 0.8052466553279046, | |
"perf_norm_to_cublas": 0.8721804336443404, | |
"compute_intensity": 468.1142857142857, | |
"tile_compute_intensity": 32, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000033910400816239417, | |
"perf_norm_to_sol": 0.5052607659956143, | |
"perf_norm_to_cublas": 0.8709068555403692, | |
"compute_intensity": 99.90243902439025, | |
"tile_compute_intensity": 4.571428571428571, | |
"MxNxK": 67108864, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002386559935985133, | |
"perf_norm_to_sol": 0.3589600837860157, | |
"perf_norm_to_cublas": 4.082729874395705, | |
"compute_intensity": 102.4, | |
"tile_compute_intensity": 1.5802469135802468, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006925439927726984, | |
"perf_norm_to_sol": 0.7916826203879247, | |
"perf_norm_to_cublas": 1.076120530095307, | |
"compute_intensity": 334.3673469387755, | |
"tile_compute_intensity": 3.8208955223880596, | |
"MxNxK": 2147483648, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006355199730023742, | |
"perf_norm_to_sol": 0.20740969336829893, | |
"perf_norm_to_cublas": 0.8298086786680894, | |
"compute_intensity": 7.51559633027523, | |
"tile_compute_intensity": 0.6530612244897959, | |
"MxNxK": 1048576, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00004672640061471611, | |
"perf_norm_to_sol": 0.36667911215560955, | |
"perf_norm_to_cublas": 0.6549102965549386, | |
"compute_intensity": 60.23529411764706, | |
"tile_compute_intensity": 4.571428571428571, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0224470779299736, | |
"perf_norm_to_sol": 0.781607362373122, | |
"perf_norm_to_cublas": 0.9071157759845524, | |
"compute_intensity": 489.07462686567163, | |
"tile_compute_intensity": 42.666666666666664, | |
"MxNxK": 68719476736, | |
"size_m": 16384, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013458432629704475, | |
"perf_norm_to_sol": 0.8147680462018928, | |
"perf_norm_to_cublas": 1.6564234625955703, | |
"compute_intensity": 237.44927536231884, | |
"tile_compute_intensity": 1.9375591296121097, | |
"MxNxK": 4294967296, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002659840101841837, | |
"perf_norm_to_sol": 0.7105990588475034, | |
"perf_norm_to_cublas": 0.9933829667361517, | |
"compute_intensity": 7.953398058252427, | |
"tile_compute_intensity": 0.9411764705882353, | |
"MxNxK": 16777216, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0009052224457263946, | |
"perf_norm_to_sol": 0.66299183650445, | |
"perf_norm_to_cublas": 0.7006984848996148, | |
"compute_intensity": 15.968810916179336, | |
"tile_compute_intensity": 1.9541984732824427, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00017987519968301057, | |
"perf_norm_to_sol": 0.7620214513985486, | |
"perf_norm_to_cublas": 1.1082706918741776, | |
"compute_intensity": 215.57894736842104, | |
"tile_compute_intensity": 1.9248120300751879, | |
"MxNxK": 536870912, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005393001437187195, | |
"perf_norm_to_sol": 0.8133134015527611, | |
"perf_norm_to_cublas": 1.0502862248983333, | |
"compute_intensity": 1170.2857142857142, | |
"tile_compute_intensity": 20.48, | |
"MxNxK": 17179869184, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027450527995824814, | |
"perf_norm_to_sol": 0.7989282290170852, | |
"perf_norm_to_cublas": 0.8625296609508197, | |
"compute_intensity": 442.81081081081084, | |
"tile_compute_intensity": 19.692307692307693, | |
"MxNxK": 8589934592, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001293440000154078, | |
"perf_norm_to_sol": 0.1655816571467408, | |
"perf_norm_to_cublas": 0.6422563231482827, | |
"compute_intensity": 53.89473684210526, | |
"tile_compute_intensity": 2.2857142857142856, | |
"MxNxK": 8388608, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.001354800071567297, | |
"perf_norm_to_sol": 0.8093814791402177, | |
"perf_norm_to_cublas": 0.9748237014296021, | |
"compute_intensity": 225.98620689655172, | |
"tile_compute_intensity": 1.9768339768339769, | |
"MxNxK": 4294967296, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009786239825189113, | |
"perf_norm_to_sol": 0.7003137220296085, | |
"perf_norm_to_cublas": 0.975704696325953, | |
"compute_intensity": 240.94117647058823, | |
"tile_compute_intensity": 3.5555555555555554, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00021330881863832474, | |
"perf_norm_to_sol": 0.7122825553248885, | |
"perf_norm_to_cublas": 0.8391213860895063, | |
"compute_intensity": 15.868280871670702, | |
"tile_compute_intensity": 1.7716262975778547, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000023334400611929596, | |
"perf_norm_to_sol": 0.36713167345879816, | |
"perf_norm_to_cublas": 1.2859296857840568, | |
"compute_intensity": 146.28571428571428, | |
"tile_compute_intensity": 2.56, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003507263958454132, | |
"perf_norm_to_sol": 0.7816278578214919, | |
"perf_norm_to_cublas": 1.68634702073557, | |
"compute_intensity": 327.68, | |
"tile_compute_intensity": 3.190031152647975, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.02232952564954758, | |
"perf_norm_to_sol": 0.7857220815698817, | |
"perf_norm_to_cublas": 0.8716155277268586, | |
"compute_intensity": 1310.72, | |
"tile_compute_intensity": 26.94736842105263, | |
"MxNxK": 68719476736, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009535359567962587, | |
"perf_norm_to_sol": 0.7187393393825569, | |
"perf_norm_to_cublas": 2.1179945357062606, | |
"compute_intensity": 127.0077519379845, | |
"tile_compute_intensity": 1.332465842550423, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001645119918975979, | |
"perf_norm_to_sol": 0.2603700024235377, | |
"perf_norm_to_cublas": 0.7809765578215575, | |
"compute_intensity": 50.5679012345679, | |
"tile_compute_intensity": 1.5609756097560976, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001875519985333085, | |
"perf_norm_to_sol": 0.7308307125754706, | |
"perf_norm_to_cublas": 0.9950520756714495, | |
"compute_intensity": 168.90721649484536, | |
"tile_compute_intensity": 1.9248120300751879, | |
"MxNxK": 536870912, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010832707583904266, | |
"perf_norm_to_sol": 0.8098068390537679, | |
"perf_norm_to_cublas": 0.9307261626912887, | |
"compute_intensity": 1260.3076923076924, | |
"tile_compute_intensity": 14.628571428571428, | |
"MxNxK": 34359738368, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027121376246213914, | |
"perf_norm_to_sol": 0.8086242201794518, | |
"perf_norm_to_cublas": 0.915454417713241, | |
"compute_intensity": 910.2222222222222, | |
"tile_compute_intensity": 10.556701030927835, | |
"MxNxK": 8589934592, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000695100799202919, | |
"perf_norm_to_sol": 0.7887705546604461, | |
"perf_norm_to_cublas": 1.052739379504871, | |
"compute_intensity": 390.0952380952381, | |
"tile_compute_intensity": 11.636363636363637, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001231551985256374, | |
"perf_norm_to_sol": 0.5564879208266538, | |
"perf_norm_to_cublas": 0.8339395843689983, | |
"compute_intensity": 60.014652014652015, | |
"tile_compute_intensity": 3.9384615384615387, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00006312959594652056, | |
"perf_norm_to_sol": 0.3167363783002085, | |
"perf_norm_to_cublas": 0.6224655809158052, | |
"compute_intensity": 30.91320754716981, | |
"tile_compute_intensity": 2.6122448979591835, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000018185601220466197, | |
"perf_norm_to_sol": 0.23553792481093738, | |
"perf_norm_to_cublas": 5.664965091761656, | |
"compute_intensity": 81.92, | |
"tile_compute_intensity": 0.9961089494163424, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005413241684436798, | |
"perf_norm_to_sol": 0.8102724022221435, | |
"perf_norm_to_cublas": 1.0560651459700807, | |
"compute_intensity": 780.1904761904761, | |
"tile_compute_intensity": 13.837837837837839, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009485119953751564, | |
"perf_norm_to_sol": 0.7225462693217596, | |
"perf_norm_to_cublas": 1.9436253813366693, | |
"compute_intensity": 167.18367346938774, | |
"tile_compute_intensity": 1.5975039001560063, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00006492480169981719, | |
"perf_norm_to_sol": 0.5277981493374257, | |
"perf_norm_to_cublas": 0.818423729038454, | |
"compute_intensity": 110.70270270270271, | |
"tile_compute_intensity": 4.923076923076923, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00007206400041468442, | |
"perf_norm_to_sol": 0.47551051823484763, | |
"perf_norm_to_cublas": 0.7585701903339775, | |
"compute_intensity": 63.875243664717345, | |
"tile_compute_intensity": 1.7655172413793103, | |
"MxNxK": 134217728, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009691519662737846, | |
"perf_norm_to_sol": 0.7071582450585961, | |
"perf_norm_to_cublas": 1.03490062164923, | |
"compute_intensity": 292.57142857142856, | |
"tile_compute_intensity": 5.12, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006992415990680456, | |
"perf_norm_to_sol": 0.7840995782615778, | |
"perf_norm_to_cublas": 1.0544269853308772, | |
"compute_intensity": 254.015503875969, | |
"tile_compute_intensity": 3.7372262773722627, | |
"MxNxK": 2147483648, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00016287039034068584, | |
"perf_norm_to_sol": 0.4883877408716898, | |
"perf_norm_to_cublas": 0.8957305907025216, | |
"compute_intensity": 31.000946073793756, | |
"tile_compute_intensity": 3.1604938271604937, | |
"MxNxK": 268435456, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006323200068436563, | |
"perf_norm_to_sol": 0.3938366513624268, | |
"perf_norm_to_cublas": 0.8613360619708327, | |
"compute_intensity": 7.742911153119093, | |
"tile_compute_intensity": 0.7804878048780488, | |
"MxNxK": 2097152, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003495712066069245, | |
"perf_norm_to_sol": 0.7842108168089398, | |
"perf_norm_to_cublas": 1.712726861231697, | |
"compute_intensity": 252.06153846153848, | |
"tile_compute_intensity": 2.6631989596879064, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00010925439419224858, | |
"perf_norm_to_sol": 0.6272917521827981, | |
"perf_norm_to_cublas": 0.9037549863445259, | |
"compute_intensity": 85.11168831168831, | |
"tile_compute_intensity": 1.8686131386861313, | |
"MxNxK": 268435456, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00010403840569779277, | |
"perf_norm_to_sol": 0.6587411630047665, | |
"perf_norm_to_cublas": 0.9502952444776007, | |
"compute_intensity": 195.04761904761904, | |
"tile_compute_intensity": 5.818181818181818, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006611199933104217, | |
"perf_norm_to_sol": 0.02759561713950218, | |
"perf_norm_to_cublas": 0.7821877926087927, | |
"compute_intensity": 36.57142857142857, | |
"tile_compute_intensity": 0.8888888888888888, | |
"MxNxK": 524288, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013690208084881305, | |
"perf_norm_to_sol": 0.8009740093544523, | |
"perf_norm_to_cublas": 1.6356543230772171, | |
"compute_intensity": 431.1578947368421, | |
"tile_compute_intensity": 6.3602484472049685, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008774399611866102, | |
"perf_norm_to_sol": 0.17049704049350173, | |
"perf_norm_to_cublas": 0.70423053535723, | |
"compute_intensity": 28.054794520547944, | |
"tile_compute_intensity": 1.7777777777777777, | |
"MxNxK": 4194304, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000322912004776299, | |
"perf_norm_to_sol": 0.5305964113505451, | |
"perf_norm_to_cublas": 3.0394409388155497, | |
"compute_intensity": 107.78947368421052, | |
"tile_compute_intensity": 1.7534246575342465, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006246400153031572, | |
"perf_norm_to_sol": 0.013873441568836846, | |
"perf_norm_to_cublas": 0.9001024113373731, | |
"compute_intensity": 7.314285714285714, | |
"tile_compute_intensity": 0.5, | |
"MxNxK": 65536, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005380825325846672, | |
"perf_norm_to_sol": 0.815153824523653, | |
"perf_norm_to_cublas": 0.9141639913423132, | |
"compute_intensity": 780.1904761904761, | |
"tile_compute_intensity": 7.086505190311419, | |
"MxNxK": 17179869184, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013411199324764311, | |
"perf_norm_to_sol": 0.15969484418139968, | |
"perf_norm_to_cublas": 0.6473395596795436, | |
"compute_intensity": 53.89473684210526, | |
"tile_compute_intensity": 2.6666666666666665, | |
"MxNxK": 8388608, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005439683049917221, | |
"perf_norm_to_sol": 0.8063338071736356, | |
"perf_norm_to_cublas": 0.8682099717861301, | |
"compute_intensity": 780.1904761904761, | |
"tile_compute_intensity": 32, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013632000423967837, | |
"perf_norm_to_sol": 0.8043941107399417, | |
"perf_norm_to_cublas": 0.9656337744886352, | |
"compute_intensity": 630.1538461538462, | |
"tile_compute_intensity": 7.314285714285714, | |
"MxNxK": 4294967296, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005370166525244713, | |
"perf_norm_to_sol": 0.8167717561156582, | |
"perf_norm_to_cublas": 0.9369619957946453, | |
"compute_intensity": 668.734693877551, | |
"tile_compute_intensity": 7.641791044776119, | |
"MxNxK": 17179869184, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.011002134531736374, | |
"perf_norm_to_sol": 0.7973362497623285, | |
"perf_norm_to_cublas": 1.0419348383520752, | |
"compute_intensity": 474.8985507246377, | |
"tile_compute_intensity": 7.6992481203007515, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000016457599122077228, | |
"perf_norm_to_sol": 0.2602687513005372, | |
"perf_norm_to_cublas": 0.8448376844543066, | |
"compute_intensity": 81.92, | |
"tile_compute_intensity": 2.6666666666666665, | |
"MxNxK": 16777216, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010827804356813431, | |
"perf_norm_to_sol": 0.8101735493027478, | |
"perf_norm_to_cublas": 0.8625597638120522, | |
"compute_intensity": 992.969696969697, | |
"tile_compute_intensity": 14.628571428571428, | |
"MxNxK": 34359738368, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013721376657485963, | |
"perf_norm_to_sol": 0.7991545697174398, | |
"perf_norm_to_cublas": 0.9717718792692647, | |
"compute_intensity": 399.609756097561, | |
"tile_compute_intensity": 7.314285714285714, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00004887999966740608, | |
"perf_norm_to_sol": 0.35052363355592187, | |
"perf_norm_to_cublas": 0.6321440139469747, | |
"compute_intensity": 59.36231884057971, | |
"tile_compute_intensity": 3.764705882352941, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006134399882284924, | |
"perf_norm_to_sol": 0.054276419190105396, | |
"perf_norm_to_cublas": 0.6035471835968333, | |
"compute_intensity": 7.474452554744525, | |
"tile_compute_intensity": 0.6153846153846154, | |
"MxNxK": 262144, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010841174423694611, | |
"perf_norm_to_sol": 0.8091743886845156, | |
"perf_norm_to_cublas": 0.8991713707679411, | |
"compute_intensity": 1260.3076923076924, | |
"tile_compute_intensity": 12.720496894409937, | |
"MxNxK": 34359738368, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000014915199426468461, | |
"perf_norm_to_sol": 0.1435917365377815, | |
"perf_norm_to_cublas": 1.5850676613545813, | |
"compute_intensity": 102.4, | |
"tile_compute_intensity": 1.3061224489795917, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018047039629891514, | |
"perf_norm_to_sol": 0.8313760927625334, | |
"perf_norm_to_cublas": 0.8789474896255645, | |
"compute_intensity": 7.984405458089668, | |
"tile_compute_intensity": 0.9770992366412213, | |
"MxNxK": 134217728, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006765215657651424, | |
"perf_norm_to_sol": 0.8104324690848688, | |
"perf_norm_to_cublas": 1.0088642150192144, | |
"compute_intensity": 409.6, | |
"tile_compute_intensity": 3.7372262773722627, | |
"MxNxK": 2147483648, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000033280000207014385, | |
"perf_norm_to_sol": 0.5148315800797443, | |
"perf_norm_to_cublas": 3.4803846917700607, | |
"compute_intensity": 99.90243902439025, | |
"tile_compute_intensity": 1.3298701298701299, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002229759993497282, | |
"perf_norm_to_sol": 0.384202675211652, | |
"perf_norm_to_cublas": 1.0673076985829086, | |
"compute_intensity": 120.47058823529412, | |
"tile_compute_intensity": 2.909090909090909, | |
"MxNxK": 33554432, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006922752130776644, | |
"perf_norm_to_sol": 0.7919899955607632, | |
"perf_norm_to_cublas": 1.0723919696751696, | |
"compute_intensity": 585.1428571428571, | |
"tile_compute_intensity": 11.636363636363637, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00037797761615365744, | |
"perf_norm_to_sol": 0.7252744865046694, | |
"perf_norm_to_cublas": 0.8838025590095857, | |
"compute_intensity": 122.26865671641791, | |
"tile_compute_intensity": 10.666666666666666, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005942400312051177, | |
"perf_norm_to_sol": 0.32911924837419243, | |
"perf_norm_to_cublas": 0.6440495051114873, | |
"compute_intensity": 31.267175572519083, | |
"tile_compute_intensity": 3.2, | |
"MxNxK": 67108864, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006796800153097138, | |
"perf_norm_to_sol": 0.11273666724403453, | |
"perf_norm_to_cublas": 0.7396421605166049, | |
"compute_intensity": 27.675675675675677, | |
"tile_compute_intensity": 1.6, | |
"MxNxK": 2097152, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00019503680523484944, | |
"perf_norm_to_sol": 0.7027840748724479, | |
"perf_norm_to_cublas": 0.9517793678635207, | |
"compute_intensity": 315.0769230769231, | |
"tile_compute_intensity": 9.142857142857142, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006783423945307731, | |
"perf_norm_to_sol": 0.80825707983571, | |
"perf_norm_to_cublas": 1.6683538711244341, | |
"compute_intensity": 334.3673469387755, | |
"tile_compute_intensity": 3.1950078003120126, | |
"MxNxK": 2147483648, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000016521599900443106, | |
"perf_norm_to_sol": 0.25926053159010215, | |
"perf_norm_to_cublas": 0.8037962023707637, | |
"compute_intensity": 55.351351351351354, | |
"tile_compute_intensity": 3.2, | |
"MxNxK": 16777216, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00000781439957791008, | |
"perf_norm_to_sol": 0.6186865311588194, | |
"perf_norm_to_cublas": 1.1281736762535746, | |
"compute_intensity": 7.861804222648752, | |
"tile_compute_intensity": 0.8648648648648649, | |
"MxNxK": 4194304, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013698016293346882, | |
"perf_norm_to_sol": 0.8005174343360982, | |
"perf_norm_to_cublas": 0.9781877822290566, | |
"compute_intensity": 237.44927536231884, | |
"tile_compute_intensity": 3.8496240601503757, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007000095676630736, | |
"perf_norm_to_sol": 0.7832393559456317, | |
"perf_norm_to_cublas": 0.9534543978479824, | |
"compute_intensity": 234.05714285714285, | |
"tile_compute_intensity": 16, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0002065952168777585, | |
"perf_norm_to_sol": 0.7294242321947649, | |
"perf_norm_to_cublas": 0.7665928075303637, | |
"compute_intensity": 7.9669341113542425, | |
"tile_compute_intensity": 0.9660377358490566, | |
"MxNxK": 134217728, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006550399848492816, | |
"perf_norm_to_sol": 0.05291833753238528, | |
"perf_norm_to_cublas": 0.5686370275206571, | |
"compute_intensity": 14.628571428571428, | |
"tile_compute_intensity": 0.8888888888888888, | |
"MxNxK": 524288, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007845696061849594, | |
"perf_norm_to_sol": 0.7634599532090316, | |
"perf_norm_to_cublas": 0.7756446507060805, | |
"compute_intensity": 7.9921951219512195, | |
"tile_compute_intensity": 0.9884169884169884, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000007868800457799807, | |
"perf_norm_to_sol": 0.6074536711058819, | |
"perf_norm_to_cublas": 1.0915005616841444, | |
"compute_intensity": 7.9073359073359075, | |
"tile_compute_intensity": 0.8888888888888888, | |
"MxNxK": 4194304, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0014307583682239057, | |
"perf_norm_to_sol": 0.7664117926674309, | |
"perf_norm_to_cublas": 0.890130420849891, | |
"compute_intensity": 125.06870229007633, | |
"tile_compute_intensity": 12.8, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001034496002830565, | |
"perf_norm_to_sol": 0.6624905285182686, | |
"perf_norm_to_cublas": 0.9216468966166054, | |
"compute_intensity": 240.94117647058823, | |
"tile_compute_intensity": 5.818181818181818, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0026978399604558946, | |
"perf_norm_to_sol": 0.8129096624983723, | |
"perf_norm_to_cublas": 0.9157288103950879, | |
"compute_intensity": 455.1111111111111, | |
"tile_compute_intensity": 3.8641509433962264, | |
"MxNxK": 8589934592, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013574080541729927, | |
"perf_norm_to_sol": 0.8078264177771436, | |
"perf_norm_to_cublas": 1.645085872042499, | |
"compute_intensity": 237.44927536231884, | |
"tile_compute_intensity": 3.5493934142114383, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009524159831926226, | |
"perf_norm_to_sol": 0.7195845258370145, | |
"perf_norm_to_cublas": 1.9774888521732348, | |
"compute_intensity": 167.18367346938774, | |
"tile_compute_intensity": 1.996101364522417, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009566720109432936, | |
"perf_norm_to_sol": 0.440142330234577, | |
"perf_norm_to_cublas": 0.7393296826990357, | |
"compute_intensity": 30.089990817263544, | |
"tile_compute_intensity": 1.9922178988326849, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002775651216506958, | |
"perf_norm_to_sol": 0.7901209484413352, | |
"perf_norm_to_cublas": 0.8841272357234736, | |
"compute_intensity": 244.53731343283582, | |
"tile_compute_intensity": 21.333333333333332, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008815999899525196, | |
"perf_norm_to_sol": 0.28558008063399765, | |
"perf_norm_to_cublas": 0.6878402867862963, | |
"compute_intensity": 15.398496240601503, | |
"tile_compute_intensity": 1.28, | |
"MxNxK": 4194304, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005408287793397903, | |
"perf_norm_to_sol": 0.811014596673654, | |
"perf_norm_to_cublas": 0.8697708365793572, | |
"compute_intensity": 819.2, | |
"tile_compute_intensity": 32, | |
"MxNxK": 17179869184, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0002195103792473674, | |
"perf_norm_to_sol": 0.6875050106251587, | |
"perf_norm_to_cublas": 0.7728326852037766, | |
"compute_intensity": 15.922254616132166, | |
"tile_compute_intensity": 1.9104477611940298, | |
"MxNxK": 268435456, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002286079979967326, | |
"perf_norm_to_sol": 0.4405224636474363, | |
"perf_norm_to_cublas": 0.9224523995243231, | |
"compute_intensity": 30.796992481203006, | |
"tile_compute_intensity": 2.909090909090909, | |
"MxNxK": 33554432, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000022540800273418427, | |
"perf_norm_to_sol": 0.3800573822535591, | |
"perf_norm_to_cublas": 1.0694207587902327, | |
"compute_intensity": 83.59183673469387, | |
"tile_compute_intensity": 1.7777777777777777, | |
"MxNxK": 33554432, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.022207337617874145, | |
"perf_norm_to_sol": 0.7900452398089001, | |
"perf_norm_to_cublas": 0.9211183155817599, | |
"compute_intensity": 1489.4545454545455, | |
"tile_compute_intensity": 24.975609756097562, | |
"MxNxK": 68719476736, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0026959840208292006, | |
"perf_norm_to_sol": 0.8134692768150362, | |
"perf_norm_to_cublas": 1.0523611585334596, | |
"compute_intensity": 1024, | |
"tile_compute_intensity": 19.692307692307693, | |
"MxNxK": 8589934592, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003385280142538249, | |
"perf_norm_to_sol": 0.506120450013476, | |
"perf_norm_to_cublas": 3.0747707951368217, | |
"compute_intensity": 107.78947368421052, | |
"tile_compute_intensity": 1.5900621118012421, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000016659199900459498, | |
"perf_norm_to_sol": 0.2571191172746365, | |
"perf_norm_to_cublas": 0.8511333088460414, | |
"compute_intensity": 63.01538461538462, | |
"tile_compute_intensity": 1.6842105263157894, | |
"MxNxK": 16777216, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000016857600712683052, | |
"perf_norm_to_sol": 0.348482128538624, | |
"perf_norm_to_cublas": 0.7617691121420904, | |
"compute_intensity": 28.346020761245676, | |
"tile_compute_intensity": 1.3195876288659794, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005271040135994554, | |
"perf_norm_to_sol": 0.325051501213785, | |
"perf_norm_to_cublas": 0.6085478475255803, | |
"compute_intensity": 51.0404984423676, | |
"tile_compute_intensity": 1.5900621118012421, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000769503996707499, | |
"perf_norm_to_sol": 0.5016781491957412, | |
"perf_norm_to_cublas": 0.6834531797254125, | |
"compute_intensity": 15.738712776176753, | |
"tile_compute_intensity": 1.7534246575342465, | |
"MxNxK": 67108864, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.022324566543102265, | |
"perf_norm_to_sol": 0.7858966193120329, | |
"perf_norm_to_cublas": 0.907271327146211, | |
"compute_intensity": 885.6216216216217, | |
"tile_compute_intensity": 51.2, | |
"MxNxK": 68719476736, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000029475201154127716, | |
"perf_norm_to_sol": 0.673739574267009, | |
"perf_norm_to_cublas": 0.9070676355567686, | |
"compute_intensity": 7.755739644970414, | |
"tile_compute_intensity": 0.7975077881619937, | |
"MxNxK": 16777216, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003875551978126168, | |
"perf_norm_to_sol": 0.7735301792702157, | |
"perf_norm_to_cublas": 0.7969961494995996, | |
"compute_intensity": 7.988298391028766, | |
"tile_compute_intensity": 0.9846153846153847, | |
"MxNxK": 268435456, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00004643520223908126, | |
"perf_norm_to_sol": 0.36897858231381375, | |
"perf_norm_to_cublas": 0.6648748788311938, | |
"compute_intensity": 59.36231884057971, | |
"tile_compute_intensity": 4.571428571428571, | |
"MxNxK": 67108864, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006953888107091189, | |
"perf_norm_to_sol": 0.7884438669254764, | |
"perf_norm_to_cublas": 1.0809077742110433, | |
"compute_intensity": 481.88235294117646, | |
"tile_compute_intensity": 11.636363636363637, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000029142401763238014, | |
"perf_norm_to_sol": 0.6814335221356844, | |
"perf_norm_to_cublas": 0.922696849864874, | |
"compute_intensity": 7.755739644970414, | |
"tile_compute_intensity": 0.6657997399219766, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001855040027294308, | |
"perf_norm_to_sol": 0.23090600255971097, | |
"perf_norm_to_cublas": 1.3327583293600793, | |
"compute_intensity": 93.0909090909091, | |
"tile_compute_intensity": 1.5609756097560976, | |
"MxNxK": 16777216, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00035262079909443856, | |
"perf_norm_to_sol": 0.7774286774067564, | |
"perf_norm_to_cublas": 1.7110369741777918, | |
"compute_intensity": 119.5912408759124, | |
"tile_compute_intensity": 1.7762359063313096, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002430399908917025, | |
"perf_norm_to_sol": 0.4819230401305359, | |
"perf_norm_to_cublas": 0.8967742035918634, | |
"compute_intensity": 28.395147313691506, | |
"tile_compute_intensity": 1.3264248704663213, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002352640003664419, | |
"perf_norm_to_sol": 0.36413550447464405, | |
"perf_norm_to_cublas": 0.7920293527291243, | |
"compute_intensity": 50.88198757763975, | |
"tile_compute_intensity": 1.5802469135802468, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009468479547649621, | |
"perf_norm_to_sol": 0.7238161103018715, | |
"perf_norm_to_cublas": 1.268376840966872, | |
"compute_intensity": 195.04761904761904, | |
"tile_compute_intensity": 1.7716262975778547, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003163839865010232, | |
"perf_norm_to_sol": 0.541544320277285, | |
"perf_norm_to_cublas": 3.0750481723129637, | |
"compute_intensity": 170.66666666666666, | |
"tile_compute_intensity": 2.6122448979591835, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006924799527041614, | |
"perf_norm_to_sol": 0.3504005973901482, | |
"perf_norm_to_cublas": 0.9459334992895098, | |
"compute_intensity": 7.846743295019157, | |
"tile_compute_intensity": 0.7804878048780488, | |
"MxNxK": 2097152, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005450300872325897, | |
"perf_norm_to_sol": 0.8047629747797483, | |
"perf_norm_to_cublas": 0.9138724886436181, | |
"compute_intensity": 468.1142857142857, | |
"tile_compute_intensity": 7.474452554744525, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005279039614833891, | |
"perf_norm_to_sol": 0.6491178828621293, | |
"perf_norm_to_cublas": 1.8751895538817076, | |
"compute_intensity": 186.1818181818182, | |
"tile_compute_intensity": 1.855072463768116, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000016697600949555634, | |
"perf_norm_to_sol": 0.25652779616953597, | |
"perf_norm_to_cublas": 0.8133384240399412, | |
"compute_intensity": 93.0909090909091, | |
"tile_compute_intensity": 3.2, | |
"MxNxK": 16777216, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005442240508273244, | |
"perf_norm_to_sol": 0.6296522568447719, | |
"perf_norm_to_cublas": 1.8165459461899198, | |
"compute_intensity": 110.70270270270271, | |
"tile_compute_intensity": 1.855072463768116, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.001369846425950527, | |
"perf_norm_to_sol": 0.8004912558745576, | |
"perf_norm_to_cublas": 0.9851942541968944, | |
"compute_intensity": 123.65283018867925, | |
"tile_compute_intensity": 1.9768339768339769, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013666016049683094, | |
"perf_norm_to_sol": 0.8023919201308416, | |
"perf_norm_to_cublas": 1.0508753951693268, | |
"compute_intensity": 496.4848484848485, | |
"tile_compute_intensity": 7.314285714285714, | |
"MxNxK": 4294967296, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.011011151969432831, | |
"perf_norm_to_sol": 0.7966832817554099, | |
"perf_norm_to_cublas": 0.9444746225670918, | |
"compute_intensity": 799.219512195122, | |
"tile_compute_intensity": 14.628571428571428, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000026963200070895255, | |
"perf_norm_to_sol": 0.7009849973608496, | |
"perf_norm_to_cublas": 0.9905055704661403, | |
"compute_intensity": 7.953398058252427, | |
"tile_compute_intensity": 0.927536231884058, | |
"MxNxK": 16777216, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.001369119994342327, | |
"perf_norm_to_sol": 0.800915982817965, | |
"perf_norm_to_cublas": 0.9574196367629711, | |
"compute_intensity": 682.6666666666666, | |
"tile_compute_intensity": 10.448979591836734, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000052582402713596824, | |
"perf_norm_to_sol": 0.6516855148272259, | |
"perf_norm_to_cublas": 1.4959225503753326, | |
"compute_intensity": 227.55555555555554, | |
"tile_compute_intensity": 3.3684210526315788, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.01088053435087204, | |
"perf_norm_to_sol": 0.8062472305151275, | |
"perf_norm_to_cublas": 0.9314989736096686, | |
"compute_intensity": 1638.4, | |
"tile_compute_intensity": 24.38095238095238, | |
"MxNxK": 34359738368, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002245759969810024, | |
"perf_norm_to_sol": 0.3814654130886663, | |
"perf_norm_to_cublas": 1.0579937658530612, | |
"compute_intensity": 146.28571428571428, | |
"tile_compute_intensity": 3.5555555555555554, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00012289920123293997, | |
"perf_norm_to_sol": 0.5576470772712951, | |
"perf_norm_to_cublas": 0.8656199449822727, | |
"compute_intensity": 56.79029462738301, | |
"tile_compute_intensity": 2.6528497409326426, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00007366399513557554, | |
"perf_norm_to_sol": 0.465182347498198, | |
"perf_norm_to_cublas": 0.8079930919487351, | |
"compute_intensity": 61.134328358208954, | |
"tile_compute_intensity": 5.333333333333333, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005574399838224053, | |
"perf_norm_to_sol": 0.6147242963859593, | |
"perf_norm_to_cublas": 0.9167050060636462, | |
"compute_intensity": 186.1818181818182, | |
"tile_compute_intensity": 5.333333333333333, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008476799848722294, | |
"perf_norm_to_sol": 0.12632711782010206, | |
"perf_norm_to_cublas": 0.7908644534426855, | |
"compute_intensity": 48.76190476190476, | |
"tile_compute_intensity": 2, | |
"MxNxK": 4194304, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003166079986840487, | |
"perf_norm_to_sol": 0.5411611571042301, | |
"perf_norm_to_cublas": 3.281685971903655, | |
"compute_intensity": 157.53846153846155, | |
"tile_compute_intensity": 1.9844961240310077, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000022447999799624086, | |
"perf_norm_to_sol": 0.38162854696564885, | |
"perf_norm_to_cublas": 0.9974340969631563, | |
"compute_intensity": 102.4, | |
"tile_compute_intensity": 4, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000687289610505104, | |
"perf_norm_to_sol": 0.7977350952959504, | |
"perf_norm_to_cublas": 1.082885574458421, | |
"compute_intensity": 390.0952380952381, | |
"tile_compute_intensity": 3.8208955223880596, | |
"MxNxK": 2147483648, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003191680007148534, | |
"perf_norm_to_sol": 0.5368205789194603, | |
"perf_norm_to_cublas": 1.2848405361441166, | |
"compute_intensity": 170.66666666666666, | |
"tile_compute_intensity": 4.571428571428571, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003679072018712759, | |
"perf_norm_to_sol": 0.8148403731057894, | |
"perf_norm_to_cublas": 0.8393681635420044, | |
"compute_intensity": 7.988298391028766, | |
"tile_compute_intensity": 0.9808429118773946, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000032652801019139587, | |
"perf_norm_to_sol": 0.52472053106833, | |
"perf_norm_to_cublas": 0.8942570723594641, | |
"compute_intensity": 107.78947368421052, | |
"tile_compute_intensity": 4.571428571428571, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013049600238446146, | |
"perf_norm_to_sol": 0.20691163369184387, | |
"perf_norm_to_cublas": 0.6809710428088087, | |
"compute_intensity": 29.681159420289855, | |
"tile_compute_intensity": 2.2857142857142856, | |
"MxNxK": 8388608, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010958310961723328, | |
"perf_norm_to_sol": 0.8005248908848016, | |
"perf_norm_to_cublas": 0.8737311323718063, | |
"compute_intensity": 481.88235294117646, | |
"tile_compute_intensity": 36.57142857142857, | |
"MxNxK": 34359738368, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00023690559901297094, | |
"perf_norm_to_sol": 0.6413362581807128, | |
"perf_norm_to_cublas": 0.9136871305780491, | |
"compute_intensity": 15.868280871670702, | |
"tile_compute_intensity": 1.8686131386861313, | |
"MxNxK": 268435456, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000023321600747294723, | |
"perf_norm_to_sol": 0.43181837978557586, | |
"perf_norm_to_cublas": 0.8848792267697927, | |
"compute_intensity": 30.796992481203006, | |
"tile_compute_intensity": 2.56, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000970080029219389, | |
"perf_norm_to_sol": 0.7064817159639342, | |
"perf_norm_to_cublas": 1.943262413049729, | |
"compute_intensity": 112.21917808219177, | |
"tile_compute_intensity": 1.5975039001560063, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000031878400477580726, | |
"perf_norm_to_sol": 0.606638187050348, | |
"perf_norm_to_cublas": 0.946797847413596, | |
"compute_intensity": 15.723608445297504, | |
"tile_compute_intensity": 1.7297297297297298, | |
"MxNxK": 33554432, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010708656162023544, | |
"perf_norm_to_sol": 0.819187819105178, | |
"perf_norm_to_cublas": 1.06192369467154, | |
"compute_intensity": 1638.4, | |
"tile_compute_intensity": 20.897959183673468, | |
"MxNxK": 34359738368, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005415251106023788, | |
"perf_norm_to_sol": 0.8099717367821685, | |
"perf_norm_to_cublas": 0.932169083326649, | |
"compute_intensity": 963.7647058823529, | |
"tile_compute_intensity": 23.272727272727273, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013679136522114278, | |
"perf_norm_to_sol": 0.8016222983751071, | |
"perf_norm_to_cublas": 0.9431893452695391, | |
"compute_intensity": 337.8144329896907, | |
"tile_compute_intensity": 7.013698630136986, | |
"MxNxK": 4294967296, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005370575934648514, | |
"perf_norm_to_sol": 0.8167094920229834, | |
"perf_norm_to_cublas": 1.0593652564105234, | |
"compute_intensity": 448.8767123287671, | |
"tile_compute_intensity": 3.9384615384615387, | |
"MxNxK": 17179869184, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009543040068820119, | |
"perf_norm_to_sol": 0.7181608782137197, | |
"perf_norm_to_cublas": 1.2402252955781516, | |
"compute_intensity": 240.94117647058823, | |
"tile_compute_intensity": 2.6528497409326426, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006851456128060818, | |
"perf_norm_to_sol": 0.800231414584545, | |
"perf_norm_to_cublas": 1.654800423287903, | |
"compute_intensity": 334.3673469387755, | |
"tile_compute_intensity": 3.992202729044834, | |
"MxNxK": 2147483648, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005456770956516266, | |
"perf_norm_to_sol": 0.8038087686674507, | |
"perf_norm_to_cublas": 0.9101218412895549, | |
"compute_intensity": 246.37593984962405, | |
"tile_compute_intensity": 1.9883495145631067, | |
"MxNxK": 17179869184, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013561472296714783, | |
"perf_norm_to_sol": 0.8085774625886653, | |
"perf_norm_to_cublas": 1.6391070437701558, | |
"compute_intensity": 496.4848484848485, | |
"tile_compute_intensity": 5.3194805194805195, | |
"MxNxK": 4294967296, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001526400010334328, | |
"perf_norm_to_sol": 0.14031049344560945, | |
"perf_norm_to_cublas": 0.8918238047557039, | |
"compute_intensity": 102.4, | |
"tile_compute_intensity": 1.5238095238095237, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007082080002874136, | |
"perf_norm_to_sol": 0.7741723373778582, | |
"perf_norm_to_cublas": 0.9456972876818595, | |
"compute_intensity": 224.43835616438355, | |
"tile_compute_intensity": 10.24, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000057865603594109416, | |
"perf_norm_to_sol": 0.5921858246502623, | |
"perf_norm_to_cublas": 0.9114084920032571, | |
"compute_intensity": 126.03076923076924, | |
"tile_compute_intensity": 1.855072463768116, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000525951967574656, | |
"perf_norm_to_sol": 0.6515269890762188, | |
"perf_norm_to_cublas": 1.4948893211946366, | |
"compute_intensity": 227.55555555555554, | |
"tile_compute_intensity": 4.923076923076923, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013046400272287429, | |
"perf_norm_to_sol": 0.16416017765476884, | |
"perf_norm_to_cublas": 0.6651949747351433, | |
"compute_intensity": 49.951219512195124, | |
"tile_compute_intensity": 2.2857142857142856, | |
"MxNxK": 8388608, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006756896153092384, | |
"perf_norm_to_sol": 0.8114303231984414, | |
"perf_norm_to_cublas": 1.6788773289969265, | |
"compute_intensity": 234.05714285714285, | |
"tile_compute_intensity": 1.9357277882797732, | |
"MxNxK": 2147483648, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.02231219857931137, | |
"perf_norm_to_sol": 0.78633225280178, | |
"perf_norm_to_cublas": 0.8610694132679929, | |
"compute_intensity": 1489.4545454545455, | |
"tile_compute_intensity": 42.666666666666664, | |
"MxNxK": 68719476736, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003613120061345399, | |
"perf_norm_to_sol": 0.4742049752216505, | |
"perf_norm_to_cublas": 0.8304843995603236, | |
"compute_intensity": 84.45360824742268, | |
"tile_compute_intensity": 1.7534246575342465, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000009308799781138077, | |
"perf_norm_to_sol": 0.11503627947791616, | |
"perf_norm_to_cublas": 0.9027157202911352, | |
"compute_intensity": 73.14285714285714, | |
"tile_compute_intensity": 1.4545454545454546, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00035050881560891865, | |
"perf_norm_to_sol": 0.7821130575271258, | |
"perf_norm_to_cublas": 1.0931491123530057, | |
"compute_intensity": 122.26865671641791, | |
"tile_compute_intensity": 1.9320754716981132, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00037402561865746976, | |
"perf_norm_to_sol": 0.732937819741049, | |
"perf_norm_to_cublas": 0.8651129543450381, | |
"compute_intensity": 119.5912408759124, | |
"tile_compute_intensity": 7.757575757575758, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008838400390231982, | |
"perf_norm_to_sol": 0.15687737819061626, | |
"perf_norm_to_cublas": 0.7013033647927274, | |
"compute_intensity": 29.257142857142856, | |
"tile_compute_intensity": 2, | |
"MxNxK": 4194304, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006844704039394855, | |
"perf_norm_to_sol": 0.8010208181049113, | |
"perf_norm_to_cublas": 1.08799099795971, | |
"compute_intensity": 682.6666666666666, | |
"tile_compute_intensity": 10.24, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0028702815994620322, | |
"perf_norm_to_sol": 0.7640714319249613, | |
"perf_norm_to_cublas": 0.8809839797732689, | |
"compute_intensity": 125.5478927203065, | |
"tile_compute_intensity": 13.473684210526315, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007064640056341886, | |
"perf_norm_to_sol": 0.7760834785064846, | |
"perf_norm_to_cublas": 0.9431308626299083, | |
"compute_intensity": 234.05714285714285, | |
"tile_compute_intensity": 14.222222222222221, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00007852159906178713, | |
"perf_norm_to_sol": 0.43640464015892844, | |
"perf_norm_to_cublas": 0.7524248497469367, | |
"compute_intensity": 59.7956204379562, | |
"tile_compute_intensity": 3.878787878787879, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009585279622115195, | |
"perf_norm_to_sol": 0.41568872922218364, | |
"perf_norm_to_cublas": 0.7002070456238194, | |
"compute_intensity": 30.97164461247637, | |
"tile_compute_intensity": 2.6391752577319587, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003721888177096844, | |
"perf_norm_to_sol": 0.7365549646360852, | |
"perf_norm_to_cublas": 1.0273581097937692, | |
"compute_intensity": 221.40540540540542, | |
"tile_compute_intensity": 12.8, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013583552092313767, | |
"perf_norm_to_sol": 0.8072631358957227, | |
"perf_norm_to_cublas": 0.9662415287577266, | |
"compute_intensity": 682.6666666666666, | |
"tile_compute_intensity": 7.013698630136986, | |
"MxNxK": 4294967296, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00019849599339067937, | |
"perf_norm_to_sol": 0.6905366621847778, | |
"perf_norm_to_cublas": 0.8892472063864213, | |
"compute_intensity": 118.72463768115942, | |
"tile_compute_intensity": 9.142857142857142, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009453120292164385, | |
"perf_norm_to_sol": 0.7249921533668975, | |
"perf_norm_to_cublas": 1.1261297102393875, | |
"compute_intensity": 292.57142857142856, | |
"tile_compute_intensity": 3.1604938271604937, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00037679998204112055, | |
"perf_norm_to_sol": 0.7275412275263485, | |
"perf_norm_to_cublas": 0.8642803048746879, | |
"compute_intensity": 122.26865671641791, | |
"tile_compute_intensity": 9.846153846153847, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006764799763914198, | |
"perf_norm_to_sol": 0.018878326630137977, | |
"perf_norm_to_cublas": 0.8027436496165086, | |
"compute_intensity": 23.272727272727273, | |
"tile_compute_intensity": 0.8, | |
"MxNxK": 262144, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005530016124248504, | |
"perf_norm_to_sol": 0.7931623063854456, | |
"perf_norm_to_cublas": 0.8835741718629713, | |
"compute_intensity": 246.37593984962405, | |
"tile_compute_intensity": 23.272727272727273, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00010567678837105631, | |
"perf_norm_to_sol": 0.7185266353048919, | |
"perf_norm_to_cublas": 0.801144734046028, | |
"compute_intensity": 7.9360620004843785, | |
"tile_compute_intensity": 0.8873483535528596, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0002187871839851141, | |
"perf_norm_to_sol": 0.626493555227519, | |
"perf_norm_to_cublas": 0.9714352849330806, | |
"compute_intensity": 60.12477064220184, | |
"tile_compute_intensity": 5.224489795918367, | |
"MxNxK": 536870912, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006768000457668677, | |
"perf_norm_to_sol": 0.10917295004297849, | |
"perf_norm_to_cublas": 0.6643025718809844, | |
"compute_intensity": 14.124137931034483, | |
"tile_compute_intensity": 0.9411764705882353, | |
"MxNxK": 1048576, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.011716591566801072, | |
"perf_norm_to_sol": 0.7487160951971618, | |
"perf_norm_to_cublas": 0.8604156891028933, | |
"compute_intensity": 127.0077519379845, | |
"tile_compute_intensity": 14.628571428571428, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.022310467064380647, | |
"perf_norm_to_sol": 0.7863932800331825, | |
"perf_norm_to_cublas": 1.028081188055196, | |
"compute_intensity": 1820.4444444444443, | |
"tile_compute_intensity": 21.11340206185567, | |
"MxNxK": 68719476736, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003516895929351449, | |
"perf_norm_to_sol": 0.7794871584859682, | |
"perf_norm_to_cublas": 1.0837010879808762, | |
"compute_intensity": 227.55555555555554, | |
"tile_compute_intensity": 3.5310344827586206, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000022575999901164322, | |
"perf_norm_to_sol": 0.37946481145111527, | |
"perf_norm_to_cublas": 1.3379163444062434, | |
"compute_intensity": 97.52380952380952, | |
"tile_compute_intensity": 1.7297297297297298, | |
"MxNxK": 33554432, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000022470399562735112, | |
"perf_norm_to_sol": 0.3812481180807699, | |
"perf_norm_to_cublas": 0.9710909034273197, | |
"compute_intensity": 83.59183673469387, | |
"tile_compute_intensity": 2.909090909090909, | |
"MxNxK": 33554432, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027335872873663902, | |
"perf_norm_to_sol": 0.8022791815957389, | |
"perf_norm_to_cublas": 0.9227868124709769, | |
"compute_intensity": 239.1824817518248, | |
"tile_compute_intensity": 3.9083969465648853, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003221119986847043, | |
"perf_norm_to_sol": 0.5319142149809346, | |
"perf_norm_to_cublas": 1.2504470673358583, | |
"compute_intensity": 124.12121212121212, | |
"tile_compute_intensity": 3.2, | |
"MxNxK": 67108864, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0220623642206192, | |
"perf_norm_to_sol": 0.7952366844453344, | |
"perf_norm_to_cublas": 0.8581497307609545, | |
"compute_intensity": 1310.72, | |
"tile_compute_intensity": 26.94736842105263, | |
"MxNxK": 68719476736, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006798783782869577, | |
"perf_norm_to_sol": 0.8064310624403975, | |
"perf_norm_to_cublas": 1.04623891724794, | |
"compute_intensity": 234.05714285714285, | |
"tile_compute_intensity": 1.9616858237547892, | |
"MxNxK": 2147483648, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.023440156877040864, | |
"perf_norm_to_sol": 0.7484933426796024, | |
"perf_norm_to_cublas": 0.8725945894779815, | |
"compute_intensity": 252.06153846153848, | |
"tile_compute_intensity": 26.94736842105263, | |
"MxNxK": 68719476736, | |
"size_m": 16384, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00007319999858736991, | |
"perf_norm_to_sol": 0.5204030385752563, | |
"perf_norm_to_cublas": 0.7077158709074924, | |
"compute_intensity": 15.845261121856867, | |
"tile_compute_intensity": 1.8285714285714285, | |
"MxNxK": 67108864, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003261759993620217, | |
"perf_norm_to_sol": 0.5252868121855561, | |
"perf_norm_to_cublas": 3.1870889406050797, | |
"compute_intensity": 157.53846153846155, | |
"tile_compute_intensity": 1.5900621118012421, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003295679925940931, | |
"perf_norm_to_sol": 0.5198804336783317, | |
"perf_norm_to_cublas": 2.8257111747132386, | |
"compute_intensity": 84.45360824742268, | |
"tile_compute_intensity": 0.9990243902439024, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001327359932474792, | |
"perf_norm_to_sol": 0.22266231118738403, | |
"perf_norm_to_cublas": 0.6345226709326093, | |
"compute_intensity": 28.248275862068965, | |
"tile_compute_intensity": 1.3061224489795917, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.022296051681041717, | |
"perf_norm_to_sol": 0.7869017180628843, | |
"perf_norm_to_cublas": 0.9084355975725387, | |
"compute_intensity": 885.6216216216217, | |
"tile_compute_intensity": 7.816793893129771, | |
"MxNxK": 68719476736, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009911040542647243, | |
"perf_norm_to_sol": 0.6914953084050262, | |
"perf_norm_to_cublas": 0.9598023449527745, | |
"compute_intensity": 292.57142857142856, | |
"tile_compute_intensity": 7.111111111111111, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027292095124721526, | |
"perf_norm_to_sol": 0.8035660735119912, | |
"perf_norm_to_cublas": 1.0500599495015537, | |
"compute_intensity": 404.5432098765432, | |
"tile_compute_intensity": 7.529411764705882, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006787200254620984, | |
"perf_norm_to_sol": 0.07888743908048752, | |
"perf_norm_to_cublas": 1.1942479715127918, | |
"compute_intensity": 56.888888888888886, | |
"tile_compute_intensity": 1.2307692307692308, | |
"MxNxK": 2097152, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002867603115737438, | |
"perf_norm_to_sol": 0.7647851125886511, | |
"perf_norm_to_cublas": 0.8632090114097609, | |
"compute_intensity": 125.5478927203065, | |
"tile_compute_intensity": 12.487804878048781, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005422383919358254, | |
"perf_norm_to_sol": 0.808906268661397, | |
"perf_norm_to_cublas": 0.9044960081002494, | |
"compute_intensity": 819.2, | |
"tile_compute_intensity": 7.474452554744525, | |
"MxNxK": 17179869184, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013446495868265629, | |
"perf_norm_to_sol": 0.8154913344020892, | |
"perf_norm_to_cublas": 1.6490029654250762, | |
"compute_intensity": 399.609756097561, | |
"tile_compute_intensity": 3.5493934142114383, | |
"MxNxK": 4294967296, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000017113600915763526, | |
"perf_norm_to_sol": 0.2502920801993444, | |
"perf_norm_to_cublas": 0.9474569565244381, | |
"compute_intensity": 113.77777777777777, | |
"tile_compute_intensity": 1.6842105263157894, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000022758400882594286, | |
"perf_norm_to_sol": 0.4360914365711893, | |
"perf_norm_to_cublas": 0.9119797276810303, | |
"compute_intensity": 31.03030303030303, | |
"tile_compute_intensity": 2.909090909090909, | |
"MxNxK": 33554432, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005600320291705429, | |
"perf_norm_to_sol": 0.6883463808513554, | |
"perf_norm_to_cublas": 0.8216101667169545, | |
"compute_intensity": 7.875030040855563, | |
"tile_compute_intensity": 0.8858131487889274, | |
"MxNxK": 33554432, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009510079980827868, | |
"perf_norm_to_sol": 0.7206498841722643, | |
"perf_norm_to_cublas": 1.0594569423925888, | |
"compute_intensity": 341.3333333333333, | |
"tile_compute_intensity": 5.12, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000032572800409980116, | |
"perf_norm_to_sol": 0.5260092738720064, | |
"perf_norm_to_cublas": 3.5727478256876006, | |
"compute_intensity": 124.12121212121212, | |
"tile_compute_intensity": 1.3298701298701299, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000016003201017156245, | |
"perf_norm_to_sol": 0.5950919076710591, | |
"perf_norm_to_cublas": 0.9558087627926669, | |
"compute_intensity": 7.922630560928433, | |
"tile_compute_intensity": 0.8767123287671232, | |
"MxNxK": 8388608, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018526079365983606, | |
"perf_norm_to_sol": 0.739869229885349, | |
"perf_norm_to_cublas": 1.0442015005736223, | |
"compute_intensity": 199.8048780487805, | |
"tile_compute_intensity": 3.657142857142857, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013302400475367904, | |
"perf_norm_to_sol": 0.1610009704954922, | |
"perf_norm_to_cublas": 0.743324501659494, | |
"compute_intensity": 78.76923076923077, | |
"tile_compute_intensity": 1.6, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006926591973751784, | |
"perf_norm_to_sol": 0.7915509459917458, | |
"perf_norm_to_cublas": 1.0743384272252636, | |
"compute_intensity": 585.1428571428571, | |
"tile_compute_intensity": 14.222222222222221, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005527455732226372, | |
"perf_norm_to_sol": 0.7935297098600108, | |
"perf_norm_to_cublas": 0.8819166187424141, | |
"compute_intensity": 246.37593984962405, | |
"tile_compute_intensity": 20.48, | |
"MxNxK": 17179869184, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003599104005843401, | |
"perf_norm_to_sol": 0.7616826883052593, | |
"perf_norm_to_cublas": 1.05520482100117, | |
"compute_intensity": 372.3636363636364, | |
"tile_compute_intensity": 6.2439024390243905, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00043779839761555196, | |
"perf_norm_to_sol": 0.6867575485911575, | |
"perf_norm_to_cublas": 0.747339429497953, | |
"compute_intensity": 15.953261927945473, | |
"tile_compute_intensity": 1.9393939393939394, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006473599933087826, | |
"perf_norm_to_sol": 0.02818217747039934, | |
"perf_norm_to_cublas": 0.5378151199112354, | |
"compute_intensity": 14.222222222222221, | |
"tile_compute_intensity": 0.8, | |
"MxNxK": 262144, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008729600085644052, | |
"perf_norm_to_sol": 0.12266881446127087, | |
"perf_norm_to_cublas": 1.5392229171325478, | |
"compute_intensity": 73.14285714285714, | |
"tile_compute_intensity": 1.28, | |
"MxNxK": 4194304, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008524800068698824, | |
"perf_norm_to_sol": 0.162648398724282, | |
"perf_norm_to_cublas": 0.6940690635584525, | |
"compute_intensity": 29.257142857142856, | |
"tile_compute_intensity": 1.7777777777777777, | |
"MxNxK": 4194304, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007379360031336546, | |
"perf_norm_to_sol": 0.81210210168612, | |
"perf_norm_to_cublas": 0.8308015590278446, | |
"compute_intensity": 7.990246281394782, | |
"tile_compute_intensity": 0.982725527831094, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000054169603390619156, | |
"perf_norm_to_sol": 0.701205754255858, | |
"perf_norm_to_cublas": 0.8494210406815107, | |
"compute_intensity": 7.934140435835351, | |
"tile_compute_intensity": 0.8858131487889274, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003599296091124415, | |
"perf_norm_to_sol": 0.7616420392367963, | |
"perf_norm_to_cublas": 1.0333131313248927, | |
"compute_intensity": 169.78238341968913, | |
"tile_compute_intensity": 1.9320754716981132, | |
"MxNxK": 1073741824, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006335999933071434, | |
"perf_norm_to_sol": 0.19724037045131676, | |
"perf_norm_to_cublas": 0.707575746440224, | |
"compute_intensity": 7.728301886792453, | |
"tile_compute_intensity": 0.7619047619047619, | |
"MxNxK": 1048576, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018119679298251868, | |
"perf_norm_to_sol": 0.7564635028958566, | |
"perf_norm_to_cublas": 1.8236967967101936, | |
"compute_intensity": 168.90721649484536, | |
"tile_compute_intensity": 1.5987509758001561, | |
"MxNxK": 536870912, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.001378041598945856, | |
"perf_norm_to_sol": 0.7957307578401308, | |
"perf_norm_to_cublas": 0.9411547605265503, | |
"compute_intensity": 399.609756097561, | |
"tile_compute_intensity": 18.285714285714285, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005474310368299484, | |
"perf_norm_to_sol": 0.8012334062856865, | |
"perf_norm_to_cublas": 0.8977256194117859, | |
"compute_intensity": 780.1904761904761, | |
"tile_compute_intensity": 10.61139896373057, | |
"MxNxK": 17179869184, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0005105696152895689, | |
"perf_norm_to_sol": 0.5923046938801325, | |
"perf_norm_to_cublas": 0.7408823319910193, | |
"compute_intensity": 31.813592233009707, | |
"tile_compute_intensity": 3.710144927536232, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00040339198894798753, | |
"perf_norm_to_sol": 0.6795809757675917, | |
"perf_norm_to_cublas": 0.8475567361366916, | |
"compute_intensity": 63.01538461538462, | |
"tile_compute_intensity": 6.7368421052631575, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0031511902809143065, | |
"perf_norm_to_sol": 0.6959592967177162, | |
"perf_norm_to_cublas": 0.9154726990894339, | |
"compute_intensity": 63.627184466019415, | |
"tile_compute_intensity": 7.420289855072464, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000025129600544460118, | |
"perf_norm_to_sol": 0.34090464472998905, | |
"perf_norm_to_cublas": 0.7538519757738319, | |
"compute_intensity": 50.88198757763975, | |
"tile_compute_intensity": 2.56, | |
"MxNxK": 33554432, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0002001215936616063, | |
"perf_norm_to_sol": 0.6849273895191269, | |
"perf_norm_to_cublas": 0.8875883826205851, | |
"compute_intensity": 112.99310344827586, | |
"tile_compute_intensity": 5.224489795918367, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005387142673134804, | |
"perf_norm_to_sol": 0.8141979170760101, | |
"perf_norm_to_cublas": 1.0594517627099258, | |
"compute_intensity": 1365.3333333333333, | |
"tile_compute_intensity": 20.48, | |
"MxNxK": 17179869184, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005351679865270853, | |
"perf_norm_to_sol": 0.6403071754279639, | |
"perf_norm_to_cublas": 2.177409769465602, | |
"compute_intensity": 110.70270270270271, | |
"tile_compute_intensity": 1.5950155763239875, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00012826559832319618, | |
"perf_norm_to_sol": 0.592840576565124, | |
"perf_norm_to_cublas": 0.6981513589075843, | |
"compute_intensity": 15.860600193610843, | |
"tile_compute_intensity": 1.7655172413793103, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00020630080252885817, | |
"perf_norm_to_sol": 0.6644121547412678, | |
"perf_norm_to_cublas": 0.9482076285992115, | |
"compute_intensity": 102.0809968847352, | |
"tile_compute_intensity": 5.224489795918367, | |
"MxNxK": 536870912, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003734431928023696, | |
"perf_norm_to_sol": 0.7340809171240661, | |
"perf_norm_to_cublas": 1.036152284553224, | |
"compute_intensity": 169.78238341968913, | |
"tile_compute_intensity": 6.2439024390243905, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001820608042180538, | |
"perf_norm_to_sol": 0.7528735321243804, | |
"perf_norm_to_cublas": 1.8190846366215945, | |
"compute_intensity": 248.24242424242425, | |
"tile_compute_intensity": 2.6597402597402597, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027267711237072946, | |
"perf_norm_to_sol": 0.8042846547190595, | |
"perf_norm_to_cublas": 1.6157742057802038, | |
"compute_intensity": 655.36, | |
"tile_compute_intensity": 6.38006230529595, | |
"MxNxK": 8589934592, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010817008465528489, | |
"perf_norm_to_sol": 0.8109821412150202, | |
"perf_norm_to_cublas": 1.0519670185502574, | |
"compute_intensity": 474.8985507246377, | |
"tile_compute_intensity": 3.9536679536679538, | |
"MxNxK": 34359738368, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00021225600503385066, | |
"perf_norm_to_sol": 0.6457709439654802, | |
"perf_norm_to_cublas": 0.9580430756864114, | |
"compute_intensity": 61.82641509433962, | |
"tile_compute_intensity": 6.095238095238095, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008627200440969317, | |
"perf_norm_to_sol": 0.2876003578119735, | |
"perf_norm_to_cublas": 0.7266320006166752, | |
"compute_intensity": 15.515151515151516, | |
"tile_compute_intensity": 1.4545454545454546, | |
"MxNxK": 4194304, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000022662398987449705, | |
"perf_norm_to_sol": 0.3780181237899816, | |
"perf_norm_to_cublas": 0.8000564677828632, | |
"compute_intensity": 56.10958904109589, | |
"tile_compute_intensity": 2.56, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003665632102638483, | |
"perf_norm_to_sol": 0.7478587970374376, | |
"perf_norm_to_cublas": 1.045900950026582, | |
"compute_intensity": 202.2716049382716, | |
"tile_compute_intensity": 6.2439024390243905, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003891648026183248, | |
"perf_norm_to_sol": 0.7044252707893642, | |
"perf_norm_to_cublas": 0.9315785735990563, | |
"compute_intensity": 113.3840830449827, | |
"tile_compute_intensity": 7.757575757575758, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00036002560518682, | |
"perf_norm_to_sol": 0.7614389574426266, | |
"perf_norm_to_cublas": 1.032379912231677, | |
"compute_intensity": 252.06153846153848, | |
"tile_compute_intensity": 3.710144927536232, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00035406078677624464, | |
"perf_norm_to_sol": 0.7742668256548536, | |
"perf_norm_to_cublas": 1.0611872521461623, | |
"compute_intensity": 372.3636363636364, | |
"tile_compute_intensity": 3.710144927536232, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000024016000679694116, | |
"perf_norm_to_sol": 0.3567120795869676, | |
"perf_norm_to_cublas": 4.299133950206227, | |
"compute_intensity": 97.52380952380952, | |
"tile_compute_intensity": 1.3264248704663213, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.011065717786550522, | |
"perf_norm_to_sol": 0.7927547815811308, | |
"perf_norm_to_cublas": 0.9132794533857208, | |
"compute_intensity": 250.13740458015266, | |
"tile_compute_intensity": 24.38095238095238, | |
"MxNxK": 34359738368, | |
"size_m": 8192, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000536640000063926, | |
"perf_norm_to_sol": 0.6385508008940979, | |
"perf_norm_to_cublas": 1.4733451887621816, | |
"compute_intensity": 163.84, | |
"tile_compute_intensity": 1.8823529411764706, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006791135761886835, | |
"perf_norm_to_sol": 0.8073392465649578, | |
"perf_norm_to_cublas": 1.672905370069213, | |
"compute_intensity": 224.43835616438355, | |
"tile_compute_intensity": 1.8806244260789715, | |
"MxNxK": 2147483648, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013577568344771862, | |
"perf_norm_to_sol": 0.8076189034884482, | |
"perf_norm_to_cublas": 1.6639656904616926, | |
"compute_intensity": 125.06870229007633, | |
"tile_compute_intensity": 1.9375591296121097, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001846815925091505, | |
"perf_norm_to_sol": 0.7421896187421063, | |
"perf_norm_to_cublas": 1.0508897866292217, | |
"compute_intensity": 112.99310344827586, | |
"tile_compute_intensity": 1.9393939393939394, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.02175392657518387, | |
"perf_norm_to_sol": 0.8065119330615509, | |
"perf_norm_to_cublas": 0.8621528339324573, | |
"compute_intensity": 1310.72, | |
"tile_compute_intensity": 15.058823529411764, | |
"MxNxK": 68719476736, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018798079108819365, | |
"perf_norm_to_sol": 0.7291636551776391, | |
"perf_norm_to_cublas": 1.0185210931747826, | |
"compute_intensity": 409.6, | |
"tile_compute_intensity": 7.529411764705882, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005731024220585823, | |
"perf_norm_to_sol": 0.7653431872966837, | |
"perf_norm_to_cublas": 0.8816194719470445, | |
"compute_intensity": 126.51737451737452, | |
"tile_compute_intensity": 13.837837837837839, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006588800169993192, | |
"perf_norm_to_sol": 0.09968195952626771, | |
"perf_norm_to_cublas": 0.5828071935330909, | |
"compute_intensity": 15.058823529411764, | |
"tile_compute_intensity": 1.1428571428571428, | |
"MxNxK": 1048576, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00035569278988987205, | |
"perf_norm_to_sol": 0.7707143053166191, | |
"perf_norm_to_cublas": 1.0635964560585602, | |
"compute_intensity": 202.2716049382716, | |
"tile_compute_intensity": 1.9541984732824427, | |
"MxNxK": 1073741824, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027326496317982675, | |
"perf_norm_to_sol": 0.8025544680916945, | |
"perf_norm_to_cublas": 0.9199288436943559, | |
"compute_intensity": 442.81081081081084, | |
"tile_compute_intensity": 7.420289855072464, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018012479413300752, | |
"perf_norm_to_sol": 0.7609655372143678, | |
"perf_norm_to_cublas": 1.8175133051312249, | |
"compute_intensity": 199.8048780487805, | |
"tile_compute_intensity": 1.7746967071057191, | |
"MxNxK": 536870912, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007073279935866595, | |
"perf_norm_to_sol": 0.7751355070114767, | |
"perf_norm_to_cublas": 0.9611517711973329, | |
"compute_intensity": 203.527950310559, | |
"tile_compute_intensity": 10.24, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005495999939739704, | |
"perf_norm_to_sol": 0.6234932780018515, | |
"perf_norm_to_cublas": 1.5443959451204272, | |
"compute_intensity": 101.1358024691358, | |
"tile_compute_intensity": 1.8823529411764706, | |
"MxNxK": 134217728, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002380799996899441, | |
"perf_norm_to_sol": 0.4919630856702078, | |
"perf_norm_to_cublas": 0.9018817188691974, | |
"compute_intensity": 28.395147313691506, | |
"tile_compute_intensity": 1.9692307692307693, | |
"MxNxK": 33554432, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006598400068469345, | |
"perf_norm_to_sol": 0.04976846698113925, | |
"perf_norm_to_cublas": 0.802133850095691, | |
"compute_intensity": 39.38461538461539, | |
"tile_compute_intensity": 1.1428571428571428, | |
"MxNxK": 1048576, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.022355611622333526, | |
"perf_norm_to_sol": 0.7848052502532795, | |
"perf_norm_to_cublas": 0.9308569043079675, | |
"compute_intensity": 885.6216216216217, | |
"tile_compute_intensity": 14.840579710144928, | |
"MxNxK": 68719476736, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018532160902395844, | |
"perf_norm_to_sol": 0.7396264335009695, | |
"perf_norm_to_cublas": 1.0323415313959634, | |
"compute_intensity": 409.6, | |
"tile_compute_intensity": 6.095238095238095, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0005866847932338715, | |
"perf_norm_to_sol": 0.518445694453729, | |
"perf_norm_to_cublas": 0.9334293465492389, | |
"compute_intensity": 31.721200387221685, | |
"tile_compute_intensity": 3.710144927536232, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001361279864795506, | |
"perf_norm_to_sol": 0.3591763851834111, | |
"perf_norm_to_cublas": 0.7618712694999403, | |
"compute_intensity": 15.633587786259541, | |
"tile_compute_intensity": 1.5238095238095237, | |
"MxNxK": 8388608, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018118079751729965, | |
"perf_norm_to_sol": 0.7565302869359748, | |
"perf_norm_to_cublas": 1.8143733159111632, | |
"compute_intensity": 168.90721649484536, | |
"tile_compute_intensity": 1.9980487804878049, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013251201016828417, | |
"perf_norm_to_sol": 0.2657189140614988, | |
"perf_norm_to_cublas": 0.6537067342136463, | |
"compute_intensity": 25.5202492211838, | |
"tile_compute_intensity": 1.3061224489795917, | |
"MxNxK": 8388608, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002728569693863392, | |
"perf_norm_to_sol": 0.8037545006312831, | |
"perf_norm_to_cublas": 0.9300099522851448, | |
"compute_intensity": 125.5478927203065, | |
"tile_compute_intensity": 1.9806576402321083, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005446160212159157, | |
"perf_norm_to_sol": 0.8053748278768899, | |
"perf_norm_to_cublas": 0.8784011683086032, | |
"compute_intensity": 448.8767123287671, | |
"tile_compute_intensity": 28.444444444444443, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006955776363611222, | |
"perf_norm_to_sol": 0.7882298312528819, | |
"perf_norm_to_cublas": 1.0819485779869824, | |
"compute_intensity": 224.43835616438355, | |
"tile_compute_intensity": 3.8208955223880596, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006951231975108385, | |
"perf_norm_to_sol": 0.7887451388408845, | |
"perf_norm_to_cublas": 1.0545008503343827, | |
"compute_intensity": 409.6, | |
"tile_compute_intensity": 16, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.022547215223312378, | |
"perf_norm_to_sol": 0.7781360669183831, | |
"perf_norm_to_cublas": 0.9084337771409764, | |
"compute_intensity": 2048, | |
"tile_compute_intensity": 24.975609756097562, | |
"MxNxK": 68719476736, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00000897919962881133, | |
"perf_norm_to_sol": 0.2946122339476898, | |
"perf_norm_to_cublas": 0.6931575916799706, | |
"compute_intensity": 15.003663003663004, | |
"tile_compute_intensity": 1.28, | |
"MxNxK": 4194304, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002730252780020237, | |
"perf_norm_to_sol": 0.803259019742695, | |
"perf_norm_to_cublas": 0.9101305074869697, | |
"compute_intensity": 744.7272727272727, | |
"tile_compute_intensity": 10.556701030927835, | |
"MxNxK": 8589934592, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007272895891219378, | |
"perf_norm_to_sol": 0.7538607057391574, | |
"perf_norm_to_cublas": 0.8622215748386921, | |
"compute_intensity": 120.02930402930403, | |
"tile_compute_intensity": 7.876923076923077, | |
"MxNxK": 2147483648, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006859360262751579, | |
"perf_norm_to_sol": 0.7993092969755602, | |
"perf_norm_to_cublas": 1.6649249129213644, | |
"compute_intensity": 234.05714285714285, | |
"tile_compute_intensity": 3.5432525951557095, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0005088543985038996, | |
"perf_norm_to_sol": 0.5977430989058179, | |
"perf_norm_to_cublas": 0.7425747181554171, | |
"compute_intensity": 31.721200387221685, | |
"tile_compute_intensity": 3.5310344827586206, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006022399975336157, | |
"perf_norm_to_sol": 0.02802157482295558, | |
"perf_norm_to_cublas": 0.7035069006763038, | |
"compute_intensity": 7.420289855072464, | |
"tile_compute_intensity": 0.5714285714285714, | |
"MxNxK": 131072, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001233567949384451, | |
"perf_norm_to_sol": 0.6128822896207071, | |
"perf_norm_to_cublas": 0.7208747928987829, | |
"compute_intensity": 15.906796116504854, | |
"tile_compute_intensity": 1.855072463768116, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001066112075932324, | |
"perf_norm_to_sol": 0.6428440490798478, | |
"perf_norm_to_cublas": 0.9205186423469395, | |
"compute_intensity": 167.18367346938774, | |
"tile_compute_intensity": 5.818181818181818, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00000852800003485754, | |
"perf_norm_to_sol": 0.12556867833606347, | |
"perf_norm_to_cublas": 0.7497185770861599, | |
"compute_intensity": 48.76190476190476, | |
"tile_compute_intensity": 1.4545454545454546, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000024111999664455652, | |
"perf_norm_to_sol": 0.3552918739644947, | |
"perf_norm_to_cublas": 4.811015326826654, | |
"compute_intensity": 83.59183673469387, | |
"tile_compute_intensity": 0.9980506822612085, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027325952425599096, | |
"perf_norm_to_sol": 0.8025704420367488, | |
"perf_norm_to_cublas": 1.0456978009420124, | |
"compute_intensity": 655.36, | |
"tile_compute_intensity": 13.473684210526315, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00006655679899267853, | |
"perf_norm_to_sol": 0.5148563437828849, | |
"perf_norm_to_cublas": 0.8005193289600275, | |
"compute_intensity": 101.1358024691358, | |
"tile_compute_intensity": 4.923076923076923, | |
"MxNxK": 134217728, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013469087891280652, | |
"perf_norm_to_sol": 0.814123491297636, | |
"perf_norm_to_cublas": 1.6569425257615942, | |
"compute_intensity": 240.94117647058823, | |
"tile_compute_intensity": 1.965451055662188, | |
"MxNxK": 4294967296, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000017046398716047408, | |
"perf_norm_to_sol": 0.25127880934026753, | |
"perf_norm_to_cublas": 1.4501596182956131, | |
"compute_intensity": 128, | |
"tile_compute_intensity": 1.9393939393939394, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006672000017715618, | |
"perf_norm_to_sol": 0.09297009619118334, | |
"perf_norm_to_cublas": 0.8781774380537599, | |
"compute_intensity": 40.96, | |
"tile_compute_intensity": 1.3333333333333333, | |
"MxNxK": 2097152, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.004637769609689713, | |
"perf_norm_to_sol": 0.5176238944541665, | |
"perf_norm_to_cublas": 0.9175190358296267, | |
"compute_intensity": 31.937621832358673, | |
"tile_compute_intensity": 3.9083969465648853, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013892672024667263, | |
"perf_norm_to_sol": 0.7893010674385906, | |
"perf_norm_to_cublas": 0.8865335674532315, | |
"compute_intensity": 225.98620689655172, | |
"tile_compute_intensity": 10.448979591836734, | |
"MxNxK": 4294967296, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000015094398986548185, | |
"perf_norm_to_sol": 0.6309211403932, | |
"perf_norm_to_cublas": 1.022472059595912, | |
"compute_intensity": 7.922630560928433, | |
"tile_compute_intensity": 0.9142857142857143, | |
"MxNxK": 8388608, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005531059205532074, | |
"perf_norm_to_sol": 0.7930127269421817, | |
"perf_norm_to_cublas": 0.903205520402195, | |
"compute_intensity": 246.37593984962405, | |
"tile_compute_intensity": 3.9233716475095783, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00010900800116360187, | |
"perf_norm_to_sol": 0.6287096326412559, | |
"perf_norm_to_cublas": 0.9019521736883093, | |
"compute_intensity": 127.0077519379845, | |
"tile_compute_intensity": 3.4594594594594597, | |
"MxNxK": 268435456, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005297600291669369, | |
"perf_norm_to_sol": 0.6468436329020334, | |
"perf_norm_to_cublas": 2.225369862989616, | |
"compute_intensity": 126.03076923076924, | |
"tile_compute_intensity": 1.3315994798439532, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00006488639628514648, | |
"perf_norm_to_sol": 0.36045808624576287, | |
"perf_norm_to_cublas": 0.6439315703987604, | |
"compute_intensity": 28.419774501300953, | |
"tile_compute_intensity": 1.3298701298701299, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.001541263982653618, | |
"perf_norm_to_sol": 0.7114615654460856, | |
"perf_norm_to_cublas": 0.9885227052722078, | |
"compute_intensity": 63.38104448742747, | |
"tile_compute_intensity": 7.314285714285714, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00230992641299963, | |
"perf_norm_to_sol": 0.5206423072727407, | |
"perf_norm_to_cublas": 0.9345475478704974, | |
"compute_intensity": 31.906523855890946, | |
"tile_compute_intensity": 3.878787878787879, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005301439668983221, | |
"perf_norm_to_sol": 0.6463751796280476, | |
"perf_norm_to_cublas": 1.8605060012270793, | |
"compute_intensity": 186.1818181818182, | |
"tile_compute_intensity": 3.1219512195121952, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027002431452274323, | |
"perf_norm_to_sol": 0.8121861824202894, | |
"perf_norm_to_cublas": 0.9094991177375452, | |
"compute_intensity": 744.7272727272727, | |
"tile_compute_intensity": 7.420289855072464, | |
"MxNxK": 8589934592, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.01086776927113533, | |
"perf_norm_to_sol": 0.8071942335226685, | |
"perf_norm_to_cublas": 0.9279855383820339, | |
"compute_intensity": 799.219512195122, | |
"tile_compute_intensity": 7.757575757575758, | |
"MxNxK": 34359738368, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00008769279811531306, | |
"perf_norm_to_sol": 0.3907640184796324, | |
"perf_norm_to_cublas": 0.7183988087225469, | |
"compute_intensity": 56.69204152249135, | |
"tile_compute_intensity": 3.878787878787879, | |
"MxNxK": 134217728, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000019523200171533973, | |
"perf_norm_to_sol": 0.5985335910294347, | |
"perf_norm_to_cublas": 0.9111621504115796, | |
"compute_intensity": 14.216052060737526, | |
"tile_compute_intensity": 0.9961089494163424, | |
"MxNxK": 16777216, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000182956806384027, | |
"perf_norm_to_sol": 0.7491864524861875, | |
"perf_norm_to_cublas": 1.793629878587695, | |
"compute_intensity": 112.99310344827586, | |
"tile_compute_intensity": 1.5987509758001561, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.003228684514760971, | |
"perf_norm_to_sol": 0.7413591060215456, | |
"perf_norm_to_cublas": 0.7550705935916899, | |
"compute_intensity": 7.996095656417765, | |
"tile_compute_intensity": 0.9941747572815534, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005422703921794891, | |
"perf_norm_to_sol": 0.8088585338079519, | |
"perf_norm_to_cublas": 0.861618278070355, | |
"compute_intensity": 780.1904761904761, | |
"tile_compute_intensity": 23.272727272727273, | |
"MxNxK": 17179869184, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009215679601766169, | |
"perf_norm_to_sol": 0.8156244772929498, | |
"perf_norm_to_cublas": 0.9116983843970254, | |
"compute_intensity": 7.976630963972736, | |
"tile_compute_intensity": 0.9696969696969697, | |
"MxNxK": 67108864, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00007106239791028201, | |
"perf_norm_to_sol": 0.4822126918166485, | |
"perf_norm_to_cublas": 0.7569234758908744, | |
"compute_intensity": 84.89119170984456, | |
"tile_compute_intensity": 3.1219512195121952, | |
"MxNxK": 134217728, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003487008158117533, | |
"perf_norm_to_sol": 0.7861682824800057, | |
"perf_norm_to_cublas": 1.089952142631171, | |
"compute_intensity": 227.55555555555554, | |
"tile_compute_intensity": 1.9320754716981132, | |
"MxNxK": 1073741824, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002725190296769142, | |
"perf_norm_to_sol": 0.8047512037338678, | |
"perf_norm_to_cublas": 1.0478123683851155, | |
"compute_intensity": 910.2222222222222, | |
"tile_compute_intensity": 19.692307692307693, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000954464019741863, | |
"perf_norm_to_sol": 0.7898073179532755, | |
"perf_norm_to_cublas": 0.8816173155444341, | |
"compute_intensity": 7.964997569275644, | |
"tile_compute_intensity": 0.9377289377289377, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0014388063922524453, | |
"perf_norm_to_sol": 0.762124836092622, | |
"perf_norm_to_cublas": 0.8709130377202091, | |
"compute_intensity": 125.06870229007633, | |
"tile_compute_intensity": 12.19047619047619, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005440540611743927, | |
"perf_norm_to_sol": 0.8062067093092944, | |
"perf_norm_to_cublas": 0.9052547393446058, | |
"compute_intensity": 963.7647058823529, | |
"tile_compute_intensity": 10.61139896373057, | |
"MxNxK": 17179869184, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002700294554233551, | |
"perf_norm_to_sol": 0.8121707197796096, | |
"perf_norm_to_cublas": 0.9179181816827314, | |
"compute_intensity": 239.1824817518248, | |
"tile_compute_intensity": 1.9844961240310077, | |
"MxNxK": 8589934592, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000022694400104228408, | |
"perf_norm_to_sol": 0.4662616415603754, | |
"perf_norm_to_cublas": 0.9486745288199979, | |
"compute_intensity": 30.007326007326007, | |
"tile_compute_intensity": 2.56, | |
"MxNxK": 33554432, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003510720271151513, | |
"perf_norm_to_sol": 0.5991747211241286, | |
"perf_norm_to_cublas": 0.9342811995580108, | |
"compute_intensity": 15.05190629306385, | |
"tile_compute_intensity": 1.3264248704663213, | |
"MxNxK": 33554432, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000007052800356177613, | |
"perf_norm_to_sol": 0.1888346430784719, | |
"perf_norm_to_cublas": 0.7586206754256422, | |
"compute_intensity": 14.94890510948905, | |
"tile_compute_intensity": 0.9696969696969697, | |
"MxNxK": 2097152, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006792960222810507, | |
"perf_norm_to_sol": 0.8071224104788935, | |
"perf_norm_to_cublas": 1.6883031684552765, | |
"compute_intensity": 124.12121212121212, | |
"tile_compute_intensity": 1.9357277882797732, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00013603839324787258, | |
"perf_norm_to_sol": 0.5621862018415025, | |
"perf_norm_to_cublas": 0.8063605414146581, | |
"compute_intensity": 31.62934362934363, | |
"tile_compute_intensity": 3.5555555555555554, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000019267199968453495, | |
"perf_norm_to_sol": 0.36455455311027596, | |
"perf_norm_to_cublas": 0.6973924872958122, | |
"compute_intensity": 25.5600624024961, | |
"tile_compute_intensity": 1.3195876288659794, | |
"MxNxK": 16777216, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002770681492984295, | |
"perf_norm_to_sol": 0.7915381747349963, | |
"perf_norm_to_cublas": 0.8829966393911173, | |
"compute_intensity": 244.53731343283582, | |
"tile_compute_intensity": 19.692307692307693, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010844102501869202, | |
"perf_norm_to_sol": 0.8089558988771262, | |
"perf_norm_to_cublas": 0.879656465797871, | |
"compute_intensity": 992.969696969697, | |
"tile_compute_intensity": 25.6, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003153280122205615, | |
"perf_norm_to_sol": 0.6063440658670987, | |
"perf_norm_to_cublas": 0.9549421722358062, | |
"compute_intensity": 15.814671814671815, | |
"tile_compute_intensity": 1.7297297297297298, | |
"MxNxK": 33554432, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00000643519961158745, | |
"perf_norm_to_sol": 0.37705961548002925, | |
"perf_norm_to_cublas": 0.8582794833065766, | |
"compute_intensity": 7.846743295019157, | |
"tile_compute_intensity": 0.8421052631578947, | |
"MxNxK": 2097152, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00019477440509945153, | |
"perf_norm_to_sol": 0.7037308657832344, | |
"perf_norm_to_cublas": 1.0126012835818363, | |
"compute_intensity": 199.8048780487805, | |
"tile_compute_intensity": 9.142857142857142, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005453347042202949, | |
"perf_norm_to_sol": 0.8043134444797383, | |
"perf_norm_to_cublas": 0.8709173078487684, | |
"compute_intensity": 448.8767123287671, | |
"tile_compute_intensity": 20.48, | |
"MxNxK": 17179869184, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0053881313651800156, | |
"perf_norm_to_sol": 0.8140485163006239, | |
"perf_norm_to_cublas": 0.9376817270599997, | |
"compute_intensity": 963.7647058823529, | |
"tile_compute_intensity": 14.222222222222221, | |
"MxNxK": 17179869184, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027593184262514115, | |
"perf_norm_to_sol": 0.7947977844326554, | |
"perf_norm_to_cublas": 1.6197229400502686, | |
"compute_intensity": 442.81081081081084, | |
"tile_compute_intensity": 6.38006230529595, | |
"MxNxK": 8589934592, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000649407971650362, | |
"perf_norm_to_sol": 0.5276681482085679, | |
"perf_norm_to_cublas": 0.8102887365704547, | |
"compute_intensity": 113.77777777777777, | |
"tile_compute_intensity": 6.4, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000795360014308244, | |
"perf_norm_to_sol": 0.43083873424371716, | |
"perf_norm_to_cublas": 0.7566283701891828, | |
"compute_intensity": 56.69204152249135, | |
"tile_compute_intensity": 2.6391752577319587, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000023168000916484742, | |
"perf_norm_to_sol": 0.4567303102658465, | |
"perf_norm_to_cublas": 0.9026242752542896, | |
"compute_intensity": 30.007326007326007, | |
"tile_compute_intensity": 1.9692307692307693, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00010435199365019799, | |
"perf_norm_to_sol": 0.6567615813480503, | |
"perf_norm_to_cublas": 0.9461515514457759, | |
"compute_intensity": 204.8, | |
"tile_compute_intensity": 8, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00035139839164912703, | |
"perf_norm_to_sol": 0.7801331138129697, | |
"perf_norm_to_cublas": 1.7059975566584766, | |
"compute_intensity": 202.2716049382716, | |
"tile_compute_intensity": 1.7762359063313096, | |
"MxNxK": 1073741824, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013072000001557171, | |
"perf_norm_to_sol": 0.3740357871802086, | |
"perf_norm_to_cublas": 0.7605874868223458, | |
"compute_intensity": 15.633587786259541, | |
"tile_compute_intensity": 1.6, | |
"MxNxK": 8388608, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00011193279642611742, | |
"perf_norm_to_sol": 0.6122814988524178, | |
"perf_norm_to_cublas": 0.9211241156223939, | |
"compute_intensity": 112.21917808219177, | |
"tile_compute_intensity": 7.111111111111111, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0026962272822856903, | |
"perf_norm_to_sol": 0.8133958832541938, | |
"perf_norm_to_cublas": 0.9117344351673131, | |
"compute_intensity": 744.7272727272727, | |
"tile_compute_intensity": 7.062068965517241, | |
"MxNxK": 8589934592, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027282431721687315, | |
"perf_norm_to_sol": 0.8038506956055119, | |
"perf_norm_to_cublas": 0.8770807469360581, | |
"compute_intensity": 442.81081081081084, | |
"tile_compute_intensity": 25.6, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.001375507190823555, | |
"perf_norm_to_sol": 0.7971969126587234, | |
"perf_norm_to_cublas": 0.9747886778849995, | |
"compute_intensity": 225.98620689655172, | |
"tile_compute_intensity": 3.878787878787879, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005570239736698568, | |
"perf_norm_to_sol": 0.6920636070663342, | |
"perf_norm_to_cublas": 0.8260470475304138, | |
"compute_intensity": 7.875030040855563, | |
"tile_compute_intensity": 0.7987519500780031, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005197759601287544, | |
"perf_norm_to_sol": 0.7258631467844852, | |
"perf_norm_to_cublas": 0.8945392574406056, | |
"compute_intensity": 7.961127308066083, | |
"tile_compute_intensity": 0.9343065693430657, | |
"MxNxK": 33554432, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005290560075081885, | |
"perf_norm_to_sol": 0.6477043960744079, | |
"perf_norm_to_cublas": 2.2108509378197847, | |
"compute_intensity": 163.84, | |
"tile_compute_intensity": 1.5950155763239875, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00034959681797772644, | |
"perf_norm_to_sol": 0.78415336573106, | |
"perf_norm_to_cublas": 1.0829846336370483, | |
"compute_intensity": 221.40540540540542, | |
"tile_compute_intensity": 1.9541984732824427, | |
"MxNxK": 1073741824, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010881190747022628, | |
"perf_norm_to_sol": 0.8061985945164729, | |
"perf_norm_to_cublas": 0.9079859497974835, | |
"compute_intensity": 250.13740458015266, | |
"tile_compute_intensity": 1.9902818270165208, | |
"MxNxK": 34359738368, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.011035491526126862, | |
"perf_norm_to_sol": 0.7949261404574837, | |
"perf_norm_to_cublas": 1.0360181549767864, | |
"compute_intensity": 862.3157894736842, | |
"tile_compute_intensity": 14.027397260273972, | |
"MxNxK": 34359738368, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00036258238833397627, | |
"perf_norm_to_sol": 0.7560696004175295, | |
"perf_norm_to_cublas": 1.0445956554362912, | |
"compute_intensity": 327.68, | |
"tile_compute_intensity": 6.7368421052631575, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.02220543920993805, | |
"perf_norm_to_sol": 0.7901127830868757, | |
"perf_norm_to_cublas": 0.8738782744494664, | |
"compute_intensity": 885.6216216216217, | |
"tile_compute_intensity": 39.38461538461539, | |
"MxNxK": 68719476736, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005023360135965049, | |
"perf_norm_to_sol": 0.3410783743925192, | |
"perf_norm_to_cublas": 0.6248566784012222, | |
"compute_intensity": 56.49655172413793, | |
"tile_compute_intensity": 2.6122448979591835, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00006526079960167408, | |
"perf_norm_to_sol": 0.32316613595760046, | |
"perf_norm_to_cublas": 0.6263116631524869, | |
"compute_intensity": 30.06238532110092, | |
"tile_compute_intensity": 1.9844961240310077, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018456639954820276, | |
"perf_norm_to_sol": 0.7426528396749352, | |
"perf_norm_to_cublas": 1.0864643307836512, | |
"compute_intensity": 215.57894736842104, | |
"tile_compute_intensity": 3.506849315068493, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006428799679270014, | |
"perf_norm_to_sol": 0.4093608733102106, | |
"perf_norm_to_cublas": 0.8695869302571677, | |
"compute_intensity": 7.522497704315886, | |
"tile_compute_intensity": 0.6597938144329897, | |
"MxNxK": 2097152, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005013759946450591, | |
"perf_norm_to_sol": 0.34173146051319986, | |
"perf_norm_to_cublas": 0.6284145814501623, | |
"compute_intensity": 56.49655172413793, | |
"tile_compute_intensity": 3.764705882352941, | |
"MxNxK": 67108864, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003654784057289362, | |
"perf_norm_to_sol": 0.7500785741892014, | |
"perf_norm_to_cublas": 1.0254438727776418, | |
"compute_intensity": 169.78238341968913, | |
"tile_compute_intensity": 3.710144927536232, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006895423866808414, | |
"perf_norm_to_sol": 0.7951288470769209, | |
"perf_norm_to_cublas": 0.992486599976634, | |
"compute_intensity": 409.6, | |
"tile_compute_intensity": 6.320987654320987, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00019658240489661694, | |
"perf_norm_to_sol": 0.7639784706420178, | |
"perf_norm_to_cublas": 0.8095129187566252, | |
"compute_intensity": 7.980516317584024, | |
"tile_compute_intensity": 0.9770992366412213, | |
"MxNxK": 134217728, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0054210305213928224, | |
"perf_norm_to_sol": 0.8091082177361921, | |
"perf_norm_to_cublas": 1.054462805305784, | |
"compute_intensity": 448.8767123287671, | |
"tile_compute_intensity": 7.641791044776119, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002759360068012029, | |
"perf_norm_to_sol": 0.3398405092942277, | |
"perf_norm_to_cublas": 0.7280528639695293, | |
"compute_intensity": 42.555844155844156, | |
"tile_compute_intensity": 1.5802469135802468, | |
"MxNxK": 33554432, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000023500800307374447, | |
"perf_norm_to_sol": 0.3645321620441791, | |
"perf_norm_to_cublas": 4.402232695529129, | |
"compute_intensity": 120.47058823529412, | |
"tile_compute_intensity": 1.3264248704663213, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0002059871796518564, | |
"perf_norm_to_sol": 0.665423746102619, | |
"perf_norm_to_cublas": 0.8950304179389318, | |
"compute_intensity": 62.534351145038165, | |
"tile_compute_intensity": 6.4, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00035108160227537155, | |
"perf_norm_to_sol": 0.7808370466848975, | |
"perf_norm_to_cublas": 1.7030434701873618, | |
"compute_intensity": 202.2716049382716, | |
"tile_compute_intensity": 2.6631989596879064, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000319135986501351, | |
"perf_norm_to_sol": 0.5368744302222057, | |
"perf_norm_to_cublas": 1.359169805394755, | |
"compute_intensity": 157.53846153846155, | |
"tile_compute_intensity": 1.8285714285714285, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013891200069338083, | |
"perf_norm_to_sol": 0.35985803033303426, | |
"perf_norm_to_cublas": 0.7406127827897875, | |
"compute_intensity": 15.456603773584906, | |
"tile_compute_intensity": 1.5238095238095237, | |
"MxNxK": 8388608, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.02223135679960251, | |
"perf_norm_to_sol": 0.7891916598695534, | |
"perf_norm_to_cublas": 1.0311082134303216, | |
"compute_intensity": 489.07462686567163, | |
"tile_compute_intensity": 3.9613152804642167, | |
"MxNxK": 68719476736, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018240000354126097, | |
"perf_norm_to_sol": 0.7514734543414904, | |
"perf_norm_to_cublas": 1.0959122458257544, | |
"compute_intensity": 118.72463768115942, | |
"tile_compute_intensity": 1.9248120300751879, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0031851518899202345, | |
"perf_norm_to_sol": 0.688538646671492, | |
"perf_norm_to_cublas": 0.9533194262461028, | |
"compute_intensity": 63.627184466019415, | |
"tile_compute_intensity": 7.529411764705882, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009457599953748286, | |
"perf_norm_to_sol": 0.7246487555160739, | |
"perf_norm_to_cublas": 1.0586026013118017, | |
"compute_intensity": 292.57142857142856, | |
"tile_compute_intensity": 3.4594594594594597, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0010409312322735786, | |
"perf_norm_to_sol": 0.5799210589048035, | |
"perf_norm_to_cublas": 1.0409171977166507, | |
"compute_intensity": 31.844509232264333, | |
"tile_compute_intensity": 3.8208955223880596, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00007924159872345627, | |
"perf_norm_to_sol": 0.4324394097959996, | |
"perf_norm_to_cublas": 0.7829422526865224, | |
"compute_intensity": 51.1201248049922, | |
"tile_compute_intensity": 1.5950155763239875, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00006430079811252654, | |
"perf_norm_to_sol": 0.32799095899041003, | |
"perf_norm_to_cublas": 0.6773166203078124, | |
"compute_intensity": 30.06238532110092, | |
"tile_compute_intensity": 2.6122448979591835, | |
"MxNxK": 67108864, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010869033634662628, | |
"perf_norm_to_sol": 0.807100334931256, | |
"perf_norm_to_cublas": 1.04841402314341, | |
"compute_intensity": 1365.3333333333333, | |
"tile_compute_intensity": 20.897959183673468, | |
"MxNxK": 34359738368, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00020801599603146315, | |
"perf_norm_to_sol": 0.7254946181829572, | |
"perf_norm_to_cublas": 0.8135220325358042, | |
"compute_intensity": 15.922254616132166, | |
"tile_compute_intensity": 1.8686131386861313, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00037083839997649194, | |
"perf_norm_to_sol": 0.7392371488051964, | |
"perf_norm_to_cublas": 0.9995685419354385, | |
"compute_intensity": 372.3636363636364, | |
"tile_compute_intensity": 12.8, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006899103987962008, | |
"perf_norm_to_sol": 0.7947047093200375, | |
"perf_norm_to_cublas": 1.080182033763748, | |
"compute_intensity": 585.1428571428571, | |
"tile_compute_intensity": 6.918918918918919, | |
"MxNxK": 2147483648, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001819776021875441, | |
"perf_norm_to_sol": 0.753217753643055, | |
"perf_norm_to_cublas": 1.166015950734137, | |
"compute_intensity": 315.0769230769231, | |
"tile_compute_intensity": 3.1801242236024843, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006656000186922028, | |
"perf_norm_to_sol": 0.05207876508713338, | |
"perf_norm_to_cublas": 0.558653825447202, | |
"compute_intensity": 14.628571428571428, | |
"tile_compute_intensity": 1, | |
"MxNxK": 524288, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006880000000819564, | |
"perf_norm_to_sol": 0.07782337885896817, | |
"perf_norm_to_cublas": 1.8818604454408936, | |
"compute_intensity": 64, | |
"tile_compute_intensity": 0.9696969696969697, | |
"MxNxK": 2097152, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000067519998992793265, | |
"perf_norm_to_sol": 0.11348468729475253, | |
"perf_norm_to_cublas": 0.6900474338132462, | |
"compute_intensity": 27.675675675675677, | |
"tile_compute_intensity": 1.2307692307692308, | |
"MxNxK": 2097152, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027302879840135573, | |
"perf_norm_to_sol": 0.8032486626209074, | |
"perf_norm_to_cublas": 0.8669889635247954, | |
"compute_intensity": 455.1111111111111, | |
"tile_compute_intensity": 25.6, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007775328122079372, | |
"perf_norm_to_sol": 0.7051471453343366, | |
"perf_norm_to_cublas": 0.8528555660495443, | |
"compute_intensity": 62.89443378119002, | |
"tile_compute_intensity": 6.320987654320987, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010815942287445068, | |
"perf_norm_to_sol": 0.8110620835225908, | |
"perf_norm_to_cublas": 0.8748345378135516, | |
"compute_intensity": 862.3157894736842, | |
"tile_compute_intensity": 42.666666666666664, | |
"MxNxK": 34359738368, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00634743720293045, | |
"perf_norm_to_sol": 0.6910191000286934, | |
"perf_norm_to_cublas": 0.956381187564456, | |
"compute_intensity": 63.750972762645915, | |
"tile_compute_intensity": 7.641791044776119, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006646400288445875, | |
"perf_norm_to_sol": 0.06862366629640144, | |
"perf_norm_to_cublas": 0.672604691779506, | |
"compute_intensity": 24.975609756097562, | |
"tile_compute_intensity": 1.1428571428571428, | |
"MxNxK": 1048576, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.02169468253850937, | |
"perf_norm_to_sol": 0.808714363194184, | |
"perf_norm_to_cublas": 0.9424958556719512, | |
"compute_intensity": 1489.4545454545455, | |
"tile_compute_intensity": 14.840579710144928, | |
"MxNxK": 68719476736, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00135635519400239, | |
"perf_norm_to_sol": 0.8084534867512584, | |
"perf_norm_to_cublas": 0.9656655808430697, | |
"compute_intensity": 819.2, | |
"tile_compute_intensity": 10.448979591836734, | |
"MxNxK": 4294967296, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0008375647477805615, | |
"perf_norm_to_sol": 0.7172447285825602, | |
"perf_norm_to_cublas": 0.7616252040550708, | |
"compute_intensity": 15.961032635168047, | |
"tile_compute_intensity": 1.9320754716981132, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006758399831596762, | |
"perf_norm_to_sol": 0.04319132261662418, | |
"perf_norm_to_cublas": 1.143939382521122, | |
"compute_intensity": 51.2, | |
"tile_compute_intensity": 0.9411764705882353, | |
"MxNxK": 1048576, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027578016743063927, | |
"perf_norm_to_sol": 0.7952349119812628, | |
"perf_norm_to_cublas": 0.8734098414577766, | |
"compute_intensity": 404.5432098765432, | |
"tile_compute_intensity": 19.692307692307693, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013509440235793591, | |
"perf_norm_to_sol": 0.8116917257304825, | |
"perf_norm_to_cublas": 0.9719851939082224, | |
"compute_intensity": 431.1578947368421, | |
"tile_compute_intensity": 3.8496240601503757, | |
"MxNxK": 4294967296, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003624799894168973, | |
"perf_norm_to_sol": 0.7562831865756057, | |
"perf_norm_to_cublas": 1.0167734031962314, | |
"compute_intensity": 327.68, | |
"tile_compute_intensity": 6.7368421052631575, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007640895899385213, | |
"perf_norm_to_sol": 0.71755334734546, | |
"perf_norm_to_cublas": 1.0142014305948328, | |
"compute_intensity": 62.89443378119002, | |
"tile_compute_intensity": 6.918918918918919, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001637440000195056, | |
"perf_norm_to_sol": 0.2874582071926161, | |
"perf_norm_to_cublas": 0.7955833785533081, | |
"compute_intensity": 42.44559585492228, | |
"tile_compute_intensity": 1.5609756097560976, | |
"MxNxK": 16777216, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005603200406767428, | |
"perf_norm_to_sol": 0.3057823002535807, | |
"perf_norm_to_cublas": 0.5735579108113513, | |
"compute_intensity": 51.0404984423676, | |
"tile_compute_intensity": 2.6122448979591835, | |
"MxNxK": 67108864, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006985600339248776, | |
"perf_norm_to_sol": 0.03395158230139252, | |
"perf_norm_to_cublas": 0.7540082029502123, | |
"compute_intensity": 24.38095238095238, | |
"tile_compute_intensity": 1, | |
"MxNxK": 524288, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000062240003899205474, | |
"perf_norm_to_sol": 0.20078966833009487, | |
"perf_norm_to_cublas": 0.7089973686613162, | |
"compute_intensity": 7.728301886792453, | |
"tile_compute_intensity": 0.6530612244897959, | |
"MxNxK": 1048576, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000016572800814174115, | |
"perf_norm_to_sol": 0.2584595579791451, | |
"perf_norm_to_cublas": 0.972388434122048, | |
"compute_intensity": 128, | |
"tile_compute_intensity": 2.4615384615384617, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013555200712289662, | |
"perf_norm_to_sol": 0.15799835295040543, | |
"perf_norm_to_cublas": 0.7200188554817074, | |
"compute_intensity": 85.33333333333333, | |
"tile_compute_intensity": 2.2857142857142856, | |
"MxNxK": 8388608, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009973120177164674, | |
"perf_norm_to_sol": 0.6871909607932734, | |
"perf_norm_to_cublas": 0.9620098764480972, | |
"compute_intensity": 167.18367346938774, | |
"tile_compute_intensity": 3.5555555555555554, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00014289600076153875, | |
"perf_norm_to_sol": 0.5398030155178114, | |
"perf_norm_to_cublas": 0.6850520580059444, | |
"compute_intensity": 15.7462758289284, | |
"tile_compute_intensity": 1.5950155763239875, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.01082865297794342, | |
"perf_norm_to_sol": 0.8101100575282586, | |
"perf_norm_to_cublas": 0.9145860470902243, | |
"compute_intensity": 1260.3076923076924, | |
"tile_compute_intensity": 15.875968992248062, | |
"MxNxK": 34359738368, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005304639926180243, | |
"perf_norm_to_sol": 0.6459852253899904, | |
"perf_norm_to_cublas": 2.18851432773763, | |
"compute_intensity": 163.84, | |
"tile_compute_intensity": 1.9922178988326849, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.01090458184480667, | |
"perf_norm_to_sol": 0.8044692416237095, | |
"perf_norm_to_cublas": 0.9324952196253017, | |
"compute_intensity": 1260.3076923076924, | |
"tile_compute_intensity": 24.38095238095238, | |
"MxNxK": 34359738368, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0026996033266186714, | |
"perf_norm_to_sol": 0.8123786743424052, | |
"perf_norm_to_cublas": 1.0562001906386949, | |
"compute_intensity": 404.5432098765432, | |
"tile_compute_intensity": 3.9083969465648853, | |
"MxNxK": 8589934592, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000052182399667799476, | |
"perf_norm_to_sol": 0.6566809959184061, | |
"perf_norm_to_cublas": 1.9967499800331694, | |
"compute_intensity": 227.55555555555554, | |
"tile_compute_intensity": 2.6391752577319587, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00007336639682762325, | |
"perf_norm_to_sol": 0.5172333822523881, | |
"perf_norm_to_cublas": 0.6986522245550331, | |
"compute_intensity": 15.875968992248062, | |
"tile_compute_intensity": 1.8285714285714285, | |
"MxNxK": 67108864, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003669440047815442, | |
"perf_norm_to_sol": 0.7470827098791475, | |
"perf_norm_to_cublas": 1.0223772971343819, | |
"compute_intensity": 252.06153846153848, | |
"tile_compute_intensity": 6.7368421052631575, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013652799651026725, | |
"perf_norm_to_sol": 0.8031686642248121, | |
"perf_norm_to_cublas": 1.050835555884199, | |
"compute_intensity": 630.1538461538462, | |
"tile_compute_intensity": 12.8, | |
"MxNxK": 4294967296, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027099167928099632, | |
"perf_norm_to_sol": 0.809286904139502, | |
"perf_norm_to_cublas": 1.6312045944824833, | |
"compute_intensity": 442.81081081081084, | |
"tile_compute_intensity": 3.757798165137615, | |
"MxNxK": 8589934592, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005237439763732255, | |
"perf_norm_to_sol": 0.6542736857911604, | |
"perf_norm_to_cublas": 2.003665985652355, | |
"compute_intensity": 186.1818181818182, | |
"tile_compute_intensity": 1.7655172413793103, | |
"MxNxK": 134217728, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002703343890607357, | |
"perf_norm_to_sol": 0.8112546018834852, | |
"perf_norm_to_cublas": 0.9315869874510343, | |
"compute_intensity": 744.7272727272727, | |
"tile_compute_intensity": 25.6, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000028604798717424274, | |
"perf_norm_to_sol": 0.6645829954492454, | |
"perf_norm_to_cublas": 0.9243764125962224, | |
"compute_intensity": 7.930300096805421, | |
"tile_compute_intensity": 0.8827586206896552, | |
"MxNxK": 16777216, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000737663998734206, | |
"perf_norm_to_sol": 0.5164072227184502, | |
"perf_norm_to_cublas": 0.6994621071885985, | |
"compute_intensity": 15.845261121856867, | |
"tile_compute_intensity": 1.7534246575342465, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009970240062102675, | |
"perf_norm_to_sol": 0.6873894704604752, | |
"perf_norm_to_cublas": 0.9920402794320797, | |
"compute_intensity": 101.7639751552795, | |
"tile_compute_intensity": 1.9104477611940298, | |
"MxNxK": 268435456, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00010524480603635311, | |
"perf_norm_to_sol": 0.7214758622088613, | |
"perf_norm_to_cublas": 0.7996290011791436, | |
"compute_intensity": 7.9360620004843785, | |
"tile_compute_intensity": 0.9377289377289377, | |
"MxNxK": 67108864, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00014819200150668622, | |
"perf_norm_to_sol": 0.5219891728343243, | |
"perf_norm_to_cublas": 0.8634420133993255, | |
"compute_intensity": 31.44721689059501, | |
"tile_compute_intensity": 3.4594594594594597, | |
"MxNxK": 268435456, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000016527999832760543, | |
"perf_norm_to_sol": 0.3223168081893855, | |
"perf_norm_to_cublas": 0.76902224035929, | |
"compute_intensity": 29.8978102189781, | |
"tile_compute_intensity": 1.9393939393939394, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013463328592479228, | |
"perf_norm_to_sol": 0.8144717543898894, | |
"perf_norm_to_cublas": 1.644386236186578, | |
"compute_intensity": 431.1578947368421, | |
"tile_compute_intensity": 3.750915750915751, | |
"MxNxK": 4294967296, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000053907197434455155, | |
"perf_norm_to_sol": 0.6356700369171994, | |
"perf_norm_to_cublas": 2.2501485230941265, | |
"compute_intensity": 101.1358024691358, | |
"tile_compute_intensity": 1.3315994798439532, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007005344144999981, | |
"perf_norm_to_sol": 0.7826525458046677, | |
"perf_norm_to_cublas": 1.0550436450427425, | |
"compute_intensity": 334.3673469387755, | |
"tile_compute_intensity": 11.636363636363637, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006905375979840755, | |
"perf_norm_to_sol": 0.7939828975754766, | |
"perf_norm_to_cublas": 1.0683386986551247, | |
"compute_intensity": 585.1428571428571, | |
"tile_compute_intensity": 10.24, | |
"MxNxK": 2147483648, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002242560003651306, | |
"perf_norm_to_sol": 0.3820097358317, | |
"perf_norm_to_cublas": 1.0398116120851963, | |
"compute_intensity": 146.28571428571428, | |
"tile_compute_intensity": 2.909090909090909, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000068640001700259745, | |
"perf_norm_to_sol": 0.2139631567924256, | |
"perf_norm_to_cublas": 0.7864801707899565, | |
"compute_intensity": 14.173010380622838, | |
"tile_compute_intensity": 0.9696969696969697, | |
"MxNxK": 2097152, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00019494720036163926, | |
"perf_norm_to_sol": 0.7031071001726639, | |
"perf_norm_to_cublas": 1.00013131621907, | |
"compute_intensity": 215.57894736842104, | |
"tile_compute_intensity": 9.142857142857142, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010776422172784805, | |
"perf_norm_to_sol": 0.8140364720555822, | |
"perf_norm_to_cublas": 0.8881873169620107, | |
"compute_intensity": 799.219512195122, | |
"tile_compute_intensity": 36.57142857142857, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00006780159892514349, | |
"perf_norm_to_sol": 0.5054038654913675, | |
"perf_norm_to_cublas": 0.7839815264573334, | |
"compute_intensity": 101.1358024691358, | |
"tile_compute_intensity": 3.1219512195121952, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005389955267310143, | |
"perf_norm_to_sol": 0.8137730511530905, | |
"perf_norm_to_cublas": 0.9275861833019768, | |
"compute_intensity": 1170.2857142857142, | |
"tile_compute_intensity": 23.272727272727273, | |
"MxNxK": 17179869184, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002726246416568756, | |
"perf_norm_to_sol": 0.8044394514011142, | |
"perf_norm_to_cublas": 0.9232280371216357, | |
"compute_intensity": 244.53731343283582, | |
"tile_compute_intensity": 3.8641509433962264, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00023793599102646114, | |
"perf_norm_to_sol": 0.6379455148607974, | |
"perf_norm_to_cublas": 0.8224463716743843, | |
"compute_intensity": 31.751937984496124, | |
"tile_compute_intensity": 3.657142857142857, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007775487843900919, | |
"perf_norm_to_sol": 0.705132660405703, | |
"perf_norm_to_cublas": 0.8561140006251148, | |
"compute_intensity": 63.25868725868726, | |
"tile_compute_intensity": 6.918918918918919, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000016617600340396167, | |
"perf_norm_to_sol": 0.35351509592689867, | |
"perf_norm_to_cublas": 0.8444059159012526, | |
"compute_intensity": 28.346020761245676, | |
"tile_compute_intensity": 1.9393939393939394, | |
"MxNxK": 16777216, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00008088640170171857, | |
"perf_norm_to_sol": 0.49125022416972397, | |
"perf_norm_to_cublas": 0.7240969423391094, | |
"compute_intensity": 15.50780880265026, | |
"tile_compute_intensity": 1.5900621118012421, | |
"MxNxK": 67108864, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0022314880043268204, | |
"perf_norm_to_sol": 0.5389432589207123, | |
"perf_norm_to_cublas": 0.7965210960408874, | |
"compute_intensity": 31.906523855890946, | |
"tile_compute_intensity": 3.8496240601503757, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006968608126044273, | |
"perf_norm_to_sol": 0.7867784111479862, | |
"perf_norm_to_cublas": 1.0851727621007243, | |
"compute_intensity": 120.02930402930403, | |
"tile_compute_intensity": 1.9692307692307693, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000052127998787909743, | |
"perf_norm_to_sol": 0.6573663094699617, | |
"perf_norm_to_cublas": 1.8912217512279017, | |
"compute_intensity": 256, | |
"tile_compute_intensity": 3.878787878787879, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00010528001002967358, | |
"perf_norm_to_sol": 0.6509723958727687, | |
"perf_norm_to_cublas": 0.9379331018886085, | |
"compute_intensity": 167.18367346938774, | |
"tile_compute_intensity": 3.4594594594594597, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005445056036114693, | |
"perf_norm_to_sol": 0.8055381458640433, | |
"perf_norm_to_cublas": 0.9291334612393519, | |
"compute_intensity": 668.734693877551, | |
"tile_compute_intensity": 14.222222222222221, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00011120958952233195, | |
"perf_norm_to_sol": 0.6162632256884949, | |
"perf_norm_to_cublas": 0.9236613732620205, | |
"compute_intensity": 117.02857142857142, | |
"tile_compute_intensity": 7.111111111111111, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000162240001372993, | |
"perf_norm_to_sol": 0.2640161943206743, | |
"perf_norm_to_cublas": 0.8254437508924528, | |
"compute_intensity": 93.0909090909091, | |
"tile_compute_intensity": 2.6666666666666665, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000066016000346280634, | |
"perf_norm_to_sol": 0.0442171937056893, | |
"perf_norm_to_cublas": 0.7794474257618892, | |
"compute_intensity": 42.666666666666664, | |
"tile_compute_intensity": 1.1428571428571428, | |
"MxNxK": 1048576, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.001088822353631258, | |
"perf_norm_to_sol": 0.5533412530959309, | |
"perf_norm_to_cublas": 0.8229044538937644, | |
"compute_intensity": 31.875486381322958, | |
"tile_compute_intensity": 3.8208955223880596, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007765184156596661, | |
"perf_norm_to_sol": 0.7060683067850188, | |
"perf_norm_to_cublas": 0.946748921748331, | |
"compute_intensity": 63.25868725868726, | |
"tile_compute_intensity": 7.111111111111111, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00010125759290531277, | |
"perf_norm_to_sol": 0.7444801381168034, | |
"perf_norm_to_cublas": 0.8355403065155155, | |
"compute_intensity": 7.964997569275644, | |
"tile_compute_intensity": 0.9624060150375939, | |
"MxNxK": 67108864, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00019776320550590754, | |
"perf_norm_to_sol": 0.6930953631258621, | |
"perf_norm_to_cublas": 1.0012944637099837, | |
"compute_intensity": 127.50194552529183, | |
"tile_compute_intensity": 3.506849315068493, | |
"MxNxK": 536870912, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006998303811997175, | |
"perf_norm_to_sol": 0.7834398986684449, | |
"perf_norm_to_cublas": 1.0787482359635798, | |
"compute_intensity": 203.527950310559, | |
"tile_compute_intensity": 3.8208955223880596, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018233599839732051, | |
"perf_norm_to_sol": 0.7517372429901134, | |
"perf_norm_to_cublas": 1.058230965431348, | |
"compute_intensity": 199.8048780487805, | |
"tile_compute_intensity": 1.9393939393939394, | |
"MxNxK": 536870912, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001530239969724789, | |
"perf_norm_to_sol": 0.6306908532397912, | |
"perf_norm_to_cublas": 0.9997908846833159, | |
"compute_intensity": 7.869356388088376, | |
"tile_compute_intensity": 0.7950310559006211, | |
"MxNxK": 8388608, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006949727889150381, | |
"perf_norm_to_sol": 0.7889158419974248, | |
"perf_norm_to_cublas": 1.0831111975019208, | |
"compute_intensity": 390.0952380952381, | |
"tile_compute_intensity": 6.918918918918919, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000120630394667387, | |
"perf_norm_to_sol": 0.5681352577473936, | |
"perf_norm_to_cublas": 0.889540335176208, | |
"compute_intensity": 85.11168831168831, | |
"tile_compute_intensity": 3.1604938271604937, | |
"MxNxK": 268435456, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000973824004177004, | |
"perf_norm_to_sol": 0.7037655682398726, | |
"perf_norm_to_cublas": 1.032005833294332, | |
"compute_intensity": 195.04761904761904, | |
"tile_compute_intensity": 3.4594594594594597, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0015641951933503151, | |
"perf_norm_to_sol": 0.7654998214351574, | |
"perf_norm_to_cublas": 0.781989349714309, | |
"compute_intensity": 7.9941449133935105, | |
"tile_compute_intensity": 0.9903288201160542, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00011605439940467476, | |
"perf_norm_to_sol": 0.5905366855378781, | |
"perf_norm_to_cublas": 0.8892656904200091, | |
"compute_intensity": 61.59398496240601, | |
"tile_compute_intensity": 5.818181818181818, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027151072397828104, | |
"perf_norm_to_sol": 0.8077397973806206, | |
"perf_norm_to_cublas": 0.9032742224689796, | |
"compute_intensity": 1024, | |
"tile_compute_intensity": 15.515151515151516, | |
"MxNxK": 8589934592, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.001384518388658762, | |
"perf_norm_to_sol": 0.7920083220611345, | |
"perf_norm_to_cublas": 0.9090444582401378, | |
"compute_intensity": 237.44927536231884, | |
"tile_compute_intensity": 18.285714285714285, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006848768331110478, | |
"perf_norm_to_sol": 0.8005454651483401, | |
"perf_norm_to_cublas": 1.0643011669190756, | |
"compute_intensity": 234.05714285714285, | |
"tile_compute_intensity": 3.7372262773722627, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006150399713078514, | |
"perf_norm_to_sol": 0.0519104877277682, | |
"perf_norm_to_cublas": 0.5915713213404165, | |
"compute_intensity": 7.641791044776119, | |
"tile_compute_intensity": 0.6666666666666666, | |
"MxNxK": 262144, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00035368320532143117, | |
"perf_norm_to_sol": 0.775093409416949, | |
"perf_norm_to_cublas": 1.6715341575547684, | |
"compute_intensity": 221.40540540540542, | |
"tile_compute_intensity": 3.190031152647975, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0002501599956303835, | |
"perf_norm_to_sol": 0.6172743006237188, | |
"perf_norm_to_cublas": 0.8355740584068375, | |
"compute_intensity": 31.477425552353505, | |
"tile_compute_intensity": 3.1801242236024843, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003530911868438125, | |
"perf_norm_to_sol": 0.7763929876487283, | |
"perf_norm_to_cublas": 1.0761185958517656, | |
"compute_intensity": 455.1111111111111, | |
"tile_compute_intensity": 5.278350515463917, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001836159935919568, | |
"perf_norm_to_sol": 0.23328026546678177, | |
"perf_norm_to_cublas": 5.303764346078755, | |
"compute_intensity": 113.77777777777777, | |
"tile_compute_intensity": 1.3195876288659794, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006243200186872855, | |
"perf_norm_to_sol": 0.19578884483324427, | |
"perf_norm_to_cublas": 0.705791875126521, | |
"compute_intensity": 7.816793893129771, | |
"tile_compute_intensity": 0.8, | |
"MxNxK": 1048576, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007224319968372583, | |
"perf_norm_to_sol": 0.7589296229022305, | |
"perf_norm_to_cublas": 0.9006733161408109, | |
"compute_intensity": 123.18796992481202, | |
"tile_compute_intensity": 11.636363636363637, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000009171199781121687, | |
"perf_norm_to_sol": 0.11676222509417343, | |
"perf_norm_to_cublas": 2.534194074146827, | |
"compute_intensity": 73.14285714285714, | |
"tile_compute_intensity": 0.9846153846153847, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008566399628762156, | |
"perf_norm_to_sol": 0.14056137824682124, | |
"perf_norm_to_cublas": 0.8502055026640178, | |
"compute_intensity": 41.795918367346935, | |
"tile_compute_intensity": 1.4545454545454546, | |
"MxNxK": 4194304, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007259903941303492, | |
"perf_norm_to_sol": 0.7552097760039576, | |
"perf_norm_to_cublas": 0.9106588777478629, | |
"compute_intensity": 120.02930402930403, | |
"tile_compute_intensity": 10.24, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00014120959676802158, | |
"perf_norm_to_sol": 0.5462496450806543, | |
"perf_norm_to_cublas": 0.7957533059869354, | |
"compute_intensity": 15.7462758289284, | |
"tile_compute_intensity": 1.7655172413793103, | |
"MxNxK": 134217728, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00019565760158002377, | |
"perf_norm_to_sol": 0.7005542316074567, | |
"perf_norm_to_cublas": 1.0004251835262505, | |
"compute_intensity": 215.57894736842104, | |
"tile_compute_intensity": 10.666666666666666, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013891360722482204, | |
"perf_norm_to_sol": 0.7893755750577561, | |
"perf_norm_to_cublas": 0.8897063403177918, | |
"compute_intensity": 240.94117647058823, | |
"tile_compute_intensity": 18.285714285714285, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018051520455628634, | |
"perf_norm_to_sol": 0.759319754089258, | |
"perf_norm_to_cublas": 1.0948574042098684, | |
"compute_intensity": 341.3333333333333, | |
"tile_compute_intensity": 3.506849315068493, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000959615979809314, | |
"perf_norm_to_sol": 0.7141854846992466, | |
"perf_norm_to_cublas": 1.0694612186998762, | |
"compute_intensity": 112.21917808219177, | |
"tile_compute_intensity": 1.9104477611940298, | |
"MxNxK": 268435456, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000031830399530008434, | |
"perf_norm_to_sol": 0.6075530108741741, | |
"perf_norm_to_cublas": 0.9384739214482573, | |
"compute_intensity": 15.723608445297504, | |
"tile_compute_intensity": 1.5802469135802468, | |
"MxNxK": 33554432, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007224287837743759, | |
"perf_norm_to_sol": 0.7589329983056702, | |
"perf_norm_to_cublas": 0.8810280236482566, | |
"compute_intensity": 124.12121212121212, | |
"tile_compute_intensity": 11.636363636363637, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001114911981858313, | |
"perf_norm_to_sol": 0.6147066448446808, | |
"perf_norm_to_cublas": 0.9271260864939108, | |
"compute_intensity": 117.02857142857142, | |
"tile_compute_intensity": 8, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013980287127196788, | |
"perf_norm_to_sol": 0.7843544813405294, | |
"perf_norm_to_cublas": 0.8965973382974427, | |
"compute_intensity": 225.98620689655172, | |
"tile_compute_intensity": 15.058823529411764, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000022604799596592784, | |
"perf_norm_to_sol": 0.37898135346030615, | |
"perf_norm_to_cublas": 1.0273216642311331, | |
"compute_intensity": 120.47058823529412, | |
"tile_compute_intensity": 1.7777777777777777, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00036280960775911806, | |
"perf_norm_to_sol": 0.7555960911821067, | |
"perf_norm_to_cublas": 1.037811516648683, | |
"compute_intensity": 455.1111111111111, | |
"tile_compute_intensity": 6.7368421052631575, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.021831241250038148, | |
"perf_norm_to_sol": 0.8036556956558725, | |
"perf_norm_to_cublas": 0.9034660328129076, | |
"compute_intensity": 1489.4545454545455, | |
"tile_compute_intensity": 51.2, | |
"MxNxK": 68719476736, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00000668479988235049, | |
"perf_norm_to_sol": 0.19923005346485118, | |
"perf_norm_to_cublas": 0.7515557790955317, | |
"compute_intensity": 14.94890510948905, | |
"tile_compute_intensity": 1.2307692307692308, | |
"MxNxK": 2097152, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0016608929261565208, | |
"perf_norm_to_sol": 0.7209321698842355, | |
"perf_norm_to_cublas": 0.7364019540082032, | |
"compute_intensity": 7.9941449133935105, | |
"tile_compute_intensity": 0.9922480620155039, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000016246399900410326, | |
"perf_norm_to_sol": 0.26365218135493973, | |
"perf_norm_to_cublas": 0.8193815525560123, | |
"compute_intensity": 81.92, | |
"tile_compute_intensity": 1.6842105263157894, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00011861120583489537, | |
"perf_norm_to_sol": 0.5778069608526224, | |
"perf_norm_to_cublas": 0.8913290066127666, | |
"compute_intensity": 101.7639751552795, | |
"tile_compute_intensity": 5.12, | |
"MxNxK": 268435456, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00007320960285142064, | |
"perf_norm_to_sol": 0.46806960902121475, | |
"perf_norm_to_cublas": 0.8415508271479905, | |
"compute_intensity": 59.7956204379562, | |
"tile_compute_intensity": 4.923076923076923, | |
"MxNxK": 134217728, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001978783868253231, | |
"perf_norm_to_sol": 0.692691925238145, | |
"perf_norm_to_cublas": 1.0016818821265843, | |
"compute_intensity": 168.90721649484536, | |
"tile_compute_intensity": 6.095238095238095, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0004277823958545923, | |
"perf_norm_to_sol": 0.6408340411448, | |
"perf_norm_to_cublas": 0.9266019902777504, | |
"compute_intensity": 61.94328922495274, | |
"tile_compute_intensity": 6.2439024390243905, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00019497280009090902, | |
"perf_norm_to_sol": 0.7030147829294193, | |
"perf_norm_to_cublas": 1.0047267631858445, | |
"compute_intensity": 199.8048780487805, | |
"tile_compute_intensity": 6.095238095238095, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00000655999974696897, | |
"perf_norm_to_sol": 0.3796201275767372, | |
"perf_norm_to_cublas": 0.7980488010474718, | |
"compute_intensity": 7.742911153119093, | |
"tile_compute_intensity": 0.6597938144329897, | |
"MxNxK": 2097152, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003771199844777584, | |
"perf_norm_to_sol": 0.45432742354819255, | |
"perf_norm_to_cublas": 0.8107763974963544, | |
"compute_intensity": 84.45360824742268, | |
"tile_compute_intensity": 3.0476190476190474, | |
"MxNxK": 67108864, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000022681600239593535, | |
"perf_norm_to_sol": 0.37769811015632454, | |
"perf_norm_to_cublas": 1.283860055610314, | |
"compute_intensity": 170.66666666666666, | |
"tile_compute_intensity": 2.56, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003160960040986538, | |
"perf_norm_to_sol": 0.5420376996060989, | |
"perf_norm_to_cublas": 3.0835188292470215, | |
"compute_intensity": 204.8, | |
"tile_compute_intensity": 2.6122448979591835, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013591552153229714, | |
"perf_norm_to_sol": 0.8067879764592172, | |
"perf_norm_to_cublas": 1.0582126623752606, | |
"compute_intensity": 337.8144329896907, | |
"tile_compute_intensity": 3.8496240601503757, | |
"MxNxK": 4294967296, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003174080047756434, | |
"perf_norm_to_sol": 0.6023706590095335, | |
"perf_norm_to_cublas": 0.9457606856227402, | |
"compute_intensity": 15.814671814671815, | |
"tile_compute_intensity": 1.7777777777777777, | |
"MxNxK": 33554432, | |
"size_m": 2048, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006620799831580371, | |
"perf_norm_to_sol": 0.10195573695957273, | |
"perf_norm_to_cublas": 0.595456724381558, | |
"compute_intensity": 14.840579710144928, | |
"tile_compute_intensity": 1.1428571428571428, | |
"MxNxK": 1048576, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000058009603526443244, | |
"perf_norm_to_sol": 0.5907158142813788, | |
"perf_norm_to_cublas": 0.9062223120389438, | |
"compute_intensity": 126.03076923076924, | |
"tile_compute_intensity": 3.3684210526315788, | |
"MxNxK": 134217728, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000114592001773417, | |
"perf_norm_to_sol": 0.5980729833312355, | |
"perf_norm_to_cublas": 0.8974308413229686, | |
"compute_intensity": 112.21917808219177, | |
"tile_compute_intensity": 5.12, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002698761597275734, | |
"perf_norm_to_sol": 0.812632050916483, | |
"perf_norm_to_cublas": 1.0478204436427254, | |
"compute_intensity": 910.2222222222222, | |
"tile_compute_intensity": 13.473684210526315, | |
"MxNxK": 8589934592, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.011158080399036407, | |
"perf_norm_to_sol": 0.7861926400595628, | |
"perf_norm_to_cublas": 0.9044712643233008, | |
"compute_intensity": 250.13740458015266, | |
"tile_compute_intensity": 3.930902111324376, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00036571838427335024, | |
"perf_norm_to_sol": 0.7495863846461799, | |
"perf_norm_to_cublas": 1.0064399870507597, | |
"compute_intensity": 372.3636363636364, | |
"tile_compute_intensity": 10.666666666666666, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003169920237269253, | |
"perf_norm_to_sol": 0.5405055587894311, | |
"perf_norm_to_cublas": 1.3654349621819717, | |
"compute_intensity": 204.8, | |
"tile_compute_intensity": 3.0476190476190474, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018005119636654854, | |
"perf_norm_to_sol": 0.761276589654015, | |
"perf_norm_to_cublas": 1.1798422115725307, | |
"compute_intensity": 215.57894736842104, | |
"tile_compute_intensity": 1.8754578754578755, | |
"MxNxK": 536870912, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00536707192659378, | |
"perf_norm_to_sol": 0.8172426983368853, | |
"perf_norm_to_cublas": 0.9340082725740405, | |
"compute_intensity": 1170.2857142857142, | |
"tile_compute_intensity": 28.444444444444443, | |
"MxNxK": 17179869184, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027176415547728538, | |
"perf_norm_to_sol": 0.8069865460649859, | |
"perf_norm_to_cublas": 0.9131846954814342, | |
"compute_intensity": 244.53731343283582, | |
"tile_compute_intensity": 1.9806576402321083, | |
"MxNxK": 8589934592, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006201599899213761, | |
"perf_norm_to_sol": 0.10002200940328947, | |
"perf_norm_to_cublas": 0.6119710951503705, | |
"compute_intensity": 7.757575757575758, | |
"tile_compute_intensity": 0.7272727272727273, | |
"MxNxK": 524288, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00008783040102571249, | |
"perf_norm_to_sol": 0.4524112665342262, | |
"perf_norm_to_cublas": 0.6298319825612421, | |
"compute_intensity": 15.50780880265026, | |
"tile_compute_intensity": 1.3298701298701299, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003214080061297864, | |
"perf_norm_to_sol": 0.5330792875368758, | |
"perf_norm_to_cublas": 3.0402230453312105, | |
"compute_intensity": 170.66666666666666, | |
"tile_compute_intensity": 1.7534246575342465, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000965824001468718, | |
"perf_norm_to_sol": 0.7095949185597609, | |
"perf_norm_to_cublas": 1.2388841378121622, | |
"compute_intensity": 195.04761904761904, | |
"tile_compute_intensity": 2.6528497409326426, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013123199460096658, | |
"perf_norm_to_sol": 0.2057512051518022, | |
"perf_norm_to_cublas": 0.6232625955928649, | |
"compute_intensity": 29.681159420289855, | |
"tile_compute_intensity": 1.8823529411764706, | |
"MxNxK": 8388608, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.001373215951025486, | |
"perf_norm_to_sol": 0.7985270525335315, | |
"perf_norm_to_cublas": 1.6473098233858, | |
"compute_intensity": 125.06870229007633, | |
"tile_compute_intensity": 1.965451055662188, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00136219197884202, | |
"perf_norm_to_sol": 0.8049893868825841, | |
"perf_norm_to_cublas": 1.6306847367008683, | |
"compute_intensity": 630.1538461538462, | |
"tile_compute_intensity": 7.937984496124031, | |
"MxNxK": 4294967296, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018352639162912964, | |
"perf_norm_to_sol": 0.8183276732838556, | |
"perf_norm_to_cublas": 0.865235816230301, | |
"compute_intensity": 7.980516317584024, | |
"tile_compute_intensity": 0.9660377358490566, | |
"MxNxK": 134217728, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00026662400923669336, | |
"perf_norm_to_sol": 0.5714939445645282, | |
"perf_norm_to_cublas": 0.8836413564298699, | |
"compute_intensity": 31.690522243713733, | |
"tile_compute_intensity": 3.657142857142857, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000066016000346280634, | |
"perf_norm_to_sol": 0.060798641345322785, | |
"perf_norm_to_cublas": 0.6422685235700423, | |
"compute_intensity": 26.94736842105263, | |
"tile_compute_intensity": 1.1428571428571428, | |
"MxNxK": 1048576, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000062719998823013155, | |
"perf_norm_to_sol": 0.10035371529901634, | |
"perf_norm_to_cublas": 0.7489796316105563, | |
"compute_intensity": 7.6992481203007515, | |
"tile_compute_intensity": 0.7272727272727273, | |
"MxNxK": 524288, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000062719998823013155, | |
"perf_norm_to_sol": 0.10035371529901634, | |
"perf_norm_to_cublas": 0.6596938738222247, | |
"compute_intensity": 7.6992481203007515, | |
"tile_compute_intensity": 0.64, | |
"MxNxK": 524288, | |
"size_m": 128, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018820799887180327, | |
"perf_norm_to_sol": 0.7282833968518788, | |
"perf_norm_to_cublas": 0.9875711822840483, | |
"compute_intensity": 248.24242424242425, | |
"tile_compute_intensity": 3.657142857142857, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027192672714591026, | |
"perf_norm_to_sol": 0.8065040883429053, | |
"perf_norm_to_cublas": 0.9253162983240925, | |
"compute_intensity": 744.7272727272727, | |
"tile_compute_intensity": 12.487804878048781, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.02180553525686264, | |
"perf_norm_to_sol": 0.8046031049987129, | |
"perf_norm_to_cublas": 0.8878822604134214, | |
"compute_intensity": 1310.72, | |
"tile_compute_intensity": 42.666666666666664, | |
"MxNxK": 68719476736, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002738921530544758, | |
"perf_norm_to_sol": 0.8007166862106585, | |
"perf_norm_to_cublas": 1.637199102613198, | |
"compute_intensity": 244.53731343283582, | |
"tile_compute_intensity": 3.757798165137615, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00020344960503280163, | |
"perf_norm_to_sol": 0.6737234054150768, | |
"perf_norm_to_cublas": 0.9012551249734781, | |
"compute_intensity": 112.99310344827586, | |
"tile_compute_intensity": 7.529411764705882, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003739519976079464, | |
"perf_norm_to_sol": 0.7330821154043156, | |
"perf_norm_to_cublas": 1.0145644300184369, | |
"compute_intensity": 455.1111111111111, | |
"tile_compute_intensity": 9.846153846153847, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013510399730876088, | |
"perf_norm_to_sol": 0.3902564109294627, | |
"perf_norm_to_cublas": 0.7626717473715775, | |
"compute_intensity": 15.03119266055046, | |
"tile_compute_intensity": 0.9922480620155039, | |
"MxNxK": 8388608, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00006027839845046401, | |
"perf_norm_to_sol": 0.568482094152239, | |
"perf_norm_to_cublas": 0.9184053303720155, | |
"compute_intensity": 84.89119170984456, | |
"tile_compute_intensity": 1.855072463768116, | |
"MxNxK": 134217728, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009722880204208196, | |
"perf_norm_to_sol": 0.4098058005108214, | |
"perf_norm_to_cublas": 0.7469720777589217, | |
"compute_intensity": 30.97164461247637, | |
"tile_compute_intensity": 3.1219512195121952, | |
"MxNxK": 134217728, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002710575982928276, | |
"perf_norm_to_sol": 0.8090900921211529, | |
"perf_norm_to_cublas": 0.9288381611861334, | |
"compute_intensity": 744.7272727272727, | |
"tile_compute_intensity": 21.333333333333332, | |
"MxNxK": 8589934592, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00035883840173482894, | |
"perf_norm_to_sol": 0.7639581497988125, | |
"perf_norm_to_cublas": 1.0601942305705843, | |
"compute_intensity": 113.3840830449827, | |
"tile_compute_intensity": 1.9541984732824427, | |
"MxNxK": 1073741824, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009696639608591795, | |
"perf_norm_to_sol": 0.7067848567435694, | |
"perf_norm_to_cublas": 1.1071546999465744, | |
"compute_intensity": 292.57142857142856, | |
"tile_compute_intensity": 3.9384615384615387, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018252161098644137, | |
"perf_norm_to_sol": 0.7509727751813106, | |
"perf_norm_to_cublas": 1.170062009172186, | |
"compute_intensity": 120.47058823529412, | |
"tile_compute_intensity": 1.8754578754578755, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000017561599088367073, | |
"perf_norm_to_sol": 0.5526726532983428, | |
"perf_norm_to_cublas": 0.9803207410791253, | |
"compute_intensity": 15.693486590038313, | |
"tile_compute_intensity": 1.6842105263157894, | |
"MxNxK": 16777216, | |
"size_m": 2048, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003916159912478179, | |
"perf_norm_to_sol": 0.43751009852887107, | |
"perf_norm_to_cublas": 0.8520183417989372, | |
"compute_intensity": 63.750972762645915, | |
"tile_compute_intensity": 1.7534246575342465, | |
"MxNxK": 67108864, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027463968843221666, | |
"perf_norm_to_sol": 0.7985372340932066, | |
"perf_norm_to_cublas": 0.9281723770713168, | |
"compute_intensity": 504.12307692307695, | |
"tile_compute_intensity": 13.473684210526315, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003751136129721999, | |
"perf_norm_to_sol": 0.7308119779871054, | |
"perf_norm_to_cublas": 1.0357779622341423, | |
"compute_intensity": 202.2716049382716, | |
"tile_compute_intensity": 9.846153846153847, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005377955362200737, | |
"perf_norm_to_sol": 0.8155888340550953, | |
"perf_norm_to_cublas": 1.0601090761307281, | |
"compute_intensity": 780.1904761904761, | |
"tile_compute_intensity": 7.641791044776119, | |
"MxNxK": 17179869184, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.001351667195558548, | |
"perf_norm_to_sol": 0.8112574526241169, | |
"perf_norm_to_cublas": 1.6365211332742065, | |
"compute_intensity": 630.1538461538462, | |
"tile_compute_intensity": 6.3602484472049685, | |
"MxNxK": 4294967296, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000019648000306915492, | |
"perf_norm_to_sol": 0.5357693424131508, | |
"perf_norm_to_cublas": 0.8819218156002484, | |
"compute_intensity": 15.044995408631772, | |
"tile_compute_intensity": 0.9961089494163424, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001248672022484243, | |
"perf_norm_to_sol": 0.5488581399475583, | |
"perf_norm_to_cublas": 0.8931600946521497, | |
"compute_intensity": 60.014652014652015, | |
"tile_compute_intensity": 5.12, | |
"MxNxK": 268435456, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.021829110383987427, | |
"perf_norm_to_sol": 0.8037341451486927, | |
"perf_norm_to_cublas": 1.0397299714907258, | |
"compute_intensity": 1489.4545454545455, | |
"tile_compute_intensity": 14.124137931034483, | |
"MxNxK": 68719476736, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.022148627042770385, | |
"perf_norm_to_sol": 0.7921394558656154, | |
"perf_norm_to_cublas": 0.8629006171548764, | |
"compute_intensity": 2048, | |
"tile_compute_intensity": 39.38461538461539, | |
"MxNxK": 68719476736, | |
"size_m": 4096, | |
"size_n": 4096, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009337280062027276, | |
"perf_norm_to_sol": 0.805002506956814, | |
"perf_norm_to_cublas": 0.898522908799616, | |
"compute_intensity": 7.976630963972736, | |
"tile_compute_intensity": 0.9624060150375939, | |
"MxNxK": 67108864, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007061344105750323, | |
"perf_norm_to_sol": 0.7764457229689805, | |
"perf_norm_to_cublas": 1.0492189425288811, | |
"compute_intensity": 254.015503875969, | |
"tile_compute_intensity": 6.918918918918919, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000698339194059372, | |
"perf_norm_to_sol": 0.7851128042021255, | |
"perf_norm_to_cublas": 1.0491818210915453, | |
"compute_intensity": 334.3673469387755, | |
"tile_compute_intensity": 6.918918918918919, | |
"MxNxK": 2147483648, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010708486288785934, | |
"perf_norm_to_sol": 0.819200814227298, | |
"perf_norm_to_cublas": 1.0617588764021648, | |
"compute_intensity": 1365.3333333333333, | |
"tile_compute_intensity": 14.027397260273972, | |
"MxNxK": 34359738368, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000014815999020356684, | |
"perf_norm_to_sol": 0.6513960691016064, | |
"perf_norm_to_cublas": 1.069762569811826, | |
"compute_intensity": 7.869356388088376, | |
"tile_compute_intensity": 0.8767123287671232, | |
"MxNxK": 8388608, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001625279983272776, | |
"perf_norm_to_sol": 0.2635483619433072, | |
"perf_norm_to_cublas": 0.7702303704082404, | |
"compute_intensity": 55.351351351351354, | |
"tile_compute_intensity": 2.4615384615384617, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0004183359909802675, | |
"perf_norm_to_sol": 0.6553046531419137, | |
"perf_norm_to_cublas": 0.9009944346965397, | |
"compute_intensity": 62.77394636015325, | |
"tile_compute_intensity": 6.7368421052631575, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00019393919501453638, | |
"perf_norm_to_sol": 0.7067615224595405, | |
"perf_norm_to_cublas": 1.0116984858261926, | |
"compute_intensity": 127.50194552529183, | |
"tile_compute_intensity": 1.8754578754578755, | |
"MxNxK": 536870912, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00038893758319318293, | |
"perf_norm_to_sol": 0.7048367998161301, | |
"perf_norm_to_cublas": 0.9089869810620242, | |
"compute_intensity": 119.5912408759124, | |
"tile_compute_intensity": 9.846153846153847, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005475347116589546, | |
"perf_norm_to_sol": 0.8010816940113377, | |
"perf_norm_to_cublas": 0.9019564392510349, | |
"compute_intensity": 248.24242424242425, | |
"tile_compute_intensity": 1.9825750242013553, | |
"MxNxK": 17179869184, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007073376327753067, | |
"perf_norm_to_sol": 0.7751249439125646, | |
"perf_norm_to_cublas": 0.9559678234834061, | |
"compute_intensity": 224.43835616438355, | |
"tile_compute_intensity": 14.222222222222221, | |
"MxNxK": 2147483648, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001143679954111576, | |
"perf_norm_to_sol": 0.5992443963028454, | |
"perf_norm_to_cublas": 0.9182708789862031, | |
"compute_intensity": 85.11168831168831, | |
"tile_compute_intensity": 1.7716262975778547, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.022190290689468383, | |
"perf_norm_to_sol": 0.790652164919923, | |
"perf_norm_to_cublas": 0.8952844184110668, | |
"compute_intensity": 910.2222222222222, | |
"tile_compute_intensity": 51.2, | |
"MxNxK": 68719476736, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006992000271566212, | |
"perf_norm_to_sol": 0.07657677714785711, | |
"perf_norm_to_cublas": 0.7867276631648, | |
"compute_intensity": 46.54545454545455, | |
"tile_compute_intensity": 1.3333333333333333, | |
"MxNxK": 2097152, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027484385296702384, | |
"perf_norm_to_sol": 0.7979440500682963, | |
"perf_norm_to_cublas": 1.6116414909708316, | |
"compute_intensity": 655.36, | |
"tile_compute_intensity": 7.968871595330739, | |
"MxNxK": 8589934592, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.022173671424388884, | |
"perf_norm_to_sol": 0.7912447622243112, | |
"perf_norm_to_cublas": 1.0261112211357966, | |
"compute_intensity": 910.2222222222222, | |
"tile_compute_intensity": 7.728301886792453, | |
"MxNxK": 68719476736, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008537599933333695, | |
"perf_norm_to_sol": 0.6159619962980221, | |
"perf_norm_to_cublas": 1.0843327946133914, | |
"compute_intensity": 7.525953146531925, | |
"tile_compute_intensity": 0.6632124352331606, | |
"MxNxK": 4194304, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00000870079966261983, | |
"perf_norm_to_sol": 0.2893612150377502, | |
"perf_norm_to_cublas": 0.7613093111656116, | |
"compute_intensity": 15.398496240601503, | |
"tile_compute_intensity": 1.4545454545454546, | |
"MxNxK": 4194304, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027596129104495047, | |
"perf_norm_to_sol": 0.7947129698605433, | |
"perf_norm_to_cublas": 0.8916798886899915, | |
"compute_intensity": 239.1824817518248, | |
"tile_compute_intensity": 19.692307692307693, | |
"MxNxK": 8589934592, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00019899520557373763, | |
"perf_norm_to_sol": 0.688804337460586, | |
"perf_norm_to_cublas": 0.8911973738878705, | |
"compute_intensity": 120.47058823529412, | |
"tile_compute_intensity": 9.142857142857142, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001854720030678436, | |
"perf_norm_to_sol": 0.23094584099256416, | |
"perf_norm_to_cublas": 5.228433258099168, | |
"compute_intensity": 93.0909090909091, | |
"tile_compute_intensity": 1.3195876288659794, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00018360000103712082, | |
"perf_norm_to_sol": 0.7465618734138159, | |
"perf_norm_to_cublas": 1.0434858779845693, | |
"compute_intensity": 315.0769230769231, | |
"tile_compute_intensity": 3.657142857142857, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003697087988257408, | |
"perf_norm_to_sol": 0.7414957997667656, | |
"perf_norm_to_cublas": 1.0293680180808813, | |
"compute_intensity": 227.55555555555554, | |
"tile_compute_intensity": 12.8, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00142819844186306, | |
"perf_norm_to_sol": 0.7677855217612346, | |
"perf_norm_to_cublas": 0.9000497158034959, | |
"compute_intensity": 123.65283018867925, | |
"tile_compute_intensity": 12.19047619047619, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000355644803494215, | |
"perf_norm_to_sol": 0.7708182961558782, | |
"perf_norm_to_cublas": 1.0736824663426652, | |
"compute_intensity": 119.5912408759124, | |
"tile_compute_intensity": 1.9541984732824427, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00036083520390093324, | |
"perf_norm_to_sol": 0.7597305321167248, | |
"perf_norm_to_cublas": 1.0475430181689553, | |
"compute_intensity": 512, | |
"tile_compute_intensity": 7.757575757575758, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006880512461066246, | |
"perf_norm_to_sol": 0.7968520455920253, | |
"perf_norm_to_cublas": 1.085942353802044, | |
"compute_intensity": 224.43835616438355, | |
"tile_compute_intensity": 1.9692307692307693, | |
"MxNxK": 2147483648, | |
"size_m": 128, | |
"size_n": 8192, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.022379513084888458, | |
"perf_norm_to_sol": 0.783967073246001, | |
"perf_norm_to_cublas": 0.9176382650436256, | |
"compute_intensity": 2048, | |
"tile_compute_intensity": 31.03030303030303, | |
"MxNxK": 68719476736, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00035793918650597336, | |
"perf_norm_to_sol": 0.7658773663261037, | |
"perf_norm_to_cublas": 1.0539086059914902, | |
"compute_intensity": 512, | |
"tile_compute_intensity": 6.2439024390243905, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000022803198953624813, | |
"perf_norm_to_sol": 0.37568402412477886, | |
"perf_norm_to_cublas": 0.9886332301632413, | |
"compute_intensity": 63.50387596899225, | |
"tile_compute_intensity": 1.7297297297297298, | |
"MxNxK": 33554432, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00019541759975254535, | |
"perf_norm_to_sol": 0.7014146162199298, | |
"perf_norm_to_cublas": 1.0046013867068773, | |
"compute_intensity": 168.90721649484536, | |
"tile_compute_intensity": 3.506849315068493, | |
"MxNxK": 536870912, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0223203644156456, | |
"perf_norm_to_sol": 0.786044575577469, | |
"perf_norm_to_cublas": 0.904102512440297, | |
"compute_intensity": 489.07462686567163, | |
"tile_compute_intensity": 39.38461538461539, | |
"MxNxK": 68719476736, | |
"size_m": 8192, | |
"size_n": 16384, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x16384x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003619263879954815, | |
"perf_norm_to_sol": 0.7574399948685847, | |
"perf_norm_to_cublas": 1.0490619905793772, | |
"compute_intensity": 202.2716049382716, | |
"tile_compute_intensity": 3.764705882352941, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000022268800239544363, | |
"perf_norm_to_sol": 0.3846995551472511, | |
"perf_norm_to_cublas": 0.961488735250199, | |
"compute_intensity": 83.59183673469387, | |
"tile_compute_intensity": 1.7297297297297298, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.01103825941681862, | |
"perf_norm_to_sol": 0.7947268093327364, | |
"perf_norm_to_cublas": 0.9228820258191176, | |
"compute_intensity": 481.88235294117646, | |
"tile_compute_intensity": 7.501831501831502, | |
"MxNxK": 34359738368, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000352345616556704, | |
"perf_norm_to_sol": 0.7780358505523941, | |
"perf_norm_to_cublas": 1.6799959876110224, | |
"compute_intensity": 327.68, | |
"tile_compute_intensity": 3.9844357976653697, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000015404800069518388, | |
"perf_norm_to_sol": 0.6448552202711884, | |
"perf_norm_to_cublas": 1.0087245555444508, | |
"compute_intensity": 7.75390440132513, | |
"tile_compute_intensity": 0.7950310559006211, | |
"MxNxK": 8388608, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003245759871788323, | |
"perf_norm_to_sol": 0.5278762375662528, | |
"perf_norm_to_cublas": 1.255249971620321, | |
"compute_intensity": 124.12121212121212, | |
"tile_compute_intensity": 1.8285714285714285, | |
"MxNxK": 67108864, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000017862400272861124, | |
"perf_norm_to_sol": 0.5433656964386765, | |
"perf_norm_to_cublas": 0.9503762459930583, | |
"compute_intensity": 15.693486590038313, | |
"tile_compute_intensity": 1.5609756097560976, | |
"MxNxK": 16777216, | |
"size_m": 512, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00028533758595585825, | |
"perf_norm_to_sol": 0.541174187864146, | |
"perf_norm_to_cublas": 0.9875628674167835, | |
"compute_intensity": 31.477425552353505, | |
"tile_compute_intensity": 3.506849315068493, | |
"MxNxK": 536870912, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027024161070585253, | |
"perf_norm_to_sol": 0.8115331188267404, | |
"perf_norm_to_cublas": 0.9065298060734938, | |
"compute_intensity": 1024, | |
"tile_compute_intensity": 12.487804878048781, | |
"MxNxK": 8589934592, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005883520352654159, | |
"perf_norm_to_sol": 0.339855025310419, | |
"perf_norm_to_cublas": 0.6691504223812653, | |
"compute_intensity": 30.91320754716981, | |
"tile_compute_intensity": 3.0476190476190474, | |
"MxNxK": 67108864, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00011712000705301761, | |
"perf_norm_to_sol": 0.5851637315518752, | |
"perf_norm_to_cublas": 0.8835245491987899, | |
"compute_intensity": 101.7639751552795, | |
"tile_compute_intensity": 3.1604938271604937, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013866592198610306, | |
"perf_norm_to_sol": 0.7907855586712261, | |
"perf_norm_to_cublas": 0.9388458299136149, | |
"compute_intensity": 337.8144329896907, | |
"tile_compute_intensity": 12.19047619047619, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003311040054541081, | |
"perf_norm_to_sol": 0.5174686747788738, | |
"perf_norm_to_cublas": 1.2714796371901143, | |
"compute_intensity": 84.45360824742268, | |
"tile_compute_intensity": 1.8285714285714285, | |
"MxNxK": 67108864, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00019331839866936206, | |
"perf_norm_to_sol": 0.7090311200409023, | |
"perf_norm_to_cublas": 0.9706018767760229, | |
"compute_intensity": 168.90721649484536, | |
"tile_compute_intensity": 3.657142857142857, | |
"MxNxK": 536870912, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002718716859817505, | |
"perf_norm_to_sol": 0.8066673672947451, | |
"perf_norm_to_cublas": 0.92701849821869, | |
"compute_intensity": 455.1111111111111, | |
"tile_compute_intensity": 7.062068965517241, | |
"MxNxK": 8589934592, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006742400000803173, | |
"perf_norm_to_sol": 0.1894104465997858, | |
"perf_norm_to_cublas": 0.7755102040816327, | |
"compute_intensity": 15.283582089552239, | |
"tile_compute_intensity": 1.3333333333333333, | |
"MxNxK": 2097152, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002716095931828022, | |
"perf_norm_to_sol": 0.8074457702430249, | |
"perf_norm_to_cublas": 1.6329721534098816, | |
"compute_intensity": 244.53731343283582, | |
"tile_compute_intensity": 1.967339097022094, | |
"MxNxK": 8589934592, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000052537600276991724, | |
"perf_norm_to_sol": 0.7181260888446274, | |
"perf_norm_to_cublas": 0.8918869616307132, | |
"compute_intensity": 7.961127308066083, | |
"tile_compute_intensity": 0.9552238805970149, | |
"MxNxK": 33554432, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013783935457468033, | |
"perf_norm_to_sol": 0.7955275829953985, | |
"perf_norm_to_cublas": 1.0467095186964017, | |
"compute_intensity": 496.4848484848485, | |
"tile_compute_intensity": 12.8, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.022254575788974763, | |
"perf_norm_to_sol": 0.7883682681798201, | |
"perf_norm_to_cublas": 1.0321779484991858, | |
"compute_intensity": 910.2222222222222, | |
"tile_compute_intensity": 14.124137931034483, | |
"MxNxK": 68719476736, | |
"size_m": 8192, | |
"size_n": 512, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003536063944920897, | |
"perf_norm_to_sol": 0.7752617761900668, | |
"perf_norm_to_cublas": 1.076125354647136, | |
"compute_intensity": 372.3636363636364, | |
"tile_compute_intensity": 5.278350515463917, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002748480183072388, | |
"perf_norm_to_sol": 0.7016213232278792, | |
"perf_norm_to_cublas": 0.9908021278934351, | |
"compute_intensity": 7.8731379144642, | |
"tile_compute_intensity": 0.8827586206896552, | |
"MxNxK": 16777216, | |
"size_m": 8192, | |
"size_n": 256, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.022195692360401153, | |
"perf_norm_to_sol": 0.7904597472765428, | |
"perf_norm_to_cublas": 0.8608272273150744, | |
"compute_intensity": 1820.4444444444443, | |
"tile_compute_intensity": 39.38461538461539, | |
"MxNxK": 68719476736, | |
"size_m": 8192, | |
"size_n": 2048, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006409599882317707, | |
"perf_norm_to_sol": 0.015654967555692713, | |
"perf_norm_to_cublas": 0.6425362234196706, | |
"compute_intensity": 13.473684210526315, | |
"tile_compute_intensity": 0.6666666666666666, | |
"MxNxK": 131072, | |
"size_m": 128, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008540799899492413, | |
"perf_norm_to_sol": 0.12538049197132062, | |
"perf_norm_to_cublas": 0.9524166269536543, | |
"compute_intensity": 73.14285714285714, | |
"tile_compute_intensity": 1.7777777777777777, | |
"MxNxK": 4194304, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006736000068485737, | |
"perf_norm_to_sol": 0.13271328498004756, | |
"perf_norm_to_cublas": 0.839904954564151, | |
"compute_intensity": 25.28395061728395, | |
"tile_compute_intensity": 1.2307692307692308, | |
"MxNxK": 2097152, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00019182399846613408, | |
"perf_norm_to_sol": 0.7145548097687606, | |
"perf_norm_to_cublas": 0.9767787164552801, | |
"compute_intensity": 248.24242424242425, | |
"tile_compute_intensity": 6.4, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009615999879315495, | |
"perf_norm_to_sol": 0.7127119511923733, | |
"perf_norm_to_cublas": 1.1136106292420136, | |
"compute_intensity": 204.8, | |
"tile_compute_intensity": 3.1604938271604937, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013632736168801784, | |
"perf_norm_to_sol": 0.8043506984121371, | |
"perf_norm_to_cublas": 1.6340971628781698, | |
"compute_intensity": 399.609756097561, | |
"tile_compute_intensity": 5.3194805194805195, | |
"MxNxK": 4294967296, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00008048639865592122, | |
"perf_norm_to_sol": 0.47963798519210227, | |
"perf_norm_to_cublas": 0.6569258789287228, | |
"compute_intensity": 15.738712776176753, | |
"tile_compute_intensity": 1.5900621118012421, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000013952000881545245, | |
"perf_norm_to_sol": 0.15350482017864714, | |
"perf_norm_to_cublas": 0.9830274948992161, | |
"compute_intensity": 102.4, | |
"tile_compute_intensity": 1.8823529411764706, | |
"MxNxK": 8388608, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027243455871939657, | |
"perf_norm_to_sol": 0.8050007245915093, | |
"perf_norm_to_cublas": 0.8680848413386946, | |
"compute_intensity": 404.5432098765432, | |
"tile_compute_intensity": 12.487804878048781, | |
"MxNxK": 8589934592, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010859663784503936, | |
"perf_norm_to_sol": 0.8077967109288376, | |
"perf_norm_to_cublas": 0.8615576833438143, | |
"compute_intensity": 1260.3076923076924, | |
"tile_compute_intensity": 25.6, | |
"MxNxK": 34359738368, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0008135167881846428, | |
"perf_norm_to_sol": 0.7366527498368952, | |
"perf_norm_to_cublas": 0.7544448985526161, | |
"compute_intensity": 7.990246281394782, | |
"tile_compute_intensity": 0.9884169884169884, | |
"MxNxK": 536870912, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009583359933458268, | |
"perf_norm_to_sol": 0.7151393753588705, | |
"perf_norm_to_cublas": 1.2684319513131823, | |
"compute_intensity": 117.02857142857142, | |
"tile_compute_intensity": 1.7716262975778547, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003705503884702921, | |
"perf_norm_to_sol": 0.7398117233065084, | |
"perf_norm_to_cublas": 1.0108206600834506, | |
"compute_intensity": 327.68, | |
"tile_compute_intensity": 10.666666666666666, | |
"MxNxK": 1073741824, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013676895759999752, | |
"perf_norm_to_sol": 0.8017536326272561, | |
"perf_norm_to_cublas": 1.0558630290992004, | |
"compute_intensity": 630.1538461538462, | |
"tile_compute_intensity": 18.285714285714285, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027657633647322653, | |
"perf_norm_to_sol": 0.7929457016078172, | |
"perf_norm_to_cublas": 0.8776849286386259, | |
"compute_intensity": 239.1824817518248, | |
"tile_compute_intensity": 15.515151515151516, | |
"MxNxK": 8589934592, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00008695040014572442, | |
"perf_norm_to_sol": 0.4464989487368955, | |
"perf_norm_to_cublas": 0.7530546496795245, | |
"compute_intensity": 31.386973180076627, | |
"tile_compute_intensity": 3.3684210526315788, | |
"MxNxK": 134217728, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000008144000457832589, | |
"perf_norm_to_sol": 0.6104486241530994, | |
"perf_norm_to_cublas": 1.1473476929256596, | |
"compute_intensity": 7.750236518448439, | |
"tile_compute_intensity": 0.7901234567901234, | |
"MxNxK": 4194304, | |
"size_m": 4096, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00000876159974723123, | |
"perf_norm_to_sol": 0.30192911546117424, | |
"perf_norm_to_cublas": 0.7016070037737553, | |
"compute_intensity": 15.003663003663004, | |
"tile_compute_intensity": 0.9846153846153847, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001919072004966438, | |
"perf_norm_to_sol": 0.7142450120596107, | |
"perf_norm_to_cublas": 0.9684681003958638, | |
"compute_intensity": 341.3333333333333, | |
"tile_compute_intensity": 9.142857142857142, | |
"MxNxK": 536870912, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002901760162785649, | |
"perf_norm_to_sol": 0.6645595068965392, | |
"perf_norm_to_cublas": 0.9019628354889868, | |
"compute_intensity": 7.8731379144642, | |
"tile_compute_intensity": 0.7975077881619937, | |
"MxNxK": 16777216, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0014356384053826332, | |
"perf_norm_to_sol": 0.7638065976454245, | |
"perf_norm_to_cublas": 0.8640169834025838, | |
"compute_intensity": 123.65283018867925, | |
"tile_compute_intensity": 10.448979591836734, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027028609067201613, | |
"perf_norm_to_sol": 0.8113995678712461, | |
"perf_norm_to_cublas": 0.909489600695313, | |
"compute_intensity": 442.81081081081084, | |
"tile_compute_intensity": 3.9083969465648853, | |
"MxNxK": 8589934592, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0006741983816027641, | |
"perf_norm_to_sol": 0.813225095006603, | |
"perf_norm_to_cublas": 1.676676819792379, | |
"compute_intensity": 390.0952380952381, | |
"tile_compute_intensity": 3.5432525951557095, | |
"MxNxK": 2147483648, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000403024023398757, | |
"perf_norm_to_sol": 0.6802014409817646, | |
"perf_norm_to_cublas": 0.8461471136209362, | |
"compute_intensity": 61.94328922495274, | |
"tile_compute_intensity": 5.278350515463917, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005423814430832863, | |
"perf_norm_to_sol": 0.8086929225534207, | |
"perf_norm_to_cublas": 0.872902469263758, | |
"compute_intensity": 668.734693877551, | |
"tile_compute_intensity": 23.272727272727273, | |
"MxNxK": 17179869184, | |
"size_m": 16384, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0005555456038564444, | |
"perf_norm_to_sol": 0.5443527544621342, | |
"perf_norm_to_cublas": 0.8206937138204384, | |
"compute_intensity": 31.813592233009707, | |
"tile_compute_intensity": 3.764705882352941, | |
"MxNxK": 1073741824, | |
"size_m": 8192, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.001564505510032177, | |
"perf_norm_to_sol": 0.7008924409872223, | |
"perf_norm_to_cublas": 0.9271949400997098, | |
"compute_intensity": 63.50387596899225, | |
"tile_compute_intensity": 7.314285714285714, | |
"MxNxK": 4294967296, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000065664004068821665, | |
"perf_norm_to_sol": 0.03334066719332554, | |
"perf_norm_to_cublas": 0.6408381489567848, | |
"compute_intensity": 25.6, | |
"tile_compute_intensity": 1, | |
"MxNxK": 524288, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000006710399611620233, | |
"perf_norm_to_sol": 0.19031370249471216, | |
"perf_norm_to_cublas": 0.6862184546114084, | |
"compute_intensity": 15.283582089552239, | |
"tile_compute_intensity": 1.2307692307692308, | |
"MxNxK": 2097152, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00009671999723650515, | |
"perf_norm_to_sol": 0.7085854251933199, | |
"perf_norm_to_cublas": 2.08724565517292, | |
"compute_intensity": 101.7639751552795, | |
"tile_compute_intensity": 1.332465842550423, | |
"MxNxK": 268435456, | |
"size_m": 256, | |
"size_n": 64, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0027265504002571108, | |
"perf_norm_to_sol": 0.8043497642743066, | |
"perf_norm_to_cublas": 0.9271367044913613, | |
"compute_intensity": 655.36, | |
"tile_compute_intensity": 13.473684210526315, | |
"MxNxK": 8589934592, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010897347331047058, | |
"perf_norm_to_sol": 0.8050033113950869, | |
"perf_norm_to_cublas": 0.8947308290918499, | |
"compute_intensity": 862.3157894736842, | |
"tile_compute_intensity": 7.501831501831502, | |
"MxNxK": 34359738368, | |
"size_m": 512, | |
"size_n": 4096, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005470060557126999, | |
"perf_norm_to_sol": 0.8018559022610492, | |
"perf_norm_to_cublas": 0.8742338211480005, | |
"compute_intensity": 468.1142857142857, | |
"tile_compute_intensity": 28.444444444444443, | |
"MxNxK": 17179869184, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "stream-kxmxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001716800034046173, | |
"perf_norm_to_sol": 0.5610926908448476, | |
"perf_norm_to_cublas": 0.9955265236790714, | |
"compute_intensity": 15.753846153846155, | |
"tile_compute_intensity": 1.6842105263157894, | |
"MxNxK": 16777216, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000005708800017600879, | |
"perf_norm_to_sol": 0.11584667765337547, | |
"perf_norm_to_cublas": 0.7589686105798857, | |
"compute_intensity": 7.501831501831502, | |
"tile_compute_intensity": 0.64, | |
"MxNxK": 524288, | |
"size_m": 1024, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001482880034018308, | |
"perf_norm_to_sol": 0.6397609500478638, | |
"perf_norm_to_cublas": 1.032369394581539, | |
"compute_intensity": 7.937984496124031, | |
"tile_compute_intensity": 0.9142857142857143, | |
"MxNxK": 8388608, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003310976084321737, | |
"perf_norm_to_sol": 0.9080746784831719, | |
"perf_norm_to_cublas": 1.003836916391466, | |
"compute_intensity": 15.953261927945473, | |
"tile_compute_intensity": 1.9248120300751879, | |
"MxNxK": 536870912, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000029600001289509238, | |
"perf_norm_to_sol": 0.289418823398901, | |
"perf_norm_to_cublas": 0.7244324089622882, | |
"compute_intensity": 97.52380952380952, | |
"tile_compute_intensity": 2.909090909090909, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00011566720204427838, | |
"perf_norm_to_sol": 0.592513514248319, | |
"perf_norm_to_cublas": 1.0230453966323787, | |
"compute_intensity": 56.79029462738301, | |
"tile_compute_intensity": 3.9384615384615387, | |
"MxNxK": 268435456, | |
"size_m": 16384, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0016135327517986298, | |
"perf_norm_to_sol": 0.6795958028382568, | |
"perf_norm_to_cublas": 0.8225648039592787, | |
"compute_intensity": 630.1538461538462, | |
"tile_compute_intensity": 12.19047619047619, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000005648000296787359, | |
"perf_norm_to_sol": 0.029071550874630488, | |
"perf_norm_to_cublas": 0.6266288698208482, | |
"compute_intensity": 7.529411764705882, | |
"tile_compute_intensity": 0.5714285714285714, | |
"MxNxK": 131072, | |
"size_m": 128, | |
"size_n": 128, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000054643204202875494, | |
"perf_norm_to_sol": 0.31355399709026965, | |
"perf_norm_to_cublas": 0.7293861997771323, | |
"compute_intensity": 157.53846153846155, | |
"tile_compute_intensity": 3.2, | |
"MxNxK": 67108864, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00042353281751275065, | |
"perf_norm_to_sol": 0.1618159857576148, | |
"perf_norm_to_cublas": 0.2543330630838044, | |
"compute_intensity": 204.8, | |
"tile_compute_intensity": 1.8686131386861313, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.011351747065782547, | |
"perf_norm_to_sol": 0.7727797876467711, | |
"perf_norm_to_cublas": 0.9006979955509488, | |
"compute_intensity": 1638.4, | |
"tile_compute_intensity": 30.11764705882353, | |
"MxNxK": 34359738368, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00041446080431342127, | |
"perf_norm_to_sol": 0.08267896463702637, | |
"perf_norm_to_cublas": 0.2534299843221903, | |
"compute_intensity": 186.1818181818182, | |
"tile_compute_intensity": 2.6391752577319587, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 128, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000016633600171189756, | |
"perf_norm_to_sol": 0.14258620046274448, | |
"perf_norm_to_cublas": 0.5679107158963768, | |
"compute_intensity": 42.22680412371134, | |
"tile_compute_intensity": 1.5238095238095237, | |
"MxNxK": 8388608, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00010367679642513395, | |
"perf_norm_to_sol": 0.16525968859390608, | |
"perf_norm_to_cublas": 0.42260564472310025, | |
"compute_intensity": 204.8, | |
"tile_compute_intensity": 3.764705882352941, | |
"MxNxK": 67108864, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000005849599983775988, | |
"perf_norm_to_sol": 0.4116879587566608, | |
"perf_norm_to_cublas": 0.9398249595845779, | |
"compute_intensity": 7.876923076923077, | |
"tile_compute_intensity": 0.8421052631578947, | |
"MxNxK": 2097152, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0008234239183366298, | |
"perf_norm_to_sol": 0.16646196167089647, | |
"perf_norm_to_cublas": 0.2576131203410483, | |
"compute_intensity": 315.0769230769231, | |
"tile_compute_intensity": 3.9689922480620154, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00016498879995197058, | |
"perf_norm_to_sol": 0.9133684073709154, | |
"perf_norm_to_cublas": 0.9653988765511631, | |
"compute_intensity": 7.9669341113542425, | |
"tile_compute_intensity": 0.9394495412844037, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 16384, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00042679039761424066, | |
"perf_norm_to_sol": 0.16058088642488932, | |
"perf_norm_to_cublas": 0.25589329002775046, | |
"compute_intensity": 117.02857142857142, | |
"tile_compute_intensity": 1.8686131386861313, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 64, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0065150074660778046, | |
"perf_norm_to_sol": 0.6732456357564618, | |
"perf_norm_to_cublas": 0.7540949842154228, | |
"compute_intensity": 468.1142857142857, | |
"tile_compute_intensity": 3.8714555765595464, | |
"MxNxK": 17179869184, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.013405045866966248, | |
"perf_norm_to_sol": 0.6544103447294367, | |
"perf_norm_to_cublas": 0.7394194892070608, | |
"compute_intensity": 862.3157894736842, | |
"tile_compute_intensity": 12.720496894409937, | |
"MxNxK": 34359738368, | |
"size_m": 4096, | |
"size_n": 512, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001087040000129491, | |
"perf_norm_to_sol": 0.2416781394589672, | |
"perf_norm_to_cublas": 0.772740647291422, | |
"compute_intensity": 30.11764705882353, | |
"tile_compute_intensity": 2.2857142857142856, | |
"MxNxK": 8388608, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005385599797591567, | |
"perf_norm_to_sol": 0.03976714696498047, | |
"perf_norm_to_cublas": 0.2613190753956545, | |
"compute_intensity": 78.76923076923077, | |
"tile_compute_intensity": 1.5238095238095237, | |
"MxNxK": 8388608, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.002881222404539585, | |
"perf_norm_to_sol": 0.7611700395892477, | |
"perf_norm_to_cublas": 0.9859093140907181, | |
"compute_intensity": 655.36, | |
"tile_compute_intensity": 7.529411764705882, | |
"MxNxK": 8589934592, | |
"size_m": 512, | |
"size_n": 8192, | |
"size_k": 2048, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x2048.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002896319783758372, | |
"perf_norm_to_sol": 0.03697277141950776, | |
"perf_norm_to_cublas": 0.29554749002655734, | |
"compute_intensity": 60.23529411764706, | |
"tile_compute_intensity": 1.4545454545454546, | |
"MxNxK": 4194304, | |
"size_m": 512, | |
"size_n": 64, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00637568011879921, | |
"perf_norm_to_sol": 0.6879580314144963, | |
"perf_norm_to_cublas": 0.7710278822987596, | |
"compute_intensity": 1170.2857142857142, | |
"tile_compute_intensity": 12.641975308641975, | |
"MxNxK": 17179869184, | |
"size_m": 1024, | |
"size_n": 2048, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00000758399983169511, | |
"perf_norm_to_sol": 0.6374820510597938, | |
"perf_norm_to_cublas": 1.1438818731369989, | |
"compute_intensity": 7.861804222648752, | |
"tile_compute_intensity": 0.7901234567901234, | |
"MxNxK": 4194304, | |
"size_m": 256, | |
"size_n": 2048, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00011082240380346775, | |
"perf_norm_to_sol": 0.6184162950306012, | |
"perf_norm_to_cublas": 0.902027002894668, | |
"compute_intensity": 61.59398496240601, | |
"tile_compute_intensity": 5.12, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00011982719879597426, | |
"perf_norm_to_sol": 0.6455514362715529, | |
"perf_norm_to_cublas": 0.95900769429708, | |
"compute_intensity": 31.44721689059501, | |
"tile_compute_intensity": 3.1604938271604937, | |
"MxNxK": 268435456, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.005586972832679749, | |
"perf_norm_to_sol": 0.7850763686913865, | |
"perf_norm_to_cublas": 0.8786817675791616, | |
"compute_intensity": 248.24242424242425, | |
"tile_compute_intensity": 23.272727272727273, | |
"MxNxK": 17179869184, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00037006400525569917, | |
"perf_norm_to_sol": 0.7407840740325043, | |
"perf_norm_to_cublas": 0.8856760182125122, | |
"compute_intensity": 113.3840830449827, | |
"tile_compute_intensity": 5.278350515463917, | |
"MxNxK": 1073741824, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00020231681410223245, | |
"perf_norm_to_sol": 0.04234347789544985, | |
"perf_norm_to_cublas": 0.4795014431883478, | |
"compute_intensity": 146.28571428571428, | |
"tile_compute_intensity": 1.9692307692307693, | |
"MxNxK": 33554432, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000010326399933546782, | |
"perf_norm_to_sol": 0.07066940787135838, | |
"perf_norm_to_cublas": 0.4679268537401737, | |
"compute_intensity": 28.444444444444443, | |
"tile_compute_intensity": 1.6, | |
"MxNxK": 2097152, | |
"size_m": 256, | |
"size_n": 256, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0004032704047858715, | |
"perf_norm_to_sol": 0.6797858662890585, | |
"perf_norm_to_cublas": 0.9345272942805023, | |
"compute_intensity": 327.68, | |
"tile_compute_intensity": 3.764705882352941, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 4096, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0001097952015697956, | |
"perf_norm_to_sol": 0.6242019631701224, | |
"perf_norm_to_cublas": 0.9078429609548424, | |
"compute_intensity": 62.06060606060606, | |
"tile_compute_intensity": 5.818181818181818, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 2048, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007407391909509897, | |
"perf_norm_to_sol": 0.7401728565600925, | |
"perf_norm_to_cublas": 0.9056121129147817, | |
"compute_intensity": 203.527950310559, | |
"tile_compute_intensity": 6.320987654320987, | |
"MxNxK": 2147483648, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0028715040534734726, | |
"perf_norm_to_sol": 0.7637461521518565, | |
"perf_norm_to_cublas": 0.8758515516399232, | |
"compute_intensity": 126.03076923076924, | |
"tile_compute_intensity": 13.473684210526315, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 8192, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.023811429738998413, | |
"perf_norm_to_sol": 0.7368226757545631, | |
"perf_norm_to_cublas": 0.9676833183350251, | |
"compute_intensity": 1489.4545454545455, | |
"tile_compute_intensity": 21.11340206185567, | |
"MxNxK": 68719476736, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00020199359860271216, | |
"perf_norm_to_sol": 0.6785797257003324, | |
"perf_norm_to_cublas": 0.9087654178825826, | |
"compute_intensity": 60.12477064220184, | |
"tile_compute_intensity": 3.9689922480620154, | |
"MxNxK": 536870912, | |
"size_m": 512, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00011349120177328587, | |
"perf_norm_to_sol": 0.603873950541404, | |
"perf_norm_to_cublas": 0.842102274941993, | |
"compute_intensity": 292.57142857142856, | |
"tile_compute_intensity": 5.818181818181818, | |
"MxNxK": 268435456, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001053119995049201, | |
"perf_norm_to_sol": 0.28064516079058893, | |
"perf_norm_to_cublas": 0.8030993705873806, | |
"compute_intensity": 28.248275862068965, | |
"tile_compute_intensity": 1.8823529411764706, | |
"MxNxK": 8388608, | |
"size_m": 2048, | |
"size_n": 128, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0058562207967042925, | |
"perf_norm_to_sol": 0.7489813816320023, | |
"perf_norm_to_cublas": 0.9609025295077859, | |
"compute_intensity": 1170.2857142857142, | |
"tile_compute_intensity": 13.837837837837839, | |
"MxNxK": 17179869184, | |
"size_m": 1024, | |
"size_n": 4096, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001786880020517856, | |
"perf_norm_to_sol": 0.589116005736005, | |
"perf_norm_to_cublas": 0.9666905713056705, | |
"compute_intensity": 15.044995408631772, | |
"tile_compute_intensity": 1.3195876288659794, | |
"MxNxK": 16777216, | |
"size_m": 8192, | |
"size_n": 128, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00010432959534227848, | |
"perf_norm_to_sol": 0.328451290075851, | |
"perf_norm_to_cublas": 0.75772173673186, | |
"compute_intensity": 256, | |
"tile_compute_intensity": 4.923076923076923, | |
"MxNxK": 134217728, | |
"size_m": 512, | |
"size_n": 512, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000015273600001819432, | |
"perf_norm_to_sol": 0.6883192693440412, | |
"perf_norm_to_cublas": 1.0064947982733892, | |
"compute_intensity": 7.527682058350563, | |
"tile_compute_intensity": 0.6649350649350649, | |
"MxNxK": 8388608, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0028304064646363257, | |
"perf_norm_to_sol": 0.7748357697489259, | |
"perf_norm_to_cublas": 0.8969770188519344, | |
"compute_intensity": 655.36, | |
"tile_compute_intensity": 21.333333333333332, | |
"MxNxK": 8589934592, | |
"size_m": 8192, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000012588800746016205, | |
"perf_norm_to_sol": 0.40578321035406467, | |
"perf_norm_to_cublas": 1.0129637665562587, | |
"compute_intensity": 30.567164179104477, | |
"tile_compute_intensity": 2.4615384615384617, | |
"MxNxK": 16777216, | |
"size_m": 512, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00046207043342292307, | |
"perf_norm_to_sol": 0.6525781465227377, | |
"perf_norm_to_cublas": 0.9212656558519714, | |
"compute_intensity": 15.929995138551288, | |
"tile_compute_intensity": 1.9248120300751879, | |
"MxNxK": 536870912, | |
"size_m": 16384, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000007625600119354203, | |
"perf_norm_to_sol": 0.3839913234715559, | |
"perf_norm_to_cublas": 0.8619387636807055, | |
"compute_intensity": 14.197573656845753, | |
"tile_compute_intensity": 0.9846153846153847, | |
"MxNxK": 4194304, | |
"size_m": 4096, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0015071647241711617, | |
"perf_norm_to_sol": 0.7275582212604131, | |
"perf_norm_to_cublas": 0.8699609183876126, | |
"compute_intensity": 63.38104448742747, | |
"tile_compute_intensity": 7.013698630136986, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 16384, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0014242367818951608, | |
"perf_norm_to_sol": 0.7699211955509865, | |
"perf_norm_to_cublas": 0.9035509009374845, | |
"compute_intensity": 431.1578947368421, | |
"tile_compute_intensity": 18.285714285714285, | |
"MxNxK": 4294967296, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.010891581326723099, | |
"perf_norm_to_sol": 0.8054294802345846, | |
"perf_norm_to_cublas": 0.8739899823736237, | |
"compute_intensity": 862.3157894736842, | |
"tile_compute_intensity": 36.57142857142857, | |
"MxNxK": 34359738368, | |
"size_m": 4096, | |
"size_n": 8192, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0036572255194187164, | |
"perf_norm_to_sol": 0.6551277427621157, | |
"perf_norm_to_cublas": 0.916848030362758, | |
"compute_intensity": 15.984390243902439, | |
"tile_compute_intensity": 1.9768339768339769, | |
"MxNxK": 4294967296, | |
"size_m": 16384, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000007827200170140713, | |
"perf_norm_to_sol": 0.6351560924111055, | |
"perf_norm_to_cublas": 1.1218314791131347, | |
"compute_intensity": 7.750236518448439, | |
"tile_compute_intensity": 0.6632124352331606, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 8, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x8.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0007437407970428467, | |
"perf_norm_to_sol": 0.7371856500439089, | |
"perf_norm_to_cublas": 0.997693860244913, | |
"compute_intensity": 390.0952380952381, | |
"tile_compute_intensity": 16, | |
"MxNxK": 2147483648, | |
"size_m": 4096, | |
"size_n": 1024, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.011275728046894074, | |
"perf_norm_to_sol": 0.7779897360447312, | |
"perf_norm_to_cublas": 0.8768021146521091, | |
"compute_intensity": 474.8985507246377, | |
"tile_compute_intensity": 36.57142857142857, | |
"MxNxK": 34359738368, | |
"size_m": 16384, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003310207976028323, | |
"perf_norm_to_sol": 0.9109308816537912, | |
"perf_norm_to_cublas": 1.0053168566180415, | |
"compute_intensity": 15.929995138551288, | |
"tile_compute_intensity": 1.8754578754578755, | |
"MxNxK": 536870912, | |
"size_m": 2048, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.006434805691242218, | |
"perf_norm_to_sol": 0.6816367974292173, | |
"perf_norm_to_cublas": 0.7593717471473315, | |
"compute_intensity": 468.1142857142857, | |
"tile_compute_intensity": 3.9233716475095783, | |
"MxNxK": 17179869184, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 8192, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x8192.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000840278435498476, | |
"perf_norm_to_sol": 0.3262460511716894, | |
"perf_norm_to_cublas": 0.4456275372208856, | |
"compute_intensity": 372.3636363636364, | |
"tile_compute_intensity": 3.5310344827586206, | |
"MxNxK": 1073741824, | |
"size_m": 256, | |
"size_n": 1024, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00001657599932514131, | |
"perf_norm_to_sol": 0.032301210690892555, | |
"perf_norm_to_cublas": 0.33725871174810856, | |
"compute_intensity": 46.54545454545455, | |
"tile_compute_intensity": 1.6, | |
"MxNxK": 2097152, | |
"size_m": 256, | |
"size_n": 128, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.01284896731376648, | |
"perf_norm_to_sol": 0.682731963798871, | |
"perf_norm_to_cublas": 0.7593077117909319, | |
"compute_intensity": 481.88235294117646, | |
"tile_compute_intensity": 3.930902111324376, | |
"MxNxK": 34359738368, | |
"size_m": 256, | |
"size_n": 8192, | |
"size_k": 16384, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x16384.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00011842240346595644, | |
"perf_norm_to_sol": 0.6458145196033621, | |
"perf_norm_to_cublas": 0.9242575528092811, | |
"compute_intensity": 31.62934362934363, | |
"tile_compute_intensity": 3.4594594594594597, | |
"MxNxK": 268435456, | |
"size_m": 2048, | |
"size_n": 4096, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0003887712024152279, | |
"perf_norm_to_sol": 0.7051384458597573, | |
"perf_norm_to_cublas": 0.8681713223867674, | |
"compute_intensity": 62.77394636015325, | |
"tile_compute_intensity": 6.2439024390243905, | |
"MxNxK": 1073741824, | |
"size_m": 2048, | |
"size_n": 8192, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00040195840410888194, | |
"perf_norm_to_sol": 0.6820047016403342, | |
"perf_norm_to_cublas": 0.937291002736217, | |
"compute_intensity": 512, | |
"tile_compute_intensity": 9.846153846153847, | |
"MxNxK": 1073741824, | |
"size_m": 1024, | |
"size_n": 1024, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000025587200070731342, | |
"perf_norm_to_sol": 0.5483072354681483, | |
"perf_norm_to_cublas": 0.9152075908001813, | |
"compute_intensity": 25.580015612802498, | |
"tile_compute_intensity": 1.3264248704663213, | |
"MxNxK": 33554432, | |
"size_m": 16384, | |
"size_n": 64, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00010346239432692527, | |
"perf_norm_to_sol": 0.04140053785506815, | |
"perf_norm_to_cublas": 0.23373130433001865, | |
"compute_intensity": 128, | |
"tile_compute_intensity": 1.5609756097560976, | |
"MxNxK": 16777216, | |
"size_m": 128, | |
"size_n": 256, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0013990592211484908, | |
"perf_norm_to_sol": 0.7837767474662375, | |
"perf_norm_to_cublas": 0.9183542928625235, | |
"compute_intensity": 399.609756097561, | |
"tile_compute_intensity": 12.19047619047619, | |
"MxNxK": 4294967296, | |
"size_m": 1024, | |
"size_n": 8192, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005411839811131358, | |
"perf_norm_to_sol": 0.3165946459906324, | |
"perf_norm_to_cublas": 0.7518921814213975, | |
"compute_intensity": 157.53846153846155, | |
"tile_compute_intensity": 4.571428571428571, | |
"MxNxK": 67108864, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00011446399148553609, | |
"perf_norm_to_sol": 0.5987418355508412, | |
"perf_norm_to_cublas": 0.8432205949687471, | |
"compute_intensity": 167.18367346938774, | |
"tile_compute_intensity": 1.9104477611940298, | |
"MxNxK": 268435456, | |
"size_m": 128, | |
"size_n": 4096, | |
"size_k": 512, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x512.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00003103039925917983, | |
"perf_norm_to_sol": 0.677894867495054, | |
"perf_norm_to_cublas": 1.0358874781419258, | |
"compute_intensity": 15.05190629306385, | |
"tile_compute_intensity": 0.9980506822612085, | |
"MxNxK": 33554432, | |
"size_m": 128, | |
"size_n": 16384, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00021359040401875974, | |
"perf_norm_to_sol": 0.16043412783775232, | |
"perf_norm_to_cublas": 0.46261254259215373, | |
"compute_intensity": 256, | |
"tile_compute_intensity": 3.1219512195121952, | |
"MxNxK": 134217728, | |
"size_m": 256, | |
"size_n": 512, | |
"size_k": 1024, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x1024.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0018336672335863113, | |
"perf_norm_to_sol": 0.653958659783155, | |
"perf_norm_to_cublas": 0.9152212632813738, | |
"compute_intensity": 15.976596782057532, | |
"tile_compute_intensity": 1.9692307692307693, | |
"MxNxK": 2147483648, | |
"size_m": 16384, | |
"size_n": 8192, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00002935999946203083, | |
"perf_norm_to_sol": 0.29178466290146016, | |
"perf_norm_to_cublas": 0.7625068410358862, | |
"compute_intensity": 97.52380952380952, | |
"tile_compute_intensity": 4, | |
"MxNxK": 33554432, | |
"size_m": 1024, | |
"size_n": 256, | |
"size_k": 128, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x128.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000018819198885466903, | |
"perf_norm_to_sol": 0.4552158462192252, | |
"perf_norm_to_cublas": 0.9731338713337877, | |
"compute_intensity": 56.10958904109589, | |
"tile_compute_intensity": 3.5555555555555554, | |
"MxNxK": 33554432, | |
"size_m": 2048, | |
"size_n": 256, | |
"size_k": 64, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x64.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000010598399967420846, | |
"perf_norm_to_sol": 0.1411542469173825, | |
"perf_norm_to_cublas": 0.5455917859886464, | |
"compute_intensity": 28.054794520547944, | |
"tile_compute_intensity": 1.28, | |
"MxNxK": 4194304, | |
"size_m": 128, | |
"size_n": 1024, | |
"size_k": 32, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x32.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0000892351963557303, | |
"perf_norm_to_sol": 0.8472351494667987, | |
"perf_norm_to_cublas": 1.004375006930633, | |
"compute_intensity": 15.906796116504854, | |
"tile_compute_intensity": 1.8823529411764706, | |
"MxNxK": 134217728, | |
"size_m": 4096, | |
"size_n": 2048, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000009529600356472656, | |
"perf_norm_to_sol": 0.6135836066945349, | |
"perf_norm_to_cublas": 1.1195432705907737, | |
"compute_intensity": 14.209887250650477, | |
"tile_compute_intensity": 0.9922480620155039, | |
"MxNxK": 8388608, | |
"size_m": 8192, | |
"size_n": 64, | |
"size_k": 16, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x16.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.0015968607738614082, | |
"perf_norm_to_sol": 0.6866911028272158, | |
"perf_norm_to_cublas": 0.8343810702360097, | |
"compute_intensity": 431.1578947368421, | |
"tile_compute_intensity": 7.013698630136986, | |
"MxNxK": 4294967296, | |
"size_m": 4096, | |
"size_n": 256, | |
"size_k": 4096, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x4096.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.00005928640021011233, | |
"perf_norm_to_sol": 0.577994110990365, | |
"perf_norm_to_cublas": 0.8643601589188211, | |
"compute_intensity": 186.1818181818182, | |
"tile_compute_intensity": 6.4, | |
"MxNxK": 134217728, | |
"size_m": 1024, | |
"size_n": 512, | |
"size_k": 256, | |
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x256.json\">JSON output</a>" | |
}, | |
{ | |
"engine": "CuTe", | |
"schedule": "output-mxn", | |
"metric": "cute-statistics_cute-sec", | |
"perf": 0.000007843200000934302, | |
"perf_norm_to_sol": 0.6094364194105388, | |
"perf_norm_to_cublas": 1.0803753418954225, | |
"compute_intensity": 7.9073359073359075, |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment