Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jowens/69accb35536621d0a03abdba063c650f to your computer and use it in GitHub Desktop.
Save jowens/69accb35536621d0a03abdba063c650f to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
{
"config": {"view": {"continuousWidth": 400, "continuousHeight": 300}},
"layer": [
{
"mark": "point",
"encoding": {
"x": {
"type": "quantitative",
"axis": {"title": "MxNxK"},
"field": "MxNxK",
"scale": {"type": "log"}
},
"y": {
"type": "quantitative",
"aggregate": "mean",
"axis": {"title": "Runtime (ms)"},
"field": "perf",
"scale": {"type": "log"}
}
},
"selection": {
"selector001": {
"type": "interval",
"bind": "scales",
"encodings": ["x", "y"]
}
}
},
{
"mark": {"type": "errorband", "extent": "ci"},
"encoding": {
"x": {
"type": "quantitative",
"axis": {"title": "MxNxK"},
"field": "MxNxK",
"scale": {"type": "log"}
},
"y": {
"type": "quantitative",
"aggregate": "mean",
"axis": {"title": "Runtime (ms)"},
"field": "perf",
"scale": {"type": "log"}
}
},
"selection": {
"selector002": {
"type": "interval",
"bind": "scales",
"encodings": ["x", "y"]
}
}
}
],
"data": {"name": "data-0e4914f6c5c15972c83660a50d164be0"},
"encoding": {
"color": {
"type": "nominal",
"field": "schedule",
"legend": {"title": "Schedule"},
"scale": {
"domain": ["roofline", "stream-kxmxn", "output-mxn", "cublas"],
"range": ["#377eb8", "#e41a1c", "#ff7f00", "#4daf4a"]
}
},
"shape": {
"type": "nominal",
"field": "schedule",
"legend": {"title": "Schedule"}
}
},
"$schema": "https://vega.github.io/schema/vega-lite/v4.8.1.json",
"datasets": {
"data-0e4914f6c5c15972c83660a50d164be0": [
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 6.613455154265933e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.551492247803546,
"compute_intensity": 7.501831501831502,
"tile_compute_intensity": 0.64,
"MxNxK": 524288,
"size_m": 1024,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000009486887393705613,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.613679913574441,
"compute_intensity": 7.937984496124031,
"tile_compute_intensity": 0.9142857142857143,
"MxNxK": 8388608,
"size_m": 1024,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0003006613543235933,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1054563464628848,
"compute_intensity": 15.953261927945473,
"tile_compute_intensity": 1.9248120300751879,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.5030590631757748,
"compute_intensity": 97.52380952380952,
"tile_compute_intensity": 2.909090909090909,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7266195150506327,
"compute_intensity": 56.79029462738301,
"tile_compute_intensity": 3.9384615384615387,
"MxNxK": 268435456,
"size_m": 16384,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2103735787124164,
"compute_intensity": 630.1538461538462,
"tile_compute_intensity": 12.19047619047619,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 1.641961279679818e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 21.55471073845189,
"compute_intensity": 7.529411764705882,
"tile_compute_intensity": 0.5714285714285714,
"MxNxK": 131072,
"size_m": 128,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.3261900870207946,
"compute_intensity": 157.53846153846155,
"tile_compute_intensity": 3.2,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5717425067308954,
"compute_intensity": 204.8,
"tile_compute_intensity": 1.8686131386861313,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1655299607326786,
"compute_intensity": 1638.4,
"tile_compute_intensity": 30.11764705882353,
"MxNxK": 34359738368,
"size_m": 4096,
"size_n": 2048,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.065229293022569,
"compute_intensity": 186.1818181818182,
"tile_compute_intensity": 2.6391752577319587,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000023717218484264033,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.9829290215553703,
"compute_intensity": 42.22680412371134,
"tile_compute_intensity": 1.5238095238095237,
"MxNxK": 8388608,
"size_m": 2048,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.5572215966204097,
"compute_intensity": 204.8,
"tile_compute_intensity": 3.764705882352941,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002408209876863733,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.2828575371087947,
"compute_intensity": 7.876923076923077,
"tile_compute_intensity": 0.8421052631578947,
"MxNxK": 2097152,
"size_m": 512,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5475795055831563,
"compute_intensity": 315.0769230769231,
"tile_compute_intensity": 3.9689922480620154,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00015069555744616993,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0569654793841783,
"compute_intensity": 7.9669341113542425,
"tile_compute_intensity": 0.9394495412844037,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5935476240345885,
"compute_intensity": 117.02857142857142,
"tile_compute_intensity": 1.8686131386861313,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.120088930644338,
"compute_intensity": 468.1142857142857,
"tile_compute_intensity": 3.8714555765595464,
"MxNxK": 17179869184,
"size_m": 256,
"size_n": 4096,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.129901895901066,
"compute_intensity": 862.3157894736842,
"tile_compute_intensity": 12.720496894409937,
"MxNxK": 34359738368,
"size_m": 4096,
"size_n": 512,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000026271380474877087,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.197395714073759,
"compute_intensity": 30.11764705882353,
"tile_compute_intensity": 2.2857142857142856,
"MxNxK": 8388608,
"size_m": 512,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002141699386453929,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.57123015703832,
"compute_intensity": 78.76923076923077,
"tile_compute_intensity": 1.5238095238095237,
"MxNxK": 8388608,
"size_m": 512,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2952550190004157,
"compute_intensity": 655.36,
"tile_compute_intensity": 7.529411764705882,
"MxNxK": 8589934592,
"size_m": 512,
"size_n": 8192,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000010708496932269645,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 7.993652590257787,
"compute_intensity": 60.23529411764706,
"tile_compute_intensity": 1.4545454545454546,
"MxNxK": 4194304,
"size_m": 512,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.120748428088651,
"compute_intensity": 1170.2857142857142,
"tile_compute_intensity": 12.641975308641975,
"MxNxK": 17179869184,
"size_m": 1024,
"size_n": 2048,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00000483466376794613,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7943750278699315,
"compute_intensity": 7.861804222648752,
"tile_compute_intensity": 0.7901234567901234,
"MxNxK": 4194304,
"size_m": 256,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.458608077023638,
"compute_intensity": 61.59398496240601,
"tile_compute_intensity": 5.12,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00007735462028713808,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4855635669187341,
"compute_intensity": 31.44721689059501,
"tile_compute_intensity": 3.1604938271604937,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1192309469762824,
"compute_intensity": 248.24242424242425,
"tile_compute_intensity": 23.272727272727273,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.195592682482062,
"compute_intensity": 113.3840830449827,
"tile_compute_intensity": 5.278350515463917,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 11.32409209211117,
"compute_intensity": 146.28571428571428,
"tile_compute_intensity": 1.9692307692307693,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 7.297605687465856e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.621349574513979,
"compute_intensity": 28.444444444444443,
"tile_compute_intensity": 1.6,
"MxNxK": 2097152,
"size_m": 256,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3747377529075056,
"compute_intensity": 327.68,
"tile_compute_intensity": 3.764705882352941,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4544058085690694,
"compute_intensity": 62.06060606060606,
"tile_compute_intensity": 5.818181818181818,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2235143519360572,
"compute_intensity": 203.527950310559,
"tile_compute_intensity": 6.320987654320987,
"MxNxK": 2147483648,
"size_m": 512,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1467835866304656,
"compute_intensity": 126.03076923076924,
"tile_compute_intensity": 13.473684210526315,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3133191338662902,
"compute_intensity": 1489.4545454545455,
"tile_compute_intensity": 21.11340206185567,
"MxNxK": 68719476736,
"size_m": 4096,
"size_n": 1024,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3392168723353555,
"compute_intensity": 60.12477064220184,
"tile_compute_intensity": 3.9689922480620154,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3945000843089936,
"compute_intensity": 292.57142857142856,
"tile_compute_intensity": 5.818181818181818,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002955530303423672,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.8616184520161214,
"compute_intensity": 28.248275862068965,
"tile_compute_intensity": 1.8823529411764706,
"MxNxK": 8388608,
"size_m": 2048,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.282945815574234,
"compute_intensity": 1170.2857142857142,
"tile_compute_intensity": 13.837837837837839,
"MxNxK": 17179869184,
"size_m": 1024,
"size_n": 4096,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000010526796204169499,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.640917174025763,
"compute_intensity": 15.044995408631772,
"tile_compute_intensity": 1.3195876288659794,
"MxNxK": 16777216,
"size_m": 8192,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.306953145341202,
"compute_intensity": 256,
"tile_compute_intensity": 4.923076923076923,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000010513113193505498,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4622499225287777,
"compute_intensity": 7.527682058350563,
"tile_compute_intensity": 0.6649350649350649,
"MxNxK": 8388608,
"size_m": 16384,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.157635016182315,
"compute_intensity": 655.36,
"tile_compute_intensity": 21.333333333333332,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000051083239812261,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.4963175920275287,
"compute_intensity": 30.567164179104477,
"tile_compute_intensity": 2.4615384615384617,
"MxNxK": 16777216,
"size_m": 512,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0003015370670060892,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4117323124608676,
"compute_intensity": 15.929995138551288,
"tile_compute_intensity": 1.9248120300751879,
"MxNxK": 536870912,
"size_m": 16384,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000029281642820956753,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.2446829159788386,
"compute_intensity": 14.197573656845753,
"tile_compute_intensity": 0.9846153846153847,
"MxNxK": 4194304,
"size_m": 4096,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.195726875136539,
"compute_intensity": 63.38104448742747,
"tile_compute_intensity": 7.013698630136986,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.17356283494815,
"compute_intensity": 431.1578947368421,
"tile_compute_intensity": 18.285714285714285,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0851229112188325,
"compute_intensity": 862.3157894736842,
"tile_compute_intensity": 36.57142857142857,
"MxNxK": 34359738368,
"size_m": 4096,
"size_n": 8192,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00239594989930879,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3994950458015878,
"compute_intensity": 15.984390243902439,
"tile_compute_intensity": 1.9768339768339769,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004971493874586116,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.766229581227142,
"compute_intensity": 7.750236518448439,
"tile_compute_intensity": 0.6632124352331606,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.353382096064251,
"compute_intensity": 390.0952380952381,
"tile_compute_intensity": 16,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.127009874332965,
"compute_intensity": 474.8985507246377,
"tile_compute_intensity": 36.57142857142857,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0003015370670060892,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.103614859113013,
"compute_intensity": 15.929995138551288,
"tile_compute_intensity": 1.8754578754578755,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1140415981227691,
"compute_intensity": 468.1142857142857,
"tile_compute_intensity": 3.9233716475095783,
"MxNxK": 17179869184,
"size_m": 256,
"size_n": 8192,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3659246927907513,
"compute_intensity": 372.3636363636364,
"tile_compute_intensity": 3.5310344827586206,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 5.354248466134823e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 10.441054825328882,
"compute_intensity": 46.54545454545455,
"tile_compute_intensity": 1.6,
"MxNxK": 2097152,
"size_m": 256,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1121607776585947,
"compute_intensity": 481.88235294117646,
"tile_compute_intensity": 3.930902111324376,
"MxNxK": 34359738368,
"size_m": 256,
"size_n": 8192,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00007647890760464218,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4311501596107339,
"compute_intensity": 31.62934362934363,
"tile_compute_intensity": 3.4594594594594597,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2312069033879274,
"compute_intensity": 62.77394636015325,
"tile_compute_intensity": 6.2439024390243905,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3743175090756368,
"compute_intensity": 512,
"tile_compute_intensity": 9.846153846153847,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000014029646934153111,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.6691510372260017,
"compute_intensity": 25.580015612802498,
"tile_compute_intensity": 1.3264248704663213,
"MxNxK": 33554432,
"size_m": 16384,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 5.645610333572172,
"compute_intensity": 128,
"tile_compute_intensity": 1.5609756097560976,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1717039269554026,
"compute_intensity": 399.609756097561,
"tile_compute_intensity": 12.19047619047619,
"MxNxK": 4294967296,
"size_m": 1024,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.3749365030122616,
"compute_intensity": 157.53846153846155,
"tile_compute_intensity": 4.571428571428571,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4083208236033584,
"compute_intensity": 167.18367346938774,
"tile_compute_intensity": 1.9104477611940298,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00002103534839412033,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5280945878373742,
"compute_intensity": 15.05190629306385,
"tile_compute_intensity": 0.9980506822612085,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.8835045811449524,
"compute_intensity": 256,
"tile_compute_intensity": 3.1219512195121952,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0011991425665643896,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3995093567303631,
"compute_intensity": 15.976596782057532,
"tile_compute_intensity": 1.9692307692307693,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.61325195592407,
"compute_intensity": 97.52380952380952,
"tile_compute_intensity": 4,
"MxNxK": 33554432,
"size_m": 1024,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.1377416436973964,
"compute_intensity": 56.10958904109589,
"tile_compute_intensity": 3.5555555555555554,
"MxNxK": 33554432,
"size_m": 2048,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000014960091659305007,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.8652169375249548,
"compute_intensity": 28.054794520547944,
"tile_compute_intensity": 1.28,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00007560319492214628,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1854737230422148,
"compute_intensity": 15.906796116504854,
"tile_compute_intensity": 1.8823529411764706,
"MxNxK": 134217728,
"size_m": 4096,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000005847206557082019,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.8245977538772877,
"compute_intensity": 14.209887250650477,
"tile_compute_intensity": 0.9922480620155039,
"MxNxK": 8388608,
"size_m": 8192,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2150748230182844,
"compute_intensity": 431.1578947368421,
"tile_compute_intensity": 7.013698630136986,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4954480374164743,
"compute_intensity": 186.1818181818182,
"tile_compute_intensity": 6.4,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004779931725290136,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7727449615505204,
"compute_intensity": 7.9073359073359075,
"tile_compute_intensity": 0.8648648648648649,
"MxNxK": 4194304,
"size_m": 512,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.3670458990817824,
"compute_intensity": 146.28571428571428,
"tile_compute_intensity": 1.7297297297297298,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.053585353335932,
"compute_intensity": 126.03076923076924,
"tile_compute_intensity": 1.967339097022094,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0709023208717845,
"compute_intensity": 390.0952380952381,
"tile_compute_intensity": 5.305699481865285,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.06927314501595,
"compute_intensity": 799.219512195122,
"tile_compute_intensity": 24.38095238095238,
"MxNxK": 34359738368,
"size_m": 2048,
"size_n": 16384,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.703320272951015,
"compute_intensity": 107.78947368421052,
"tile_compute_intensity": 5.333333333333333,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002141699386453929,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 11.114911902855777,
"compute_intensity": 85.33333333333333,
"tile_compute_intensity": 1.3061224489795917,
"MxNxK": 8388608,
"size_m": 256,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1061495412624387,
"compute_intensity": 474.8985507246377,
"tile_compute_intensity": 30.11764705882353,
"MxNxK": 34359738368,
"size_m": 4096,
"size_n": 16384,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 11.33604441102209,
"compute_intensity": 146.28571428571428,
"tile_compute_intensity": 1.5802469135802468,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1362143863133172,
"compute_intensity": 468.1142857142857,
"tile_compute_intensity": 7.086505190311419,
"MxNxK": 17179869184,
"size_m": 4096,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000023388826228328075,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.914350059644594,
"compute_intensity": 28.419774501300953,
"tile_compute_intensity": 1.9844961240310077,
"MxNxK": 67108864,
"size_m": 16384,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000009961231763390895,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7466113857740306,
"compute_intensity": 15.485822306238186,
"tile_compute_intensity": 1.5609756097560976,
"MxNxK": 16777216,
"size_m": 4096,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000009961231763390895,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.6939269649661468,
"compute_intensity": 15.485822306238186,
"tile_compute_intensity": 1.3195876288659794,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3194224366152645,
"compute_intensity": 337.8144329896907,
"tile_compute_intensity": 7.314285714285714,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000001222348952650531,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.6022446707846063,
"compute_intensity": 7.816793893129771,
"tile_compute_intensity": 0.7619047619047619,
"MxNxK": 1048576,
"size_m": 256,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4424993529704386,
"compute_intensity": 409.6,
"tile_compute_intensity": 5.224489795918367,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000051083239812261,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.4812834657482483,
"compute_intensity": 30.567164179104477,
"tile_compute_intensity": 2.6666666666666665,
"MxNxK": 16777216,
"size_m": 1024,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 4.062568125877826,
"compute_intensity": 81.92,
"tile_compute_intensity": 1.6842105263157894,
"MxNxK": 16777216,
"size_m": 1024,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002141699386453929,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 5.042724523579948,
"compute_intensity": 62.06060606060606,
"tile_compute_intensity": 1.6,
"MxNxK": 8388608,
"size_m": 1024,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000019557583242408497,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.9999198107665932,
"compute_intensity": 31.267175572519083,
"tile_compute_intensity": 3.0476190476190474,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1950060567450536,
"compute_intensity": 819.2,
"tile_compute_intensity": 15.058823529411764,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.425900664949112,
"compute_intensity": 118.72463768115942,
"tile_compute_intensity": 1.7746967071057191,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3886752737840893,
"compute_intensity": 221.40540540540542,
"tile_compute_intensity": 3.710144927536232,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.277723946826653,
"compute_intensity": 163.84,
"tile_compute_intensity": 3.3684210526315788,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2727182969336432,
"compute_intensity": 489.07462686567163,
"tile_compute_intensity": 7.728301886792453,
"MxNxK": 68719476736,
"size_m": 16384,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4120329491408465,
"compute_intensity": 315.0769230769231,
"tile_compute_intensity": 6.095238095238095,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 3.1927024882663123e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 11.756810952289273,
"compute_intensity": 7.641791044776119,
"tile_compute_intensity": 0.6153846153846154,
"MxNxK": 262144,
"size_m": 128,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.1340063317075844,
"compute_intensity": 58.51428571428571,
"tile_compute_intensity": 4,
"MxNxK": 33554432,
"size_m": 1024,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5152451909624784,
"compute_intensity": 163.84,
"tile_compute_intensity": 3.3684210526315788,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000019010262815848558,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.453530681910854,
"compute_intensity": 7.930300096805421,
"tile_compute_intensity": 0.927536231884058,
"MxNxK": 16777216,
"size_m": 4096,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2712801423651239,
"compute_intensity": 118.72463768115942,
"tile_compute_intensity": 7.529411764705882,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1965819711145615,
"compute_intensity": 399.609756097561,
"tile_compute_intensity": 3.878787878787879,
"MxNxK": 4294967296,
"size_m": 256,
"size_n": 8192,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5612836331594215,
"compute_intensity": 84.89119170984456,
"tile_compute_intensity": 1.7655172413793103,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000019885975498344463,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4958883830107685,
"compute_intensity": 15.500473036896878,
"tile_compute_intensity": 1.3264248704663213,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.8810302645317605,
"compute_intensity": 113.77777777777777,
"tile_compute_intensity": 2.4615384615384617,
"MxNxK": 16777216,
"size_m": 512,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4321337348006202,
"compute_intensity": 127.0077519379845,
"tile_compute_intensity": 1.8686131386861313,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002141699386453929,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 4.714013671194087,
"compute_intensity": 78.76923076923077,
"tile_compute_intensity": 2.2857142857142856,
"MxNxK": 8388608,
"size_m": 512,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4677596763307375,
"compute_intensity": 195.04761904761904,
"tile_compute_intensity": 1.9104477611940298,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4053675660201446,
"compute_intensity": 221.40540540540542,
"tile_compute_intensity": 9.846153846153847,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3630997675498888,
"compute_intensity": 203.527950310559,
"tile_compute_intensity": 1.9616858237547892,
"MxNxK": 2147483648,
"size_m": 128,
"size_n": 16384,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7173277778448703,
"compute_intensity": 61.134328358208954,
"tile_compute_intensity": 4.923076923076923,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.731522133270276,
"compute_intensity": 99.90243902439025,
"tile_compute_intensity": 3.0476190476190474,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003765564534732382,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2436063464020062,
"compute_intensity": 7.968871595330739,
"tile_compute_intensity": 0.9552238805970149,
"MxNxK": 33554432,
"size_m": 2048,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1471965263051922,
"compute_intensity": 123.18796992481202,
"tile_compute_intensity": 10.24,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002141699386453929,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.851894491282986,
"compute_intensity": 49.951219512195124,
"tile_compute_intensity": 1.5238095238095237,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.133463606760656,
"compute_intensity": 250.13740458015266,
"tile_compute_intensity": 25.6,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1560219865026764,
"compute_intensity": 126.51737451737452,
"tile_compute_intensity": 14.222222222222221,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.9023210151116623,
"compute_intensity": 51.1201248049922,
"tile_compute_intensity": 2.6391752577319587,
"MxNxK": 134217728,
"size_m": 16384,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.121282480880646,
"compute_intensity": 237.44927536231884,
"tile_compute_intensity": 15.058823529411764,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002141699386453929,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 45.135000172658884,
"compute_intensity": 78.76923076923077,
"tile_compute_intensity": 0.9922480620155039,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000019885975498344463,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5335431016516246,
"compute_intensity": 15.500473036896878,
"tile_compute_intensity": 1.5802469135802468,
"MxNxK": 33554432,
"size_m": 8192,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000009933865742062898,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5684528126312118,
"compute_intensity": 7.75390440132513,
"tile_compute_intensity": 0.6649350649350649,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4525147113256598,
"compute_intensity": 341.3333333333333,
"tile_compute_intensity": 5.224489795918367,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00001871835858834992,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.749726145626669,
"compute_intensity": 42.6111833550065,
"tile_compute_intensity": 1.5900621118012421,
"MxNxK": 67108864,
"size_m": 16384,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1432335114630678,
"compute_intensity": 248.24242424242425,
"tile_compute_intensity": 3.8714555765595464,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000005327252151850076,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.4015382922925177,
"compute_intensity": 29.8978102189781,
"tile_compute_intensity": 2.4615384615384617,
"MxNxK": 16777216,
"size_m": 2048,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0006007389001921893,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4041907958322308,
"compute_intensity": 15.961032635168047,
"tile_compute_intensity": 1.9541984732824427,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 3.7400229148262517e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 13.125053568818041,
"compute_intensity": 14.027397260273972,
"tile_compute_intensity": 0.8888888888888888,
"MxNxK": 524288,
"size_m": 512,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0006036579424671757,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2170866689837623,
"compute_intensity": 31.844509232264333,
"tile_compute_intensity": 3.7372262773722627,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0003002234979823453,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0298355046978815,
"compute_intensity": 7.982460414129111,
"tile_compute_intensity": 0.9678638941398866,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.305762133981854,
"compute_intensity": 102.0809968847352,
"tile_compute_intensity": 3.1801242236024843,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.044179977034163,
"compute_intensity": 240.94117647058823,
"tile_compute_intensity": 3.750915750915751,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.073912049189928,
"compute_intensity": 1820.4444444444443,
"tile_compute_intensity": 26.94736842105263,
"MxNxK": 68719476736,
"size_m": 2048,
"size_n": 8192,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.0686846689432947,
"compute_intensity": 113.77777777777777,
"tile_compute_intensity": 1.7655172413793103,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.125074017894043,
"compute_intensity": 1170.2857142857142,
"tile_compute_intensity": 15.753846153846155,
"MxNxK": 17179869184,
"size_m": 2048,
"size_n": 1024,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00015237400675428709,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3086956780385608,
"compute_intensity": 31.690522243713733,
"tile_compute_intensity": 3.506849315068493,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0915342625486857,
"compute_intensity": 1260.3076923076924,
"tile_compute_intensity": 36.57142857142857,
"MxNxK": 34359738368,
"size_m": 8192,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2993186783333979,
"compute_intensity": 62.534351145038165,
"tile_compute_intensity": 6.095238095238095,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4051808853527137,
"compute_intensity": 169.78238341968913,
"tile_compute_intensity": 3.5310344827586206,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 4.013683128106221e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 10.795072453767208,
"compute_intensity": 26.94736842105263,
"tile_compute_intensity": 1.3333333333333333,
"MxNxK": 1048576,
"size_m": 256,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3684460708499044,
"compute_intensity": 102.0809968847352,
"tile_compute_intensity": 1.9248120300751879,
"MxNxK": 536870912,
"size_m": 16384,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 6.750285260905918e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.077372657394267,
"compute_intensity": 14.840579710144928,
"tile_compute_intensity": 0.9411764705882353,
"MxNxK": 1048576,
"size_m": 128,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0678089253899077,
"compute_intensity": 481.88235294117646,
"tile_compute_intensity": 5.305699481865285,
"MxNxK": 2147483648,
"size_m": 512,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3605784744227953,
"compute_intensity": 315.0769230769231,
"tile_compute_intensity": 6.4,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1532241545888886,
"compute_intensity": 504.12307692307695,
"tile_compute_intensity": 7.420289855072464,
"MxNxK": 8589934592,
"size_m": 512,
"size_n": 16384,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.355331456700681,
"compute_intensity": 481.88235294117646,
"tile_compute_intensity": 7.111111111111111,
"MxNxK": 2147483648,
"size_m": 512,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.551618364754678,
"compute_intensity": 215.57894736842104,
"tile_compute_intensity": 3.1801242236024843,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3703253625370027,
"compute_intensity": 123.18796992481202,
"tile_compute_intensity": 1.9616858237547892,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0619983743662147,
"compute_intensity": 668.734693877551,
"tile_compute_intensity": 13.837837837837839,
"MxNxK": 17179869184,
"size_m": 1024,
"size_n": 16384,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2058765110782972,
"compute_intensity": 585.1428571428571,
"tile_compute_intensity": 7.876923076923077,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.042817421083696,
"compute_intensity": 50.5679012345679,
"tile_compute_intensity": 2.4615384615384617,
"MxNxK": 16777216,
"size_m": 2048,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0704878524199817,
"compute_intensity": 224.43835616438355,
"tile_compute_intensity": 3.1950078003120126,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4466549087921194,
"compute_intensity": 195.04761904761904,
"tile_compute_intensity": 8,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.3821502924430638,
"compute_intensity": 199.8048780487805,
"tile_compute_intensity": 2.6597402597402597,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.374148239480464,
"compute_intensity": 334.3673469387755,
"tile_compute_intensity": 7.111111111111111,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.1516091805567084,
"compute_intensity": 221.40540540540542,
"tile_compute_intensity": 1.8788990825688074,
"MxNxK": 1073741824,
"size_m": 128,
"size_n": 1024,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2025935472389564,
"compute_intensity": 237.44927536231884,
"tile_compute_intensity": 1.9768339768339769,
"MxNxK": 4294967296,
"size_m": 128,
"size_n": 8192,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00007954390199337785,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5038538211486665,
"compute_intensity": 31.000946073793756,
"tile_compute_intensity": 2.6528497409326426,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1317568120870027,
"compute_intensity": 819.2,
"tile_compute_intensity": 12.641975308641975,
"MxNxK": 17179869184,
"size_m": 4096,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.2139428576101623,
"compute_intensity": 58.51428571428571,
"tile_compute_intensity": 3.5555555555555554,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.552412139785528,
"compute_intensity": 110.70270270270271,
"tile_compute_intensity": 6.4,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0003002234979823453,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0249643584828698,
"compute_intensity": 7.982460414129111,
"tile_compute_intensity": 0.9808429118773946,
"MxNxK": 268435456,
"size_m": 16384,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 1.9156214929597874e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 18.742743501543572,
"compute_intensity": 13.837837837837839,
"tile_compute_intensity": 0.8,
"MxNxK": 262144,
"size_m": 256,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1781502702938915,
"compute_intensity": 431.1578947368421,
"tile_compute_intensity": 21.333333333333332,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 4.263997075628031,
"compute_intensity": 84.89119170984456,
"tile_compute_intensity": 0.9995119570522206,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1420530407639684,
"compute_intensity": 126.51737451737452,
"tile_compute_intensity": 1.9825750242013553,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000010708496932269645,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.266425301387257,
"compute_intensity": 51.2,
"tile_compute_intensity": 2,
"MxNxK": 4194304,
"size_m": 256,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1933777392946514,
"compute_intensity": 819.2,
"tile_compute_intensity": 12.19047619047619,
"MxNxK": 4294967296,
"size_m": 1024,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 5.354248466134823e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 14.666483912875476,
"compute_intensity": 64,
"tile_compute_intensity": 1.2307692307692308,
"MxNxK": 2097152,
"size_m": 128,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2950577218255106,
"compute_intensity": 862.3157894736842,
"tile_compute_intensity": 7.6992481203007515,
"MxNxK": 34359738368,
"size_m": 512,
"size_n": 8192,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000010708496932269645,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 12.012889738140837,
"compute_intensity": 85.33333333333333,
"tile_compute_intensity": 1.28,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.248622986637561,
"compute_intensity": 585.1428571428571,
"tile_compute_intensity": 6.320987654320987,
"MxNxK": 2147483648,
"size_m": 512,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003798403760325979,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2508833715147816,
"compute_intensity": 7.934140435835351,
"tile_compute_intensity": 0.9343065693430657,
"MxNxK": 33554432,
"size_m": 8192,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0921470564449454,
"compute_intensity": 123.18796992481202,
"tile_compute_intensity": 1.8806244260789715,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.1827527476990514,
"compute_intensity": 122.26865671641791,
"tile_compute_intensity": 1.8788990825688074,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003853135802981973,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.6543408888208557,
"compute_intensity": 31.50769230769231,
"tile_compute_intensity": 3.3684210526315788,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.5807544018844655,
"compute_intensity": 157.53846153846155,
"tile_compute_intensity": 3.0476190476190474,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.0331056099101845,
"compute_intensity": 56.888888888888886,
"tile_compute_intensity": 3.2,
"MxNxK": 16777216,
"size_m": 512,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00007604105126339423,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1727980346685678,
"compute_intensity": 15.860600193610843,
"tile_compute_intensity": 1.855072463768116,
"MxNxK": 134217728,
"size_m": 8192,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.9520484904106628,
"compute_intensity": 99.90243902439025,
"tile_compute_intensity": 1.8285714285714285,
"MxNxK": 67108864,
"size_m": 2048,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004998859895914112,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0561487874408018,
"compute_intensity": 15.456603773584906,
"tile_compute_intensity": 1.3061224489795917,
"MxNxK": 8388608,
"size_m": 256,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3149371745374567,
"compute_intensity": 61.82641509433962,
"tile_compute_intensity": 5.224489795918367,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003882326225731836,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.6412428440179738,
"compute_intensity": 31.386973180076627,
"tile_compute_intensity": 3.1219512195121952,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0768307394248053,
"compute_intensity": 1365.3333333333333,
"tile_compute_intensity": 36.57142857142857,
"MxNxK": 34359738368,
"size_m": 4096,
"size_n": 4096,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0001506225813892953,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1034945372909573,
"compute_intensity": 15.937743190661479,
"tile_compute_intensity": 1.9104477611940298,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000042107184816677995,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.9123768169535125,
"compute_intensity": 30.089990817263544,
"tile_compute_intensity": 2.6391752577319587,
"MxNxK": 134217728,
"size_m": 16384,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.521688816475054,
"compute_intensity": 163.84,
"tile_compute_intensity": 5.333333333333333,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000005272520109194081,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.991305878681544,
"compute_intensity": 15.03119266055046,
"tile_compute_intensity": 1.3061224489795917,
"MxNxK": 8388608,
"size_m": 4096,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0011991425665643896,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0471909066524594,
"compute_intensity": 15.976596782057532,
"tile_compute_intensity": 1.9616858237547892,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000017696693792104705,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.835292814779369,
"compute_intensity": 25.440993788819874,
"tile_compute_intensity": 1.28,
"MxNxK": 4194304,
"size_m": 2048,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3188621398167926,
"compute_intensity": 682.6666666666666,
"tile_compute_intensity": 18.285714285714285,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0839026923157487,
"compute_intensity": 468.1142857142857,
"tile_compute_intensity": 32,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7173277778448703,
"compute_intensity": 99.90243902439025,
"tile_compute_intensity": 4.571428571428571,
"MxNxK": 67108864,
"size_m": 2048,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 11.33903266061394,
"compute_intensity": 102.4,
"tile_compute_intensity": 1.5802469135802468,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3619149653157396,
"compute_intensity": 334.3673469387755,
"tile_compute_intensity": 3.8208955223880596,
"MxNxK": 2147483648,
"size_m": 256,
"size_n": 8192,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000013181300272985204,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.7871832481683008,
"compute_intensity": 7.51559633027523,
"tile_compute_intensity": 0.6530612244897959,
"MxNxK": 1048576,
"size_m": 2048,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7924083981606924,
"compute_intensity": 60.23529411764706,
"tile_compute_intensity": 4.571428571428571,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1582004089043747,
"compute_intensity": 489.07462686567163,
"tile_compute_intensity": 42.666666666666664,
"MxNxK": 68719476736,
"size_m": 16384,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0358833038409663,
"compute_intensity": 237.44927536231884,
"tile_compute_intensity": 1.9375591296121097,
"MxNxK": 4294967296,
"size_m": 128,
"size_n": 2048,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00001890079873053657,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4436639111137552,
"compute_intensity": 7.953398058252427,
"tile_compute_intensity": 0.9411764705882353,
"MxNxK": 16777216,
"size_m": 2048,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0006001550917371922,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0568841805663305,
"compute_intensity": 15.968810916179336,
"tile_compute_intensity": 1.9541984732824427,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4570437983463218,
"compute_intensity": 215.57894736842104,
"tile_compute_intensity": 1.9248120300751879,
"MxNxK": 536870912,
"size_m": 128,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2895046089598925,
"compute_intensity": 1170.2857142857142,
"tile_compute_intensity": 20.48,
"MxNxK": 17179869184,
"size_m": 4096,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.078649899213568,
"compute_intensity": 442.81081081081084,
"tile_compute_intensity": 19.692307692307693,
"MxNxK": 8589934592,
"size_m": 2048,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002141699386453929,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 4.025214666251466,
"compute_intensity": 53.89473684210526,
"tile_compute_intensity": 2.2857142857142856,
"MxNxK": 8388608,
"size_m": 256,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2055263361957598,
"compute_intensity": 225.98620689655172,
"tile_compute_intensity": 1.9768339768339769,
"MxNxK": 4294967296,
"size_m": 128,
"size_n": 16384,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3978151524669369,
"compute_intensity": 240.94117647058823,
"tile_compute_intensity": 3.5555555555555554,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00015193615041303915,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1736298033817962,
"compute_intensity": 15.868280871670702,
"tile_compute_intensity": 1.7716262975778547,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.4006640467183304,
"compute_intensity": 146.28571428571428,
"tile_compute_intensity": 2.56,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.152916454821083,
"compute_intensity": 327.68,
"tile_compute_intensity": 3.190031152647975,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1109101530781038,
"compute_intensity": 1310.72,
"tile_compute_intensity": 26.94736842105263,
"MxNxK": 68719476736,
"size_m": 16384,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.949900728482551,
"compute_intensity": 127.0077519379845,
"tile_compute_intensity": 1.332465842550423,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.9486865397557125,
"compute_intensity": 50.5679012345679,
"tile_compute_intensity": 1.5609756097560976,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.358757446128717,
"compute_intensity": 168.90721649484536,
"tile_compute_intensity": 1.9248120300751879,
"MxNxK": 536870912,
"size_m": 128,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1457738293715554,
"compute_intensity": 1260.3076923076924,
"tile_compute_intensity": 14.628571428571428,
"MxNxK": 34359738368,
"size_m": 1024,
"size_n": 8192,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1226627967164449,
"compute_intensity": 910.2222222222222,
"tile_compute_intensity": 10.556701030927835,
"MxNxK": 8589934592,
"size_m": 1024,
"size_n": 1024,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3386274438403738,
"compute_intensity": 390.0952380952381,
"tile_compute_intensity": 11.636363636363637,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5071676424835547,
"compute_intensity": 60.014652014652015,
"tile_compute_intensity": 3.9384615384615387,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000019995439583656446,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.971969755299062,
"compute_intensity": 30.91320754716981,
"tile_compute_intensity": 2.6122448979591835,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 24.045206495911085,
"compute_intensity": 81.92,
"tile_compute_intensity": 0.9961089494163424,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2961319425178057,
"compute_intensity": 780.1904761904761,
"tile_compute_intensity": 13.837837837837839,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.751460028324196,
"compute_intensity": 167.18367346938774,
"tile_compute_intensity": 1.5975039001560063,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5447546652743533,
"compute_intensity": 110.70270270270271,
"tile_compute_intensity": 4.923076923076923,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.6000377923159317,
"compute_intensity": 63.875243664717345,
"tile_compute_intensity": 1.7655172413793103,
"MxNxK": 134217728,
"size_m": 16384,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4650515326720939,
"compute_intensity": 292.57142857142856,
"tile_compute_intensity": 5.12,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3458705348318054,
"compute_intensity": 254.015503875969,
"tile_compute_intensity": 3.7372262773722627,
"MxNxK": 2147483648,
"size_m": 256,
"size_n": 16384,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00007954390199337785,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.851153861119139,
"compute_intensity": 31.000946073793756,
"tile_compute_intensity": 3.1604938271604937,
"MxNxK": 268435456,
"size_m": 16384,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002490307940847724,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.106084998238796,
"compute_intensity": 7.742911153119093,
"tile_compute_intensity": 0.7804878048780488,
"MxNxK": 2097152,
"size_m": 2048,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.1909938753251414,
"compute_intensity": 252.06153846153848,
"tile_compute_intensity": 2.6631989596879064,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4374565968521325,
"compute_intensity": 85.11168831168831,
"tile_compute_intensity": 1.8686131386861313,
"MxNxK": 268435456,
"size_m": 16384,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.452584865207051,
"compute_intensity": 195.04761904761904,
"tile_compute_intensity": 5.818181818181818,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 1.824401421866464e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 27.871059555999512,
"compute_intensity": 36.57142857142857,
"tile_compute_intensity": 0.8888888888888888,
"MxNxK": 524288,
"size_m": 128,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.031050669638595,
"compute_intensity": 431.1578947368421,
"tile_compute_intensity": 6.3602484472049685,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000014960091659305007,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 4.089814548486245,
"compute_intensity": 28.054794520547944,
"tile_compute_intensity": 1.7777777777777777,
"MxNxK": 4194304,
"size_m": 1024,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 5.733204164121806,
"compute_intensity": 107.78947368421052,
"tile_compute_intensity": 1.7534246575342465,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 8.665906753865705e-8,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 55.98029300226937,
"compute_intensity": 7.314285714285714,
"tile_compute_intensity": 0.5,
"MxNxK": 65536,
"size_m": 128,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1152920528391401,
"compute_intensity": 780.1904761904761,
"tile_compute_intensity": 7.086505190311419,
"MxNxK": 17179869184,
"size_m": 512,
"size_n": 2048,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002141699386453929,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 4.074521463973465,
"compute_intensity": 53.89473684210526,
"tile_compute_intensity": 2.6666666666666665,
"MxNxK": 8388608,
"size_m": 512,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0761781214773354,
"compute_intensity": 780.1904761904761,
"tile_compute_intensity": 32,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1946879861811188,
"compute_intensity": 630.1538461538462,
"tile_compute_intensity": 7.314285714285714,
"MxNxK": 4294967296,
"size_m": 512,
"size_n": 4096,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1432182236923045,
"compute_intensity": 668.734693877551,
"tile_compute_intensity": 7.641791044776119,
"MxNxK": 17179869184,
"size_m": 512,
"size_n": 16384,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3150764631155216,
"compute_intensity": 474.8985507246377,
"tile_compute_intensity": 7.6992481203007515,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.1877492249296036,
"compute_intensity": 81.92,
"tile_compute_intensity": 2.6666666666666665,
"MxNxK": 16777216,
"size_m": 1024,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0646429888441313,
"compute_intensity": 992.969696969697,
"tile_compute_intensity": 14.628571428571428,
"MxNxK": 34359738368,
"size_m": 1024,
"size_n": 16384,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.217928879955657,
"compute_intensity": 399.609756097561,
"tile_compute_intensity": 7.314285714285714,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.8204235778125226,
"compute_intensity": 59.36231884057971,
"tile_compute_intensity": 3.764705882352941,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 3.3295325949062974e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 12.475024572259086,
"compute_intensity": 7.474452554744525,
"tile_compute_intensity": 0.6153846153846154,
"MxNxK": 262144,
"size_m": 512,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1197233830589775,
"compute_intensity": 1260.3076923076924,
"tile_compute_intensity": 12.720496894409937,
"MxNxK": 34359738368,
"size_m": 1024,
"size_n": 2048,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002141699386453929,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 11.070087479521554,
"compute_intensity": 102.4,
"tile_compute_intensity": 1.3061224489795917,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00015003877293429804,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0572414056563584,
"compute_intensity": 7.984405458089668,
"tile_compute_intensity": 0.9770992366412213,
"MxNxK": 134217728,
"size_m": 4096,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2976085713094128,
"compute_intensity": 409.6,
"tile_compute_intensity": 3.7372262773722627,
"MxNxK": 2147483648,
"size_m": 256,
"size_n": 2048,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.763040720880788,
"compute_intensity": 99.90243902439025,
"tile_compute_intensity": 1.3298701298701299,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.78059563170885,
"compute_intensity": 120.47058823529412,
"tile_compute_intensity": 2.909090909090909,
"MxNxK": 33554432,
"size_m": 1024,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3532186867812044,
"compute_intensity": 585.1428571428571,
"tile_compute_intensity": 11.636363636363637,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2131604543904906,
"compute_intensity": 122.26865671641791,
"tile_compute_intensity": 10.666666666666666,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000019557583242408497,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.9956657514027423,
"compute_intensity": 31.267175572519083,
"tile_compute_intensity": 3.2,
"MxNxK": 67108864,
"size_m": 2048,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 7.66248597183915e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.481447252859274,
"compute_intensity": 27.675675675675677,
"tile_compute_intensity": 1.6,
"MxNxK": 2097152,
"size_m": 512,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3600648053251514,
"compute_intensity": 315.0769230769231,
"tile_compute_intensity": 9.142857142857142,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0762834463163165,
"compute_intensity": 334.3673469387755,
"tile_compute_intensity": 3.1950078003120126,
"MxNxK": 2147483648,
"size_m": 256,
"size_n": 512,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.0854003174957936,
"compute_intensity": 55.351351351351354,
"tile_compute_intensity": 3.2,
"MxNxK": 16777216,
"size_m": 1024,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00000483466376794613,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.815555331111213,
"compute_intensity": 7.861804222648752,
"tile_compute_intensity": 0.8648648648648649,
"MxNxK": 4194304,
"size_m": 2048,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2227060535229821,
"compute_intensity": 237.44927536231884,
"tile_compute_intensity": 3.8496240601503757,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2250668250570615,
"compute_intensity": 234.05714285714285,
"tile_compute_intensity": 16,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00015069555744616993,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.049936784032701,
"compute_intensity": 7.9669341113542425,
"tile_compute_intensity": 0.9660377358490566,
"MxNxK": 134217728,
"size_m": 16384,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 3.466362701546282e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 10.348600454014873,
"compute_intensity": 14.628571428571428,
"tile_compute_intensity": 0.8888888888888888,
"MxNxK": 524288,
"size_m": 128,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005989874748271975,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.01540958475008,
"compute_intensity": 7.9921951219512195,
"tile_compute_intensity": 0.9884169884169884,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004779931725290136,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.808226721321689,
"compute_intensity": 7.9073359073359075,
"tile_compute_intensity": 0.8888888888888888,
"MxNxK": 4194304,
"size_m": 1024,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1668304404322691,
"compute_intensity": 125.06870229007633,
"tile_compute_intensity": 12.8,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3924456627145971,
"compute_intensity": 240.94117647058823,
"tile_compute_intensity": 5.818181818181818,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1271044037155173,
"compute_intensity": 455.1111111111111,
"tile_compute_intensity": 3.8641509433962264,
"MxNxK": 8589934592,
"size_m": 256,
"size_n": 4096,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.038369604959424,
"compute_intensity": 237.44927536231884,
"tile_compute_intensity": 3.5493934142114383,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.8283614221379167,
"compute_intensity": 167.18367346938774,
"tile_compute_intensity": 1.996101364522417,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000042107184816677995,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.6637160306237755,
"compute_intensity": 30.089990817263544,
"tile_compute_intensity": 1.9922178988326849,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1198554524119597,
"compute_intensity": 244.53731343283582,
"tile_compute_intensity": 21.333333333333332,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000025176739621757206,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.374255036641246,
"compute_intensity": 15.398496240601503,
"tile_compute_intensity": 1.28,
"MxNxK": 4194304,
"size_m": 256,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0563041044172161,
"compute_intensity": 819.2,
"tile_compute_intensity": 32,
"MxNxK": 17179869184,
"size_m": 4096,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00015091448561679391,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1238788055190343,
"compute_intensity": 15.922254616132166,
"tile_compute_intensity": 1.9104477611940298,
"MxNxK": 268435456,
"size_m": 8192,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000010070695848702883,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.1295845960014304,
"compute_intensity": 30.796992481203006,
"tile_compute_intensity": 2.909090909090909,
"MxNxK": 33554432,
"size_m": 2048,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.8265404787758985,
"compute_intensity": 83.59183673469387,
"tile_compute_intensity": 1.7777777777777777,
"MxNxK": 33554432,
"size_m": 2048,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1758380798980153,
"compute_intensity": 1489.4545454545455,
"tile_compute_intensity": 24.975609756097562,
"MxNxK": 68719476736,
"size_m": 8192,
"size_n": 1024,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2946216806305197,
"compute_intensity": 1024,
"tile_compute_intensity": 19.692307692307693,
"MxNxK": 8589934592,
"size_m": 2048,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.057619492651442,
"compute_intensity": 107.78947368421052,
"tile_compute_intensity": 1.5900621118012421,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.313256726972009,
"compute_intensity": 63.01538461538462,
"tile_compute_intensity": 1.6842105263157894,
"MxNxK": 16777216,
"size_m": 2048,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000005874572578410015,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.1799715952540306,
"compute_intensity": 28.346020761245676,
"tile_compute_intensity": 1.3195876288659794,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.8693566744714205,
"compute_intensity": 51.0404984423676,
"tile_compute_intensity": 1.5900621118012421,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003860433408669438,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3419426076447487,
"compute_intensity": 15.738712776176753,
"tile_compute_intensity": 1.7534246575342465,
"MxNxK": 67108864,
"size_m": 8192,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1144780639780587,
"compute_intensity": 885.6216216216217,
"tile_compute_intensity": 51.2,
"MxNxK": 68719476736,
"size_m": 16384,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00001985860947701646,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3577587142796355,
"compute_intensity": 7.755739644970414,
"tile_compute_intensity": 0.7975077881619937,
"MxNxK": 16777216,
"size_m": 16384,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002997856416410974,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.03082720778901,
"compute_intensity": 7.988298391028766,
"tile_compute_intensity": 0.9846153846153847,
"MxNxK": 268435456,
"size_m": 8192,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7823228859240798,
"compute_intensity": 59.36231884057971,
"tile_compute_intensity": 4.571428571428571,
"MxNxK": 67108864,
"size_m": 2048,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3709381473484281,
"compute_intensity": 481.88235294117646,
"tile_compute_intensity": 11.636363636363637,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00001985860947701646,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3640431490244729,
"compute_intensity": 7.755739644970414,
"tile_compute_intensity": 0.6657997399219766,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 5.761406364169306,
"compute_intensity": 93.0909090909091,
"tile_compute_intensity": 1.5609756097560976,
"MxNxK": 16777216,
"size_m": 512,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.2105809069552116,
"compute_intensity": 119.5912408759124,
"tile_compute_intensity": 1.7762359063313096,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000011712657128382701,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.921750151576348,
"compute_intensity": 28.395147313691506,
"tile_compute_intensity": 1.3264248704663213,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.147453454870908,
"compute_intensity": 50.88198757763975,
"tile_compute_intensity": 1.5802469135802468,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7425413885722818,
"compute_intensity": 195.04761904761904,
"tile_compute_intensity": 1.7716262975778547,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 5.6805357554730955,
"compute_intensity": 170.66666666666666,
"tile_compute_intensity": 2.6122448979591835,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000024264538910823974,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.1483201577368787,
"compute_intensity": 7.846743295019157,
"tile_compute_intensity": 0.7804878048780488,
"MxNxK": 2097152,
"size_m": 256,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1375896911575878,
"compute_intensity": 468.1142857142857,
"tile_compute_intensity": 7.474452554744525,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.880516331553103,
"compute_intensity": 186.1818181818182,
"tile_compute_intensity": 1.855072463768116,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.1608542991464774,
"compute_intensity": 93.0909090909091,
"tile_compute_intensity": 3.2,
"MxNxK": 16777216,
"size_m": 512,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.8904149932581644,
"compute_intensity": 110.70270270270271,
"tile_compute_intensity": 1.855072463768116,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2286913856409003,
"compute_intensity": 123.65283018867925,
"tile_compute_intensity": 1.9768339768339769,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3086074429170673,
"compute_intensity": 496.4848484848485,
"tile_compute_intensity": 7.314285714285714,
"MxNxK": 4294967296,
"size_m": 512,
"size_n": 8192,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1825477126218704,
"compute_intensity": 799.219512195122,
"tile_compute_intensity": 14.628571428571428,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00001890079873053657,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4375688410725742,
"compute_intensity": 7.953398058252427,
"tile_compute_intensity": 0.927536231884058,
"MxNxK": 16777216,
"size_m": 1024,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2039387861340598,
"compute_intensity": 682.6666666666666,
"tile_compute_intensity": 10.448979591836734,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.2917314517205085,
"compute_intensity": 227.55555555555554,
"tile_compute_intensity": 3.3684210526315788,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1475798246910496,
"compute_intensity": 1638.4,
"tile_compute_intensity": 24.38095238095238,
"MxNxK": 34359738368,
"size_m": 2048,
"size_n": 4096,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.7589306523038206,
"compute_intensity": 146.28571428571428,
"tile_compute_intensity": 3.5555555555555554,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.529999928118416,
"compute_intensity": 56.79029462738301,
"tile_compute_intensity": 2.6528497409326426,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.718541796707589,
"compute_intensity": 61.134328358208954,
"tile_compute_intensity": 5.333333333333333,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4996503058710429,
"compute_intensity": 186.1818181818182,
"tile_compute_intensity": 5.333333333333333,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000010708496932269645,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.239531055060609,
"compute_intensity": 48.76190476190476,
"tile_compute_intensity": 2,
"MxNxK": 4194304,
"size_m": 512,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.054257541996491,
"compute_intensity": 157.53846153846155,
"tile_compute_intensity": 1.9844961240310077,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.6386522473189125,
"compute_intensity": 102.4,
"tile_compute_intensity": 4,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3609110683689551,
"compute_intensity": 390.0952380952381,
"tile_compute_intensity": 3.8208955223880596,
"MxNxK": 2147483648,
"size_m": 256,
"size_n": 4096,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.392492639228498,
"compute_intensity": 170.66666666666666,
"tile_compute_intensity": 4.571428571428571,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002997856416410974,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0296210593303479,
"compute_intensity": 7.988298391028766,
"tile_compute_intensity": 0.9808429118773946,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7040673353489775,
"compute_intensity": 107.78947368421052,
"tile_compute_intensity": 4.571428571428571,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002700114104362367,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.230678340309104,
"compute_intensity": 29.681159420289855,
"tile_compute_intensity": 2.2857142857142856,
"MxNxK": 8388608,
"size_m": 1024,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1077622312138382,
"compute_intensity": 481.88235294117646,
"tile_compute_intensity": 36.57142857142857,
"MxNxK": 34359738368,
"size_m": 8192,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00015193615041303915,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.427294290636588,
"compute_intensity": 15.868280871670702,
"tile_compute_intensity": 1.8686131386861313,
"MxNxK": 268435456,
"size_m": 16384,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000010070695848702883,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0603144627507635,
"compute_intensity": 30.796992481203006,
"tile_compute_intensity": 2.56,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.7338570096109525,
"compute_intensity": 112.21917808219177,
"tile_compute_intensity": 1.5975039001560063,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00001933865507178452,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5833986679434213,
"compute_intensity": 15.723608445297504,
"tile_compute_intensity": 1.7297297297297298,
"MxNxK": 33554432,
"size_m": 4096,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.296824988125738,
"compute_intensity": 1638.4,
"tile_compute_intensity": 20.897959183673468,
"MxNxK": 34359738368,
"size_m": 2048,
"size_n": 2048,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.153947860670407,
"compute_intensity": 963.7647058823529,
"tile_compute_intensity": 23.272727272727273,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1745141589499206,
"compute_intensity": 337.8144329896907,
"tile_compute_intensity": 7.013698630136986,
"MxNxK": 4294967296,
"size_m": 512,
"size_n": 16384,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2900050286562084,
"compute_intensity": 448.8767123287671,
"tile_compute_intensity": 3.9384615384615387,
"MxNxK": 17179869184,
"size_m": 256,
"size_n": 16384,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7261060158171082,
"compute_intensity": 240.94117647058823,
"tile_compute_intensity": 2.6528497409326426,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.077946925639474,
"compute_intensity": 334.3673469387755,
"tile_compute_intensity": 3.992202729044834,
"MxNxK": 2147483648,
"size_m": 512,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1254723492544845,
"compute_intensity": 246.37593984962405,
"tile_compute_intensity": 1.9883495145631067,
"MxNxK": 17179869184,
"size_m": 128,
"size_n": 16384,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.018691865889492,
"compute_intensity": 496.4848484848485,
"tile_compute_intensity": 5.3194805194805195,
"MxNxK": 4294967296,
"size_m": 512,
"size_n": 512,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002141699386453929,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.258954677407632,
"compute_intensity": 102.4,
"tile_compute_intensity": 1.5238095238095237,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2237186135398657,
"compute_intensity": 224.43835616438355,
"tile_compute_intensity": 10.24,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5320544346448726,
"compute_intensity": 126.03076923076924,
"tile_compute_intensity": 1.855072463768116,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.308353844871352,
"compute_intensity": 227.55555555555554,
"tile_compute_intensity": 4.923076923076923,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002141699386453929,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 4.047626877918578,
"compute_intensity": 49.951219512195124,
"tile_compute_intensity": 2.2857142857142856,
"MxNxK": 8388608,
"size_m": 1024,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0633321565180793,
"compute_intensity": 234.05714285714285,
"tile_compute_intensity": 1.9357277882797732,
"MxNxK": 2147483648,
"size_m": 128,
"size_n": 2048,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0853915513239663,
"compute_intensity": 1489.4545454545455,
"tile_compute_intensity": 42.666666666666664,
"MxNxK": 68719476736,
"size_m": 4096,
"size_n": 8192,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7537474944058378,
"compute_intensity": 84.45360824742268,
"tile_compute_intensity": 1.7534246575342465,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000010708496932269645,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 7.581269390387177,
"compute_intensity": 73.14285714285714,
"tile_compute_intensity": 1.4545454545454546,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3985389434805156,
"compute_intensity": 122.26865671641791,
"tile_compute_intensity": 1.9320754716981132,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.204323953025233,
"compute_intensity": 119.5912408759124,
"tile_compute_intensity": 7.757575757575758,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000013865450806185128,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 4.475007666252459,
"compute_intensity": 29.257142857142856,
"tile_compute_intensity": 2,
"MxNxK": 4194304,
"size_m": 512,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3607243877015245,
"compute_intensity": 682.6666666666666,
"tile_compute_intensity": 10.24,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1548626638864659,
"compute_intensity": 125.5478927203065,
"tile_compute_intensity": 13.473684210526315,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2191661702027596,
"compute_intensity": 234.05714285714285,
"tile_compute_intensity": 14.222222222222221,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7427282391038321,
"compute_intensity": 59.7956204379562,
"tile_compute_intensity": 3.878787878787879,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003984492705356358,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.71520957712011,
"compute_intensity": 30.97164461247637,
"tile_compute_intensity": 2.6391752577319587,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4071068896726995,
"compute_intensity": 221.40540540540542,
"tile_compute_intensity": 12.8,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2011840147745882,
"compute_intensity": 682.6666666666666,
"tile_compute_intensity": 7.013698630136986,
"MxNxK": 4294967296,
"size_m": 512,
"size_n": 2048,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3154039611380937,
"compute_intensity": 118.72463768115942,
"tile_compute_intensity": 9.142857142857142,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5548867202797338,
"compute_intensity": 292.57142857142856,
"tile_compute_intensity": 3.1604938271604937,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1905965540645835,
"compute_intensity": 122.26865671641791,
"tile_compute_intensity": 9.846153846153847,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 1.277080995306525e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 32.12325547479827,
"compute_intensity": 23.272727272727273,
"tile_compute_intensity": 0.8,
"MxNxK": 262144,
"size_m": 128,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1192754513756153,
"compute_intensity": 246.37593984962405,
"tile_compute_intensity": 23.272727272727273,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00007593158717808223,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1089139431328212,
"compute_intensity": 7.9360620004843785,
"tile_compute_intensity": 0.8873483535528596,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5587621701682086,
"compute_intensity": 60.12477064220184,
"tile_compute_intensity": 5.224489795918367,
"MxNxK": 536870912,
"size_m": 16384,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 7.38882575855918e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.240775103252198,
"compute_intensity": 14.124137931034483,
"tile_compute_intensity": 0.9411764705882353,
"MxNxK": 1048576,
"size_m": 1024,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1603372995288328,
"compute_intensity": 127.0077519379845,
"tile_compute_intensity": 14.628571428571428,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.306094048471535,
"compute_intensity": 1820.4444444444443,
"tile_compute_intensity": 21.11340206185567,
"MxNxK": 68719476736,
"size_m": 2048,
"size_n": 2048,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3911266678259708,
"compute_intensity": 227.55555555555554,
"tile_compute_intensity": 3.5310344827586206,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.531774686609574,
"compute_intensity": 97.52380952380952,
"tile_compute_intensity": 1.7297297297297298,
"MxNxK": 33554432,
"size_m": 1024,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.5826220578793713,
"compute_intensity": 83.59183673469387,
"tile_compute_intensity": 2.909090909090909,
"MxNxK": 33554432,
"size_m": 2048,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1511974208459852,
"compute_intensity": 239.1824817518248,
"tile_compute_intensity": 3.9083969465648853,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.3471081738919817,
"compute_intensity": 124.12121212121212,
"tile_compute_intensity": 3.2,
"MxNxK": 67108864,
"size_m": 2048,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0735572879761617,
"compute_intensity": 1310.72,
"tile_compute_intensity": 26.94736842105263,
"MxNxK": 68719476736,
"size_m": 2048,
"size_n": 16384,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2949121482750208,
"compute_intensity": 234.05714285714285,
"tile_compute_intensity": 1.9616858237547892,
"MxNxK": 2147483648,
"size_m": 128,
"size_n": 4096,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.155261589771326,
"compute_intensity": 252.06153846153848,
"tile_compute_intensity": 26.94736842105263,
"MxNxK": 68719476736,
"size_m": 16384,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003809350168857177,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.341289124586616,
"compute_intensity": 15.845261121856867,
"tile_compute_intensity": 1.8285714285714285,
"MxNxK": 67108864,
"size_m": 4096,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.067704665159816,
"compute_intensity": 157.53846153846155,
"tile_compute_intensity": 1.5900621118012421,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 5.457535421447756,
"compute_intensity": 84.45360824742268,
"tile_compute_intensity": 0.9990243902439024,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002955530303423672,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.8421295236211193,
"compute_intensity": 28.248275862068965,
"tile_compute_intensity": 1.3061224489795917,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1600982157805082,
"compute_intensity": 885.6216216216217,
"tile_compute_intensity": 7.816793893129771,
"MxNxK": 68719476736,
"size_m": 512,
"size_n": 16384,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.389784231688841,
"compute_intensity": 292.57142857142856,
"tile_compute_intensity": 7.111111111111111,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.307597685658157,
"compute_intensity": 404.5432098765432,
"tile_compute_intensity": 7.529411764705882,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 5.354248466134823e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 14.971288089070061,
"compute_intensity": 56.888888888888886,
"tile_compute_intensity": 1.2307692307692308,
"MxNxK": 2097152,
"size_m": 256,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1272137962085342,
"compute_intensity": 125.5478927203065,
"tile_compute_intensity": 12.487804878048781,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1111773493382209,
"compute_intensity": 819.2,
"tile_compute_intensity": 7.474452554744525,
"MxNxK": 17179869184,
"size_m": 512,
"size_n": 4096,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0213300255308644,
"compute_intensity": 399.609756097561,
"tile_compute_intensity": 3.5493934142114383,
"MxNxK": 4294967296,
"size_m": 256,
"size_n": 1024,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.7480514590532517,
"compute_intensity": 113.77777777777777,
"tile_compute_intensity": 1.6842105263157894,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000009924743734953566,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.108669042347244,
"compute_intensity": 31.03030303030303,
"tile_compute_intensity": 2.909090909090909,
"MxNxK": 33554432,
"size_m": 1024,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003854960204403839,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1751716558637166,
"compute_intensity": 7.875030040855563,
"tile_compute_intensity": 0.8858131487889274,
"MxNxK": 33554432,
"size_m": 16384,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4668724760341123,
"compute_intensity": 341.3333333333333,
"tile_compute_intensity": 5.12,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.78526574228811,
"compute_intensity": 124.12121212121212,
"tile_compute_intensity": 1.3298701298701299,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000009523375422142943,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.6024570554786115,
"compute_intensity": 7.922630560928433,
"tile_compute_intensity": 0.8767123287671232,
"MxNxK": 8388608,
"size_m": 512,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.412009507892343,
"compute_intensity": 199.8048780487805,
"tile_compute_intensity": 3.657142857142857,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002141699386453929,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 4.509316196054707,
"compute_intensity": 78.76923076923077,
"tile_compute_intensity": 1.6,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3653585356801539,
"compute_intensity": 585.1428571428571,
"tile_compute_intensity": 14.222222222222221,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.112095804632851,
"compute_intensity": 246.37593984962405,
"tile_compute_intensity": 20.48,
"MxNxK": 17179869184,
"size_m": 4096,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3856869392601798,
"compute_intensity": 372.3636363636364,
"tile_compute_intensity": 6.2439024390243905,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0003006613543235933,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0844041091568042,
"compute_intensity": 15.953261927945473,
"tile_compute_intensity": 1.9393939393939394,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 1.824401421866464e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 19.785119560575716,
"compute_intensity": 14.222222222222221,
"tile_compute_intensity": 0.8,
"MxNxK": 262144,
"size_m": 128,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000010708496932269645,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 12.422285367876073,
"compute_intensity": 73.14285714285714,
"tile_compute_intensity": 1.28,
"MxNxK": 4194304,
"size_m": 256,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000013865450806185128,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 4.2488342815418045,
"compute_intensity": 29.257142857142856,
"tile_compute_intensity": 1.7777777777777777,
"MxNxK": 4194304,
"size_m": 256,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005992793790546961,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0225507581948179,
"compute_intensity": 7.990246281394782,
"tile_compute_intensity": 0.982725527831094,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003798403760325979,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2080021562714234,
"compute_intensity": 7.934140435835351,
"tile_compute_intensity": 0.8858131487889274,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3542749868088828,
"compute_intensity": 169.78238341968913,
"tile_compute_intensity": 1.9320754716981132,
"MxNxK": 1073741824,
"size_m": 128,
"size_n": 16384,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000001249714973978528,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.612984052593058,
"compute_intensity": 7.728301886792453,
"tile_compute_intensity": 0.7619047619047619,
"MxNxK": 1048576,
"size_m": 1024,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.405729810340007,
"compute_intensity": 168.90721649484536,
"tile_compute_intensity": 1.5987509758001561,
"MxNxK": 536870912,
"size_m": 128,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1843165076991746,
"compute_intensity": 399.609756097561,
"tile_compute_intensity": 18.285714285714285,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1274516909080219,
"compute_intensity": 780.1904761904761,
"tile_compute_intensity": 10.61139896373057,
"MxNxK": 17179869184,
"size_m": 2048,
"size_n": 512,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00030241277968858513,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2455558109157576,
"compute_intensity": 31.813592233009707,
"tile_compute_intensity": 3.710144927536232,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.245704721062983,
"compute_intensity": 63.01538461538462,
"tile_compute_intensity": 6.7368421052631575,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3148553849638738,
"compute_intensity": 63.627184466019415,
"tile_compute_intensity": 7.420289855072464,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.2180517007989557,
"compute_intensity": 50.88198757763975,
"tile_compute_intensity": 2.56,
"MxNxK": 33554432,
"size_m": 4096,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2820426480503673,
"compute_intensity": 112.99310344827586,
"tile_compute_intensity": 5.224489795918367,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2853395407475143,
"compute_intensity": 1365.3333333333333,
"tile_compute_intensity": 20.48,
"MxNxK": 17179869184,
"size_m": 2048,
"size_n": 2048,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.403558871044405,
"compute_intensity": 110.70270270270271,
"tile_compute_intensity": 1.5950155763239875,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00007604105126339423,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1785213516386386,
"compute_intensity": 15.860600193610843,
"tile_compute_intensity": 1.7655172413793103,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4420558377541857,
"compute_intensity": 102.0809968847352,
"tile_compute_intensity": 5.224489795918367,
"MxNxK": 536870912,
"size_m": 16384,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4111341131520292,
"compute_intensity": 169.78238341968913,
"tile_compute_intensity": 6.2439024390243905,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.406430160105082,
"compute_intensity": 248.24242424242425,
"tile_compute_intensity": 2.6597402597402597,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0018229999011217,
"compute_intensity": 655.36,
"tile_compute_intensity": 6.38006230529595,
"MxNxK": 8589934592,
"size_m": 512,
"size_n": 1024,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.299316130371604,
"compute_intensity": 474.8985507246377,
"tile_compute_intensity": 3.9536679536679538,
"MxNxK": 34359738368,
"size_m": 256,
"size_n": 16384,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4886076093205336,
"compute_intensity": 61.82641509433962,
"tile_compute_intensity": 6.095238095238095,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000024811859337383914,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.48913234835193,
"compute_intensity": 15.515151515151516,
"tile_compute_intensity": 1.4545454545454546,
"MxNxK": 4194304,
"size_m": 512,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.1138354770984793,
"compute_intensity": 56.10958904109589,
"tile_compute_intensity": 2.56,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4020525827942611,
"compute_intensity": 202.2716049382716,
"tile_compute_intensity": 6.2439024390243905,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3241587578618281,
"compute_intensity": 113.3840830449827,
"tile_compute_intensity": 7.757575757575758,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.353843107284822,
"compute_intensity": 252.06153846153848,
"tile_compute_intensity": 3.710144927536232,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.368434265293593,
"compute_intensity": 372.3636363636364,
"tile_compute_intensity": 3.710144927536232,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 12.060703090523388,
"compute_intensity": 97.52380952380952,
"tile_compute_intensity": 1.3264248704663213,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1354881322700046,
"compute_intensity": 250.13740458015266,
"tile_compute_intensity": 24.38095238095238,
"MxNxK": 34359738368,
"size_m": 8192,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.2992022455642522,
"compute_intensity": 163.84,
"tile_compute_intensity": 1.8823529411764706,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.082668468707633,
"compute_intensity": 224.43835616438355,
"tile_compute_intensity": 1.8806244260789715,
"MxNxK": 2147483648,
"size_m": 128,
"size_n": 1024,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0615696464132007,
"compute_intensity": 125.06870229007633,
"tile_compute_intensity": 1.9375591296121097,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4199004606358956,
"compute_intensity": 112.99310344827586,
"tile_compute_intensity": 1.9393939393939394,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0623375930130383,
"compute_intensity": 1310.72,
"tile_compute_intensity": 15.058823529411764,
"MxNxK": 68719476736,
"size_m": 1024,
"size_n": 16384,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.400500024741197,
"compute_intensity": 409.6,
"tile_compute_intensity": 7.529411764705882,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1471651014430677,
"compute_intensity": 126.51737451737452,
"tile_compute_intensity": 13.837837837837839,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 6.567845118719272e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 5.934366325676873,
"compute_intensity": 15.058823529411764,
"tile_compute_intensity": 1.1428571428571428,
"MxNxK": 1048576,
"size_m": 256,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.37738746330901,
"compute_intensity": 202.2716049382716,
"tile_compute_intensity": 1.9541984732824427,
"MxNxK": 1073741824,
"size_m": 128,
"size_n": 8192,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.146068543619046,
"compute_intensity": 442.81081081081084,
"tile_compute_intensity": 7.420289855072464,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.407784147002344,
"compute_intensity": 199.8048780487805,
"tile_compute_intensity": 1.7746967071057191,
"MxNxK": 536870912,
"size_m": 128,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2446949638040725,
"compute_intensity": 203.527950310559,
"tile_compute_intensity": 10.24,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.478685940112101,
"compute_intensity": 101.1358024691358,
"tile_compute_intensity": 1.8823529411764706,
"MxNxK": 134217728,
"size_m": 4096,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000011712657128382701,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.8485301206750022,
"compute_intensity": 28.395147313691506,
"tile_compute_intensity": 1.9692307692307693,
"MxNxK": 33554432,
"size_m": 8192,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 3.283922559359636e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 16.44862171547781,
"compute_intensity": 39.38461538461539,
"tile_compute_intensity": 1.1428571428571428,
"MxNxK": 1048576,
"size_m": 256,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2145789047211375,
"compute_intensity": 885.6216216216217,
"tile_compute_intensity": 14.840579710144928,
"MxNxK": 68719476736,
"size_m": 16384,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3935896126279845,
"compute_intensity": 409.6,
"tile_compute_intensity": 6.095238095238095,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0003041642050535769,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.8018163509381235,
"compute_intensity": 31.721200387221685,
"tile_compute_intensity": 3.710144927536232,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004889395810602124,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0079372997072378,
"compute_intensity": 15.633587786259541,
"tile_compute_intensity": 1.5238095238095237,
"MxNxK": 8388608,
"size_m": 512,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.4122432500057345,
"compute_intensity": 168.90721649484536,
"tile_compute_intensity": 1.9980487804878049,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000035210947442022757,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.4565087941250385,
"compute_intensity": 25.5202492211838,
"tile_compute_intensity": 1.3061224489795917,
"MxNxK": 8388608,
"size_m": 4096,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1583674702659925,
"compute_intensity": 125.5478927203065,
"tile_compute_intensity": 1.9806576402321083,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0867823138031198,
"compute_intensity": 448.8767123287671,
"tile_compute_intensity": 28.444444444444443,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3766637572274913,
"compute_intensity": 224.43835616438355,
"tile_compute_intensity": 3.8208955223880596,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3374542772984166,
"compute_intensity": 409.6,
"tile_compute_intensity": 16,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1612138833179837,
"compute_intensity": 2048,
"tile_compute_intensity": 24.975609756097562,
"MxNxK": 68719476736,
"size_m": 2048,
"size_n": 4096,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002645382061706373,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.3757628682482297,
"compute_intensity": 15.003663003663004,
"tile_compute_intensity": 1.28,
"MxNxK": 4194304,
"size_m": 2048,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1249828857938822,
"compute_intensity": 744.7272727272727,
"tile_compute_intensity": 10.556701030927835,
"MxNxK": 8589934592,
"size_m": 2048,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1423055437777405,
"compute_intensity": 120.02930402930403,
"tile_compute_intensity": 7.876923076923077,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.085820297446649,
"compute_intensity": 234.05714285714285,
"tile_compute_intensity": 3.5432525951557095,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0003041642050535769,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2387098741852238,
"compute_intensity": 31.721200387221685,
"tile_compute_intensity": 3.5310344827586206,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 1.6875713152264795e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 20.89630314447214,
"compute_intensity": 7.420289855072464,
"tile_compute_intensity": 0.5714285714285714,
"MxNxK": 131072,
"size_m": 256,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00007560319492214628,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1774317044220806,
"compute_intensity": 15.906796116504854,
"tile_compute_intensity": 1.855072463768116,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4530518216718074,
"compute_intensity": 167.18367346938774,
"tile_compute_intensity": 5.818181818181818,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000010708496932269645,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.0423045436290925,
"compute_intensity": 48.76190476190476,
"tile_compute_intensity": 1.4545454545454546,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 13.505163694618627,
"compute_intensity": 83.59183673469387,
"tile_compute_intensity": 0.9980506822612085,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3057052295017908,
"compute_intensity": 655.36,
"tile_compute_intensity": 13.473684210526315,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5521318639882025,
"compute_intensity": 101.1358024691358,
"tile_compute_intensity": 4.923076923076923,
"MxNxK": 134217728,
"size_m": 4096,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0344037873593264,
"compute_intensity": 240.94117647058823,
"tile_compute_intensity": 1.965451055662188,
"MxNxK": 4294967296,
"size_m": 128,
"size_n": 4096,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 5.7576707124512545,
"compute_intensity": 128,
"tile_compute_intensity": 1.9393939393939394,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 6.202964834345979e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 9.610887776946955,
"compute_intensity": 40.96,
"tile_compute_intensity": 1.3333333333333333,
"MxNxK": 2097152,
"size_m": 512,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0024006203669487687,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7726604540220186,
"compute_intensity": 31.937621832358673,
"tile_compute_intensity": 3.9083969465648853,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1215947760645146,
"compute_intensity": 225.98620689655172,
"tile_compute_intensity": 10.448979591836734,
"MxNxK": 4294967296,
"size_m": 1024,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000009523375422142943,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.6226180668212593,
"compute_intensity": 7.922630560928433,
"tile_compute_intensity": 0.9142857142857143,
"MxNxK": 8388608,
"size_m": 2048,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1429168847441495,
"compute_intensity": 246.37593984962405,
"tile_compute_intensity": 3.9233716475095783,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4353088349240202,
"compute_intensity": 127.0077519379845,
"tile_compute_intensity": 3.4594594594594597,
"MxNxK": 268435456,
"size_m": 8192,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.5216895141012006,
"compute_intensity": 126.03076923076924,
"tile_compute_intensity": 1.3315994798439532,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000023388826228328075,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.805443261958237,
"compute_intensity": 28.419774501300953,
"tile_compute_intensity": 1.3298701298701299,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3962365202715155,
"compute_intensity": 63.38104448742747,
"tile_compute_intensity": 7.314285714285714,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0012026454172943731,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.793691134823464,
"compute_intensity": 31.906523855890946,
"tile_compute_intensity": 3.878787878787879,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.878648505694077,
"compute_intensity": 186.1818181818182,
"tile_compute_intensity": 3.1219512195121952,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.121342273050742,
"compute_intensity": 744.7272727272727,
"tile_compute_intensity": 7.420289855072464,
"MxNxK": 8589934592,
"size_m": 512,
"size_n": 4096,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1358139316513807,
"compute_intensity": 799.219512195122,
"tile_compute_intensity": 7.757575757575758,
"MxNxK": 34359738368,
"size_m": 512,
"size_n": 16384,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.841528175487021,
"compute_intensity": 56.69204152249135,
"tile_compute_intensity": 3.878787878787879,
"MxNxK": 134217728,
"size_m": 8192,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000011685291107054704,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.546970400230194,
"compute_intensity": 14.216052060737526,
"tile_compute_intensity": 0.9961089494163424,
"MxNxK": 16777216,
"size_m": 16384,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.391862273480695,
"compute_intensity": 112.99310344827586,
"tile_compute_intensity": 1.5987509758001561,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0023936146654888013,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0191541807854434,
"compute_intensity": 7.996095656417765,
"tile_compute_intensity": 0.9941747572815534,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0560802235075948,
"compute_intensity": 780.1904761904761,
"tile_compute_intensity": 23.272727272727273,
"MxNxK": 17179869184,
"size_m": 2048,
"size_n": 8192,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00007516533858089832,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1210699303805904,
"compute_intensity": 7.976630963972736,
"tile_compute_intensity": 0.9696969696969697,
"MxNxK": 67108864,
"size_m": 4096,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5764117316502206,
"compute_intensity": 84.89119170984456,
"tile_compute_intensity": 3.1219512195121952,
"MxNxK": 134217728,
"size_m": 8192,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.38965581441443,
"compute_intensity": 227.55555555555554,
"tile_compute_intensity": 1.9320754716981132,
"MxNxK": 1073741824,
"size_m": 128,
"size_n": 2048,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.301443168877126,
"compute_intensity": 910.2222222222222,
"tile_compute_intensity": 19.692307692307693,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000753842667515223,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1209129373536388,
"compute_intensity": 7.964997569275644,
"tile_compute_intensity": 0.9377289377289377,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1408260272961006,
"compute_intensity": 125.06870229007633,
"tile_compute_intensity": 12.19047619047619,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1176996219381672,
"compute_intensity": 963.7647058823529,
"tile_compute_intensity": 10.61139896373057,
"MxNxK": 17179869184,
"size_m": 1024,
"size_n": 1024,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1320375974766872,
"compute_intensity": 239.1824817518248,
"tile_compute_intensity": 1.9844961240310077,
"MxNxK": 8589934592,
"size_m": 128,
"size_n": 16384,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000010581528246825492,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.015890282681882,
"compute_intensity": 30.007326007326007,
"tile_compute_intensity": 2.56,
"MxNxK": 33554432,
"size_m": 4096,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00002103534839412033,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.6225640430049932,
"compute_intensity": 15.05190629306385,
"tile_compute_intensity": 1.3264248704663213,
"MxNxK": 33554432,
"size_m": 16384,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000001331813037962519,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.582484636173203,
"compute_intensity": 14.94890510948905,
"tile_compute_intensity": 0.9696969696969697,
"MxNxK": 2097152,
"size_m": 128,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0918319585031036,
"compute_intensity": 124.12121212121212,
"tile_compute_intensity": 1.9357277882797732,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00007647890760464218,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.442029089365667,
"compute_intensity": 31.62934362934363,
"tile_compute_intensity": 3.5555555555555554,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000007023945474185887,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.95764619097598,
"compute_intensity": 25.5600624024961,
"tile_compute_intensity": 1.3195876288659794,
"MxNxK": 16777216,
"size_m": 8192,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1177368221803574,
"compute_intensity": 244.53731343283582,
"tile_compute_intensity": 19.692307692307693,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0995207638595088,
"compute_intensity": 992.969696969697,
"tile_compute_intensity": 25.6,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000019119726901160545,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.577428374828343,
"compute_intensity": 15.814671814671815,
"tile_compute_intensity": 1.7297297297297298,
"MxNxK": 33554432,
"size_m": 1024,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000024264538910823974,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.281518740737827,
"compute_intensity": 7.846743295019157,
"tile_compute_intensity": 0.8421052631578947,
"MxNxK": 2097152,
"size_m": 1024,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4476822469874076,
"compute_intensity": 199.8048780487805,
"tile_compute_intensity": 9.142857142857142,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0783711521933188,
"compute_intensity": 448.8767123287671,
"tile_compute_intensity": 20.48,
"MxNxK": 17179869184,
"size_m": 2048,
"size_n": 16384,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1333894610724544,
"compute_intensity": 963.7647058823529,
"tile_compute_intensity": 14.222222222222221,
"MxNxK": 17179869184,
"size_m": 1024,
"size_n": 8192,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.029941796665903,
"compute_intensity": 442.81081081081084,
"tile_compute_intensity": 6.38006230529595,
"MxNxK": 8589934592,
"size_m": 2048,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5680070248769638,
"compute_intensity": 113.77777777777777,
"tile_compute_intensity": 6.4,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7699029310919248,
"compute_intensity": 56.69204152249135,
"tile_compute_intensity": 2.6391752577319587,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000010581528246825492,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.9176058795536317,
"compute_intensity": 30.007326007326007,
"tile_compute_intensity": 1.9692307692307693,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4491296591504448,
"compute_intensity": 204.8,
"tile_compute_intensity": 8,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.1882272984093585,
"compute_intensity": 202.2716049382716,
"tile_compute_intensity": 1.7762359063313096,
"MxNxK": 1073741824,
"size_m": 128,
"size_n": 512,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004889395810602124,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0602953998491014,
"compute_intensity": 15.633587786259541,
"tile_compute_intensity": 1.6,
"MxNxK": 8388608,
"size_m": 1024,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5179067918523543,
"compute_intensity": 112.21917808219177,
"tile_compute_intensity": 7.111111111111111,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1219113178513729,
"compute_intensity": 744.7272727272727,
"tile_compute_intensity": 7.062068965517241,
"MxNxK": 8589934592,
"size_m": 512,
"size_n": 2048,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0927143085874864,
"compute_intensity": 442.81081081081084,
"tile_compute_intensity": 25.6,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2232838463257718,
"compute_intensity": 225.98620689655172,
"tile_compute_intensity": 3.878787878787879,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003854960204403839,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1996595922653435,
"compute_intensity": 7.875030040855563,
"tile_compute_intensity": 0.7987519500780031,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003772862140419848,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.206426316974397,
"compute_intensity": 7.961127308066083,
"tile_compute_intensity": 0.9343065693430657,
"MxNxK": 33554432,
"size_m": 1024,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.4072007577684422,
"compute_intensity": 163.84,
"tile_compute_intensity": 1.5950155763239875,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3825352803852833,
"compute_intensity": 221.40540540540542,
"tile_compute_intensity": 1.9541984732824427,
"MxNxK": 1073741824,
"size_m": 128,
"size_n": 4096,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1421953868961856,
"compute_intensity": 250.13740458015266,
"tile_compute_intensity": 1.9902818270165208,
"MxNxK": 34359738368,
"size_m": 128,
"size_n": 16384,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3101015677129921,
"compute_intensity": 862.3157894736842,
"tile_compute_intensity": 14.027397260273972,
"MxNxK": 34359738368,
"size_m": 8192,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.385149998778152,
"compute_intensity": 327.68,
"tile_compute_intensity": 6.7368421052631575,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.110042317291112,
"compute_intensity": 885.6216216216217,
"tile_compute_intensity": 39.38461538461539,
"MxNxK": 68719476736,
"size_m": 4096,
"size_n": 16384,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.8051086287901732,
"compute_intensity": 56.49655172413793,
"tile_compute_intensity": 2.6122448979591835,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000021090080436776326,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.9354692406245886,
"compute_intensity": 30.06238532110092,
"tile_compute_intensity": 1.9844961240310077,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4562967359483596,
"compute_intensity": 215.57894736842104,
"tile_compute_intensity": 3.506849315068493,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000026316990510423747,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.1680289337735568,
"compute_intensity": 7.522497704315886,
"tile_compute_intensity": 0.6597938144329897,
"MxNxK": 2097152,
"size_m": 4096,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.853481513582659,
"compute_intensity": 56.49655172413793,
"tile_compute_intensity": 3.764705882352941,
"MxNxK": 67108864,
"size_m": 4096,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3644654750714027,
"compute_intensity": 169.78238341968913,
"tile_compute_intensity": 3.710144927536232,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.254255171250849,
"compute_intensity": 409.6,
"tile_compute_intensity": 6.320987654320987,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00015018472504804733,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.057833243403967,
"compute_intensity": 7.980516317584024,
"tile_compute_intensity": 0.9770992366412213,
"MxNxK": 134217728,
"size_m": 8192,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3062882031602279,
"compute_intensity": 448.8767123287671,
"tile_compute_intensity": 7.641791044776119,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000009377423308393628,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.1529153893738178,
"compute_intensity": 42.555844155844156,
"tile_compute_intensity": 1.5802469135802468,
"MxNxK": 33554432,
"size_m": 8192,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 12.063317639052135,
"compute_intensity": 120.47058823529412,
"tile_compute_intensity": 1.3264248704663213,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.355302325004171,
"compute_intensity": 62.534351145038165,
"tile_compute_intensity": 6.4,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.1986396292120647,
"compute_intensity": 202.2716049382716,
"tile_compute_intensity": 2.6631989596879064,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.536677040948204,
"compute_intensity": 157.53846153846155,
"tile_compute_intensity": 1.8285714285714285,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004998859895914112,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0106984678105277,
"compute_intensity": 15.456603773584906,
"tile_compute_intensity": 1.5238095238095237,
"MxNxK": 8388608,
"size_m": 2048,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3036514873638723,
"compute_intensity": 489.07462686567163,
"tile_compute_intensity": 3.9613152804642167,
"MxNxK": 68719476736,
"size_m": 256,
"size_n": 16384,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4590983048727784,
"compute_intensity": 118.72463768115942,
"tile_compute_intensity": 1.9248120300751879,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3834443931533358,
"compute_intensity": 63.627184466019415,
"tile_compute_intensity": 7.529411764705882,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4618766024128134,
"compute_intensity": 292.57142857142856,
"tile_compute_intensity": 3.4594594594594597,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0006036579424671757,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7918226982804595,
"compute_intensity": 31.844509232264333,
"tile_compute_intensity": 3.8208955223880596,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.832563426711472,
"compute_intensity": 51.1201248049922,
"tile_compute_intensity": 1.5950155763239875,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000021090080436776326,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0884130813022375,
"compute_intensity": 30.06238532110092,
"tile_compute_intensity": 2.6122448979591835,
"MxNxK": 67108864,
"size_m": 8192,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3037113644660283,
"compute_intensity": 1365.3333333333333,
"tile_compute_intensity": 20.897959183673468,
"MxNxK": 34359738368,
"size_m": 4096,
"size_n": 1024,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00015091448561679391,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1195743976741588,
"compute_intensity": 15.922254616132166,
"tile_compute_intensity": 1.8686131386861313,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3532128264690786,
"compute_intensity": 372.3636363636364,
"tile_compute_intensity": 12.8,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3571524000626993,
"compute_intensity": 585.1428571428571,
"tile_compute_intensity": 6.918918918918919,
"MxNxK": 2147483648,
"size_m": 512,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.533315166140479,
"compute_intensity": 315.0769230769231,
"tile_compute_intensity": 3.1801242236024843,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 3.466362701546282e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 12.425704680147806,
"compute_intensity": 14.628571428571428,
"tile_compute_intensity": 1,
"MxNxK": 524288,
"size_m": 256,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 5.354248466134823e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 24.061638471383866,
"compute_intensity": 64,
"tile_compute_intensity": 0.9696969696969697,
"MxNxK": 2097152,
"size_m": 128,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 7.66248597183915e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.072180653366879,
"compute_intensity": 27.675675675675677,
"tile_compute_intensity": 1.2307692307692308,
"MxNxK": 2097152,
"size_m": 128,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0870981761335,
"compute_intensity": 455.1111111111111,
"tile_compute_intensity": 25.6,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2064717998854046,
"compute_intensity": 62.89443378119002,
"tile_compute_intensity": 6.320987654320987,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0804755987709351,
"compute_intensity": 862.3157894736842,
"tile_compute_intensity": 42.666666666666664,
"MxNxK": 34359738368,
"size_m": 8192,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3815314683705586,
"compute_intensity": 63.750972762645915,
"tile_compute_intensity": 7.641791044776119,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 4.561003554666161e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 9.366360375345359,
"compute_intensity": 24.975609756097562,
"tile_compute_intensity": 1.1428571428571428,
"MxNxK": 1048576,
"size_m": 512,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.158421317191903,
"compute_intensity": 1489.4545454545455,
"tile_compute_intensity": 14.840579710144928,
"MxNxK": 68719476736,
"size_m": 1024,
"size_n": 8192,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1996373170336498,
"compute_intensity": 819.2,
"tile_compute_intensity": 10.448979591836734,
"MxNxK": 4294967296,
"size_m": 1024,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0006007389001921893,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0602995303671603,
"compute_intensity": 15.961032635168047,
"tile_compute_intensity": 1.9320754716981132,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 2.9190422749863426e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 26.595024628092848,
"compute_intensity": 51.2,
"tile_compute_intensity": 0.9411764705882353,
"MxNxK": 1048576,
"size_m": 128,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.095234242801623,
"compute_intensity": 404.5432098765432,
"tile_compute_intensity": 19.692307692307693,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.19199445083676,
"compute_intensity": 431.1578947368421,
"tile_compute_intensity": 3.8496240601503757,
"MxNxK": 4294967296,
"size_m": 256,
"size_n": 4096,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3481467140343286,
"compute_intensity": 327.68,
"tile_compute_intensity": 6.7368421052631575,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4146767992302247,
"compute_intensity": 62.89443378119002,
"tile_compute_intensity": 6.918918918918919,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004706955668415478,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.7826055966291983,
"compute_intensity": 42.44559585492228,
"tile_compute_intensity": 1.5609756097560976,
"MxNxK": 16777216,
"size_m": 4096,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.8910216538764506,
"compute_intensity": 51.0404984423676,
"tile_compute_intensity": 2.6122448979591835,
"MxNxK": 67108864,
"size_m": 8192,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 2.3717218484264035e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 17.607461951067204,
"compute_intensity": 24.38095238095238,
"tile_compute_intensity": 1,
"MxNxK": 524288,
"size_m": 256,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000001249714973978528,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.4593488987314416,
"compute_intensity": 7.728301886792453,
"tile_compute_intensity": 0.6530612244897959,
"MxNxK": 1048576,
"size_m": 128,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.7831640712139643,
"compute_intensity": 128,
"tile_compute_intensity": 2.4615384615384617,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002141699386453929,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 4.536211121837833,
"compute_intensity": 85.33333333333333,
"tile_compute_intensity": 2.2857142857142856,
"MxNxK": 8388608,
"size_m": 256,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.403418205383715,
"compute_intensity": 167.18367346938774,
"tile_compute_intensity": 3.5555555555555554,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000771356921165141,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2666717024764271,
"compute_intensity": 15.7462758289284,
"tile_compute_intensity": 1.5950155763239875,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1427527110725646,
"compute_intensity": 1260.3076923076924,
"tile_compute_intensity": 15.875968992248062,
"MxNxK": 34359738368,
"size_m": 2048,
"size_n": 1024,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.3896446215522054,
"compute_intensity": 163.84,
"tile_compute_intensity": 1.9922178988326849,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1608750043993992,
"compute_intensity": 1260.3076923076924,
"tile_compute_intensity": 24.38095238095238,
"MxNxK": 34359738368,
"size_m": 8192,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2989422594443836,
"compute_intensity": 404.5432098765432,
"tile_compute_intensity": 3.9083969465648853,
"MxNxK": 8589934592,
"size_m": 256,
"size_n": 16384,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.043377632950108,
"compute_intensity": 227.55555555555554,
"tile_compute_intensity": 2.6391752577319587,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003794754957482245,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3518449481455685,
"compute_intensity": 15.875968992248062,
"tile_compute_intensity": 1.8285714285714285,
"MxNxK": 67108864,
"size_m": 2048,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3636134366475328,
"compute_intensity": 252.06153846153848,
"tile_compute_intensity": 6.7368421052631575,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3071774418262883,
"compute_intensity": 630.1538461538462,
"tile_compute_intensity": 12.8,
"MxNxK": 4294967296,
"size_m": 1024,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0098468713182553,
"compute_intensity": 442.81081081081084,
"tile_compute_intensity": 3.757798165137615,
"MxNxK": 8589934592,
"size_m": 256,
"size_n": 2048,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.063828593492419,
"compute_intensity": 186.1818181818182,
"tile_compute_intensity": 1.7655172413793103,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1542031664421528,
"compute_intensity": 744.7272727272727,
"tile_compute_intensity": 25.6,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000019010262815848558,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3937734289957873,
"compute_intensity": 7.930300096805421,
"tile_compute_intensity": 0.8827586206896552,
"MxNxK": 16777216,
"size_m": 512,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003809350168857177,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3548978301632146,
"compute_intensity": 15.845261121856867,
"tile_compute_intensity": 1.7534246575342465,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4479623529206136,
"compute_intensity": 101.7639751552795,
"tile_compute_intensity": 1.9104477611940298,
"MxNxK": 268435456,
"size_m": 8192,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00007593158717808223,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.109588303846308,
"compute_intensity": 7.9360620004843785,
"tile_compute_intensity": 0.9377289377289377,
"MxNxK": 67108864,
"size_m": 16384,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00007735462028713808,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.672298241532908,
"compute_intensity": 31.44721689059501,
"tile_compute_intensity": 3.4594594594594597,
"MxNxK": 268435456,
"size_m": 8192,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000005327252151850076,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.370302460780659,
"compute_intensity": 29.8978102189781,
"tile_compute_intensity": 1.9393939393939394,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0164418457613866,
"compute_intensity": 431.1578947368421,
"tile_compute_intensity": 3.750915750915751,
"MxNxK": 4294967296,
"size_m": 256,
"size_n": 2048,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.553066134815623,
"compute_intensity": 101.1358024691358,
"tile_compute_intensity": 1.3315994798439532,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3510999716175425,
"compute_intensity": 334.3673469387755,
"tile_compute_intensity": 11.636363636363637,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3636660095925464,
"compute_intensity": 585.1428571428571,
"tile_compute_intensity": 10.24,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.7185887732214904,
"compute_intensity": 146.28571428571428,
"tile_compute_intensity": 2.909090909090909,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000014686431446025037,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.66052156296311,
"compute_intensity": 14.173010380622838,
"tile_compute_intensity": 0.9696969696969697,
"MxNxK": 2097152,
"size_m": 2048,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4294023197575925,
"compute_intensity": 215.57894736842104,
"tile_compute_intensity": 9.142857142857142,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0803169456832369,
"compute_intensity": 799.219512195122,
"tile_compute_intensity": 36.57142857142857,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5525053951871837,
"compute_intensity": 101.1358024691358,
"tile_compute_intensity": 3.1219512195121952,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1412221504229876,
"compute_intensity": 1170.2857142857142,
"tile_compute_intensity": 23.272727272727273,
"MxNxK": 17179869184,
"size_m": 2048,
"size_n": 4096,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1521195282191863,
"compute_intensity": 244.53731343283582,
"tile_compute_intensity": 3.8641509433962264,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0001517901982992898,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.294502634995624,
"compute_intensity": 31.751937984496124,
"tile_compute_intensity": 3.657142857142857,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2138375327711814,
"compute_intensity": 63.25868725868726,
"tile_compute_intensity": 6.918918918918919,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000005874572578410015,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.2693055525078774,
"compute_intensity": 28.346020761245676,
"tile_compute_intensity": 1.9393939393939394,
"MxNxK": 16777216,
"size_m": 4096,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003973546296825159,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4637605191245353,
"compute_intensity": 15.50780880265026,
"tile_compute_intensity": 1.5900621118012421,
"MxNxK": 67108864,
"size_m": 16384,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0012026454172943731,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.475725330176873,
"compute_intensity": 31.906523855890946,
"tile_compute_intensity": 3.8496240601503757,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3816656610250357,
"compute_intensity": 120.02930402930403,
"tile_compute_intensity": 1.9692307692307693,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.8773412314297024,
"compute_intensity": 256,
"tile_compute_intensity": 3.878787878787879,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4354022601897953,
"compute_intensity": 167.18367346938774,
"tile_compute_intensity": 3.4594594594594597,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1572972413805078,
"compute_intensity": 668.734693877551,
"tile_compute_intensity": 14.222222222222221,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5052532738557611,
"compute_intensity": 117.02857142857142,
"tile_compute_intensity": 7.111111111111111,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.090629754281531,
"compute_intensity": 93.0909090909091,
"tile_compute_intensity": 2.6666666666666665,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 2.9190422749863426e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 17.73732384369749,
"compute_intensity": 42.666666666666664,
"tile_compute_intensity": 1.1428571428571428,
"MxNxK": 1048576,
"size_m": 128,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0006024903255571812,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4857266662642554,
"compute_intensity": 31.875486381322958,
"tile_compute_intensity": 3.8208955223880596,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3401273440163648,
"compute_intensity": 63.25868725868726,
"tile_compute_intensity": 7.111111111111111,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000753842667515223,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1197243729979034,
"compute_intensity": 7.964997569275644,
"tile_compute_intensity": 0.9624060150375939,
"MxNxK": 67108864,
"size_m": 8192,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.446865030708054,
"compute_intensity": 127.50194552529183,
"tile_compute_intensity": 3.506849315068493,
"MxNxK": 536870912,
"size_m": 16384,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3754439629847064,
"compute_intensity": 203.527950310559,
"tile_compute_intensity": 3.8208955223880596,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4049356864960838,
"compute_intensity": 199.8048780487805,
"tile_compute_intensity": 1.9393939393939394,
"MxNxK": 536870912,
"size_m": 128,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000009651083521673596,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.6008150339850462,
"compute_intensity": 7.869356388088376,
"tile_compute_intensity": 0.7950310559006211,
"MxNxK": 8388608,
"size_m": 256,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3749653708277578,
"compute_intensity": 390.0952380952381,
"tile_compute_intensity": 6.918918918918919,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5702950945678584,
"compute_intensity": 85.11168831168831,
"tile_compute_intensity": 3.1604938271604937,
"MxNxK": 268435456,
"size_m": 16384,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4580476952931063,
"compute_intensity": 195.04761904761904,
"tile_compute_intensity": 3.4594594594594597,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0011973911411993978,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0214980863374974,
"compute_intensity": 7.9941449133935105,
"tile_compute_intensity": 0.9903288201160542,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5099224987750859,
"compute_intensity": 61.59398496240601,
"tile_compute_intensity": 5.818181818181818,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1205383061727165,
"compute_intensity": 1024,
"tile_compute_intensity": 15.515151515151516,
"MxNxK": 8589934592,
"size_m": 2048,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.145614751623558,
"compute_intensity": 237.44927536231884,
"tile_compute_intensity": 18.285714285714285,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3376818952186689,
"compute_intensity": 234.05714285714285,
"tile_compute_intensity": 3.7372262773722627,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 3.1927024882663123e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 11.20555395804783,
"compute_intensity": 7.641791044776119,
"tile_compute_intensity": 0.6666666666666666,
"MxNxK": 262144,
"size_m": 256,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.173461010493289,
"compute_intensity": 221.40540540540542,
"tile_compute_intensity": 3.190031152647975,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00015441733634677753,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3747678919130564,
"compute_intensity": 31.477425552353505,
"tile_compute_intensity": 3.1801242236024843,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3835508130242598,
"compute_intensity": 455.1111111111111,
"tile_compute_intensity": 5.278350515463917,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 22.620541176856538,
"compute_intensity": 113.77777777777777,
"tile_compute_intensity": 1.3195876288659794,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000001222348952650531,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.4006656206515413,
"compute_intensity": 7.816793893129771,
"tile_compute_intensity": 0.8,
"MxNxK": 1048576,
"size_m": 512,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.192662696283232,
"compute_intensity": 123.18796992481202,
"tile_compute_intensity": 11.636363636363637,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000010708496932269645,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 21.60228459685833,
"compute_intensity": 73.14285714285714,
"tile_compute_intensity": 0.9846153846153847,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000012041049384318665,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.189493561667562,
"compute_intensity": 41.795918367346935,
"tile_compute_intensity": 1.4545454545454546,
"MxNxK": 4194304,
"size_m": 1024,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2078083757104086,
"compute_intensity": 120.02930402930403,
"tile_compute_intensity": 10.24,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000771356921165141,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4603252340609512,
"compute_intensity": 15.7462758289284,
"tile_compute_intensity": 1.7655172413793103,
"MxNxK": 134217728,
"size_m": 16384,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4330676477301332,
"compute_intensity": 215.57894736842104,
"tile_compute_intensity": 10.666666666666666,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.13423079805679,
"compute_intensity": 240.94117647058823,
"tile_compute_intensity": 18.285714285714285,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4438533398677007,
"compute_intensity": 341.3333333333333,
"tile_compute_intensity": 3.506849315068493,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.502871948763211,
"compute_intensity": 112.21917808219177,
"tile_compute_intensity": 1.9104477611940298,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00001933865507178452,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5433544901717868,
"compute_intensity": 15.723608445297504,
"tile_compute_intensity": 1.5802469135802468,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.156429830253815,
"compute_intensity": 124.12121212121212,
"tile_compute_intensity": 11.636363636363637,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.502871948763211,
"compute_intensity": 117.02857142857142,
"tile_compute_intensity": 8,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.154296591707928,
"compute_intensity": 225.98620689655172,
"tile_compute_intensity": 15.058823529411764,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.7137328676347345,
"compute_intensity": 120.47058823529412,
"tile_compute_intensity": 1.7777777777777777,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3721696622154649,
"compute_intensity": 455.1111111111111,
"tile_compute_intensity": 6.7368421052631575,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.115484848614876,
"compute_intensity": 1489.4545454545455,
"tile_compute_intensity": 51.2,
"MxNxK": 68719476736,
"size_m": 8192,
"size_n": 4096,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000001331813037962519,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.74346840274397,
"compute_intensity": 14.94890510948905,
"tile_compute_intensity": 1.2307692307692308,
"MxNxK": 2097152,
"size_m": 1024,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0011973911411993978,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.02112669012946,
"compute_intensity": 7.9941449133935105,
"tile_compute_intensity": 0.9922480620155039,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.116777277938455,
"compute_intensity": 81.92,
"tile_compute_intensity": 1.6842105263157894,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5631045765214402,
"compute_intensity": 101.7639751552795,
"tile_compute_intensity": 5.12,
"MxNxK": 268435456,
"size_m": 8192,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.789793679767824,
"compute_intensity": 59.7956204379562,
"tile_compute_intensity": 4.923076923076923,
"MxNxK": 134217728,
"size_m": 4096,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4461414095585952,
"compute_intensity": 168.90721649484536,
"tile_compute_intensity": 6.095238095238095,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4478222150219509,
"compute_intensity": 61.94328922495274,
"tile_compute_intensity": 6.2439024390243905,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.428818666642677,
"compute_intensity": 199.8048780487805,
"tile_compute_intensity": 6.095238095238095,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002490307940847724,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.106084998238796,
"compute_intensity": 7.742911153119093,
"tile_compute_intensity": 0.6597938144329897,
"MxNxK": 2097152,
"size_m": 128,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.8039880351932298,
"compute_intensity": 84.45360824742268,
"tile_compute_intensity": 3.0476190476190474,
"MxNxK": 67108864,
"size_m": 4096,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.415605294677579,
"compute_intensity": 170.66666666666666,
"tile_compute_intensity": 2.56,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 5.6805357554730955,
"compute_intensity": 204.8,
"tile_compute_intensity": 2.6122448979591835,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.312275658579641,
"compute_intensity": 337.8144329896907,
"tile_compute_intensity": 3.8496240601503757,
"MxNxK": 4294967296,
"size_m": 256,
"size_n": 16384,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000019119726901160545,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5616959467001847,
"compute_intensity": 15.814671814671815,
"tile_compute_intensity": 1.7777777777777777,
"MxNxK": 33554432,
"size_m": 2048,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 6.750285260905918e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 7.01599943137294,
"compute_intensity": 14.840579710144928,
"tile_compute_intensity": 1.1428571428571428,
"MxNxK": 1048576,
"size_m": 512,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5467157465350345,
"compute_intensity": 126.03076923076924,
"tile_compute_intensity": 3.3684210526315788,
"MxNxK": 134217728,
"size_m": 4096,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.500864154869642,
"compute_intensity": 112.21917808219177,
"tile_compute_intensity": 5.12,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.295694202681619,
"compute_intensity": 910.2222222222222,
"tile_compute_intensity": 13.473684210526315,
"MxNxK": 8589934592,
"size_m": 1024,
"size_n": 4096,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1777727472880999,
"compute_intensity": 250.13740458015266,
"tile_compute_intensity": 3.930902111324376,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3460222234906005,
"compute_intensity": 372.3636363636364,
"tile_compute_intensity": 10.666666666666666,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.52640501790828,
"compute_intensity": 204.8,
"tile_compute_intensity": 3.0476190476190474,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5476027769675402,
"compute_intensity": 215.57894736842104,
"tile_compute_intensity": 1.8754578754578755,
"MxNxK": 536870912,
"size_m": 128,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1496288106336194,
"compute_intensity": 1170.2857142857142,
"tile_compute_intensity": 28.444444444444443,
"MxNxK": 17179869184,
"size_m": 4096,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1358941924478876,
"compute_intensity": 244.53731343283582,
"tile_compute_intensity": 1.9806576402321083,
"MxNxK": 8589934592,
"size_m": 128,
"size_n": 8192,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 6.202964834345979e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 5.984235173687275,
"compute_intensity": 7.757575757575758,
"tile_compute_intensity": 0.7272727272727273,
"MxNxK": 524288,
"size_m": 256,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003973546296825159,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3847579620384758,
"compute_intensity": 15.50780880265026,
"tile_compute_intensity": 1.3298701298701299,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 5.696597766893407,
"compute_intensity": 170.66666666666666,
"tile_compute_intensity": 1.7534246575342465,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.733529927163845,
"compute_intensity": 195.04761904761904,
"tile_compute_intensity": 2.6528497409326426,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002700114104362367,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.0102431176191606,
"compute_intensity": 29.681159420289855,
"tile_compute_intensity": 1.8823529411764706,
"MxNxK": 8388608,
"size_m": 256,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.057422923525738,
"compute_intensity": 125.06870229007633,
"tile_compute_intensity": 1.965451055662188,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0183592719433325,
"compute_intensity": 630.1538461538462,
"tile_compute_intensity": 7.937984496124031,
"MxNxK": 4294967296,
"size_m": 1024,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00015018472504804733,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0547224987668262,
"compute_intensity": 7.980516317584024,
"tile_compute_intensity": 0.9660377358490566,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00015237400675428709,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5581450650182387,
"compute_intensity": 31.690522243713733,
"tile_compute_intensity": 3.657142857142857,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 4.013683128106221e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 10.053609429295157,
"compute_intensity": 26.94736842105263,
"tile_compute_intensity": 1.1428571428571428,
"MxNxK": 1048576,
"size_m": 128,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 6.294184905439302e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 7.483733154823168,
"compute_intensity": 7.6992481203007515,
"tile_compute_intensity": 0.7272727272727273,
"MxNxK": 524288,
"size_m": 512,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 6.294184905439302e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.502509968226466,
"compute_intensity": 7.6992481203007515,
"tile_compute_intensity": 0.64,
"MxNxK": 524288,
"size_m": 128,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.361045345955492,
"compute_intensity": 248.24242424242425,
"tile_compute_intensity": 3.657142857142857,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1384739188321202,
"compute_intensity": 744.7272727272727,
"tile_compute_intensity": 12.487804878048781,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1197176076789115,
"compute_intensity": 1310.72,
"tile_compute_intensity": 42.666666666666664,
"MxNxK": 68719476736,
"size_m": 16384,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0355242110921505,
"compute_intensity": 244.53731343283582,
"tile_compute_intensity": 3.757798165137615,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.335388135079768,
"compute_intensity": 112.99310344827586,
"tile_compute_intensity": 7.529411764705882,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3845197179624082,
"compute_intensity": 455.1111111111111,
"tile_compute_intensity": 9.846153846153847,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000005272520109194081,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.9087646115563475,
"compute_intensity": 15.03119266055046,
"tile_compute_intensity": 0.9922480620155039,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000034267190183262865,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.6172203973331873,
"compute_intensity": 84.89119170984456,
"tile_compute_intensity": 1.855072463768116,
"MxNxK": 134217728,
"size_m": 8192,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003984492705356358,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.842824318676884,
"compute_intensity": 30.97164461247637,
"tile_compute_intensity": 3.1219512195121952,
"MxNxK": 134217728,
"size_m": 8192,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1528271821414036,
"compute_intensity": 744.7272727272727,
"tile_compute_intensity": 21.333333333333332,
"MxNxK": 8589934592,
"size_m": 2048,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3859904864418893,
"compute_intensity": 113.3840830449827,
"tile_compute_intensity": 1.9541984732824427,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5498439641614274,
"compute_intensity": 292.57142857142856,
"tile_compute_intensity": 3.9384615384615387,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5563343023068905,
"compute_intensity": 120.47058823529412,
"tile_compute_intensity": 1.8754578754578755,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00000970581556432959,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7754303643442821,
"compute_intensity": 15.693486590038313,
"tile_compute_intensity": 1.6842105263157894,
"MxNxK": 16777216,
"size_m": 2048,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.9104454460875933,
"compute_intensity": 63.750972762645915,
"tile_compute_intensity": 1.7534246575342465,
"MxNxK": 67108864,
"size_m": 8192,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1612639083012033,
"compute_intensity": 504.12307692307695,
"tile_compute_intensity": 13.473684210526315,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.421079402557919,
"compute_intensity": 202.2716049382716,
"tile_compute_intensity": 9.846153846153847,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3001787853029483,
"compute_intensity": 780.1904761904761,
"tile_compute_intensity": 7.641791044776119,
"MxNxK": 17179869184,
"size_m": 512,
"size_n": 8192,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.013660830395438,
"compute_intensity": 630.1538461538462,
"tile_compute_intensity": 6.3602484472049685,
"MxNxK": 4294967296,
"size_m": 512,
"size_n": 1024,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000010526796204169499,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.599878928028192,
"compute_intensity": 15.044995408631772,
"tile_compute_intensity": 0.9961089494163424,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.6552276639558547,
"compute_intensity": 60.014652014652015,
"tile_compute_intensity": 5.12,
"MxNxK": 268435456,
"size_m": 8192,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2947195223634043,
"compute_intensity": 1489.4545454545455,
"tile_compute_intensity": 14.124137931034483,
"MxNxK": 68719476736,
"size_m": 1024,
"size_n": 4096,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0889079084636253,
"compute_intensity": 2048,
"tile_compute_intensity": 39.38461538461539,
"MxNxK": 68719476736,
"size_m": 4096,
"size_n": 4096,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x4096x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00007516533858089832,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1218361942139115,
"compute_intensity": 7.976630963972736,
"tile_compute_intensity": 0.9624060150375939,
"MxNxK": 67108864,
"size_m": 2048,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.352920914979561,
"compute_intensity": 254.015503875969,
"tile_compute_intensity": 6.918918918918919,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3416156934322234,
"compute_intensity": 334.3673469387755,
"tile_compute_intensity": 6.918918918918919,
"MxNxK": 2147483648,
"size_m": 512,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2906387067543437,
"compute_intensity": 1365.3333333333333,
"tile_compute_intensity": 14.027397260273972,
"MxNxK": 34359738368,
"size_m": 1024,
"size_n": 4096,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000009651083521673596,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.6870231147397998,
"compute_intensity": 7.869356388088376,
"tile_compute_intensity": 0.8767123287671232,
"MxNxK": 8388608,
"size_m": 4096,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.920297828904901,
"compute_intensity": 55.351351351351354,
"tile_compute_intensity": 2.4615384615384617,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3791851352866442,
"compute_intensity": 62.77394636015325,
"tile_compute_intensity": 6.7368421052631575,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4348653197077674,
"compute_intensity": 127.50194552529183,
"tile_compute_intensity": 1.8754578754578755,
"MxNxK": 536870912,
"size_m": 128,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2915095151634282,
"compute_intensity": 119.5912408759124,
"tile_compute_intensity": 9.846153846153847,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.122845910237364,
"compute_intensity": 248.24242424242425,
"tile_compute_intensity": 1.9825750242013553,
"MxNxK": 17179869184,
"size_m": 128,
"size_n": 8192,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.23970478063078,
"compute_intensity": 224.43835616438355,
"tile_compute_intensity": 14.222222222222221,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5336419847065725,
"compute_intensity": 85.11168831168831,
"tile_compute_intensity": 1.7716262975778547,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1477456120717708,
"compute_intensity": 910.2222222222222,
"tile_compute_intensity": 51.2,
"MxNxK": 68719476736,
"size_m": 8192,
"size_n": 8192,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 5.354248466134823e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 10.829530669551737,
"compute_intensity": 46.54545454545455,
"tile_compute_intensity": 1.3333333333333333,
"MxNxK": 2097152,
"size_m": 128,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0194216021472564,
"compute_intensity": 655.36,
"tile_compute_intensity": 7.968871595330739,
"MxNxK": 8589934592,
"size_m": 1024,
"size_n": 512,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2969953618376882,
"compute_intensity": 910.2222222222222,
"tile_compute_intensity": 7.728301886792453,
"MxNxK": 68719476736,
"size_m": 512,
"size_n": 8192,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000005258837098530083,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7786441868622151,
"compute_intensity": 7.525953146531925,
"tile_compute_intensity": 0.6632124352331606,
"MxNxK": 4194304,
"size_m": 8192,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000025176739621757206,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.6093928251788308,
"compute_intensity": 15.398496240601503,
"tile_compute_intensity": 1.4545454545454546,
"MxNxK": 4194304,
"size_m": 1024,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1254044885387076,
"compute_intensity": 239.1824817518248,
"tile_compute_intensity": 19.692307692307693,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2818092547500488,
"compute_intensity": 120.47058823529412,
"tile_compute_intensity": 9.142857142857142,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004283398772907858,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 22.669100572452333,
"compute_intensity": 93.0909090909091,
"tile_compute_intensity": 1.3195876288659794,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4037451088818684,
"compute_intensity": 315.0769230769231,
"tile_compute_intensity": 3.657142857142857,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3930293158295126,
"compute_intensity": 227.55555555555554,
"tile_compute_intensity": 12.8,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.172199930184609,
"compute_intensity": 123.65283018867925,
"tile_compute_intensity": 12.19047619047619,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3886169254590097,
"compute_intensity": 119.5912408759124,
"tile_compute_intensity": 1.9541984732824427,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3785547695388412,
"compute_intensity": 512,
"tile_compute_intensity": 7.757575757575758,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3620900952230381,
"compute_intensity": 224.43835616438355,
"tile_compute_intensity": 1.9692307692307693,
"MxNxK": 2147483648,
"size_m": 128,
"size_n": 8192,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x8192x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.167284402291877,
"compute_intensity": 2048,
"tile_compute_intensity": 31.03030303030303,
"MxNxK": 68719476736,
"size_m": 4096,
"size_n": 2048,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3704421441192216,
"compute_intensity": 512,
"tile_compute_intensity": 6.2439024390243905,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.621469642301657,
"compute_intensity": 63.50387596899225,
"tile_compute_intensity": 1.7297297297297298,
"MxNxK": 33554432,
"size_m": 4096,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4336513008450487,
"compute_intensity": 168.90721649484536,
"tile_compute_intensity": 3.506849315068493,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.15328997693523,
"compute_intensity": 489.07462686567163,
"tile_compute_intensity": 39.38461538461539,
"MxNxK": 68719476736,
"size_m": 8192,
"size_n": 16384,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x16384x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3842746040378382,
"compute_intensity": 202.2716049382716,
"tile_compute_intensity": 3.764705882352941,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000008566797545815716,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.513144405548268,
"compute_intensity": 83.59183673469387,
"tile_compute_intensity": 1.7297297297297298,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1652414465255525,
"compute_intensity": 481.88235294117646,
"tile_compute_intensity": 7.501831501831502,
"MxNxK": 34359738368,
"size_m": 8192,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.153336698652952,
"compute_intensity": 327.68,
"tile_compute_intensity": 3.9844357976653697,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000009933865742062898,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5739291177577628,
"compute_intensity": 7.75390440132513,
"tile_compute_intensity": 0.7950310559006211,
"MxNxK": 8388608,
"size_m": 8192,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.380913002195961,
"compute_intensity": 124.12121212121212,
"tile_compute_intensity": 1.8285714285714285,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00000970581556432959,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7328991625804369,
"compute_intensity": 15.693486590038313,
"tile_compute_intensity": 1.5609756097560976,
"MxNxK": 16777216,
"size_m": 512,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00015441733634677753,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.8214950069375722,
"compute_intensity": 31.477425552353505,
"tile_compute_intensity": 3.506849315068493,
"MxNxK": 536870912,
"size_m": 16384,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1202931922481465,
"compute_intensity": 1024,
"tile_compute_intensity": 12.487804878048781,
"MxNxK": 8589934592,
"size_m": 1024,
"size_n": 2048,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000019995439583656446,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0006162026846575,
"compute_intensity": 30.91320754716981,
"tile_compute_intensity": 3.0476190476190474,
"MxNxK": 67108864,
"size_m": 4096,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5227628673032296,
"compute_intensity": 101.7639751552795,
"tile_compute_intensity": 3.1604938271604937,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1889798723063736,
"compute_intensity": 337.8144329896907,
"tile_compute_intensity": 12.19047619047619,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000017133595091631432,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.481580764438176,
"compute_intensity": 84.45360824742268,
"tile_compute_intensity": 1.8285714285714285,
"MxNxK": 67108864,
"size_m": 4096,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3650374924941262,
"compute_intensity": 168.90721649484536,
"tile_compute_intensity": 3.657142857142857,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1486614344725472,
"compute_intensity": 455.1111111111111,
"tile_compute_intensity": 7.062068965517241,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000001277080995306525,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.8262255891517793,
"compute_intensity": 15.283582089552239,
"tile_compute_intensity": 1.3333333333333333,
"MxNxK": 2097152,
"size_m": 512,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0168811143746495,
"compute_intensity": 244.53731343283582,
"tile_compute_intensity": 1.967339097022094,
"MxNxK": 8589934592,
"size_m": 128,
"size_n": 4096,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003772862140419848,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2535841133721053,
"compute_intensity": 7.961127308066083,
"tile_compute_intensity": 0.9552238805970149,
"MxNxK": 33554432,
"size_m": 4096,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.318669598837236,
"compute_intensity": 496.4848484848485,
"tile_compute_intensity": 12.8,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3170869748350043,
"compute_intensity": 910.2222222222222,
"tile_compute_intensity": 14.124137931034483,
"MxNxK": 68719476736,
"size_m": 8192,
"size_n": 512,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x512x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.386597495873249,
"compute_intensity": 372.3636363636364,
"tile_compute_intensity": 5.278350515463917,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000019283923029128524,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.4251042367189735,
"compute_intensity": 7.8731379144642,
"tile_compute_intensity": 0.8827586206896552,
"MxNxK": 16777216,
"size_m": 8192,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.017544801373830587,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0923314348619277,
"compute_intensity": 1820.4444444444443,
"tile_compute_intensity": 39.38461538461539,
"MxNxK": 68719476736,
"size_m": 8192,
"size_n": 2048,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x2048x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 1.0034207820265552e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 39.704181331700546,
"compute_intensity": 13.473684210526315,
"tile_compute_intensity": 0.6666666666666666,
"MxNxK": 131072,
"size_m": 128,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0000010708496932269645,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 7.563339213379601,
"compute_intensity": 73.14285714285714,
"tile_compute_intensity": 1.7777777777777777,
"MxNxK": 4194304,
"size_m": 256,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 8.939566967145676e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.271444724037163,
"compute_intensity": 25.28395061728395,
"tile_compute_intensity": 1.2307692307692308,
"MxNxK": 2097152,
"size_m": 1024,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3672320519871857,
"compute_intensity": 248.24242424242425,
"tile_compute_intensity": 6.4,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5607699640617776,
"compute_intensity": 204.8,
"tile_compute_intensity": 3.1604938271604937,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0274845423118957,
"compute_intensity": 399.609756097561,
"tile_compute_intensity": 5.3194805194805195,
"MxNxK": 4294967296,
"size_m": 1024,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003860433408669438,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3348966526792314,
"compute_intensity": 15.738712776176753,
"tile_compute_intensity": 1.5900621118012421,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002141699386453929,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 6.483076114622274,
"compute_intensity": 102.4,
"tile_compute_intensity": 1.8823529411764706,
"MxNxK": 8388608,
"size_m": 256,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0782646473903348,
"compute_intensity": 404.5432098765432,
"tile_compute_intensity": 12.487804878048781,
"MxNxK": 8589934592,
"size_m": 1024,
"size_n": 16384,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0607453716880495,
"compute_intensity": 1260.3076923076924,
"tile_compute_intensity": 25.6,
"MxNxK": 34359738368,
"size_m": 2048,
"size_n": 8192,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005992793790546961,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0243342875304067,
"compute_intensity": 7.990246281394782,
"tile_compute_intensity": 0.9884169884169884,
"MxNxK": 536870912,
"size_m": 16384,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.773078031215325,
"compute_intensity": 117.02857142857142,
"tile_compute_intensity": 1.7716262975778547,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3665549736064948,
"compute_intensity": 327.68,
"tile_compute_intensity": 10.666666666666666,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3226149478109228,
"compute_intensity": 630.1538461538462,
"tile_compute_intensity": 18.285714285714285,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1069480724886374,
"compute_intensity": 239.1824817518248,
"tile_compute_intensity": 15.515151515151516,
"MxNxK": 8589934592,
"size_m": 2048,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00003882326225731836,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7419660206480312,
"compute_intensity": 31.386973180076627,
"tile_compute_intensity": 3.3684210526315788,
"MxNxK": 134217728,
"size_m": 4096,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000004971493874586116,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.8692168158646534,
"compute_intensity": 7.750236518448439,
"tile_compute_intensity": 0.7901234567901234,
"MxNxK": 4194304,
"size_m": 4096,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000002645382061706373,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.2971351532150153,
"compute_intensity": 15.003663003663004,
"tile_compute_intensity": 0.9846153846153847,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00013706876073305146,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.358383914929736,
"compute_intensity": 341.3333333333333,
"tile_compute_intensity": 9.142857142857142,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000019283923029128524,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.3900906072605284,
"compute_intensity": 7.8731379144642,
"tile_compute_intensity": 0.7975077881619937,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1298241830663531,
"compute_intensity": 123.65283018867925,
"tile_compute_intensity": 10.448979591836734,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.120431801369733,
"compute_intensity": 442.81081081081084,
"tile_compute_intensity": 3.9083969465648853,
"MxNxK": 8589934592,
"size_m": 256,
"size_n": 8192,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0005482750429322058,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.0634023103994705,
"compute_intensity": 390.0952380952381,
"tile_compute_intensity": 3.5432525951557095,
"MxNxK": 2147483648,
"size_m": 256,
"size_n": 1024,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0002741375214661029,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.2425763335724704,
"compute_intensity": 61.94328922495274,
"tile_compute_intensity": 5.278350515463917,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.073750848140436,
"compute_intensity": 668.734693877551,
"tile_compute_intensity": 23.272727272727273,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00030241277968858513,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.5087933754084861,
"compute_intensity": 31.813592233009707,
"tile_compute_intensity": 3.764705882352941,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0010965500858644117,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.323770618348562,
"compute_intensity": 63.50387596899225,
"tile_compute_intensity": 7.314285714285714,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 2.189281706239757e-7,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 17.730016418728564,
"compute_intensity": 25.6,
"tile_compute_intensity": 1,
"MxNxK": 524288,
"size_m": 128,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000001277080995306525,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 3.6934230008845703,
"compute_intensity": 15.283582089552239,
"tile_compute_intensity": 1.2307692307692308,
"MxNxK": 2097152,
"size_m": 256,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.00006853438036652573,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 2.9504606006207235,
"compute_intensity": 101.7639751552795,
"tile_compute_intensity": 1.332465842550423,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.0021931001717288233,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1519677546283316,
"compute_intensity": 655.36,
"tile_compute_intensity": 13.473684210526315,
"MxNxK": 8589934592,
"size_m": 1024,
"size_n": 8192,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.008772400686915293,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.1204164286669098,
"compute_intensity": 862.3157894736842,
"tile_compute_intensity": 7.501831501831502,
"MxNxK": 34359738368,
"size_m": 512,
"size_n": 4096,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x4096x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.004386200343457647,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.0749962918652791,
"compute_intensity": 468.1142857142857,
"tile_compute_intensity": 28.444444444444443,
"MxNxK": 17179869184,
"size_m": 4096,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "roofline",
"metric": "sol-sec",
"perf": 0.000009632839507454932,
"perf_norm_to_sol": 1,
"perf_norm_to_cublas": 1.7579863016908581,
"compute_intensity": 15.753846153846155,
"tile_compute_intensity": 1.6842105263157894,
"MxNxK": 16777216,
"size_m": 1024,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006217599730007351,
"perf_norm_to_sol": 0.10636669199447026,
"perf_norm_to_cublas": 0.7076685900397454,
"compute_intensity": 7.501831501831502,
"tile_compute_intensity": 0.64,
"MxNxK": 524288,
"size_m": 1024,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001526400010334328,
"perf_norm_to_sol": 0.6215203963230908,
"perf_norm_to_cublas": 1.015094370561506,
"compute_intensity": 7.937984496124031,
"tile_compute_intensity": 0.9142857142857143,
"MxNxK": 8388608,
"size_m": 1024,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00041714240796864033,
"perf_norm_to_sol": 0.7207642967487405,
"perf_norm_to_cublas": 0.7975712946936644,
"compute_intensity": 15.953261927945473,
"tile_compute_intensity": 1.9248120300751879,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000022463999630417675,
"perf_norm_to_sol": 0.3813567346313401,
"perf_norm_to_cublas": 0.958119646880255,
"compute_intensity": 97.52380952380952,
"tile_compute_intensity": 2.909090909090909,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00013026880333200097,
"perf_norm_to_sol": 0.5260997154618832,
"perf_norm_to_cublas": 0.8910069298449822,
"compute_intensity": 56.79029462738301,
"tile_compute_intensity": 3.9384615384615387,
"MxNxK": 268435456,
"size_m": 16384,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.001369574386626482,
"perf_norm_to_sol": 0.8006502578990395,
"perf_norm_to_cublas": 0.96894100808442,
"compute_intensity": 630.1538461538462,
"tile_compute_intensity": 12.19047619047619,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006019200009177439,
"perf_norm_to_sol": 0.027278729352344648,
"perf_norm_to_cublas": 0.5922381324026766,
"compute_intensity": 7.529411764705882,
"tile_compute_intensity": 0.5714285714285714,
"MxNxK": 131072,
"size_m": 128,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003185599925927818,
"perf_norm_to_sol": 0.537845162293605,
"perf_norm_to_cublas": 1.250828777637265,
"compute_intensity": 157.53846153846155,
"tile_compute_intensity": 3.2,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009491519886069,
"perf_norm_to_sol": 0.7220590715625617,
"perf_norm_to_cublas": 1.1340480710929828,
"compute_intensity": 204.8,
"tile_compute_intensity": 1.8686131386861313,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.01085987538099289,
"perf_norm_to_sol": 0.8077809716185947,
"perf_norm_to_cublas": 0.9325543202268844,
"compute_intensity": 1638.4,
"tile_compute_intensity": 30.11764705882353,
"MxNxK": 34359738368,
"size_m": 4096,
"size_n": 2048,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005304319784045219,
"perf_norm_to_sol": 0.6460242138178511,
"perf_norm_to_cublas": 1.9693532754978957,
"compute_intensity": 186.1818181818182,
"tile_compute_intensity": 2.6391752577319587,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000014291200204752386,
"perf_norm_to_sol": 0.1659567996001982,
"perf_norm_to_cublas": 0.6612180650979071,
"compute_intensity": 42.22680412371134,
"tile_compute_intensity": 1.5238095238095237,
"MxNxK": 8388608,
"size_m": 2048,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000031577597837895155,
"perf_norm_to_sol": 0.5425870320974832,
"perf_norm_to_cublas": 1.3878192402197977,
"compute_intensity": 204.8,
"tile_compute_intensity": 3.764705882352941,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006492800457635894,
"perf_norm_to_sol": 0.370904649323012,
"perf_norm_to_cublas": 0.8713651171601893,
"compute_intensity": 7.876923076923077,
"tile_compute_intensity": 0.8421052631578947,
"MxNxK": 2097152,
"size_m": 512,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001837695948779583,
"perf_norm_to_sol": 0.7458729003787544,
"perf_norm_to_cublas": 1.1538971250413665,
"compute_intensity": 315.0769230769231,
"tile_compute_intensity": 3.9689922480620154,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001869536004960537,
"perf_norm_to_sol": 0.8060585998147218,
"perf_norm_to_cublas": 0.8522499762130001,
"compute_intensity": 7.9669341113542425,
"tile_compute_intensity": 0.9394495412844037,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009616320021450519,
"perf_norm_to_sol": 0.7126882239115421,
"perf_norm_to_cublas": 1.1278825028848636,
"compute_intensity": 117.02857142857142,
"tile_compute_intensity": 1.8686131386861313,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005438639968633652,
"perf_norm_to_sol": 0.8064884546052403,
"perf_norm_to_cublas": 0.9048114702972854,
"compute_intensity": 468.1142857142857,
"tile_compute_intensity": 3.8714555765595464,
"MxNxK": 17179869184,
"size_m": 256,
"size_n": 4096,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010810470581054688,
"perf_norm_to_sol": 0.8114726015987588,
"perf_norm_to_cublas": 0.9464015744645189,
"compute_intensity": 862.3157894736842,
"tile_compute_intensity": 12.720496894409937,
"MxNxK": 34359738368,
"size_m": 4096,
"size_n": 512,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013385599595494569,
"perf_norm_to_sol": 0.1962659968083889,
"perf_norm_to_cublas": 0.626344755250685,
"compute_intensity": 30.11764705882353,
"tile_compute_intensity": 2.2857142857142856,
"MxNxK": 8388608,
"size_m": 512,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001457920006942004,
"perf_norm_to_sol": 0.14690102174715033,
"perf_norm_to_cublas": 0.9550043668122271,
"compute_intensity": 78.76923076923077,
"tile_compute_intensity": 1.5238095238095237,
"MxNxK": 8388608,
"size_m": 512,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027106527239084246,
"perf_norm_to_sol": 0.809067186063822,
"perf_norm_to_cublas": 1.049314196273875,
"compute_intensity": 655.36,
"tile_compute_intensity": 7.529411764705882,
"MxNxK": 8589934592,
"size_m": 512,
"size_n": 8192,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008665600034873933,
"perf_norm_to_sol": 0.12357478869523468,
"perf_norm_to_cublas": 0.9963072768811855,
"compute_intensity": 60.23529411764706,
"tile_compute_intensity": 1.4545454545454546,
"MxNxK": 4194304,
"size_m": 512,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00538102425634861,
"perf_norm_to_sol": 0.8151236891903515,
"perf_norm_to_cublas": 0.9103943992267824,
"compute_intensity": 1170.2857142857142,
"tile_compute_intensity": 12.641975308641975,
"MxNxK": 17179869184,
"size_m": 1024,
"size_n": 2048,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000007993599865585566,
"perf_norm_to_sol": 0.6048168346229787,
"perf_norm_to_cublas": 1.0496397337780077,
"compute_intensity": 7.861804222648752,
"tile_compute_intensity": 0.7901234567901234,
"MxNxK": 4194304,
"size_m": 256,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00011688319500535727,
"perf_norm_to_sol": 0.5863493067877251,
"perf_norm_to_cublas": 0.8570607727543005,
"compute_intensity": 61.59398496240601,
"tile_compute_intensity": 5.12,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00014087040908634664,
"perf_norm_to_sol": 0.5491190150496652,
"perf_norm_to_cublas": 0.8069373975470461,
"compute_intensity": 31.44721689059501,
"tile_compute_intensity": 3.1604938271604937,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005527308583259583,
"perf_norm_to_sol": 0.7935508353454371,
"perf_norm_to_cublas": 0.8841661153517874,
"compute_intensity": 248.24242424242425,
"tile_compute_intensity": 23.272727272727273,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003909856081008911,
"perf_norm_to_sol": 0.7011447884172853,
"perf_norm_to_cublas": 0.8396422041381781,
"compute_intensity": 113.3840830449827,
"tile_compute_intensity": 5.278350515463917,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002288320101797581,
"perf_norm_to_sol": 0.3743705934797366,
"perf_norm_to_cublas": 4.242902863782603,
"compute_intensity": 146.28571428571428,
"tile_compute_intensity": 1.9692307692307693,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000066912005422636865,
"perf_norm_to_sol": 0.10906272561062738,
"perf_norm_to_cublas": 0.739837343562357,
"compute_intensity": 28.444444444444443,
"tile_compute_intensity": 1.6,
"MxNxK": 2097152,
"size_m": 256,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003578144125640392,
"perf_norm_to_sol": 0.7661444364459177,
"perf_norm_to_cublas": 1.0523086525439986,
"compute_intensity": 327.68,
"tile_compute_intensity": 3.764705882352941,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00011594239622354507,
"perf_norm_to_sol": 0.5911071583718749,
"perf_norm_to_cublas": 0.860068442038939,
"compute_intensity": 62.06060606060606,
"tile_compute_intensity": 5.818181818181818,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007077888119965791,
"perf_norm_to_sol": 0.774630841346014,
"perf_norm_to_cublas": 0.9467004797281833,
"compute_intensity": 203.527950310559,
"tile_compute_intensity": 6.320987654320987,
"MxNxK": 2147483648,
"size_m": 512,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002887840010225773,
"perf_norm_to_sol": 0.7594257867344132,
"perf_norm_to_cublas": 0.8733115053799436,
"compute_intensity": 126.03076923076924,
"tile_compute_intensity": 13.473684210526315,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.022187910974025726,
"perf_norm_to_sol": 0.7907369645736098,
"perf_norm_to_cublas": 1.035417599196242,
"compute_intensity": 1489.4545454545455,
"tile_compute_intensity": 21.11340206185567,
"MxNxK": 68719476736,
"size_m": 4096,
"size_n": 1024,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00021119038574397565,
"perf_norm_to_sol": 0.6490293592210148,
"perf_norm_to_cublas": 0.871418494870988,
"compute_intensity": 60.12477064220184,
"tile_compute_intensity": 3.9689922480620154,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009743040427565575,
"perf_norm_to_sol": 0.7034188236828443,
"perf_norm_to_cublas": 0.9772719234603799,
"compute_intensity": 292.57142857142856,
"tile_compute_intensity": 5.818181818181818,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013350399967748672,
"perf_norm_to_sol": 0.22138140509374374,
"perf_norm_to_cublas": 0.6347075670417169,
"compute_intensity": 28.248275862068965,
"tile_compute_intensity": 1.8823529411764706,
"MxNxK": 8388608,
"size_m": 2048,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005378531292080879,
"perf_norm_to_sol": 0.8155015012956607,
"perf_norm_to_cublas": 1.0605137802385876,
"compute_intensity": 1170.2857142857142,
"tile_compute_intensity": 13.837837837837839,
"MxNxK": 17179869184,
"size_m": 1024,
"size_n": 4096,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000018041599832940847,
"perf_norm_to_sol": 0.5834735445661191,
"perf_norm_to_cublas": 0.9563675639471327,
"compute_intensity": 15.044995408631772,
"tile_compute_intensity": 1.3195876288659794,
"MxNxK": 16777216,
"size_m": 8192,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005201920284889638,
"perf_norm_to_sol": 0.6587411630047665,
"perf_norm_to_cublas": 1.4985234698966559,
"compute_intensity": 256,
"tile_compute_intensity": 4.923076923076923,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001606079895282164,
"perf_norm_to_sol": 0.6545822050563993,
"perf_norm_to_cublas": 0.9796772589252215,
"compute_intensity": 7.527682058350563,
"tile_compute_intensity": 0.6649350649350649,
"MxNxK": 8388608,
"size_m": 16384,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002741273678839207,
"perf_norm_to_sol": 0.8000296317212269,
"perf_norm_to_cublas": 0.9267808245659249,
"compute_intensity": 655.36,
"tile_compute_intensity": 21.333333333333332,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000016710400814190507,
"perf_norm_to_sol": 0.30569727429207444,
"perf_norm_to_cublas": 0.757755610321075,
"compute_intensity": 30.567164179104477,
"tile_compute_intensity": 2.4615384615384617,
"MxNxK": 16777216,
"size_m": 512,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00046759364195168016,
"perf_norm_to_sol": 0.6448699040207421,
"perf_norm_to_cublas": 0.911759172959968,
"compute_intensity": 15.929995138551288,
"tile_compute_intensity": 1.9248120300751879,
"MxNxK": 536870912,
"size_m": 16384,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008611199882579968,
"perf_norm_to_sol": 0.34004137890460623,
"perf_norm_to_cublas": 0.7491639095530995,
"compute_intensity": 14.197573656845753,
"tile_compute_intensity": 0.9846153846153847,
"MxNxK": 4194304,
"size_m": 4096,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0015269824303686618,
"perf_norm_to_sol": 0.7181157189868058,
"perf_norm_to_cublas": 0.8583077338660535,
"compute_intensity": 63.38104448742747,
"tile_compute_intensity": 7.013698630136986,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013737695291638375,
"perf_norm_to_sol": 0.7982052757654634,
"perf_norm_to_cublas": 0.9376547803913269,
"compute_intensity": 431.1578947368421,
"tile_compute_intensity": 18.285714285714285,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010842486470937728,
"perf_norm_to_sol": 0.8090764706443392,
"perf_norm_to_cublas": 0.8780713111299685,
"compute_intensity": 862.3157894736842,
"tile_compute_intensity": 36.57142857142857,
"MxNxK": 34359738368,
"size_m": 4096,
"size_n": 8192,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.003716016188263893,
"perf_norm_to_sol": 0.6447630413655887,
"perf_norm_to_cublas": 0.9019835881426812,
"compute_intensity": 15.984390243902439,
"tile_compute_intensity": 1.9768339768339769,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008521600102540106,
"perf_norm_to_sol": 0.5833991051873251,
"perf_norm_to_cublas": 1.0236575347775845,
"compute_intensity": 7.750236518448439,
"tile_compute_intensity": 0.6632124352331606,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000695663969963789,
"perf_norm_to_sol": 0.7881320100000937,
"perf_norm_to_cublas": 1.0609398585071426,
"compute_intensity": 390.0952380952381,
"tile_compute_intensity": 16,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.011061504483222961,
"perf_norm_to_sol": 0.7930567401767488,
"perf_norm_to_cublas": 0.8943619708348769,
"compute_intensity": 474.8985507246377,
"tile_compute_intensity": 36.57142857142857,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0004245087970048189,
"perf_norm_to_sol": 0.7103199489236174,
"perf_norm_to_cublas": 0.7844473888790103,
"compute_intensity": 15.929995138551288,
"tile_compute_intensity": 1.8754578754578755,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005405321344733238,
"perf_norm_to_sol": 0.8114596827312426,
"perf_norm_to_cublas": 0.9127515136495,
"compute_intensity": 468.1142857142857,
"tile_compute_intensity": 3.9233716475095783,
"MxNxK": 17179869184,
"size_m": 256,
"size_n": 8192,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00035061121452599763,
"perf_norm_to_sol": 0.7818846349131133,
"perf_norm_to_cublas": 1.0826807233616165,
"compute_intensity": 372.3636363636364,
"tile_compute_intensity": 3.5310344827586206,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006828799814684316,
"perf_norm_to_sol": 0.07840687399594448,
"perf_norm_to_cublas": 0.8172446209449519,
"compute_intensity": 46.54545454545455,
"tile_compute_intensity": 1.6,
"MxNxK": 2097152,
"size_m": 256,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010853850096464158,
"perf_norm_to_sol": 0.8082293940813744,
"perf_norm_to_cublas": 0.9125603875014405,
"compute_intensity": 481.88235294117646,
"tile_compute_intensity": 3.930902111324376,
"MxNxK": 34359738368,
"size_m": 256,
"size_n": 8192,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00013688959879800677,
"perf_norm_to_sol": 0.5586904211582493,
"perf_norm_to_cublas": 0.7957829108542107,
"compute_intensity": 31.62934362934363,
"tile_compute_intensity": 3.4594594594594597,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00040315836668014526,
"perf_norm_to_sol": 0.679974779448385,
"perf_norm_to_cublas": 0.8411344851324131,
"compute_intensity": 62.77394636015325,
"tile_compute_intensity": 6.2439024390243905,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003655072068795562,
"perf_norm_to_sol": 0.7500194696747474,
"perf_norm_to_cublas": 1.0321131783049435,
"compute_intensity": 512,
"tile_compute_intensity": 9.846153846153847,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002574399986770004,
"perf_norm_to_sol": 0.5449676431887939,
"perf_norm_to_cublas": 0.8960845489793705,
"compute_intensity": 25.580015612802498,
"tile_compute_intensity": 1.3264248704663213,
"MxNxK": 33554432,
"size_m": 16384,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001797440054360777,
"perf_norm_to_sol": 0.23830551469663125,
"perf_norm_to_cublas": 1.3505429199069878,
"compute_intensity": 128,
"tile_compute_intensity": 1.5609756097560976,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013724831864237786,
"perf_norm_to_sol": 0.7989533836998367,
"perf_norm_to_cublas": 0.936885209303487,
"compute_intensity": 399.609756097561,
"tile_compute_intensity": 12.19047619047619,
"MxNxK": 4294967296,
"size_m": 1024,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000031574402237311004,
"perf_norm_to_sol": 0.5426419465634386,
"perf_norm_to_cublas": 1.2798215521193799,
"compute_intensity": 157.53846153846155,
"tile_compute_intensity": 4.571428571428571,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009775359649211168,
"perf_norm_to_sol": 0.7010931855796853,
"perf_norm_to_cublas": 0.9887063428832067,
"compute_intensity": 167.18367346938774,
"tile_compute_intensity": 1.9104477611940298,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000340319995302707,
"perf_norm_to_sol": 0.618104980149928,
"perf_norm_to_cublas": 0.948472057431833,
"compute_intensity": 15.05190629306385,
"tile_compute_intensity": 0.9980506822612085,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005219840095378458,
"perf_norm_to_sol": 0.6564796920427188,
"perf_norm_to_cublas": 1.894127057388825,
"compute_intensity": 256,
"tile_compute_intensity": 3.1219512195121952,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0018814494833350182,
"perf_norm_to_sol": 0.6373503924425409,
"perf_norm_to_cublas": 0.891021987524942,
"compute_intensity": 15.976596782057532,
"tile_compute_intensity": 1.9692307692307693,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000022281600104179232,
"perf_norm_to_sol": 0.38447856104413664,
"perf_norm_to_cublas": 0.997845713622547,
"compute_intensity": 97.52380952380952,
"tile_compute_intensity": 4,
"MxNxK": 33554432,
"size_m": 1024,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000022972800070419908,
"perf_norm_to_sol": 0.37291046453002663,
"perf_norm_to_cublas": 0.7996935792132003,
"compute_intensity": 56.10958904109589,
"tile_compute_intensity": 3.5555555555555554,
"MxNxK": 33554432,
"size_m": 2048,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000009014399984152987,
"perf_norm_to_sol": 0.1659577086173716,
"perf_norm_to_cublas": 0.6769612916697372,
"compute_intensity": 28.054794520547944,
"tile_compute_intensity": 1.28,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000122857594396919,
"perf_norm_to_sol": 0.6153725806960961,
"perf_norm_to_cublas": 0.722553621884614,
"compute_intensity": 15.906796116504854,
"tile_compute_intensity": 1.8823529411764706,
"MxNxK": 134217728,
"size_m": 4096,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013385599595494569,
"perf_norm_to_sol": 0.43682813872978227,
"perf_norm_to_cublas": 0.7857996954725512,
"compute_intensity": 14.209887250650477,
"tile_compute_intensity": 0.9922480620155039,
"MxNxK": 8388608,
"size_m": 8192,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013684543780982494,
"perf_norm_to_sol": 0.801305548372241,
"perf_norm_to_cublas": 0.9684035305822569,
"compute_intensity": 431.1578947368421,
"tile_compute_intensity": 7.013698630136986,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005567040061578154,
"perf_norm_to_sol": 0.6155369784342587,
"perf_norm_to_cublas": 0.9176868879064652,
"compute_intensity": 186.1818181818182,
"tile_compute_intensity": 6.4,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008076799713307991,
"perf_norm_to_sol": 0.5918101098154425,
"perf_norm_to_cublas": 1.070523046854203,
"compute_intensity": 7.9073359073359075,
"tile_compute_intensity": 0.8648648648648649,
"MxNxK": 4194304,
"size_m": 512,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000022684800205752255,
"perf_norm_to_sol": 0.3776448312576898,
"perf_norm_to_cublas": 1.2729581016617233,
"compute_intensity": 146.28571428571428,
"tile_compute_intensity": 1.7297297297297298,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002711286395788193,
"perf_norm_to_sol": 0.8088780938582003,
"perf_norm_to_cublas": 1.6490119474244076,
"compute_intensity": 126.03076923076924,
"tile_compute_intensity": 1.967339097022094,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006839584093540907,
"perf_norm_to_sol": 0.8016204427546697,
"perf_norm_to_cublas": 1.6594834045314184,
"compute_intensity": 390.0952380952381,
"tile_compute_intensity": 5.305699481865285,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010875286161899566,
"perf_norm_to_sol": 0.806636308812589,
"perf_norm_to_cublas": 0.8629721163106765,
"compute_intensity": 799.219512195122,
"tile_compute_intensity": 24.38095238095238,
"MxNxK": 34359738368,
"size_m": 2048,
"size_n": 16384,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003216640034224838,
"perf_norm_to_sol": 0.5326550347359701,
"perf_norm_to_cublas": 0.903601253351002,
"compute_intensity": 107.78947368421052,
"tile_compute_intensity": 5.333333333333333,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000015913599054329097,
"perf_norm_to_sol": 0.13458296763303876,
"perf_norm_to_cublas": 1.4896441412023071,
"compute_intensity": 85.33333333333333,
"tile_compute_intensity": 1.3061224489795917,
"MxNxK": 8388608,
"size_m": 256,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.011085740476846694,
"perf_norm_to_sol": 0.7913229346507827,
"perf_norm_to_cublas": 0.8787022997266825,
"compute_intensity": 474.8985507246377,
"tile_compute_intensity": 30.11764705882353,
"MxNxK": 34359738368,
"size_m": 4096,
"size_n": 16384,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000023737600713502616,
"perf_norm_to_sol": 0.3608956797787352,
"perf_norm_to_cublas": 4.094769394118752,
"compute_intensity": 146.28571428571428,
"tile_compute_intensity": 1.5802469135802468,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005543804913759232,
"perf_norm_to_sol": 0.7911895190560344,
"perf_norm_to_cublas": 0.8990371156529654,
"compute_intensity": 468.1142857142857,
"tile_compute_intensity": 7.086505190311419,
"MxNxK": 17179869184,
"size_m": 4096,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00006739519885741174,
"perf_norm_to_sol": 0.34703994683377815,
"perf_norm_to_cublas": 0.6590855287778362,
"compute_intensity": 28.419774501300953,
"tile_compute_intensity": 1.9844961240310077,
"MxNxK": 67108864,
"size_m": 16384,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000017472000035922974,
"perf_norm_to_sol": 0.5701254431610745,
"perf_norm_to_cublas": 1.013003692032102,
"compute_intensity": 15.485822306238186,
"tile_compute_intensity": 1.5609756097560976,
"MxNxK": 16777216,
"size_m": 4096,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000018192001152783633,
"perf_norm_to_sol": 0.5475610780657127,
"perf_norm_to_cublas": 0.9236586833781175,
"compute_intensity": 15.485822306238186,
"tile_compute_intensity": 1.3195876288659794,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.001377071999013424,
"perf_norm_to_sol": 0.7962910339110906,
"perf_norm_to_cublas": 1.0509207978954427,
"compute_intensity": 337.8144329896907,
"tile_compute_intensity": 7.314285714285714,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006326400034595281,
"perf_norm_to_sol": 0.1932139836188415,
"perf_norm_to_cublas": 0.7030854619089865,
"compute_intensity": 7.816793893129771,
"tile_compute_intensity": 0.7619047619047619,
"MxNxK": 1048576,
"size_m": 256,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018319999799132347,
"perf_norm_to_sol": 0.7481919336022218,
"perf_norm_to_cublas": 1.0838078486295781,
"compute_intensity": 409.6,
"tile_compute_intensity": 5.224489795918367,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001648320030653849,
"perf_norm_to_sol": 0.30991093272098075,
"perf_norm_to_cublas": 0.778101306690136,
"compute_intensity": 30.567164179104477,
"tile_compute_intensity": 2.6666666666666665,
"MxNxK": 16777216,
"size_m": 1024,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000016291199426632374,
"perf_norm_to_sol": 0.2629271584451593,
"perf_norm_to_cublas": 1.077195040350678,
"compute_intensity": 81.92,
"tile_compute_intensity": 1.6842105263157894,
"MxNxK": 16777216,
"size_m": 1024,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013379199663177132,
"perf_norm_to_sol": 0.16007679385699097,
"perf_norm_to_cublas": 0.8096149211082392,
"compute_intensity": 62.06060606060606,
"tile_compute_intensity": 1.6,
"MxNxK": 8388608,
"size_m": 1024,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005987200420349836,
"perf_norm_to_sol": 0.32665656516081243,
"perf_norm_to_cublas": 0.6451095535144507,
"compute_intensity": 31.267175572519083,
"tile_compute_intensity": 3.0476190476190474,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013623840175569057,
"perf_norm_to_sol": 0.8048759173135336,
"perf_norm_to_cublas": 0.9629965833103589,
"compute_intensity": 819.2,
"tile_compute_intensity": 15.058823529411764,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018199679907411338,
"perf_norm_to_sol": 0.7531383047964147,
"perf_norm_to_cublas": 1.811759382440397,
"compute_intensity": 118.72463768115942,
"tile_compute_intensity": 1.7746967071057191,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003566400147974491,
"perf_norm_to_sol": 0.7686673118320645,
"perf_norm_to_cublas": 1.069277689701736,
"compute_intensity": 221.40540540540542,
"tile_compute_intensity": 3.710144927536232,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000054675195133313534,
"perf_norm_to_sol": 0.6267410678591965,
"perf_norm_to_cublas": 1.4187055066965455,
"compute_intensity": 163.84,
"tile_compute_intensity": 3.3684210526315788,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.022810059785842895,
"perf_norm_to_sol": 0.7691694602536641,
"perf_norm_to_cublas": 0.9827092761673766,
"compute_intensity": 489.07462686567163,
"tile_compute_intensity": 7.728301886792453,
"MxNxK": 68719476736,
"size_m": 16384,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018832000205293298,
"perf_norm_to_sol": 0.7278502508433713,
"perf_norm_to_cublas": 1.0278844118018875,
"compute_intensity": 315.0769230769231,
"tile_compute_intensity": 6.095238095238095,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006131199916126206,
"perf_norm_to_sol": 0.05207304494947075,
"perf_norm_to_cublas": 0.5897703561447915,
"compute_intensity": 7.641791044776119,
"tile_compute_intensity": 0.6153846153846154,
"MxNxK": 262144,
"size_m": 128,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000226431991904974,
"perf_norm_to_sol": 0.3783386558473114,
"perf_norm_to_cublas": 0.8094969064839931,
"compute_intensity": 58.51428571428571,
"tile_compute_intensity": 4,
"MxNxK": 33554432,
"size_m": 1024,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000057036796351894734,
"perf_norm_to_sol": 0.6007909345372012,
"perf_norm_to_cublas": 0.9076527064867939,
"compute_intensity": 163.84,
"tile_compute_intensity": 3.3684210526315788,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002698239986784756,
"perf_norm_to_sol": 0.7045430691471345,
"perf_norm_to_cublas": 1.0079459981046441,
"compute_intensity": 7.930300096805421,
"tile_compute_intensity": 0.927536231884058,
"MxNxK": 16777216,
"size_m": 4096,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001961759990081191,
"perf_norm_to_sol": 0.6987030086559091,
"perf_norm_to_cublas": 0.887007599966531,
"compute_intensity": 118.72463768115942,
"tile_compute_intensity": 7.529411764705882,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013576160185039044,
"perf_norm_to_sol": 0.8077026721243404,
"perf_norm_to_cublas": 0.9670033341585894,
"compute_intensity": 399.609756097561,
"tile_compute_intensity": 3.878787878787879,
"MxNxK": 4294967296,
"size_m": 256,
"size_n": 8192,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00006643839878961444,
"perf_norm_to_sol": 0.5157738718504375,
"perf_norm_to_cublas": 0.8020903221736758,
"compute_intensity": 84.89119170984456,
"tile_compute_intensity": 1.7655172413793103,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000033603201154619454,
"perf_norm_to_sol": 0.5917881277692117,
"perf_norm_to_cublas": 0.886106080977851,
"compute_intensity": 15.500473036896878,
"tile_compute_intensity": 1.3264248704663213,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001710399956209585,
"perf_norm_to_sol": 0.25043258200264995,
"perf_norm_to_cublas": 0.9586529574871198,
"compute_intensity": 113.77777777777777,
"tile_compute_intensity": 2.4615384615384617,
"MxNxK": 16777216,
"size_m": 512,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00010448959656059743,
"perf_norm_to_sol": 0.6558966884974053,
"perf_norm_to_cublas": 0.940434320291885,
"compute_intensity": 127.0077519379845,
"tile_compute_intensity": 1.8686131386861313,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000014259199087973683,
"perf_norm_to_sol": 0.1501977336343003,
"perf_norm_to_cublas": 0.6932226822196531,
"compute_intensity": 78.76923076923077,
"tile_compute_intensity": 2.2857142857142856,
"MxNxK": 8388608,
"size_m": 512,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009564160136505961,
"perf_norm_to_sol": 0.7165749986235921,
"perf_norm_to_cublas": 1.0535665836655388,
"compute_intensity": 195.04761904761904,
"tile_compute_intensity": 1.9104477611940298,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003663104027509689,
"perf_norm_to_sol": 0.7483749284960153,
"perf_norm_to_cublas": 1.0469633974943737,
"compute_intensity": 221.40540540540542,
"tile_compute_intensity": 9.846153846153847,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006927231792360544,
"perf_norm_to_sol": 0.7914778361204137,
"perf_norm_to_cublas": 1.082859122444385,
"compute_intensity": 203.527950310559,
"tile_compute_intensity": 1.9616858237547892,
"MxNxK": 2147483648,
"size_m": 128,
"size_n": 16384,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00007431359845213592,
"perf_norm_to_sol": 0.46111601237199895,
"perf_norm_to_cublas": 0.7933084248043332,
"compute_intensity": 61.134328358208954,
"tile_compute_intensity": 4.923076923076923,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003367039898876101,
"perf_norm_to_sol": 0.5088622530832061,
"perf_norm_to_cublas": 0.8754989709190745,
"compute_intensity": 99.90243902439025,
"tile_compute_intensity": 3.0476190476190474,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005031359614804387,
"perf_norm_to_sol": 0.748418881379995,
"perf_norm_to_cublas": 0.9278764225497784,
"compute_intensity": 7.968871595330739,
"tile_compute_intensity": 0.9552238805970149,
"MxNxK": 33554432,
"size_m": 2048,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007243648171424866,
"perf_norm_to_sol": 0.7569045734372781,
"perf_norm_to_cublas": 0.8626901692610565,
"compute_intensity": 123.18796992481202,
"tile_compute_intensity": 10.24,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013104001118335873,
"perf_norm_to_sol": 0.16343858391900928,
"perf_norm_to_cublas": 0.6283271928565027,
"compute_intensity": 49.951219512195124,
"tile_compute_intensity": 1.5238095238095237,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.011094802618026733,
"perf_norm_to_sol": 0.7906765887535464,
"perf_norm_to_cublas": 0.8913637000986893,
"compute_intensity": 250.13740458015266,
"tile_compute_intensity": 25.6,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005756979435682296,
"perf_norm_to_sol": 0.7618926543790598,
"perf_norm_to_cublas": 0.8767781255245484,
"compute_intensity": 126.51737451737452,
"tile_compute_intensity": 14.222222222222221,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00008321920176967978,
"perf_norm_to_sol": 0.4117702339671783,
"perf_norm_to_cublas": 0.7725140676857651,
"compute_intensity": 51.1201248049922,
"tile_compute_intensity": 2.6391752577319587,
"MxNxK": 134217728,
"size_m": 16384,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013902432285249234,
"perf_norm_to_sol": 0.7887469353315059,
"perf_norm_to_cublas": 0.8828981680472504,
"compute_intensity": 237.44927536231884,
"tile_compute_intensity": 15.058823529411764,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000015116800204850733,
"perf_norm_to_sol": 0.14167676739993512,
"perf_norm_to_cublas": 6.417443045397917,
"compute_intensity": 78.76923076923077,
"tile_compute_intensity": 0.9922480620155039,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000331584014929831,
"perf_norm_to_sol": 0.5997266033030175,
"perf_norm_to_cublas": 0.9379463431503517,
"compute_intensity": 15.500473036896878,
"tile_compute_intensity": 1.5802469135802468,
"MxNxK": 33554432,
"size_m": 8192,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000015420799900311977,
"perf_norm_to_sol": 0.6441861515797197,
"perf_norm_to_cublas": 0.999169961044977,
"compute_intensity": 7.75390440132513,
"tile_compute_intensity": 0.6649350649350649,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018517440184950829,
"perf_norm_to_sol": 0.7402144106529778,
"perf_norm_to_cublas": 1.0715952612665625,
"compute_intensity": 341.3333333333333,
"tile_compute_intensity": 5.224489795918367,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005739200278185308,
"perf_norm_to_sol": 0.3261492486940798,
"perf_norm_to_cublas": 0.5750766414692444,
"compute_intensity": 42.6111833550065,
"tile_compute_intensity": 1.5900621118012421,
"MxNxK": 67108864,
"size_m": 16384,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005569206178188324,
"perf_norm_to_sol": 0.7875808873149832,
"perf_norm_to_cublas": 0.8976440635291997,
"compute_intensity": 248.24242424242425,
"tile_compute_intensity": 3.8714555765595464,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000016617600340396167,
"perf_norm_to_sol": 0.32057890686538637,
"perf_norm_to_cublas": 0.7718081693043705,
"compute_intensity": 29.8978102189781,
"tile_compute_intensity": 2.4615384615384617,
"MxNxK": 16777216,
"size_m": 2048,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0009627263993024826,
"perf_norm_to_sol": 0.6239975351537451,
"perf_norm_to_cublas": 0.874283703240221,
"compute_intensity": 15.961032635168047,
"tile_compute_intensity": 1.9541984732824427,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006528000085381791,
"perf_norm_to_sol": 0.05729201694101259,
"perf_norm_to_cublas": 0.5671568204021059,
"compute_intensity": 14.027397260273972,
"tile_compute_intensity": 0.8888888888888888,
"MxNxK": 524288,
"size_m": 512,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0010217344388365746,
"perf_norm_to_sol": 0.5908168693565297,
"perf_norm_to_cublas": 0.7200931746938917,
"compute_intensity": 31.844509232264333,
"tile_compute_intensity": 3.7372262773722627,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00037446720525622366,
"perf_norm_to_sol": 0.8017350885958674,
"perf_norm_to_cublas": 0.8251424899106176,
"compute_intensity": 7.982460414129111,
"tile_compute_intensity": 0.9678638941398866,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00020080960821360349,
"perf_norm_to_sol": 0.6825806890039338,
"perf_norm_to_cublas": 0.880754673752975,
"compute_intensity": 102.0809968847352,
"tile_compute_intensity": 3.1801242236024843,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013605407439172268,
"perf_norm_to_sol": 0.8059663709204757,
"perf_norm_to_cublas": 1.6500710022353124,
"compute_intensity": 240.94117647058823,
"tile_compute_intensity": 3.750915750915751,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.02252197712659836,
"perf_norm_to_sol": 0.7790080451289622,
"perf_norm_to_cublas": 0.8398359084209509,
"compute_intensity": 1820.4444444444443,
"tile_compute_intensity": 26.94736842105263,
"MxNxK": 68719476736,
"size_m": 2048,
"size_n": 8192,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005330240237526596,
"perf_norm_to_sol": 0.6428826592469676,
"perf_norm_to_cublas": 1.9753854937103066,
"compute_intensity": 113.77777777777777,
"tile_compute_intensity": 1.7655172413793103,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005458499118685722,
"perf_norm_to_sol": 0.8035542826127158,
"perf_norm_to_cublas": 0.9035737609304375,
"compute_intensity": 1170.2857142857142,
"tile_compute_intensity": 15.753846153846155,
"MxNxK": 17179869184,
"size_m": 2048,
"size_n": 1024,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0002459231996908784,
"perf_norm_to_sol": 0.6195999683877682,
"perf_norm_to_cublas": 0.8015640691094315,
"compute_intensity": 31.690522243713733,
"tile_compute_intensity": 3.506849315068493,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010850406438112258,
"perf_norm_to_sol": 0.8084859066755389,
"perf_norm_to_cublas": 0.8746268755104057,
"compute_intensity": 1260.3076923076924,
"tile_compute_intensity": 36.57142857142857,
"MxNxK": 34359738368,
"size_m": 8192,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00020696001593023539,
"perf_norm_to_sol": 0.6622958551532788,
"perf_norm_to_cublas": 0.8723772382638278,
"compute_intensity": 62.534351145038165,
"tile_compute_intensity": 6.095238095238095,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003648672020062804,
"perf_norm_to_sol": 0.7513350609720855,
"perf_norm_to_cublas": 1.0548670796564874,
"compute_intensity": 169.78238341968913,
"tile_compute_intensity": 3.5310344827586206,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000069567999162245545,
"perf_norm_to_sol": 0.05769438788580887,
"perf_norm_to_cublas": 0.8114075487015279,
"compute_intensity": 26.94736842105263,
"tile_compute_intensity": 1.3333333333333333,
"MxNxK": 1048576,
"size_m": 256,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00019230080069974065,
"perf_norm_to_sol": 0.7127830993645796,
"perf_norm_to_cublas": 0.9772523107860859,
"compute_intensity": 102.0809968847352,
"tile_compute_intensity": 1.9248120300751879,
"MxNxK": 536870912,
"size_m": 16384,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006703999679302796,
"perf_norm_to_sol": 0.10069041741970869,
"perf_norm_to_cublas": 0.6248209907045947,
"compute_intensity": 14.840579710144928,
"tile_compute_intensity": 0.9411764705882353,
"MxNxK": 1048576,
"size_m": 128,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006763679906725883,
"perf_norm_to_sol": 0.8106164846550391,
"perf_norm_to_cublas": 1.6586285116612227,
"compute_intensity": 481.88235294117646,
"tile_compute_intensity": 5.305699481865285,
"MxNxK": 2147483648,
"size_m": 512,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001902112038806081,
"perf_norm_to_sol": 0.7206134966638815,
"perf_norm_to_cublas": 0.9762116670659154,
"compute_intensity": 315.0769230769231,
"tile_compute_intensity": 6.4,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002713926322758198,
"perf_norm_to_sol": 0.8080912710629328,
"perf_norm_to_cublas": 0.9334219429261521,
"compute_intensity": 504.12307692307695,
"tile_compute_intensity": 7.420289855072464,
"MxNxK": 8589934592,
"size_m": 512,
"size_n": 16384,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006933055818080902,
"perf_norm_to_sol": 0.790812965189671,
"perf_norm_to_cublas": 1.072155224015329,
"compute_intensity": 481.88235294117646,
"tile_compute_intensity": 7.111111111111111,
"MxNxK": 2147483648,
"size_m": 512,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018537599826231598,
"perf_norm_to_sol": 0.7394094274227052,
"perf_norm_to_cublas": 1.1469705144107105,
"compute_intensity": 215.57894736842104,
"tile_compute_intensity": 3.1801242236024843,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006876640021800995,
"perf_norm_to_sol": 0.7973007765333228,
"perf_norm_to_cublas": 1.0814537518676184,
"compute_intensity": 123.18796992481202,
"tile_compute_intensity": 1.9616858237547892,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005415113270282745,
"perf_norm_to_sol": 0.8099923537201698,
"perf_norm_to_cublas": 0.8692158768482626,
"compute_intensity": 668.734693877551,
"tile_compute_intensity": 13.837837837837839,
"MxNxK": 17179869184,
"size_m": 1024,
"size_n": 16384,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006820864044129848,
"perf_norm_to_sol": 0.8038205121593952,
"perf_norm_to_cublas": 0.976270462777281,
"compute_intensity": 585.1428571428571,
"tile_compute_intensity": 7.876923076923077,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001646080054342747,
"perf_norm_to_sol": 0.2602181322595607,
"perf_norm_to_cublas": 0.7873249965345848,
"compute_intensity": 50.5679012345679,
"tile_compute_intensity": 2.4615384615384617,
"MxNxK": 16777216,
"size_m": 2048,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006871424149721861,
"perf_norm_to_sol": 0.7979059813305204,
"perf_norm_to_cublas": 1.6583182837994774,
"compute_intensity": 224.43835616438355,
"tile_compute_intensity": 3.1950078003120126,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00010650240583345293,
"perf_norm_to_sol": 0.6435007719327852,
"perf_norm_to_cublas": 0.9296014739923661,
"compute_intensity": 195.04761904761904,
"tile_compute_intensity": 8,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018235520692542196,
"perf_norm_to_sol": 0.7516580581606789,
"perf_norm_to_cublas": 1.7973010164009087,
"compute_intensity": 199.8048780487805,
"tile_compute_intensity": 2.6597402597402597,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007007616106420756,
"perf_norm_to_sol": 0.7823987995430381,
"perf_norm_to_cublas": 1.076310104612738,
"compute_intensity": 334.3673469387755,
"tile_compute_intensity": 7.111111111111111,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00035070718731731174,
"perf_norm_to_sol": 0.7816706682377446,
"perf_norm_to_cublas": 1.6842586407974796,
"compute_intensity": 221.40540540540542,
"tile_compute_intensity": 1.8788990825688074,
"MxNxK": 1073741824,
"size_m": 128,
"size_n": 1024,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013536607846617698,
"perf_norm_to_sol": 0.8100626820909194,
"perf_norm_to_cublas": 0.9748215010820221,
"compute_intensity": 237.44927536231884,
"tile_compute_intensity": 1.9768339768339769,
"MxNxK": 4294967296,
"size_m": 128,
"size_n": 8192,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00015536319697275757,
"perf_norm_to_sol": 0.5119867738517612,
"perf_norm_to_cublas": 0.7700768275859072,
"compute_intensity": 31.000946073793756,
"tile_compute_intensity": 2.6528497409326426,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005477670207619667,
"perf_norm_to_sol": 0.8007419536423095,
"perf_norm_to_cublas": 0.9119544200496614,
"compute_intensity": 819.2,
"tile_compute_intensity": 12.641975308641975,
"MxNxK": 17179869184,
"size_m": 4096,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000228320001042448,
"perf_norm_to_sol": 0.37521012205247073,
"perf_norm_to_cublas": 0.7929922509394659,
"compute_intensity": 58.51428571428571,
"tile_compute_intensity": 3.5555555555555554,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00006587520474568009,
"perf_norm_to_sol": 0.5201834334413968,
"perf_norm_to_cublas": 0.8036042955220489,
"compute_intensity": 110.70270270270271,
"tile_compute_intensity": 6.4,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0004065375775098801,
"perf_norm_to_sol": 0.7384889235117484,
"perf_norm_to_cublas": 0.7575703341271942,
"compute_intensity": 7.982460414129111,
"tile_compute_intensity": 0.9808429118773946,
"MxNxK": 268435456,
"size_m": 16384,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000066592001530807465,
"perf_norm_to_sol": 0.028766540258946312,
"perf_norm_to_cublas": 0.6934166526902698,
"compute_intensity": 13.837837837837839,
"tile_compute_intensity": 0.8,
"MxNxK": 262144,
"size_m": 256,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013664287514984607,
"perf_norm_to_sol": 0.8024934228454332,
"perf_norm_to_cublas": 0.9449380063968464,
"compute_intensity": 431.1578947368421,
"tile_compute_intensity": 21.333333333333332,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000053759996080771086,
"perf_norm_to_sol": 0.6374105781514292,
"perf_norm_to_cublas": 2.7256549093174165,
"compute_intensity": 84.89119170984456,
"tile_compute_intensity": 0.9995119570522206,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005568825453519821,
"perf_norm_to_sol": 0.7876347319676384,
"perf_norm_to_cublas": 0.8999097045348821,
"compute_intensity": 126.51737451737452,
"tile_compute_intensity": 1.9825750242013553,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008582399459555745,
"perf_norm_to_sol": 0.12477276294039982,
"perf_norm_to_cublas": 0.7576436387929018,
"compute_intensity": 51.2,
"tile_compute_intensity": 2,
"MxNxK": 4194304,
"size_m": 256,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013556735590100288,
"perf_norm_to_sol": 0.8088599785520342,
"perf_norm_to_cublas": 0.9641353782114495,
"compute_intensity": 819.2,
"tile_compute_intensity": 12.19047619047619,
"MxNxK": 4294967296,
"size_m": 1024,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006672000017715618,
"perf_norm_to_sol": 0.08024952715704621,
"perf_norm_to_cublas": 1.1592325653192224,
"compute_intensity": 64,
"tile_compute_intensity": 1.2307692307692308,
"MxNxK": 2097152,
"size_m": 128,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010837190598249436,
"perf_norm_to_sol": 0.8094718467286462,
"perf_norm_to_cublas": 1.048953722993839,
"compute_intensity": 862.3157894736842,
"tile_compute_intensity": 7.6992481203007515,
"MxNxK": 34359738368,
"size_m": 512,
"size_n": 8192,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008969599730335175,
"perf_norm_to_sol": 0.11938656410779984,
"perf_norm_to_cublas": 1.4398859027019102,
"compute_intensity": 85.33333333333333,
"tile_compute_intensity": 1.28,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006752895656973124,
"perf_norm_to_sol": 0.811911024222698,
"perf_norm_to_cublas": 1.0219213110598575,
"compute_intensity": 585.1428571428571,
"tile_compute_intensity": 6.320987654320987,
"MxNxK": 2147483648,
"size_m": 512,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000053804804338142276,
"perf_norm_to_sol": 0.705959961577871,
"perf_norm_to_cublas": 0.8684429056011216,
"compute_intensity": 7.934140435835351,
"tile_compute_intensity": 0.9343065693430657,
"MxNxK": 33554432,
"size_m": 8192,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006842432077974081,
"perf_norm_to_sol": 0.8012867890893848,
"perf_norm_to_cublas": 1.6766624170895204,
"compute_intensity": 123.18796992481202,
"tile_compute_intensity": 1.8806244260789715,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003541023936122656,
"perf_norm_to_sol": 0.7741758497297184,
"perf_norm_to_cublas": 1.6798305062233942,
"compute_intensity": 122.26865671641791,
"tile_compute_intensity": 1.8788990825688074,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00008681280305609108,
"perf_norm_to_sol": 0.44384418741696463,
"perf_norm_to_cublas": 0.726639339920557,
"compute_intensity": 31.50769230769231,
"tile_compute_intensity": 3.3684210526315788,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003136320156045258,
"perf_norm_to_sol": 0.5462961126148561,
"perf_norm_to_cublas": 1.4001631726766686,
"compute_intensity": 157.53846153846155,
"tile_compute_intensity": 3.0476190476190474,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001648640027269721,
"perf_norm_to_sol": 0.2598140711166346,
"perf_norm_to_cublas": 0.7862965524643564,
"compute_intensity": 56.888888888888886,
"tile_compute_intensity": 3.2,
"MxNxK": 16777216,
"size_m": 512,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001331328065134585,
"perf_norm_to_sol": 0.5711668915783518,
"perf_norm_to_cublas": 0.6722670848540764,
"compute_intensity": 15.860600193610843,
"tile_compute_intensity": 1.855072463768116,
"MxNxK": 134217728,
"size_m": 8192,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003339520189911127,
"perf_norm_to_sol": 0.5130555923390719,
"perf_norm_to_cublas": 1.5160981250257093,
"compute_intensity": 99.90243902439025,
"tile_compute_intensity": 1.8285714285714285,
"MxNxK": 67108864,
"size_m": 2048,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013484800001606346,
"perf_norm_to_sol": 0.3707033026310093,
"perf_norm_to_cublas": 0.7342192741222211,
"compute_intensity": 15.456603773584906,
"tile_compute_intensity": 1.3061224489795917,
"MxNxK": 8388608,
"size_m": 256,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0002081983955577016,
"perf_norm_to_sol": 0.6583564698752121,
"perf_norm_to_cublas": 0.8725831193351236,
"compute_intensity": 61.82641509433962,
"tile_compute_intensity": 5.224489795918367,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00008881599642336368,
"perf_norm_to_sol": 0.4371201565116441,
"perf_norm_to_cublas": 0.7184651910099518,
"compute_intensity": 31.386973180076627,
"tile_compute_intensity": 3.1219512195121952,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010797932744026184,
"perf_norm_to_sol": 0.8124148292892924,
"perf_norm_to_cublas": 0.8795849508098538,
"compute_intensity": 1365.3333333333333,
"tile_compute_intensity": 36.57142857142857,
"MxNxK": 34359738368,
"size_m": 4096,
"size_n": 4096,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00020264319609850646,
"perf_norm_to_sol": 0.7432896060131052,
"perf_norm_to_cublas": 0.8141521663972909,
"compute_intensity": 15.937743190661479,
"tile_compute_intensity": 1.9104477611940298,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000107881601434201,
"perf_norm_to_sol": 0.39030923027556236,
"perf_norm_to_cublas": 0.7478124161334909,
"compute_intensity": 30.089990817263544,
"tile_compute_intensity": 2.6391752577319587,
"MxNxK": 134217728,
"size_m": 16384,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005757439648732543,
"perf_norm_to_sol": 0.5951810574481059,
"perf_norm_to_cublas": 0.9016786224045885,
"compute_intensity": 163.84,
"tile_compute_intensity": 5.333333333333333,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001340160088147968,
"perf_norm_to_sol": 0.3934246479821998,
"perf_norm_to_cublas": 0.7999043923075169,
"compute_intensity": 15.03119266055046,
"tile_compute_intensity": 1.3061224489795917,
"MxNxK": 8388608,
"size_m": 4096,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0017687231302261353,
"perf_norm_to_sol": 0.6779707609811585,
"perf_norm_to_cublas": 0.7101384723287024,
"compute_intensity": 15.976596782057532,
"tile_compute_intensity": 1.9616858237547892,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00000865600013639778,
"perf_norm_to_sol": 0.20444424114195123,
"perf_norm_to_cublas": 0.7866913020366293,
"compute_intensity": 25.440993788819874,
"tile_compute_intensity": 1.28,
"MxNxK": 4194304,
"size_m": 2048,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.001363577600568533,
"perf_norm_to_sol": 0.8041713837241193,
"perf_norm_to_cublas": 1.0554963584866728,
"compute_intensity": 682.6666666666666,
"tile_compute_intensity": 18.285714285714285,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005447027087211609,
"perf_norm_to_sol": 0.8052466553279046,
"perf_norm_to_cublas": 0.8721804336443404,
"compute_intensity": 468.1142857142857,
"tile_compute_intensity": 32,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000033910400816239417,
"perf_norm_to_sol": 0.5052607659956143,
"perf_norm_to_cublas": 0.8709068555403692,
"compute_intensity": 99.90243902439025,
"tile_compute_intensity": 4.571428571428571,
"MxNxK": 67108864,
"size_m": 2048,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002386559935985133,
"perf_norm_to_sol": 0.3589600837860157,
"perf_norm_to_cublas": 4.082729874395705,
"compute_intensity": 102.4,
"tile_compute_intensity": 1.5802469135802468,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006925439927726984,
"perf_norm_to_sol": 0.7916826203879247,
"perf_norm_to_cublas": 1.076120530095307,
"compute_intensity": 334.3673469387755,
"tile_compute_intensity": 3.8208955223880596,
"MxNxK": 2147483648,
"size_m": 256,
"size_n": 8192,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006355199730023742,
"perf_norm_to_sol": 0.20740969336829893,
"perf_norm_to_cublas": 0.8298086786680894,
"compute_intensity": 7.51559633027523,
"tile_compute_intensity": 0.6530612244897959,
"MxNxK": 1048576,
"size_m": 2048,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00004672640061471611,
"perf_norm_to_sol": 0.36667911215560955,
"perf_norm_to_cublas": 0.6549102965549386,
"compute_intensity": 60.23529411764706,
"tile_compute_intensity": 4.571428571428571,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0224470779299736,
"perf_norm_to_sol": 0.781607362373122,
"perf_norm_to_cublas": 0.9071157759845524,
"compute_intensity": 489.07462686567163,
"tile_compute_intensity": 42.666666666666664,
"MxNxK": 68719476736,
"size_m": 16384,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013458432629704475,
"perf_norm_to_sol": 0.8147680462018928,
"perf_norm_to_cublas": 1.6564234625955703,
"compute_intensity": 237.44927536231884,
"tile_compute_intensity": 1.9375591296121097,
"MxNxK": 4294967296,
"size_m": 128,
"size_n": 2048,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002659840101841837,
"perf_norm_to_sol": 0.7105990588475034,
"perf_norm_to_cublas": 0.9933829667361517,
"compute_intensity": 7.953398058252427,
"tile_compute_intensity": 0.9411764705882353,
"MxNxK": 16777216,
"size_m": 2048,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0009052224457263946,
"perf_norm_to_sol": 0.66299183650445,
"perf_norm_to_cublas": 0.7006984848996148,
"compute_intensity": 15.968810916179336,
"tile_compute_intensity": 1.9541984732824427,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00017987519968301057,
"perf_norm_to_sol": 0.7620214513985486,
"perf_norm_to_cublas": 1.1082706918741776,
"compute_intensity": 215.57894736842104,
"tile_compute_intensity": 1.9248120300751879,
"MxNxK": 536870912,
"size_m": 128,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005393001437187195,
"perf_norm_to_sol": 0.8133134015527611,
"perf_norm_to_cublas": 1.0502862248983333,
"compute_intensity": 1170.2857142857142,
"tile_compute_intensity": 20.48,
"MxNxK": 17179869184,
"size_m": 4096,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027450527995824814,
"perf_norm_to_sol": 0.7989282290170852,
"perf_norm_to_cublas": 0.8625296609508197,
"compute_intensity": 442.81081081081084,
"tile_compute_intensity": 19.692307692307693,
"MxNxK": 8589934592,
"size_m": 2048,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001293440000154078,
"perf_norm_to_sol": 0.1655816571467408,
"perf_norm_to_cublas": 0.6422563231482827,
"compute_intensity": 53.89473684210526,
"tile_compute_intensity": 2.2857142857142856,
"MxNxK": 8388608,
"size_m": 256,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.001354800071567297,
"perf_norm_to_sol": 0.8093814791402177,
"perf_norm_to_cublas": 0.9748237014296021,
"compute_intensity": 225.98620689655172,
"tile_compute_intensity": 1.9768339768339769,
"MxNxK": 4294967296,
"size_m": 128,
"size_n": 16384,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009786239825189113,
"perf_norm_to_sol": 0.7003137220296085,
"perf_norm_to_cublas": 0.975704696325953,
"compute_intensity": 240.94117647058823,
"tile_compute_intensity": 3.5555555555555554,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00021330881863832474,
"perf_norm_to_sol": 0.7122825553248885,
"perf_norm_to_cublas": 0.8391213860895063,
"compute_intensity": 15.868280871670702,
"tile_compute_intensity": 1.7716262975778547,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000023334400611929596,
"perf_norm_to_sol": 0.36713167345879816,
"perf_norm_to_cublas": 1.2859296857840568,
"compute_intensity": 146.28571428571428,
"tile_compute_intensity": 2.56,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003507263958454132,
"perf_norm_to_sol": 0.7816278578214919,
"perf_norm_to_cublas": 1.68634702073557,
"compute_intensity": 327.68,
"tile_compute_intensity": 3.190031152647975,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.02232952564954758,
"perf_norm_to_sol": 0.7857220815698817,
"perf_norm_to_cublas": 0.8716155277268586,
"compute_intensity": 1310.72,
"tile_compute_intensity": 26.94736842105263,
"MxNxK": 68719476736,
"size_m": 16384,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009535359567962587,
"perf_norm_to_sol": 0.7187393393825569,
"perf_norm_to_cublas": 2.1179945357062606,
"compute_intensity": 127.0077519379845,
"tile_compute_intensity": 1.332465842550423,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001645119918975979,
"perf_norm_to_sol": 0.2603700024235377,
"perf_norm_to_cublas": 0.7809765578215575,
"compute_intensity": 50.5679012345679,
"tile_compute_intensity": 1.5609756097560976,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001875519985333085,
"perf_norm_to_sol": 0.7308307125754706,
"perf_norm_to_cublas": 0.9950520756714495,
"compute_intensity": 168.90721649484536,
"tile_compute_intensity": 1.9248120300751879,
"MxNxK": 536870912,
"size_m": 128,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010832707583904266,
"perf_norm_to_sol": 0.8098068390537679,
"perf_norm_to_cublas": 0.9307261626912887,
"compute_intensity": 1260.3076923076924,
"tile_compute_intensity": 14.628571428571428,
"MxNxK": 34359738368,
"size_m": 1024,
"size_n": 8192,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027121376246213914,
"perf_norm_to_sol": 0.8086242201794518,
"perf_norm_to_cublas": 0.915454417713241,
"compute_intensity": 910.2222222222222,
"tile_compute_intensity": 10.556701030927835,
"MxNxK": 8589934592,
"size_m": 1024,
"size_n": 1024,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000695100799202919,
"perf_norm_to_sol": 0.7887705546604461,
"perf_norm_to_cublas": 1.052739379504871,
"compute_intensity": 390.0952380952381,
"tile_compute_intensity": 11.636363636363637,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001231551985256374,
"perf_norm_to_sol": 0.5564879208266538,
"perf_norm_to_cublas": 0.8339395843689983,
"compute_intensity": 60.014652014652015,
"tile_compute_intensity": 3.9384615384615387,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00006312959594652056,
"perf_norm_to_sol": 0.3167363783002085,
"perf_norm_to_cublas": 0.6224655809158052,
"compute_intensity": 30.91320754716981,
"tile_compute_intensity": 2.6122448979591835,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000018185601220466197,
"perf_norm_to_sol": 0.23553792481093738,
"perf_norm_to_cublas": 5.664965091761656,
"compute_intensity": 81.92,
"tile_compute_intensity": 0.9961089494163424,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005413241684436798,
"perf_norm_to_sol": 0.8102724022221435,
"perf_norm_to_cublas": 1.0560651459700807,
"compute_intensity": 780.1904761904761,
"tile_compute_intensity": 13.837837837837839,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009485119953751564,
"perf_norm_to_sol": 0.7225462693217596,
"perf_norm_to_cublas": 1.9436253813366693,
"compute_intensity": 167.18367346938774,
"tile_compute_intensity": 1.5975039001560063,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00006492480169981719,
"perf_norm_to_sol": 0.5277981493374257,
"perf_norm_to_cublas": 0.818423729038454,
"compute_intensity": 110.70270270270271,
"tile_compute_intensity": 4.923076923076923,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00007206400041468442,
"perf_norm_to_sol": 0.47551051823484763,
"perf_norm_to_cublas": 0.7585701903339775,
"compute_intensity": 63.875243664717345,
"tile_compute_intensity": 1.7655172413793103,
"MxNxK": 134217728,
"size_m": 16384,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009691519662737846,
"perf_norm_to_sol": 0.7071582450585961,
"perf_norm_to_cublas": 1.03490062164923,
"compute_intensity": 292.57142857142856,
"tile_compute_intensity": 5.12,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006992415990680456,
"perf_norm_to_sol": 0.7840995782615778,
"perf_norm_to_cublas": 1.0544269853308772,
"compute_intensity": 254.015503875969,
"tile_compute_intensity": 3.7372262773722627,
"MxNxK": 2147483648,
"size_m": 256,
"size_n": 16384,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00016287039034068584,
"perf_norm_to_sol": 0.4883877408716898,
"perf_norm_to_cublas": 0.8957305907025216,
"compute_intensity": 31.000946073793756,
"tile_compute_intensity": 3.1604938271604937,
"MxNxK": 268435456,
"size_m": 16384,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006323200068436563,
"perf_norm_to_sol": 0.3938366513624268,
"perf_norm_to_cublas": 0.8613360619708327,
"compute_intensity": 7.742911153119093,
"tile_compute_intensity": 0.7804878048780488,
"MxNxK": 2097152,
"size_m": 2048,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003495712066069245,
"perf_norm_to_sol": 0.7842108168089398,
"perf_norm_to_cublas": 1.712726861231697,
"compute_intensity": 252.06153846153848,
"tile_compute_intensity": 2.6631989596879064,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00010925439419224858,
"perf_norm_to_sol": 0.6272917521827981,
"perf_norm_to_cublas": 0.9037549863445259,
"compute_intensity": 85.11168831168831,
"tile_compute_intensity": 1.8686131386861313,
"MxNxK": 268435456,
"size_m": 16384,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00010403840569779277,
"perf_norm_to_sol": 0.6587411630047665,
"perf_norm_to_cublas": 0.9502952444776007,
"compute_intensity": 195.04761904761904,
"tile_compute_intensity": 5.818181818181818,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006611199933104217,
"perf_norm_to_sol": 0.02759561713950218,
"perf_norm_to_cublas": 0.7821877926087927,
"compute_intensity": 36.57142857142857,
"tile_compute_intensity": 0.8888888888888888,
"MxNxK": 524288,
"size_m": 128,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013690208084881305,
"perf_norm_to_sol": 0.8009740093544523,
"perf_norm_to_cublas": 1.6356543230772171,
"compute_intensity": 431.1578947368421,
"tile_compute_intensity": 6.3602484472049685,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008774399611866102,
"perf_norm_to_sol": 0.17049704049350173,
"perf_norm_to_cublas": 0.70423053535723,
"compute_intensity": 28.054794520547944,
"tile_compute_intensity": 1.7777777777777777,
"MxNxK": 4194304,
"size_m": 1024,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000322912004776299,
"perf_norm_to_sol": 0.5305964113505451,
"perf_norm_to_cublas": 3.0394409388155497,
"compute_intensity": 107.78947368421052,
"tile_compute_intensity": 1.7534246575342465,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006246400153031572,
"perf_norm_to_sol": 0.013873441568836846,
"perf_norm_to_cublas": 0.9001024113373731,
"compute_intensity": 7.314285714285714,
"tile_compute_intensity": 0.5,
"MxNxK": 65536,
"size_m": 128,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005380825325846672,
"perf_norm_to_sol": 0.815153824523653,
"perf_norm_to_cublas": 0.9141639913423132,
"compute_intensity": 780.1904761904761,
"tile_compute_intensity": 7.086505190311419,
"MxNxK": 17179869184,
"size_m": 512,
"size_n": 2048,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013411199324764311,
"perf_norm_to_sol": 0.15969484418139968,
"perf_norm_to_cublas": 0.6473395596795436,
"compute_intensity": 53.89473684210526,
"tile_compute_intensity": 2.6666666666666665,
"MxNxK": 8388608,
"size_m": 512,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005439683049917221,
"perf_norm_to_sol": 0.8063338071736356,
"perf_norm_to_cublas": 0.8682099717861301,
"compute_intensity": 780.1904761904761,
"tile_compute_intensity": 32,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013632000423967837,
"perf_norm_to_sol": 0.8043941107399417,
"perf_norm_to_cublas": 0.9656337744886352,
"compute_intensity": 630.1538461538462,
"tile_compute_intensity": 7.314285714285714,
"MxNxK": 4294967296,
"size_m": 512,
"size_n": 4096,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005370166525244713,
"perf_norm_to_sol": 0.8167717561156582,
"perf_norm_to_cublas": 0.9369619957946453,
"compute_intensity": 668.734693877551,
"tile_compute_intensity": 7.641791044776119,
"MxNxK": 17179869184,
"size_m": 512,
"size_n": 16384,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.011002134531736374,
"perf_norm_to_sol": 0.7973362497623285,
"perf_norm_to_cublas": 1.0419348383520752,
"compute_intensity": 474.8985507246377,
"tile_compute_intensity": 7.6992481203007515,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000016457599122077228,
"perf_norm_to_sol": 0.2602687513005372,
"perf_norm_to_cublas": 0.8448376844543066,
"compute_intensity": 81.92,
"tile_compute_intensity": 2.6666666666666665,
"MxNxK": 16777216,
"size_m": 1024,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010827804356813431,
"perf_norm_to_sol": 0.8101735493027478,
"perf_norm_to_cublas": 0.8625597638120522,
"compute_intensity": 992.969696969697,
"tile_compute_intensity": 14.628571428571428,
"MxNxK": 34359738368,
"size_m": 1024,
"size_n": 16384,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013721376657485963,
"perf_norm_to_sol": 0.7991545697174398,
"perf_norm_to_cublas": 0.9717718792692647,
"compute_intensity": 399.609756097561,
"tile_compute_intensity": 7.314285714285714,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00004887999966740608,
"perf_norm_to_sol": 0.35052363355592187,
"perf_norm_to_cublas": 0.6321440139469747,
"compute_intensity": 59.36231884057971,
"tile_compute_intensity": 3.764705882352941,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006134399882284924,
"perf_norm_to_sol": 0.054276419190105396,
"perf_norm_to_cublas": 0.6035471835968333,
"compute_intensity": 7.474452554744525,
"tile_compute_intensity": 0.6153846153846154,
"MxNxK": 262144,
"size_m": 512,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010841174423694611,
"perf_norm_to_sol": 0.8091743886845156,
"perf_norm_to_cublas": 0.8991713707679411,
"compute_intensity": 1260.3076923076924,
"tile_compute_intensity": 12.720496894409937,
"MxNxK": 34359738368,
"size_m": 1024,
"size_n": 2048,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000014915199426468461,
"perf_norm_to_sol": 0.1435917365377815,
"perf_norm_to_cublas": 1.5850676613545813,
"compute_intensity": 102.4,
"tile_compute_intensity": 1.3061224489795917,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018047039629891514,
"perf_norm_to_sol": 0.8313760927625334,
"perf_norm_to_cublas": 0.8789474896255645,
"compute_intensity": 7.984405458089668,
"tile_compute_intensity": 0.9770992366412213,
"MxNxK": 134217728,
"size_m": 4096,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006765215657651424,
"perf_norm_to_sol": 0.8104324690848688,
"perf_norm_to_cublas": 1.0088642150192144,
"compute_intensity": 409.6,
"tile_compute_intensity": 3.7372262773722627,
"MxNxK": 2147483648,
"size_m": 256,
"size_n": 2048,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000033280000207014385,
"perf_norm_to_sol": 0.5148315800797443,
"perf_norm_to_cublas": 3.4803846917700607,
"compute_intensity": 99.90243902439025,
"tile_compute_intensity": 1.3298701298701299,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002229759993497282,
"perf_norm_to_sol": 0.384202675211652,
"perf_norm_to_cublas": 1.0673076985829086,
"compute_intensity": 120.47058823529412,
"tile_compute_intensity": 2.909090909090909,
"MxNxK": 33554432,
"size_m": 1024,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006922752130776644,
"perf_norm_to_sol": 0.7919899955607632,
"perf_norm_to_cublas": 1.0723919696751696,
"compute_intensity": 585.1428571428571,
"tile_compute_intensity": 11.636363636363637,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00037797761615365744,
"perf_norm_to_sol": 0.7252744865046694,
"perf_norm_to_cublas": 0.8838025590095857,
"compute_intensity": 122.26865671641791,
"tile_compute_intensity": 10.666666666666666,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005942400312051177,
"perf_norm_to_sol": 0.32911924837419243,
"perf_norm_to_cublas": 0.6440495051114873,
"compute_intensity": 31.267175572519083,
"tile_compute_intensity": 3.2,
"MxNxK": 67108864,
"size_m": 2048,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006796800153097138,
"perf_norm_to_sol": 0.11273666724403453,
"perf_norm_to_cublas": 0.7396421605166049,
"compute_intensity": 27.675675675675677,
"tile_compute_intensity": 1.6,
"MxNxK": 2097152,
"size_m": 512,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00019503680523484944,
"perf_norm_to_sol": 0.7027840748724479,
"perf_norm_to_cublas": 0.9517793678635207,
"compute_intensity": 315.0769230769231,
"tile_compute_intensity": 9.142857142857142,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006783423945307731,
"perf_norm_to_sol": 0.80825707983571,
"perf_norm_to_cublas": 1.6683538711244341,
"compute_intensity": 334.3673469387755,
"tile_compute_intensity": 3.1950078003120126,
"MxNxK": 2147483648,
"size_m": 256,
"size_n": 512,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000016521599900443106,
"perf_norm_to_sol": 0.25926053159010215,
"perf_norm_to_cublas": 0.8037962023707637,
"compute_intensity": 55.351351351351354,
"tile_compute_intensity": 3.2,
"MxNxK": 16777216,
"size_m": 1024,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00000781439957791008,
"perf_norm_to_sol": 0.6186865311588194,
"perf_norm_to_cublas": 1.1281736762535746,
"compute_intensity": 7.861804222648752,
"tile_compute_intensity": 0.8648648648648649,
"MxNxK": 4194304,
"size_m": 2048,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013698016293346882,
"perf_norm_to_sol": 0.8005174343360982,
"perf_norm_to_cublas": 0.9781877822290566,
"compute_intensity": 237.44927536231884,
"tile_compute_intensity": 3.8496240601503757,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007000095676630736,
"perf_norm_to_sol": 0.7832393559456317,
"perf_norm_to_cublas": 0.9534543978479824,
"compute_intensity": 234.05714285714285,
"tile_compute_intensity": 16,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0002065952168777585,
"perf_norm_to_sol": 0.7294242321947649,
"perf_norm_to_cublas": 0.7665928075303637,
"compute_intensity": 7.9669341113542425,
"tile_compute_intensity": 0.9660377358490566,
"MxNxK": 134217728,
"size_m": 16384,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006550399848492816,
"perf_norm_to_sol": 0.05291833753238528,
"perf_norm_to_cublas": 0.5686370275206571,
"compute_intensity": 14.628571428571428,
"tile_compute_intensity": 0.8888888888888888,
"MxNxK": 524288,
"size_m": 128,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007845696061849594,
"perf_norm_to_sol": 0.7634599532090316,
"perf_norm_to_cublas": 0.7756446507060805,
"compute_intensity": 7.9921951219512195,
"tile_compute_intensity": 0.9884169884169884,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000007868800457799807,
"perf_norm_to_sol": 0.6074536711058819,
"perf_norm_to_cublas": 1.0915005616841444,
"compute_intensity": 7.9073359073359075,
"tile_compute_intensity": 0.8888888888888888,
"MxNxK": 4194304,
"size_m": 1024,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0014307583682239057,
"perf_norm_to_sol": 0.7664117926674309,
"perf_norm_to_cublas": 0.890130420849891,
"compute_intensity": 125.06870229007633,
"tile_compute_intensity": 12.8,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001034496002830565,
"perf_norm_to_sol": 0.6624905285182686,
"perf_norm_to_cublas": 0.9216468966166054,
"compute_intensity": 240.94117647058823,
"tile_compute_intensity": 5.818181818181818,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0026978399604558946,
"perf_norm_to_sol": 0.8129096624983723,
"perf_norm_to_cublas": 0.9157288103950879,
"compute_intensity": 455.1111111111111,
"tile_compute_intensity": 3.8641509433962264,
"MxNxK": 8589934592,
"size_m": 256,
"size_n": 4096,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013574080541729927,
"perf_norm_to_sol": 0.8078264177771436,
"perf_norm_to_cublas": 1.645085872042499,
"compute_intensity": 237.44927536231884,
"tile_compute_intensity": 3.5493934142114383,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009524159831926226,
"perf_norm_to_sol": 0.7195845258370145,
"perf_norm_to_cublas": 1.9774888521732348,
"compute_intensity": 167.18367346938774,
"tile_compute_intensity": 1.996101364522417,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009566720109432936,
"perf_norm_to_sol": 0.440142330234577,
"perf_norm_to_cublas": 0.7393296826990357,
"compute_intensity": 30.089990817263544,
"tile_compute_intensity": 1.9922178988326849,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002775651216506958,
"perf_norm_to_sol": 0.7901209484413352,
"perf_norm_to_cublas": 0.8841272357234736,
"compute_intensity": 244.53731343283582,
"tile_compute_intensity": 21.333333333333332,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008815999899525196,
"perf_norm_to_sol": 0.28558008063399765,
"perf_norm_to_cublas": 0.6878402867862963,
"compute_intensity": 15.398496240601503,
"tile_compute_intensity": 1.28,
"MxNxK": 4194304,
"size_m": 256,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005408287793397903,
"perf_norm_to_sol": 0.811014596673654,
"perf_norm_to_cublas": 0.8697708365793572,
"compute_intensity": 819.2,
"tile_compute_intensity": 32,
"MxNxK": 17179869184,
"size_m": 4096,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0002195103792473674,
"perf_norm_to_sol": 0.6875050106251587,
"perf_norm_to_cublas": 0.7728326852037766,
"compute_intensity": 15.922254616132166,
"tile_compute_intensity": 1.9104477611940298,
"MxNxK": 268435456,
"size_m": 8192,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002286079979967326,
"perf_norm_to_sol": 0.4405224636474363,
"perf_norm_to_cublas": 0.9224523995243231,
"compute_intensity": 30.796992481203006,
"tile_compute_intensity": 2.909090909090909,
"MxNxK": 33554432,
"size_m": 2048,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000022540800273418427,
"perf_norm_to_sol": 0.3800573822535591,
"perf_norm_to_cublas": 1.0694207587902327,
"compute_intensity": 83.59183673469387,
"tile_compute_intensity": 1.7777777777777777,
"MxNxK": 33554432,
"size_m": 2048,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.022207337617874145,
"perf_norm_to_sol": 0.7900452398089001,
"perf_norm_to_cublas": 0.9211183155817599,
"compute_intensity": 1489.4545454545455,
"tile_compute_intensity": 24.975609756097562,
"MxNxK": 68719476736,
"size_m": 8192,
"size_n": 1024,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0026959840208292006,
"perf_norm_to_sol": 0.8134692768150362,
"perf_norm_to_cublas": 1.0523611585334596,
"compute_intensity": 1024,
"tile_compute_intensity": 19.692307692307693,
"MxNxK": 8589934592,
"size_m": 2048,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003385280142538249,
"perf_norm_to_sol": 0.506120450013476,
"perf_norm_to_cublas": 3.0747707951368217,
"compute_intensity": 107.78947368421052,
"tile_compute_intensity": 1.5900621118012421,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000016659199900459498,
"perf_norm_to_sol": 0.2571191172746365,
"perf_norm_to_cublas": 0.8511333088460414,
"compute_intensity": 63.01538461538462,
"tile_compute_intensity": 1.6842105263157894,
"MxNxK": 16777216,
"size_m": 2048,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000016857600712683052,
"perf_norm_to_sol": 0.348482128538624,
"perf_norm_to_cublas": 0.7617691121420904,
"compute_intensity": 28.346020761245676,
"tile_compute_intensity": 1.3195876288659794,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005271040135994554,
"perf_norm_to_sol": 0.325051501213785,
"perf_norm_to_cublas": 0.6085478475255803,
"compute_intensity": 51.0404984423676,
"tile_compute_intensity": 1.5900621118012421,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000769503996707499,
"perf_norm_to_sol": 0.5016781491957412,
"perf_norm_to_cublas": 0.6834531797254125,
"compute_intensity": 15.738712776176753,
"tile_compute_intensity": 1.7534246575342465,
"MxNxK": 67108864,
"size_m": 8192,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.022324566543102265,
"perf_norm_to_sol": 0.7858966193120329,
"perf_norm_to_cublas": 0.907271327146211,
"compute_intensity": 885.6216216216217,
"tile_compute_intensity": 51.2,
"MxNxK": 68719476736,
"size_m": 16384,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000029475201154127716,
"perf_norm_to_sol": 0.673739574267009,
"perf_norm_to_cublas": 0.9070676355567686,
"compute_intensity": 7.755739644970414,
"tile_compute_intensity": 0.7975077881619937,
"MxNxK": 16777216,
"size_m": 16384,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003875551978126168,
"perf_norm_to_sol": 0.7735301792702157,
"perf_norm_to_cublas": 0.7969961494995996,
"compute_intensity": 7.988298391028766,
"tile_compute_intensity": 0.9846153846153847,
"MxNxK": 268435456,
"size_m": 8192,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00004643520223908126,
"perf_norm_to_sol": 0.36897858231381375,
"perf_norm_to_cublas": 0.6648748788311938,
"compute_intensity": 59.36231884057971,
"tile_compute_intensity": 4.571428571428571,
"MxNxK": 67108864,
"size_m": 2048,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006953888107091189,
"perf_norm_to_sol": 0.7884438669254764,
"perf_norm_to_cublas": 1.0809077742110433,
"compute_intensity": 481.88235294117646,
"tile_compute_intensity": 11.636363636363637,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000029142401763238014,
"perf_norm_to_sol": 0.6814335221356844,
"perf_norm_to_cublas": 0.922696849864874,
"compute_intensity": 7.755739644970414,
"tile_compute_intensity": 0.6657997399219766,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001855040027294308,
"perf_norm_to_sol": 0.23090600255971097,
"perf_norm_to_cublas": 1.3327583293600793,
"compute_intensity": 93.0909090909091,
"tile_compute_intensity": 1.5609756097560976,
"MxNxK": 16777216,
"size_m": 512,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00035262079909443856,
"perf_norm_to_sol": 0.7774286774067564,
"perf_norm_to_cublas": 1.7110369741777918,
"compute_intensity": 119.5912408759124,
"tile_compute_intensity": 1.7762359063313096,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002430399908917025,
"perf_norm_to_sol": 0.4819230401305359,
"perf_norm_to_cublas": 0.8967742035918634,
"compute_intensity": 28.395147313691506,
"tile_compute_intensity": 1.3264248704663213,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002352640003664419,
"perf_norm_to_sol": 0.36413550447464405,
"perf_norm_to_cublas": 0.7920293527291243,
"compute_intensity": 50.88198757763975,
"tile_compute_intensity": 1.5802469135802468,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009468479547649621,
"perf_norm_to_sol": 0.7238161103018715,
"perf_norm_to_cublas": 1.268376840966872,
"compute_intensity": 195.04761904761904,
"tile_compute_intensity": 1.7716262975778547,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003163839865010232,
"perf_norm_to_sol": 0.541544320277285,
"perf_norm_to_cublas": 3.0750481723129637,
"compute_intensity": 170.66666666666666,
"tile_compute_intensity": 2.6122448979591835,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006924799527041614,
"perf_norm_to_sol": 0.3504005973901482,
"perf_norm_to_cublas": 0.9459334992895098,
"compute_intensity": 7.846743295019157,
"tile_compute_intensity": 0.7804878048780488,
"MxNxK": 2097152,
"size_m": 256,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005450300872325897,
"perf_norm_to_sol": 0.8047629747797483,
"perf_norm_to_cublas": 0.9138724886436181,
"compute_intensity": 468.1142857142857,
"tile_compute_intensity": 7.474452554744525,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005279039614833891,
"perf_norm_to_sol": 0.6491178828621293,
"perf_norm_to_cublas": 1.8751895538817076,
"compute_intensity": 186.1818181818182,
"tile_compute_intensity": 1.855072463768116,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000016697600949555634,
"perf_norm_to_sol": 0.25652779616953597,
"perf_norm_to_cublas": 0.8133384240399412,
"compute_intensity": 93.0909090909091,
"tile_compute_intensity": 3.2,
"MxNxK": 16777216,
"size_m": 512,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005442240508273244,
"perf_norm_to_sol": 0.6296522568447719,
"perf_norm_to_cublas": 1.8165459461899198,
"compute_intensity": 110.70270270270271,
"tile_compute_intensity": 1.855072463768116,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.001369846425950527,
"perf_norm_to_sol": 0.8004912558745576,
"perf_norm_to_cublas": 0.9851942541968944,
"compute_intensity": 123.65283018867925,
"tile_compute_intensity": 1.9768339768339769,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013666016049683094,
"perf_norm_to_sol": 0.8023919201308416,
"perf_norm_to_cublas": 1.0508753951693268,
"compute_intensity": 496.4848484848485,
"tile_compute_intensity": 7.314285714285714,
"MxNxK": 4294967296,
"size_m": 512,
"size_n": 8192,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.011011151969432831,
"perf_norm_to_sol": 0.7966832817554099,
"perf_norm_to_cublas": 0.9444746225670918,
"compute_intensity": 799.219512195122,
"tile_compute_intensity": 14.628571428571428,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000026963200070895255,
"perf_norm_to_sol": 0.7009849973608496,
"perf_norm_to_cublas": 0.9905055704661403,
"compute_intensity": 7.953398058252427,
"tile_compute_intensity": 0.927536231884058,
"MxNxK": 16777216,
"size_m": 1024,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.001369119994342327,
"perf_norm_to_sol": 0.800915982817965,
"perf_norm_to_cublas": 0.9574196367629711,
"compute_intensity": 682.6666666666666,
"tile_compute_intensity": 10.448979591836734,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000052582402713596824,
"perf_norm_to_sol": 0.6516855148272259,
"perf_norm_to_cublas": 1.4959225503753326,
"compute_intensity": 227.55555555555554,
"tile_compute_intensity": 3.3684210526315788,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.01088053435087204,
"perf_norm_to_sol": 0.8062472305151275,
"perf_norm_to_cublas": 0.9314989736096686,
"compute_intensity": 1638.4,
"tile_compute_intensity": 24.38095238095238,
"MxNxK": 34359738368,
"size_m": 2048,
"size_n": 4096,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002245759969810024,
"perf_norm_to_sol": 0.3814654130886663,
"perf_norm_to_cublas": 1.0579937658530612,
"compute_intensity": 146.28571428571428,
"tile_compute_intensity": 3.5555555555555554,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00012289920123293997,
"perf_norm_to_sol": 0.5576470772712951,
"perf_norm_to_cublas": 0.8656199449822727,
"compute_intensity": 56.79029462738301,
"tile_compute_intensity": 2.6528497409326426,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00007366399513557554,
"perf_norm_to_sol": 0.465182347498198,
"perf_norm_to_cublas": 0.8079930919487351,
"compute_intensity": 61.134328358208954,
"tile_compute_intensity": 5.333333333333333,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005574399838224053,
"perf_norm_to_sol": 0.6147242963859593,
"perf_norm_to_cublas": 0.9167050060636462,
"compute_intensity": 186.1818181818182,
"tile_compute_intensity": 5.333333333333333,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008476799848722294,
"perf_norm_to_sol": 0.12632711782010206,
"perf_norm_to_cublas": 0.7908644534426855,
"compute_intensity": 48.76190476190476,
"tile_compute_intensity": 2,
"MxNxK": 4194304,
"size_m": 512,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003166079986840487,
"perf_norm_to_sol": 0.5411611571042301,
"perf_norm_to_cublas": 3.281685971903655,
"compute_intensity": 157.53846153846155,
"tile_compute_intensity": 1.9844961240310077,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000022447999799624086,
"perf_norm_to_sol": 0.38162854696564885,
"perf_norm_to_cublas": 0.9974340969631563,
"compute_intensity": 102.4,
"tile_compute_intensity": 4,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000687289610505104,
"perf_norm_to_sol": 0.7977350952959504,
"perf_norm_to_cublas": 1.082885574458421,
"compute_intensity": 390.0952380952381,
"tile_compute_intensity": 3.8208955223880596,
"MxNxK": 2147483648,
"size_m": 256,
"size_n": 4096,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003191680007148534,
"perf_norm_to_sol": 0.5368205789194603,
"perf_norm_to_cublas": 1.2848405361441166,
"compute_intensity": 170.66666666666666,
"tile_compute_intensity": 4.571428571428571,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003679072018712759,
"perf_norm_to_sol": 0.8148403731057894,
"perf_norm_to_cublas": 0.8393681635420044,
"compute_intensity": 7.988298391028766,
"tile_compute_intensity": 0.9808429118773946,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000032652801019139587,
"perf_norm_to_sol": 0.52472053106833,
"perf_norm_to_cublas": 0.8942570723594641,
"compute_intensity": 107.78947368421052,
"tile_compute_intensity": 4.571428571428571,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013049600238446146,
"perf_norm_to_sol": 0.20691163369184387,
"perf_norm_to_cublas": 0.6809710428088087,
"compute_intensity": 29.681159420289855,
"tile_compute_intensity": 2.2857142857142856,
"MxNxK": 8388608,
"size_m": 1024,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010958310961723328,
"perf_norm_to_sol": 0.8005248908848016,
"perf_norm_to_cublas": 0.8737311323718063,
"compute_intensity": 481.88235294117646,
"tile_compute_intensity": 36.57142857142857,
"MxNxK": 34359738368,
"size_m": 8192,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00023690559901297094,
"perf_norm_to_sol": 0.6413362581807128,
"perf_norm_to_cublas": 0.9136871305780491,
"compute_intensity": 15.868280871670702,
"tile_compute_intensity": 1.8686131386861313,
"MxNxK": 268435456,
"size_m": 16384,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000023321600747294723,
"perf_norm_to_sol": 0.43181837978557586,
"perf_norm_to_cublas": 0.8848792267697927,
"compute_intensity": 30.796992481203006,
"tile_compute_intensity": 2.56,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000970080029219389,
"perf_norm_to_sol": 0.7064817159639342,
"perf_norm_to_cublas": 1.943262413049729,
"compute_intensity": 112.21917808219177,
"tile_compute_intensity": 1.5975039001560063,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000031878400477580726,
"perf_norm_to_sol": 0.606638187050348,
"perf_norm_to_cublas": 0.946797847413596,
"compute_intensity": 15.723608445297504,
"tile_compute_intensity": 1.7297297297297298,
"MxNxK": 33554432,
"size_m": 4096,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010708656162023544,
"perf_norm_to_sol": 0.819187819105178,
"perf_norm_to_cublas": 1.06192369467154,
"compute_intensity": 1638.4,
"tile_compute_intensity": 20.897959183673468,
"MxNxK": 34359738368,
"size_m": 2048,
"size_n": 2048,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005415251106023788,
"perf_norm_to_sol": 0.8099717367821685,
"perf_norm_to_cublas": 0.932169083326649,
"compute_intensity": 963.7647058823529,
"tile_compute_intensity": 23.272727272727273,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013679136522114278,
"perf_norm_to_sol": 0.8016222983751071,
"perf_norm_to_cublas": 0.9431893452695391,
"compute_intensity": 337.8144329896907,
"tile_compute_intensity": 7.013698630136986,
"MxNxK": 4294967296,
"size_m": 512,
"size_n": 16384,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005370575934648514,
"perf_norm_to_sol": 0.8167094920229834,
"perf_norm_to_cublas": 1.0593652564105234,
"compute_intensity": 448.8767123287671,
"tile_compute_intensity": 3.9384615384615387,
"MxNxK": 17179869184,
"size_m": 256,
"size_n": 16384,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009543040068820119,
"perf_norm_to_sol": 0.7181608782137197,
"perf_norm_to_cublas": 1.2402252955781516,
"compute_intensity": 240.94117647058823,
"tile_compute_intensity": 2.6528497409326426,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006851456128060818,
"perf_norm_to_sol": 0.800231414584545,
"perf_norm_to_cublas": 1.654800423287903,
"compute_intensity": 334.3673469387755,
"tile_compute_intensity": 3.992202729044834,
"MxNxK": 2147483648,
"size_m": 512,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005456770956516266,
"perf_norm_to_sol": 0.8038087686674507,
"perf_norm_to_cublas": 0.9101218412895549,
"compute_intensity": 246.37593984962405,
"tile_compute_intensity": 1.9883495145631067,
"MxNxK": 17179869184,
"size_m": 128,
"size_n": 16384,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013561472296714783,
"perf_norm_to_sol": 0.8085774625886653,
"perf_norm_to_cublas": 1.6391070437701558,
"compute_intensity": 496.4848484848485,
"tile_compute_intensity": 5.3194805194805195,
"MxNxK": 4294967296,
"size_m": 512,
"size_n": 512,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001526400010334328,
"perf_norm_to_sol": 0.14031049344560945,
"perf_norm_to_cublas": 0.8918238047557039,
"compute_intensity": 102.4,
"tile_compute_intensity": 1.5238095238095237,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007082080002874136,
"perf_norm_to_sol": 0.7741723373778582,
"perf_norm_to_cublas": 0.9456972876818595,
"compute_intensity": 224.43835616438355,
"tile_compute_intensity": 10.24,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000057865603594109416,
"perf_norm_to_sol": 0.5921858246502623,
"perf_norm_to_cublas": 0.9114084920032571,
"compute_intensity": 126.03076923076924,
"tile_compute_intensity": 1.855072463768116,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000525951967574656,
"perf_norm_to_sol": 0.6515269890762188,
"perf_norm_to_cublas": 1.4948893211946366,
"compute_intensity": 227.55555555555554,
"tile_compute_intensity": 4.923076923076923,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013046400272287429,
"perf_norm_to_sol": 0.16416017765476884,
"perf_norm_to_cublas": 0.6651949747351433,
"compute_intensity": 49.951219512195124,
"tile_compute_intensity": 2.2857142857142856,
"MxNxK": 8388608,
"size_m": 1024,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006756896153092384,
"perf_norm_to_sol": 0.8114303231984414,
"perf_norm_to_cublas": 1.6788773289969265,
"compute_intensity": 234.05714285714285,
"tile_compute_intensity": 1.9357277882797732,
"MxNxK": 2147483648,
"size_m": 128,
"size_n": 2048,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.02231219857931137,
"perf_norm_to_sol": 0.78633225280178,
"perf_norm_to_cublas": 0.8610694132679929,
"compute_intensity": 1489.4545454545455,
"tile_compute_intensity": 42.666666666666664,
"MxNxK": 68719476736,
"size_m": 4096,
"size_n": 8192,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003613120061345399,
"perf_norm_to_sol": 0.4742049752216505,
"perf_norm_to_cublas": 0.8304843995603236,
"compute_intensity": 84.45360824742268,
"tile_compute_intensity": 1.7534246575342465,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000009308799781138077,
"perf_norm_to_sol": 0.11503627947791616,
"perf_norm_to_cublas": 0.9027157202911352,
"compute_intensity": 73.14285714285714,
"tile_compute_intensity": 1.4545454545454546,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00035050881560891865,
"perf_norm_to_sol": 0.7821130575271258,
"perf_norm_to_cublas": 1.0931491123530057,
"compute_intensity": 122.26865671641791,
"tile_compute_intensity": 1.9320754716981132,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00037402561865746976,
"perf_norm_to_sol": 0.732937819741049,
"perf_norm_to_cublas": 0.8651129543450381,
"compute_intensity": 119.5912408759124,
"tile_compute_intensity": 7.757575757575758,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008838400390231982,
"perf_norm_to_sol": 0.15687737819061626,
"perf_norm_to_cublas": 0.7013033647927274,
"compute_intensity": 29.257142857142856,
"tile_compute_intensity": 2,
"MxNxK": 4194304,
"size_m": 512,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006844704039394855,
"perf_norm_to_sol": 0.8010208181049113,
"perf_norm_to_cublas": 1.08799099795971,
"compute_intensity": 682.6666666666666,
"tile_compute_intensity": 10.24,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0028702815994620322,
"perf_norm_to_sol": 0.7640714319249613,
"perf_norm_to_cublas": 0.8809839797732689,
"compute_intensity": 125.5478927203065,
"tile_compute_intensity": 13.473684210526315,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007064640056341886,
"perf_norm_to_sol": 0.7760834785064846,
"perf_norm_to_cublas": 0.9431308626299083,
"compute_intensity": 234.05714285714285,
"tile_compute_intensity": 14.222222222222221,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00007852159906178713,
"perf_norm_to_sol": 0.43640464015892844,
"perf_norm_to_cublas": 0.7524248497469367,
"compute_intensity": 59.7956204379562,
"tile_compute_intensity": 3.878787878787879,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009585279622115195,
"perf_norm_to_sol": 0.41568872922218364,
"perf_norm_to_cublas": 0.7002070456238194,
"compute_intensity": 30.97164461247637,
"tile_compute_intensity": 2.6391752577319587,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003721888177096844,
"perf_norm_to_sol": 0.7365549646360852,
"perf_norm_to_cublas": 1.0273581097937692,
"compute_intensity": 221.40540540540542,
"tile_compute_intensity": 12.8,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013583552092313767,
"perf_norm_to_sol": 0.8072631358957227,
"perf_norm_to_cublas": 0.9662415287577266,
"compute_intensity": 682.6666666666666,
"tile_compute_intensity": 7.013698630136986,
"MxNxK": 4294967296,
"size_m": 512,
"size_n": 2048,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00019849599339067937,
"perf_norm_to_sol": 0.6905366621847778,
"perf_norm_to_cublas": 0.8892472063864213,
"compute_intensity": 118.72463768115942,
"tile_compute_intensity": 9.142857142857142,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009453120292164385,
"perf_norm_to_sol": 0.7249921533668975,
"perf_norm_to_cublas": 1.1261297102393875,
"compute_intensity": 292.57142857142856,
"tile_compute_intensity": 3.1604938271604937,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00037679998204112055,
"perf_norm_to_sol": 0.7275412275263485,
"perf_norm_to_cublas": 0.8642803048746879,
"compute_intensity": 122.26865671641791,
"tile_compute_intensity": 9.846153846153847,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006764799763914198,
"perf_norm_to_sol": 0.018878326630137977,
"perf_norm_to_cublas": 0.8027436496165086,
"compute_intensity": 23.272727272727273,
"tile_compute_intensity": 0.8,
"MxNxK": 262144,
"size_m": 128,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005530016124248504,
"perf_norm_to_sol": 0.7931623063854456,
"perf_norm_to_cublas": 0.8835741718629713,
"compute_intensity": 246.37593984962405,
"tile_compute_intensity": 23.272727272727273,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00010567678837105631,
"perf_norm_to_sol": 0.7185266353048919,
"perf_norm_to_cublas": 0.801144734046028,
"compute_intensity": 7.9360620004843785,
"tile_compute_intensity": 0.8873483535528596,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0002187871839851141,
"perf_norm_to_sol": 0.626493555227519,
"perf_norm_to_cublas": 0.9714352849330806,
"compute_intensity": 60.12477064220184,
"tile_compute_intensity": 5.224489795918367,
"MxNxK": 536870912,
"size_m": 16384,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006768000457668677,
"perf_norm_to_sol": 0.10917295004297849,
"perf_norm_to_cublas": 0.6643025718809844,
"compute_intensity": 14.124137931034483,
"tile_compute_intensity": 0.9411764705882353,
"MxNxK": 1048576,
"size_m": 1024,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.011716591566801072,
"perf_norm_to_sol": 0.7487160951971618,
"perf_norm_to_cublas": 0.8604156891028933,
"compute_intensity": 127.0077519379845,
"tile_compute_intensity": 14.628571428571428,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.022310467064380647,
"perf_norm_to_sol": 0.7863932800331825,
"perf_norm_to_cublas": 1.028081188055196,
"compute_intensity": 1820.4444444444443,
"tile_compute_intensity": 21.11340206185567,
"MxNxK": 68719476736,
"size_m": 2048,
"size_n": 2048,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003516895929351449,
"perf_norm_to_sol": 0.7794871584859682,
"perf_norm_to_cublas": 1.0837010879808762,
"compute_intensity": 227.55555555555554,
"tile_compute_intensity": 3.5310344827586206,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000022575999901164322,
"perf_norm_to_sol": 0.37946481145111527,
"perf_norm_to_cublas": 1.3379163444062434,
"compute_intensity": 97.52380952380952,
"tile_compute_intensity": 1.7297297297297298,
"MxNxK": 33554432,
"size_m": 1024,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000022470399562735112,
"perf_norm_to_sol": 0.3812481180807699,
"perf_norm_to_cublas": 0.9710909034273197,
"compute_intensity": 83.59183673469387,
"tile_compute_intensity": 2.909090909090909,
"MxNxK": 33554432,
"size_m": 2048,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027335872873663902,
"perf_norm_to_sol": 0.8022791815957389,
"perf_norm_to_cublas": 0.9227868124709769,
"compute_intensity": 239.1824817518248,
"tile_compute_intensity": 3.9083969465648853,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003221119986847043,
"perf_norm_to_sol": 0.5319142149809346,
"perf_norm_to_cublas": 1.2504470673358583,
"compute_intensity": 124.12121212121212,
"tile_compute_intensity": 3.2,
"MxNxK": 67108864,
"size_m": 2048,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0220623642206192,
"perf_norm_to_sol": 0.7952366844453344,
"perf_norm_to_cublas": 0.8581497307609545,
"compute_intensity": 1310.72,
"tile_compute_intensity": 26.94736842105263,
"MxNxK": 68719476736,
"size_m": 2048,
"size_n": 16384,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006798783782869577,
"perf_norm_to_sol": 0.8064310624403975,
"perf_norm_to_cublas": 1.04623891724794,
"compute_intensity": 234.05714285714285,
"tile_compute_intensity": 1.9616858237547892,
"MxNxK": 2147483648,
"size_m": 128,
"size_n": 4096,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.023440156877040864,
"perf_norm_to_sol": 0.7484933426796024,
"perf_norm_to_cublas": 0.8725945894779815,
"compute_intensity": 252.06153846153848,
"tile_compute_intensity": 26.94736842105263,
"MxNxK": 68719476736,
"size_m": 16384,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00007319999858736991,
"perf_norm_to_sol": 0.5204030385752563,
"perf_norm_to_cublas": 0.7077158709074924,
"compute_intensity": 15.845261121856867,
"tile_compute_intensity": 1.8285714285714285,
"MxNxK": 67108864,
"size_m": 4096,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003261759993620217,
"perf_norm_to_sol": 0.5252868121855561,
"perf_norm_to_cublas": 3.1870889406050797,
"compute_intensity": 157.53846153846155,
"tile_compute_intensity": 1.5900621118012421,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003295679925940931,
"perf_norm_to_sol": 0.5198804336783317,
"perf_norm_to_cublas": 2.8257111747132386,
"compute_intensity": 84.45360824742268,
"tile_compute_intensity": 0.9990243902439024,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001327359932474792,
"perf_norm_to_sol": 0.22266231118738403,
"perf_norm_to_cublas": 0.6345226709326093,
"compute_intensity": 28.248275862068965,
"tile_compute_intensity": 1.3061224489795917,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.022296051681041717,
"perf_norm_to_sol": 0.7869017180628843,
"perf_norm_to_cublas": 0.9084355975725387,
"compute_intensity": 885.6216216216217,
"tile_compute_intensity": 7.816793893129771,
"MxNxK": 68719476736,
"size_m": 512,
"size_n": 16384,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009911040542647243,
"perf_norm_to_sol": 0.6914953084050262,
"perf_norm_to_cublas": 0.9598023449527745,
"compute_intensity": 292.57142857142856,
"tile_compute_intensity": 7.111111111111111,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027292095124721526,
"perf_norm_to_sol": 0.8035660735119912,
"perf_norm_to_cublas": 1.0500599495015537,
"compute_intensity": 404.5432098765432,
"tile_compute_intensity": 7.529411764705882,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006787200254620984,
"perf_norm_to_sol": 0.07888743908048752,
"perf_norm_to_cublas": 1.1942479715127918,
"compute_intensity": 56.888888888888886,
"tile_compute_intensity": 1.2307692307692308,
"MxNxK": 2097152,
"size_m": 256,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002867603115737438,
"perf_norm_to_sol": 0.7647851125886511,
"perf_norm_to_cublas": 0.8632090114097609,
"compute_intensity": 125.5478927203065,
"tile_compute_intensity": 12.487804878048781,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005422383919358254,
"perf_norm_to_sol": 0.808906268661397,
"perf_norm_to_cublas": 0.9044960081002494,
"compute_intensity": 819.2,
"tile_compute_intensity": 7.474452554744525,
"MxNxK": 17179869184,
"size_m": 512,
"size_n": 4096,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013446495868265629,
"perf_norm_to_sol": 0.8154913344020892,
"perf_norm_to_cublas": 1.6490029654250762,
"compute_intensity": 399.609756097561,
"tile_compute_intensity": 3.5493934142114383,
"MxNxK": 4294967296,
"size_m": 256,
"size_n": 1024,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000017113600915763526,
"perf_norm_to_sol": 0.2502920801993444,
"perf_norm_to_cublas": 0.9474569565244381,
"compute_intensity": 113.77777777777777,
"tile_compute_intensity": 1.6842105263157894,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000022758400882594286,
"perf_norm_to_sol": 0.4360914365711893,
"perf_norm_to_cublas": 0.9119797276810303,
"compute_intensity": 31.03030303030303,
"tile_compute_intensity": 2.909090909090909,
"MxNxK": 33554432,
"size_m": 1024,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005600320291705429,
"perf_norm_to_sol": 0.6883463808513554,
"perf_norm_to_cublas": 0.8216101667169545,
"compute_intensity": 7.875030040855563,
"tile_compute_intensity": 0.8858131487889274,
"MxNxK": 33554432,
"size_m": 16384,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009510079980827868,
"perf_norm_to_sol": 0.7206498841722643,
"perf_norm_to_cublas": 1.0594569423925888,
"compute_intensity": 341.3333333333333,
"tile_compute_intensity": 5.12,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000032572800409980116,
"perf_norm_to_sol": 0.5260092738720064,
"perf_norm_to_cublas": 3.5727478256876006,
"compute_intensity": 124.12121212121212,
"tile_compute_intensity": 1.3298701298701299,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000016003201017156245,
"perf_norm_to_sol": 0.5950919076710591,
"perf_norm_to_cublas": 0.9558087627926669,
"compute_intensity": 7.922630560928433,
"tile_compute_intensity": 0.8767123287671232,
"MxNxK": 8388608,
"size_m": 512,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018526079365983606,
"perf_norm_to_sol": 0.739869229885349,
"perf_norm_to_cublas": 1.0442015005736223,
"compute_intensity": 199.8048780487805,
"tile_compute_intensity": 3.657142857142857,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013302400475367904,
"perf_norm_to_sol": 0.1610009704954922,
"perf_norm_to_cublas": 0.743324501659494,
"compute_intensity": 78.76923076923077,
"tile_compute_intensity": 1.6,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006926591973751784,
"perf_norm_to_sol": 0.7915509459917458,
"perf_norm_to_cublas": 1.0743384272252636,
"compute_intensity": 585.1428571428571,
"tile_compute_intensity": 14.222222222222221,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005527455732226372,
"perf_norm_to_sol": 0.7935297098600108,
"perf_norm_to_cublas": 0.8819166187424141,
"compute_intensity": 246.37593984962405,
"tile_compute_intensity": 20.48,
"MxNxK": 17179869184,
"size_m": 4096,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003599104005843401,
"perf_norm_to_sol": 0.7616826883052593,
"perf_norm_to_cublas": 1.05520482100117,
"compute_intensity": 372.3636363636364,
"tile_compute_intensity": 6.2439024390243905,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00043779839761555196,
"perf_norm_to_sol": 0.6867575485911575,
"perf_norm_to_cublas": 0.747339429497953,
"compute_intensity": 15.953261927945473,
"tile_compute_intensity": 1.9393939393939394,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006473599933087826,
"perf_norm_to_sol": 0.02818217747039934,
"perf_norm_to_cublas": 0.5378151199112354,
"compute_intensity": 14.222222222222221,
"tile_compute_intensity": 0.8,
"MxNxK": 262144,
"size_m": 128,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008729600085644052,
"perf_norm_to_sol": 0.12266881446127087,
"perf_norm_to_cublas": 1.5392229171325478,
"compute_intensity": 73.14285714285714,
"tile_compute_intensity": 1.28,
"MxNxK": 4194304,
"size_m": 256,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008524800068698824,
"perf_norm_to_sol": 0.162648398724282,
"perf_norm_to_cublas": 0.6940690635584525,
"compute_intensity": 29.257142857142856,
"tile_compute_intensity": 1.7777777777777777,
"MxNxK": 4194304,
"size_m": 256,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007379360031336546,
"perf_norm_to_sol": 0.81210210168612,
"perf_norm_to_cublas": 0.8308015590278446,
"compute_intensity": 7.990246281394782,
"tile_compute_intensity": 0.982725527831094,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000054169603390619156,
"perf_norm_to_sol": 0.701205754255858,
"perf_norm_to_cublas": 0.8494210406815107,
"compute_intensity": 7.934140435835351,
"tile_compute_intensity": 0.8858131487889274,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003599296091124415,
"perf_norm_to_sol": 0.7616420392367963,
"perf_norm_to_cublas": 1.0333131313248927,
"compute_intensity": 169.78238341968913,
"tile_compute_intensity": 1.9320754716981132,
"MxNxK": 1073741824,
"size_m": 128,
"size_n": 16384,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006335999933071434,
"perf_norm_to_sol": 0.19724037045131676,
"perf_norm_to_cublas": 0.707575746440224,
"compute_intensity": 7.728301886792453,
"tile_compute_intensity": 0.7619047619047619,
"MxNxK": 1048576,
"size_m": 1024,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018119679298251868,
"perf_norm_to_sol": 0.7564635028958566,
"perf_norm_to_cublas": 1.8236967967101936,
"compute_intensity": 168.90721649484536,
"tile_compute_intensity": 1.5987509758001561,
"MxNxK": 536870912,
"size_m": 128,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.001378041598945856,
"perf_norm_to_sol": 0.7957307578401308,
"perf_norm_to_cublas": 0.9411547605265503,
"compute_intensity": 399.609756097561,
"tile_compute_intensity": 18.285714285714285,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005474310368299484,
"perf_norm_to_sol": 0.8012334062856865,
"perf_norm_to_cublas": 0.8977256194117859,
"compute_intensity": 780.1904761904761,
"tile_compute_intensity": 10.61139896373057,
"MxNxK": 17179869184,
"size_m": 2048,
"size_n": 512,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0005105696152895689,
"perf_norm_to_sol": 0.5923046938801325,
"perf_norm_to_cublas": 0.7408823319910193,
"compute_intensity": 31.813592233009707,
"tile_compute_intensity": 3.710144927536232,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00040339198894798753,
"perf_norm_to_sol": 0.6795809757675917,
"perf_norm_to_cublas": 0.8475567361366916,
"compute_intensity": 63.01538461538462,
"tile_compute_intensity": 6.7368421052631575,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0031511902809143065,
"perf_norm_to_sol": 0.6959592967177162,
"perf_norm_to_cublas": 0.9154726990894339,
"compute_intensity": 63.627184466019415,
"tile_compute_intensity": 7.420289855072464,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000025129600544460118,
"perf_norm_to_sol": 0.34090464472998905,
"perf_norm_to_cublas": 0.7538519757738319,
"compute_intensity": 50.88198757763975,
"tile_compute_intensity": 2.56,
"MxNxK": 33554432,
"size_m": 4096,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0002001215936616063,
"perf_norm_to_sol": 0.6849273895191269,
"perf_norm_to_cublas": 0.8875883826205851,
"compute_intensity": 112.99310344827586,
"tile_compute_intensity": 5.224489795918367,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005387142673134804,
"perf_norm_to_sol": 0.8141979170760101,
"perf_norm_to_cublas": 1.0594517627099258,
"compute_intensity": 1365.3333333333333,
"tile_compute_intensity": 20.48,
"MxNxK": 17179869184,
"size_m": 2048,
"size_n": 2048,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005351679865270853,
"perf_norm_to_sol": 0.6403071754279639,
"perf_norm_to_cublas": 2.177409769465602,
"compute_intensity": 110.70270270270271,
"tile_compute_intensity": 1.5950155763239875,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00012826559832319618,
"perf_norm_to_sol": 0.592840576565124,
"perf_norm_to_cublas": 0.6981513589075843,
"compute_intensity": 15.860600193610843,
"tile_compute_intensity": 1.7655172413793103,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00020630080252885817,
"perf_norm_to_sol": 0.6644121547412678,
"perf_norm_to_cublas": 0.9482076285992115,
"compute_intensity": 102.0809968847352,
"tile_compute_intensity": 5.224489795918367,
"MxNxK": 536870912,
"size_m": 16384,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003734431928023696,
"perf_norm_to_sol": 0.7340809171240661,
"perf_norm_to_cublas": 1.036152284553224,
"compute_intensity": 169.78238341968913,
"tile_compute_intensity": 6.2439024390243905,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001820608042180538,
"perf_norm_to_sol": 0.7528735321243804,
"perf_norm_to_cublas": 1.8190846366215945,
"compute_intensity": 248.24242424242425,
"tile_compute_intensity": 2.6597402597402597,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027267711237072946,
"perf_norm_to_sol": 0.8042846547190595,
"perf_norm_to_cublas": 1.6157742057802038,
"compute_intensity": 655.36,
"tile_compute_intensity": 6.38006230529595,
"MxNxK": 8589934592,
"size_m": 512,
"size_n": 1024,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010817008465528489,
"perf_norm_to_sol": 0.8109821412150202,
"perf_norm_to_cublas": 1.0519670185502574,
"compute_intensity": 474.8985507246377,
"tile_compute_intensity": 3.9536679536679538,
"MxNxK": 34359738368,
"size_m": 256,
"size_n": 16384,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00021225600503385066,
"perf_norm_to_sol": 0.6457709439654802,
"perf_norm_to_cublas": 0.9580430756864114,
"compute_intensity": 61.82641509433962,
"tile_compute_intensity": 6.095238095238095,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008627200440969317,
"perf_norm_to_sol": 0.2876003578119735,
"perf_norm_to_cublas": 0.7266320006166752,
"compute_intensity": 15.515151515151516,
"tile_compute_intensity": 1.4545454545454546,
"MxNxK": 4194304,
"size_m": 512,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000022662398987449705,
"perf_norm_to_sol": 0.3780181237899816,
"perf_norm_to_cublas": 0.8000564677828632,
"compute_intensity": 56.10958904109589,
"tile_compute_intensity": 2.56,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003665632102638483,
"perf_norm_to_sol": 0.7478587970374376,
"perf_norm_to_cublas": 1.045900950026582,
"compute_intensity": 202.2716049382716,
"tile_compute_intensity": 6.2439024390243905,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003891648026183248,
"perf_norm_to_sol": 0.7044252707893642,
"perf_norm_to_cublas": 0.9315785735990563,
"compute_intensity": 113.3840830449827,
"tile_compute_intensity": 7.757575757575758,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00036002560518682,
"perf_norm_to_sol": 0.7614389574426266,
"perf_norm_to_cublas": 1.032379912231677,
"compute_intensity": 252.06153846153848,
"tile_compute_intensity": 3.710144927536232,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00035406078677624464,
"perf_norm_to_sol": 0.7742668256548536,
"perf_norm_to_cublas": 1.0611872521461623,
"compute_intensity": 372.3636363636364,
"tile_compute_intensity": 3.710144927536232,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000024016000679694116,
"perf_norm_to_sol": 0.3567120795869676,
"perf_norm_to_cublas": 4.299133950206227,
"compute_intensity": 97.52380952380952,
"tile_compute_intensity": 1.3264248704663213,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.011065717786550522,
"perf_norm_to_sol": 0.7927547815811308,
"perf_norm_to_cublas": 0.9132794533857208,
"compute_intensity": 250.13740458015266,
"tile_compute_intensity": 24.38095238095238,
"MxNxK": 34359738368,
"size_m": 8192,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000536640000063926,
"perf_norm_to_sol": 0.6385508008940979,
"perf_norm_to_cublas": 1.4733451887621816,
"compute_intensity": 163.84,
"tile_compute_intensity": 1.8823529411764706,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006791135761886835,
"perf_norm_to_sol": 0.8073392465649578,
"perf_norm_to_cublas": 1.672905370069213,
"compute_intensity": 224.43835616438355,
"tile_compute_intensity": 1.8806244260789715,
"MxNxK": 2147483648,
"size_m": 128,
"size_n": 1024,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013577568344771862,
"perf_norm_to_sol": 0.8076189034884482,
"perf_norm_to_cublas": 1.6639656904616926,
"compute_intensity": 125.06870229007633,
"tile_compute_intensity": 1.9375591296121097,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001846815925091505,
"perf_norm_to_sol": 0.7421896187421063,
"perf_norm_to_cublas": 1.0508897866292217,
"compute_intensity": 112.99310344827586,
"tile_compute_intensity": 1.9393939393939394,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.02175392657518387,
"perf_norm_to_sol": 0.8065119330615509,
"perf_norm_to_cublas": 0.8621528339324573,
"compute_intensity": 1310.72,
"tile_compute_intensity": 15.058823529411764,
"MxNxK": 68719476736,
"size_m": 1024,
"size_n": 16384,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018798079108819365,
"perf_norm_to_sol": 0.7291636551776391,
"perf_norm_to_cublas": 1.0185210931747826,
"compute_intensity": 409.6,
"tile_compute_intensity": 7.529411764705882,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005731024220585823,
"perf_norm_to_sol": 0.7653431872966837,
"perf_norm_to_cublas": 0.8816194719470445,
"compute_intensity": 126.51737451737452,
"tile_compute_intensity": 13.837837837837839,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006588800169993192,
"perf_norm_to_sol": 0.09968195952626771,
"perf_norm_to_cublas": 0.5828071935330909,
"compute_intensity": 15.058823529411764,
"tile_compute_intensity": 1.1428571428571428,
"MxNxK": 1048576,
"size_m": 256,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00035569278988987205,
"perf_norm_to_sol": 0.7707143053166191,
"perf_norm_to_cublas": 1.0635964560585602,
"compute_intensity": 202.2716049382716,
"tile_compute_intensity": 1.9541984732824427,
"MxNxK": 1073741824,
"size_m": 128,
"size_n": 8192,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027326496317982675,
"perf_norm_to_sol": 0.8025544680916945,
"perf_norm_to_cublas": 0.9199288436943559,
"compute_intensity": 442.81081081081084,
"tile_compute_intensity": 7.420289855072464,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018012479413300752,
"perf_norm_to_sol": 0.7609655372143678,
"perf_norm_to_cublas": 1.8175133051312249,
"compute_intensity": 199.8048780487805,
"tile_compute_intensity": 1.7746967071057191,
"MxNxK": 536870912,
"size_m": 128,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007073279935866595,
"perf_norm_to_sol": 0.7751355070114767,
"perf_norm_to_cublas": 0.9611517711973329,
"compute_intensity": 203.527950310559,
"tile_compute_intensity": 10.24,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005495999939739704,
"perf_norm_to_sol": 0.6234932780018515,
"perf_norm_to_cublas": 1.5443959451204272,
"compute_intensity": 101.1358024691358,
"tile_compute_intensity": 1.8823529411764706,
"MxNxK": 134217728,
"size_m": 4096,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002380799996899441,
"perf_norm_to_sol": 0.4919630856702078,
"perf_norm_to_cublas": 0.9018817188691974,
"compute_intensity": 28.395147313691506,
"tile_compute_intensity": 1.9692307692307693,
"MxNxK": 33554432,
"size_m": 8192,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006598400068469345,
"perf_norm_to_sol": 0.04976846698113925,
"perf_norm_to_cublas": 0.802133850095691,
"compute_intensity": 39.38461538461539,
"tile_compute_intensity": 1.1428571428571428,
"MxNxK": 1048576,
"size_m": 256,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.022355611622333526,
"perf_norm_to_sol": 0.7848052502532795,
"perf_norm_to_cublas": 0.9308569043079675,
"compute_intensity": 885.6216216216217,
"tile_compute_intensity": 14.840579710144928,
"MxNxK": 68719476736,
"size_m": 16384,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018532160902395844,
"perf_norm_to_sol": 0.7396264335009695,
"perf_norm_to_cublas": 1.0323415313959634,
"compute_intensity": 409.6,
"tile_compute_intensity": 6.095238095238095,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0005866847932338715,
"perf_norm_to_sol": 0.518445694453729,
"perf_norm_to_cublas": 0.9334293465492389,
"compute_intensity": 31.721200387221685,
"tile_compute_intensity": 3.710144927536232,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001361279864795506,
"perf_norm_to_sol": 0.3591763851834111,
"perf_norm_to_cublas": 0.7618712694999403,
"compute_intensity": 15.633587786259541,
"tile_compute_intensity": 1.5238095238095237,
"MxNxK": 8388608,
"size_m": 512,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018118079751729965,
"perf_norm_to_sol": 0.7565302869359748,
"perf_norm_to_cublas": 1.8143733159111632,
"compute_intensity": 168.90721649484536,
"tile_compute_intensity": 1.9980487804878049,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013251201016828417,
"perf_norm_to_sol": 0.2657189140614988,
"perf_norm_to_cublas": 0.6537067342136463,
"compute_intensity": 25.5202492211838,
"tile_compute_intensity": 1.3061224489795917,
"MxNxK": 8388608,
"size_m": 4096,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002728569693863392,
"perf_norm_to_sol": 0.8037545006312831,
"perf_norm_to_cublas": 0.9300099522851448,
"compute_intensity": 125.5478927203065,
"tile_compute_intensity": 1.9806576402321083,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005446160212159157,
"perf_norm_to_sol": 0.8053748278768899,
"perf_norm_to_cublas": 0.8784011683086032,
"compute_intensity": 448.8767123287671,
"tile_compute_intensity": 28.444444444444443,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006955776363611222,
"perf_norm_to_sol": 0.7882298312528819,
"perf_norm_to_cublas": 1.0819485779869824,
"compute_intensity": 224.43835616438355,
"tile_compute_intensity": 3.8208955223880596,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006951231975108385,
"perf_norm_to_sol": 0.7887451388408845,
"perf_norm_to_cublas": 1.0545008503343827,
"compute_intensity": 409.6,
"tile_compute_intensity": 16,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.022547215223312378,
"perf_norm_to_sol": 0.7781360669183831,
"perf_norm_to_cublas": 0.9084337771409764,
"compute_intensity": 2048,
"tile_compute_intensity": 24.975609756097562,
"MxNxK": 68719476736,
"size_m": 2048,
"size_n": 4096,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00000897919962881133,
"perf_norm_to_sol": 0.2946122339476898,
"perf_norm_to_cublas": 0.6931575916799706,
"compute_intensity": 15.003663003663004,
"tile_compute_intensity": 1.28,
"MxNxK": 4194304,
"size_m": 2048,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002730252780020237,
"perf_norm_to_sol": 0.803259019742695,
"perf_norm_to_cublas": 0.9101305074869697,
"compute_intensity": 744.7272727272727,
"tile_compute_intensity": 10.556701030927835,
"MxNxK": 8589934592,
"size_m": 2048,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007272895891219378,
"perf_norm_to_sol": 0.7538607057391574,
"perf_norm_to_cublas": 0.8622215748386921,
"compute_intensity": 120.02930402930403,
"tile_compute_intensity": 7.876923076923077,
"MxNxK": 2147483648,
"size_m": 1024,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006859360262751579,
"perf_norm_to_sol": 0.7993092969755602,
"perf_norm_to_cublas": 1.6649249129213644,
"compute_intensity": 234.05714285714285,
"tile_compute_intensity": 3.5432525951557095,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0005088543985038996,
"perf_norm_to_sol": 0.5977430989058179,
"perf_norm_to_cublas": 0.7425747181554171,
"compute_intensity": 31.721200387221685,
"tile_compute_intensity": 3.5310344827586206,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006022399975336157,
"perf_norm_to_sol": 0.02802157482295558,
"perf_norm_to_cublas": 0.7035069006763038,
"compute_intensity": 7.420289855072464,
"tile_compute_intensity": 0.5714285714285714,
"MxNxK": 131072,
"size_m": 256,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001233567949384451,
"perf_norm_to_sol": 0.6128822896207071,
"perf_norm_to_cublas": 0.7208747928987829,
"compute_intensity": 15.906796116504854,
"tile_compute_intensity": 1.855072463768116,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001066112075932324,
"perf_norm_to_sol": 0.6428440490798478,
"perf_norm_to_cublas": 0.9205186423469395,
"compute_intensity": 167.18367346938774,
"tile_compute_intensity": 5.818181818181818,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00000852800003485754,
"perf_norm_to_sol": 0.12556867833606347,
"perf_norm_to_cublas": 0.7497185770861599,
"compute_intensity": 48.76190476190476,
"tile_compute_intensity": 1.4545454545454546,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000024111999664455652,
"perf_norm_to_sol": 0.3552918739644947,
"perf_norm_to_cublas": 4.811015326826654,
"compute_intensity": 83.59183673469387,
"tile_compute_intensity": 0.9980506822612085,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027325952425599096,
"perf_norm_to_sol": 0.8025704420367488,
"perf_norm_to_cublas": 1.0456978009420124,
"compute_intensity": 655.36,
"tile_compute_intensity": 13.473684210526315,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00006655679899267853,
"perf_norm_to_sol": 0.5148563437828849,
"perf_norm_to_cublas": 0.8005193289600275,
"compute_intensity": 101.1358024691358,
"tile_compute_intensity": 4.923076923076923,
"MxNxK": 134217728,
"size_m": 4096,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013469087891280652,
"perf_norm_to_sol": 0.814123491297636,
"perf_norm_to_cublas": 1.6569425257615942,
"compute_intensity": 240.94117647058823,
"tile_compute_intensity": 1.965451055662188,
"MxNxK": 4294967296,
"size_m": 128,
"size_n": 4096,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000017046398716047408,
"perf_norm_to_sol": 0.25127880934026753,
"perf_norm_to_cublas": 1.4501596182956131,
"compute_intensity": 128,
"tile_compute_intensity": 1.9393939393939394,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006672000017715618,
"perf_norm_to_sol": 0.09297009619118334,
"perf_norm_to_cublas": 0.8781774380537599,
"compute_intensity": 40.96,
"tile_compute_intensity": 1.3333333333333333,
"MxNxK": 2097152,
"size_m": 512,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.004637769609689713,
"perf_norm_to_sol": 0.5176238944541665,
"perf_norm_to_cublas": 0.9175190358296267,
"compute_intensity": 31.937621832358673,
"tile_compute_intensity": 3.9083969465648853,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013892672024667263,
"perf_norm_to_sol": 0.7893010674385906,
"perf_norm_to_cublas": 0.8865335674532315,
"compute_intensity": 225.98620689655172,
"tile_compute_intensity": 10.448979591836734,
"MxNxK": 4294967296,
"size_m": 1024,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000015094398986548185,
"perf_norm_to_sol": 0.6309211403932,
"perf_norm_to_cublas": 1.022472059595912,
"compute_intensity": 7.922630560928433,
"tile_compute_intensity": 0.9142857142857143,
"MxNxK": 8388608,
"size_m": 2048,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005531059205532074,
"perf_norm_to_sol": 0.7930127269421817,
"perf_norm_to_cublas": 0.903205520402195,
"compute_intensity": 246.37593984962405,
"tile_compute_intensity": 3.9233716475095783,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00010900800116360187,
"perf_norm_to_sol": 0.6287096326412559,
"perf_norm_to_cublas": 0.9019521736883093,
"compute_intensity": 127.0077519379845,
"tile_compute_intensity": 3.4594594594594597,
"MxNxK": 268435456,
"size_m": 8192,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005297600291669369,
"perf_norm_to_sol": 0.6468436329020334,
"perf_norm_to_cublas": 2.225369862989616,
"compute_intensity": 126.03076923076924,
"tile_compute_intensity": 1.3315994798439532,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00006488639628514648,
"perf_norm_to_sol": 0.36045808624576287,
"perf_norm_to_cublas": 0.6439315703987604,
"compute_intensity": 28.419774501300953,
"tile_compute_intensity": 1.3298701298701299,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.001541263982653618,
"perf_norm_to_sol": 0.7114615654460856,
"perf_norm_to_cublas": 0.9885227052722078,
"compute_intensity": 63.38104448742747,
"tile_compute_intensity": 7.314285714285714,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00230992641299963,
"perf_norm_to_sol": 0.5206423072727407,
"perf_norm_to_cublas": 0.9345475478704974,
"compute_intensity": 31.906523855890946,
"tile_compute_intensity": 3.878787878787879,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005301439668983221,
"perf_norm_to_sol": 0.6463751796280476,
"perf_norm_to_cublas": 1.8605060012270793,
"compute_intensity": 186.1818181818182,
"tile_compute_intensity": 3.1219512195121952,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027002431452274323,
"perf_norm_to_sol": 0.8121861824202894,
"perf_norm_to_cublas": 0.9094991177375452,
"compute_intensity": 744.7272727272727,
"tile_compute_intensity": 7.420289855072464,
"MxNxK": 8589934592,
"size_m": 512,
"size_n": 4096,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.01086776927113533,
"perf_norm_to_sol": 0.8071942335226685,
"perf_norm_to_cublas": 0.9279855383820339,
"compute_intensity": 799.219512195122,
"tile_compute_intensity": 7.757575757575758,
"MxNxK": 34359738368,
"size_m": 512,
"size_n": 16384,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00008769279811531306,
"perf_norm_to_sol": 0.3907640184796324,
"perf_norm_to_cublas": 0.7183988087225469,
"compute_intensity": 56.69204152249135,
"tile_compute_intensity": 3.878787878787879,
"MxNxK": 134217728,
"size_m": 8192,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000019523200171533973,
"perf_norm_to_sol": 0.5985335910294347,
"perf_norm_to_cublas": 0.9111621504115796,
"compute_intensity": 14.216052060737526,
"tile_compute_intensity": 0.9961089494163424,
"MxNxK": 16777216,
"size_m": 16384,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000182956806384027,
"perf_norm_to_sol": 0.7491864524861875,
"perf_norm_to_cublas": 1.793629878587695,
"compute_intensity": 112.99310344827586,
"tile_compute_intensity": 1.5987509758001561,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.003228684514760971,
"perf_norm_to_sol": 0.7413591060215456,
"perf_norm_to_cublas": 0.7550705935916899,
"compute_intensity": 7.996095656417765,
"tile_compute_intensity": 0.9941747572815534,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005422703921794891,
"perf_norm_to_sol": 0.8088585338079519,
"perf_norm_to_cublas": 0.861618278070355,
"compute_intensity": 780.1904761904761,
"tile_compute_intensity": 23.272727272727273,
"MxNxK": 17179869184,
"size_m": 2048,
"size_n": 8192,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009215679601766169,
"perf_norm_to_sol": 0.8156244772929498,
"perf_norm_to_cublas": 0.9116983843970254,
"compute_intensity": 7.976630963972736,
"tile_compute_intensity": 0.9696969696969697,
"MxNxK": 67108864,
"size_m": 4096,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00007106239791028201,
"perf_norm_to_sol": 0.4822126918166485,
"perf_norm_to_cublas": 0.7569234758908744,
"compute_intensity": 84.89119170984456,
"tile_compute_intensity": 3.1219512195121952,
"MxNxK": 134217728,
"size_m": 8192,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003487008158117533,
"perf_norm_to_sol": 0.7861682824800057,
"perf_norm_to_cublas": 1.089952142631171,
"compute_intensity": 227.55555555555554,
"tile_compute_intensity": 1.9320754716981132,
"MxNxK": 1073741824,
"size_m": 128,
"size_n": 2048,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002725190296769142,
"perf_norm_to_sol": 0.8047512037338678,
"perf_norm_to_cublas": 1.0478123683851155,
"compute_intensity": 910.2222222222222,
"tile_compute_intensity": 19.692307692307693,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000954464019741863,
"perf_norm_to_sol": 0.7898073179532755,
"perf_norm_to_cublas": 0.8816173155444341,
"compute_intensity": 7.964997569275644,
"tile_compute_intensity": 0.9377289377289377,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0014388063922524453,
"perf_norm_to_sol": 0.762124836092622,
"perf_norm_to_cublas": 0.8709130377202091,
"compute_intensity": 125.06870229007633,
"tile_compute_intensity": 12.19047619047619,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005440540611743927,
"perf_norm_to_sol": 0.8062067093092944,
"perf_norm_to_cublas": 0.9052547393446058,
"compute_intensity": 963.7647058823529,
"tile_compute_intensity": 10.61139896373057,
"MxNxK": 17179869184,
"size_m": 1024,
"size_n": 1024,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002700294554233551,
"perf_norm_to_sol": 0.8121707197796096,
"perf_norm_to_cublas": 0.9179181816827314,
"compute_intensity": 239.1824817518248,
"tile_compute_intensity": 1.9844961240310077,
"MxNxK": 8589934592,
"size_m": 128,
"size_n": 16384,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000022694400104228408,
"perf_norm_to_sol": 0.4662616415603754,
"perf_norm_to_cublas": 0.9486745288199979,
"compute_intensity": 30.007326007326007,
"tile_compute_intensity": 2.56,
"MxNxK": 33554432,
"size_m": 4096,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003510720271151513,
"perf_norm_to_sol": 0.5991747211241286,
"perf_norm_to_cublas": 0.9342811995580108,
"compute_intensity": 15.05190629306385,
"tile_compute_intensity": 1.3264248704663213,
"MxNxK": 33554432,
"size_m": 16384,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000007052800356177613,
"perf_norm_to_sol": 0.1888346430784719,
"perf_norm_to_cublas": 0.7586206754256422,
"compute_intensity": 14.94890510948905,
"tile_compute_intensity": 0.9696969696969697,
"MxNxK": 2097152,
"size_m": 128,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006792960222810507,
"perf_norm_to_sol": 0.8071224104788935,
"perf_norm_to_cublas": 1.6883031684552765,
"compute_intensity": 124.12121212121212,
"tile_compute_intensity": 1.9357277882797732,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00013603839324787258,
"perf_norm_to_sol": 0.5621862018415025,
"perf_norm_to_cublas": 0.8063605414146581,
"compute_intensity": 31.62934362934363,
"tile_compute_intensity": 3.5555555555555554,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000019267199968453495,
"perf_norm_to_sol": 0.36455455311027596,
"perf_norm_to_cublas": 0.6973924872958122,
"compute_intensity": 25.5600624024961,
"tile_compute_intensity": 1.3195876288659794,
"MxNxK": 16777216,
"size_m": 8192,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002770681492984295,
"perf_norm_to_sol": 0.7915381747349963,
"perf_norm_to_cublas": 0.8829966393911173,
"compute_intensity": 244.53731343283582,
"tile_compute_intensity": 19.692307692307693,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010844102501869202,
"perf_norm_to_sol": 0.8089558988771262,
"perf_norm_to_cublas": 0.879656465797871,
"compute_intensity": 992.969696969697,
"tile_compute_intensity": 25.6,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003153280122205615,
"perf_norm_to_sol": 0.6063440658670987,
"perf_norm_to_cublas": 0.9549421722358062,
"compute_intensity": 15.814671814671815,
"tile_compute_intensity": 1.7297297297297298,
"MxNxK": 33554432,
"size_m": 1024,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00000643519961158745,
"perf_norm_to_sol": 0.37705961548002925,
"perf_norm_to_cublas": 0.8582794833065766,
"compute_intensity": 7.846743295019157,
"tile_compute_intensity": 0.8421052631578947,
"MxNxK": 2097152,
"size_m": 1024,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00019477440509945153,
"perf_norm_to_sol": 0.7037308657832344,
"perf_norm_to_cublas": 1.0126012835818363,
"compute_intensity": 199.8048780487805,
"tile_compute_intensity": 9.142857142857142,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005453347042202949,
"perf_norm_to_sol": 0.8043134444797383,
"perf_norm_to_cublas": 0.8709173078487684,
"compute_intensity": 448.8767123287671,
"tile_compute_intensity": 20.48,
"MxNxK": 17179869184,
"size_m": 2048,
"size_n": 16384,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0053881313651800156,
"perf_norm_to_sol": 0.8140485163006239,
"perf_norm_to_cublas": 0.9376817270599997,
"compute_intensity": 963.7647058823529,
"tile_compute_intensity": 14.222222222222221,
"MxNxK": 17179869184,
"size_m": 1024,
"size_n": 8192,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027593184262514115,
"perf_norm_to_sol": 0.7947977844326554,
"perf_norm_to_cublas": 1.6197229400502686,
"compute_intensity": 442.81081081081084,
"tile_compute_intensity": 6.38006230529595,
"MxNxK": 8589934592,
"size_m": 2048,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000649407971650362,
"perf_norm_to_sol": 0.5276681482085679,
"perf_norm_to_cublas": 0.8102887365704547,
"compute_intensity": 113.77777777777777,
"tile_compute_intensity": 6.4,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000795360014308244,
"perf_norm_to_sol": 0.43083873424371716,
"perf_norm_to_cublas": 0.7566283701891828,
"compute_intensity": 56.69204152249135,
"tile_compute_intensity": 2.6391752577319587,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000023168000916484742,
"perf_norm_to_sol": 0.4567303102658465,
"perf_norm_to_cublas": 0.9026242752542896,
"compute_intensity": 30.007326007326007,
"tile_compute_intensity": 1.9692307692307693,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00010435199365019799,
"perf_norm_to_sol": 0.6567615813480503,
"perf_norm_to_cublas": 0.9461515514457759,
"compute_intensity": 204.8,
"tile_compute_intensity": 8,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00035139839164912703,
"perf_norm_to_sol": 0.7801331138129697,
"perf_norm_to_cublas": 1.7059975566584766,
"compute_intensity": 202.2716049382716,
"tile_compute_intensity": 1.7762359063313096,
"MxNxK": 1073741824,
"size_m": 128,
"size_n": 512,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013072000001557171,
"perf_norm_to_sol": 0.3740357871802086,
"perf_norm_to_cublas": 0.7605874868223458,
"compute_intensity": 15.633587786259541,
"tile_compute_intensity": 1.6,
"MxNxK": 8388608,
"size_m": 1024,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00011193279642611742,
"perf_norm_to_sol": 0.6122814988524178,
"perf_norm_to_cublas": 0.9211241156223939,
"compute_intensity": 112.21917808219177,
"tile_compute_intensity": 7.111111111111111,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0026962272822856903,
"perf_norm_to_sol": 0.8133958832541938,
"perf_norm_to_cublas": 0.9117344351673131,
"compute_intensity": 744.7272727272727,
"tile_compute_intensity": 7.062068965517241,
"MxNxK": 8589934592,
"size_m": 512,
"size_n": 2048,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027282431721687315,
"perf_norm_to_sol": 0.8038506956055119,
"perf_norm_to_cublas": 0.8770807469360581,
"compute_intensity": 442.81081081081084,
"tile_compute_intensity": 25.6,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.001375507190823555,
"perf_norm_to_sol": 0.7971969126587234,
"perf_norm_to_cublas": 0.9747886778849995,
"compute_intensity": 225.98620689655172,
"tile_compute_intensity": 3.878787878787879,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005570239736698568,
"perf_norm_to_sol": 0.6920636070663342,
"perf_norm_to_cublas": 0.8260470475304138,
"compute_intensity": 7.875030040855563,
"tile_compute_intensity": 0.7987519500780031,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005197759601287544,
"perf_norm_to_sol": 0.7258631467844852,
"perf_norm_to_cublas": 0.8945392574406056,
"compute_intensity": 7.961127308066083,
"tile_compute_intensity": 0.9343065693430657,
"MxNxK": 33554432,
"size_m": 1024,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005290560075081885,
"perf_norm_to_sol": 0.6477043960744079,
"perf_norm_to_cublas": 2.2108509378197847,
"compute_intensity": 163.84,
"tile_compute_intensity": 1.5950155763239875,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00034959681797772644,
"perf_norm_to_sol": 0.78415336573106,
"perf_norm_to_cublas": 1.0829846336370483,
"compute_intensity": 221.40540540540542,
"tile_compute_intensity": 1.9541984732824427,
"MxNxK": 1073741824,
"size_m": 128,
"size_n": 4096,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010881190747022628,
"perf_norm_to_sol": 0.8061985945164729,
"perf_norm_to_cublas": 0.9079859497974835,
"compute_intensity": 250.13740458015266,
"tile_compute_intensity": 1.9902818270165208,
"MxNxK": 34359738368,
"size_m": 128,
"size_n": 16384,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.011035491526126862,
"perf_norm_to_sol": 0.7949261404574837,
"perf_norm_to_cublas": 1.0360181549767864,
"compute_intensity": 862.3157894736842,
"tile_compute_intensity": 14.027397260273972,
"MxNxK": 34359738368,
"size_m": 8192,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00036258238833397627,
"perf_norm_to_sol": 0.7560696004175295,
"perf_norm_to_cublas": 1.0445956554362912,
"compute_intensity": 327.68,
"tile_compute_intensity": 6.7368421052631575,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.02220543920993805,
"perf_norm_to_sol": 0.7901127830868757,
"perf_norm_to_cublas": 0.8738782744494664,
"compute_intensity": 885.6216216216217,
"tile_compute_intensity": 39.38461538461539,
"MxNxK": 68719476736,
"size_m": 4096,
"size_n": 16384,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005023360135965049,
"perf_norm_to_sol": 0.3410783743925192,
"perf_norm_to_cublas": 0.6248566784012222,
"compute_intensity": 56.49655172413793,
"tile_compute_intensity": 2.6122448979591835,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00006526079960167408,
"perf_norm_to_sol": 0.32316613595760046,
"perf_norm_to_cublas": 0.6263116631524869,
"compute_intensity": 30.06238532110092,
"tile_compute_intensity": 1.9844961240310077,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018456639954820276,
"perf_norm_to_sol": 0.7426528396749352,
"perf_norm_to_cublas": 1.0864643307836512,
"compute_intensity": 215.57894736842104,
"tile_compute_intensity": 3.506849315068493,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006428799679270014,
"perf_norm_to_sol": 0.4093608733102106,
"perf_norm_to_cublas": 0.8695869302571677,
"compute_intensity": 7.522497704315886,
"tile_compute_intensity": 0.6597938144329897,
"MxNxK": 2097152,
"size_m": 4096,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005013759946450591,
"perf_norm_to_sol": 0.34173146051319986,
"perf_norm_to_cublas": 0.6284145814501623,
"compute_intensity": 56.49655172413793,
"tile_compute_intensity": 3.764705882352941,
"MxNxK": 67108864,
"size_m": 4096,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003654784057289362,
"perf_norm_to_sol": 0.7500785741892014,
"perf_norm_to_cublas": 1.0254438727776418,
"compute_intensity": 169.78238341968913,
"tile_compute_intensity": 3.710144927536232,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006895423866808414,
"perf_norm_to_sol": 0.7951288470769209,
"perf_norm_to_cublas": 0.992486599976634,
"compute_intensity": 409.6,
"tile_compute_intensity": 6.320987654320987,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00019658240489661694,
"perf_norm_to_sol": 0.7639784706420178,
"perf_norm_to_cublas": 0.8095129187566252,
"compute_intensity": 7.980516317584024,
"tile_compute_intensity": 0.9770992366412213,
"MxNxK": 134217728,
"size_m": 8192,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0054210305213928224,
"perf_norm_to_sol": 0.8091082177361921,
"perf_norm_to_cublas": 1.054462805305784,
"compute_intensity": 448.8767123287671,
"tile_compute_intensity": 7.641791044776119,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002759360068012029,
"perf_norm_to_sol": 0.3398405092942277,
"perf_norm_to_cublas": 0.7280528639695293,
"compute_intensity": 42.555844155844156,
"tile_compute_intensity": 1.5802469135802468,
"MxNxK": 33554432,
"size_m": 8192,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000023500800307374447,
"perf_norm_to_sol": 0.3645321620441791,
"perf_norm_to_cublas": 4.402232695529129,
"compute_intensity": 120.47058823529412,
"tile_compute_intensity": 1.3264248704663213,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0002059871796518564,
"perf_norm_to_sol": 0.665423746102619,
"perf_norm_to_cublas": 0.8950304179389318,
"compute_intensity": 62.534351145038165,
"tile_compute_intensity": 6.4,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00035108160227537155,
"perf_norm_to_sol": 0.7808370466848975,
"perf_norm_to_cublas": 1.7030434701873618,
"compute_intensity": 202.2716049382716,
"tile_compute_intensity": 2.6631989596879064,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000319135986501351,
"perf_norm_to_sol": 0.5368744302222057,
"perf_norm_to_cublas": 1.359169805394755,
"compute_intensity": 157.53846153846155,
"tile_compute_intensity": 1.8285714285714285,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013891200069338083,
"perf_norm_to_sol": 0.35985803033303426,
"perf_norm_to_cublas": 0.7406127827897875,
"compute_intensity": 15.456603773584906,
"tile_compute_intensity": 1.5238095238095237,
"MxNxK": 8388608,
"size_m": 2048,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.02223135679960251,
"perf_norm_to_sol": 0.7891916598695534,
"perf_norm_to_cublas": 1.0311082134303216,
"compute_intensity": 489.07462686567163,
"tile_compute_intensity": 3.9613152804642167,
"MxNxK": 68719476736,
"size_m": 256,
"size_n": 16384,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018240000354126097,
"perf_norm_to_sol": 0.7514734543414904,
"perf_norm_to_cublas": 1.0959122458257544,
"compute_intensity": 118.72463768115942,
"tile_compute_intensity": 1.9248120300751879,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0031851518899202345,
"perf_norm_to_sol": 0.688538646671492,
"perf_norm_to_cublas": 0.9533194262461028,
"compute_intensity": 63.627184466019415,
"tile_compute_intensity": 7.529411764705882,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009457599953748286,
"perf_norm_to_sol": 0.7246487555160739,
"perf_norm_to_cublas": 1.0586026013118017,
"compute_intensity": 292.57142857142856,
"tile_compute_intensity": 3.4594594594594597,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0010409312322735786,
"perf_norm_to_sol": 0.5799210589048035,
"perf_norm_to_cublas": 1.0409171977166507,
"compute_intensity": 31.844509232264333,
"tile_compute_intensity": 3.8208955223880596,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00007924159872345627,
"perf_norm_to_sol": 0.4324394097959996,
"perf_norm_to_cublas": 0.7829422526865224,
"compute_intensity": 51.1201248049922,
"tile_compute_intensity": 1.5950155763239875,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00006430079811252654,
"perf_norm_to_sol": 0.32799095899041003,
"perf_norm_to_cublas": 0.6773166203078124,
"compute_intensity": 30.06238532110092,
"tile_compute_intensity": 2.6122448979591835,
"MxNxK": 67108864,
"size_m": 8192,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010869033634662628,
"perf_norm_to_sol": 0.807100334931256,
"perf_norm_to_cublas": 1.04841402314341,
"compute_intensity": 1365.3333333333333,
"tile_compute_intensity": 20.897959183673468,
"MxNxK": 34359738368,
"size_m": 4096,
"size_n": 1024,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00020801599603146315,
"perf_norm_to_sol": 0.7254946181829572,
"perf_norm_to_cublas": 0.8135220325358042,
"compute_intensity": 15.922254616132166,
"tile_compute_intensity": 1.8686131386861313,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00037083839997649194,
"perf_norm_to_sol": 0.7392371488051964,
"perf_norm_to_cublas": 0.9995685419354385,
"compute_intensity": 372.3636363636364,
"tile_compute_intensity": 12.8,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006899103987962008,
"perf_norm_to_sol": 0.7947047093200375,
"perf_norm_to_cublas": 1.080182033763748,
"compute_intensity": 585.1428571428571,
"tile_compute_intensity": 6.918918918918919,
"MxNxK": 2147483648,
"size_m": 512,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001819776021875441,
"perf_norm_to_sol": 0.753217753643055,
"perf_norm_to_cublas": 1.166015950734137,
"compute_intensity": 315.0769230769231,
"tile_compute_intensity": 3.1801242236024843,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006656000186922028,
"perf_norm_to_sol": 0.05207876508713338,
"perf_norm_to_cublas": 0.558653825447202,
"compute_intensity": 14.628571428571428,
"tile_compute_intensity": 1,
"MxNxK": 524288,
"size_m": 256,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006880000000819564,
"perf_norm_to_sol": 0.07782337885896817,
"perf_norm_to_cublas": 1.8818604454408936,
"compute_intensity": 64,
"tile_compute_intensity": 0.9696969696969697,
"MxNxK": 2097152,
"size_m": 128,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000067519998992793265,
"perf_norm_to_sol": 0.11348468729475253,
"perf_norm_to_cublas": 0.6900474338132462,
"compute_intensity": 27.675675675675677,
"tile_compute_intensity": 1.2307692307692308,
"MxNxK": 2097152,
"size_m": 128,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027302879840135573,
"perf_norm_to_sol": 0.8032486626209074,
"perf_norm_to_cublas": 0.8669889635247954,
"compute_intensity": 455.1111111111111,
"tile_compute_intensity": 25.6,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007775328122079372,
"perf_norm_to_sol": 0.7051471453343366,
"perf_norm_to_cublas": 0.8528555660495443,
"compute_intensity": 62.89443378119002,
"tile_compute_intensity": 6.320987654320987,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010815942287445068,
"perf_norm_to_sol": 0.8110620835225908,
"perf_norm_to_cublas": 0.8748345378135516,
"compute_intensity": 862.3157894736842,
"tile_compute_intensity": 42.666666666666664,
"MxNxK": 34359738368,
"size_m": 8192,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00634743720293045,
"perf_norm_to_sol": 0.6910191000286934,
"perf_norm_to_cublas": 0.956381187564456,
"compute_intensity": 63.750972762645915,
"tile_compute_intensity": 7.641791044776119,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006646400288445875,
"perf_norm_to_sol": 0.06862366629640144,
"perf_norm_to_cublas": 0.672604691779506,
"compute_intensity": 24.975609756097562,
"tile_compute_intensity": 1.1428571428571428,
"MxNxK": 1048576,
"size_m": 512,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.02169468253850937,
"perf_norm_to_sol": 0.808714363194184,
"perf_norm_to_cublas": 0.9424958556719512,
"compute_intensity": 1489.4545454545455,
"tile_compute_intensity": 14.840579710144928,
"MxNxK": 68719476736,
"size_m": 1024,
"size_n": 8192,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00135635519400239,
"perf_norm_to_sol": 0.8084534867512584,
"perf_norm_to_cublas": 0.9656655808430697,
"compute_intensity": 819.2,
"tile_compute_intensity": 10.448979591836734,
"MxNxK": 4294967296,
"size_m": 1024,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0008375647477805615,
"perf_norm_to_sol": 0.7172447285825602,
"perf_norm_to_cublas": 0.7616252040550708,
"compute_intensity": 15.961032635168047,
"tile_compute_intensity": 1.9320754716981132,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006758399831596762,
"perf_norm_to_sol": 0.04319132261662418,
"perf_norm_to_cublas": 1.143939382521122,
"compute_intensity": 51.2,
"tile_compute_intensity": 0.9411764705882353,
"MxNxK": 1048576,
"size_m": 128,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027578016743063927,
"perf_norm_to_sol": 0.7952349119812628,
"perf_norm_to_cublas": 0.8734098414577766,
"compute_intensity": 404.5432098765432,
"tile_compute_intensity": 19.692307692307693,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013509440235793591,
"perf_norm_to_sol": 0.8116917257304825,
"perf_norm_to_cublas": 0.9719851939082224,
"compute_intensity": 431.1578947368421,
"tile_compute_intensity": 3.8496240601503757,
"MxNxK": 4294967296,
"size_m": 256,
"size_n": 4096,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003624799894168973,
"perf_norm_to_sol": 0.7562831865756057,
"perf_norm_to_cublas": 1.0167734031962314,
"compute_intensity": 327.68,
"tile_compute_intensity": 6.7368421052631575,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007640895899385213,
"perf_norm_to_sol": 0.71755334734546,
"perf_norm_to_cublas": 1.0142014305948328,
"compute_intensity": 62.89443378119002,
"tile_compute_intensity": 6.918918918918919,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001637440000195056,
"perf_norm_to_sol": 0.2874582071926161,
"perf_norm_to_cublas": 0.7955833785533081,
"compute_intensity": 42.44559585492228,
"tile_compute_intensity": 1.5609756097560976,
"MxNxK": 16777216,
"size_m": 4096,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005603200406767428,
"perf_norm_to_sol": 0.3057823002535807,
"perf_norm_to_cublas": 0.5735579108113513,
"compute_intensity": 51.0404984423676,
"tile_compute_intensity": 2.6122448979591835,
"MxNxK": 67108864,
"size_m": 8192,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006985600339248776,
"perf_norm_to_sol": 0.03395158230139252,
"perf_norm_to_cublas": 0.7540082029502123,
"compute_intensity": 24.38095238095238,
"tile_compute_intensity": 1,
"MxNxK": 524288,
"size_m": 256,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000062240003899205474,
"perf_norm_to_sol": 0.20078966833009487,
"perf_norm_to_cublas": 0.7089973686613162,
"compute_intensity": 7.728301886792453,
"tile_compute_intensity": 0.6530612244897959,
"MxNxK": 1048576,
"size_m": 128,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000016572800814174115,
"perf_norm_to_sol": 0.2584595579791451,
"perf_norm_to_cublas": 0.972388434122048,
"compute_intensity": 128,
"tile_compute_intensity": 2.4615384615384617,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013555200712289662,
"perf_norm_to_sol": 0.15799835295040543,
"perf_norm_to_cublas": 0.7200188554817074,
"compute_intensity": 85.33333333333333,
"tile_compute_intensity": 2.2857142857142856,
"MxNxK": 8388608,
"size_m": 256,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009973120177164674,
"perf_norm_to_sol": 0.6871909607932734,
"perf_norm_to_cublas": 0.9620098764480972,
"compute_intensity": 167.18367346938774,
"tile_compute_intensity": 3.5555555555555554,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00014289600076153875,
"perf_norm_to_sol": 0.5398030155178114,
"perf_norm_to_cublas": 0.6850520580059444,
"compute_intensity": 15.7462758289284,
"tile_compute_intensity": 1.5950155763239875,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.01082865297794342,
"perf_norm_to_sol": 0.8101100575282586,
"perf_norm_to_cublas": 0.9145860470902243,
"compute_intensity": 1260.3076923076924,
"tile_compute_intensity": 15.875968992248062,
"MxNxK": 34359738368,
"size_m": 2048,
"size_n": 1024,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005304639926180243,
"perf_norm_to_sol": 0.6459852253899904,
"perf_norm_to_cublas": 2.18851432773763,
"compute_intensity": 163.84,
"tile_compute_intensity": 1.9922178988326849,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.01090458184480667,
"perf_norm_to_sol": 0.8044692416237095,
"perf_norm_to_cublas": 0.9324952196253017,
"compute_intensity": 1260.3076923076924,
"tile_compute_intensity": 24.38095238095238,
"MxNxK": 34359738368,
"size_m": 8192,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0026996033266186714,
"perf_norm_to_sol": 0.8123786743424052,
"perf_norm_to_cublas": 1.0562001906386949,
"compute_intensity": 404.5432098765432,
"tile_compute_intensity": 3.9083969465648853,
"MxNxK": 8589934592,
"size_m": 256,
"size_n": 16384,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000052182399667799476,
"perf_norm_to_sol": 0.6566809959184061,
"perf_norm_to_cublas": 1.9967499800331694,
"compute_intensity": 227.55555555555554,
"tile_compute_intensity": 2.6391752577319587,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00007336639682762325,
"perf_norm_to_sol": 0.5172333822523881,
"perf_norm_to_cublas": 0.6986522245550331,
"compute_intensity": 15.875968992248062,
"tile_compute_intensity": 1.8285714285714285,
"MxNxK": 67108864,
"size_m": 2048,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003669440047815442,
"perf_norm_to_sol": 0.7470827098791475,
"perf_norm_to_cublas": 1.0223772971343819,
"compute_intensity": 252.06153846153848,
"tile_compute_intensity": 6.7368421052631575,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013652799651026725,
"perf_norm_to_sol": 0.8031686642248121,
"perf_norm_to_cublas": 1.050835555884199,
"compute_intensity": 630.1538461538462,
"tile_compute_intensity": 12.8,
"MxNxK": 4294967296,
"size_m": 1024,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027099167928099632,
"perf_norm_to_sol": 0.809286904139502,
"perf_norm_to_cublas": 1.6312045944824833,
"compute_intensity": 442.81081081081084,
"tile_compute_intensity": 3.757798165137615,
"MxNxK": 8589934592,
"size_m": 256,
"size_n": 2048,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005237439763732255,
"perf_norm_to_sol": 0.6542736857911604,
"perf_norm_to_cublas": 2.003665985652355,
"compute_intensity": 186.1818181818182,
"tile_compute_intensity": 1.7655172413793103,
"MxNxK": 134217728,
"size_m": 128,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002703343890607357,
"perf_norm_to_sol": 0.8112546018834852,
"perf_norm_to_cublas": 0.9315869874510343,
"compute_intensity": 744.7272727272727,
"tile_compute_intensity": 25.6,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000028604798717424274,
"perf_norm_to_sol": 0.6645829954492454,
"perf_norm_to_cublas": 0.9243764125962224,
"compute_intensity": 7.930300096805421,
"tile_compute_intensity": 0.8827586206896552,
"MxNxK": 16777216,
"size_m": 512,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000737663998734206,
"perf_norm_to_sol": 0.5164072227184502,
"perf_norm_to_cublas": 0.6994621071885985,
"compute_intensity": 15.845261121856867,
"tile_compute_intensity": 1.7534246575342465,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009970240062102675,
"perf_norm_to_sol": 0.6873894704604752,
"perf_norm_to_cublas": 0.9920402794320797,
"compute_intensity": 101.7639751552795,
"tile_compute_intensity": 1.9104477611940298,
"MxNxK": 268435456,
"size_m": 8192,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00010524480603635311,
"perf_norm_to_sol": 0.7214758622088613,
"perf_norm_to_cublas": 0.7996290011791436,
"compute_intensity": 7.9360620004843785,
"tile_compute_intensity": 0.9377289377289377,
"MxNxK": 67108864,
"size_m": 16384,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00014819200150668622,
"perf_norm_to_sol": 0.5219891728343243,
"perf_norm_to_cublas": 0.8634420133993255,
"compute_intensity": 31.44721689059501,
"tile_compute_intensity": 3.4594594594594597,
"MxNxK": 268435456,
"size_m": 8192,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000016527999832760543,
"perf_norm_to_sol": 0.3223168081893855,
"perf_norm_to_cublas": 0.76902224035929,
"compute_intensity": 29.8978102189781,
"tile_compute_intensity": 1.9393939393939394,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013463328592479228,
"perf_norm_to_sol": 0.8144717543898894,
"perf_norm_to_cublas": 1.644386236186578,
"compute_intensity": 431.1578947368421,
"tile_compute_intensity": 3.750915750915751,
"MxNxK": 4294967296,
"size_m": 256,
"size_n": 2048,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000053907197434455155,
"perf_norm_to_sol": 0.6356700369171994,
"perf_norm_to_cublas": 2.2501485230941265,
"compute_intensity": 101.1358024691358,
"tile_compute_intensity": 1.3315994798439532,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007005344144999981,
"perf_norm_to_sol": 0.7826525458046677,
"perf_norm_to_cublas": 1.0550436450427425,
"compute_intensity": 334.3673469387755,
"tile_compute_intensity": 11.636363636363637,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006905375979840755,
"perf_norm_to_sol": 0.7939828975754766,
"perf_norm_to_cublas": 1.0683386986551247,
"compute_intensity": 585.1428571428571,
"tile_compute_intensity": 10.24,
"MxNxK": 2147483648,
"size_m": 2048,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002242560003651306,
"perf_norm_to_sol": 0.3820097358317,
"perf_norm_to_cublas": 1.0398116120851963,
"compute_intensity": 146.28571428571428,
"tile_compute_intensity": 2.909090909090909,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000068640001700259745,
"perf_norm_to_sol": 0.2139631567924256,
"perf_norm_to_cublas": 0.7864801707899565,
"compute_intensity": 14.173010380622838,
"tile_compute_intensity": 0.9696969696969697,
"MxNxK": 2097152,
"size_m": 2048,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00019494720036163926,
"perf_norm_to_sol": 0.7031071001726639,
"perf_norm_to_cublas": 1.00013131621907,
"compute_intensity": 215.57894736842104,
"tile_compute_intensity": 9.142857142857142,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010776422172784805,
"perf_norm_to_sol": 0.8140364720555822,
"perf_norm_to_cublas": 0.8881873169620107,
"compute_intensity": 799.219512195122,
"tile_compute_intensity": 36.57142857142857,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00006780159892514349,
"perf_norm_to_sol": 0.5054038654913675,
"perf_norm_to_cublas": 0.7839815264573334,
"compute_intensity": 101.1358024691358,
"tile_compute_intensity": 3.1219512195121952,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005389955267310143,
"perf_norm_to_sol": 0.8137730511530905,
"perf_norm_to_cublas": 0.9275861833019768,
"compute_intensity": 1170.2857142857142,
"tile_compute_intensity": 23.272727272727273,
"MxNxK": 17179869184,
"size_m": 2048,
"size_n": 4096,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002726246416568756,
"perf_norm_to_sol": 0.8044394514011142,
"perf_norm_to_cublas": 0.9232280371216357,
"compute_intensity": 244.53731343283582,
"tile_compute_intensity": 3.8641509433962264,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00023793599102646114,
"perf_norm_to_sol": 0.6379455148607974,
"perf_norm_to_cublas": 0.8224463716743843,
"compute_intensity": 31.751937984496124,
"tile_compute_intensity": 3.657142857142857,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007775487843900919,
"perf_norm_to_sol": 0.705132660405703,
"perf_norm_to_cublas": 0.8561140006251148,
"compute_intensity": 63.25868725868726,
"tile_compute_intensity": 6.918918918918919,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000016617600340396167,
"perf_norm_to_sol": 0.35351509592689867,
"perf_norm_to_cublas": 0.8444059159012526,
"compute_intensity": 28.346020761245676,
"tile_compute_intensity": 1.9393939393939394,
"MxNxK": 16777216,
"size_m": 4096,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00008088640170171857,
"perf_norm_to_sol": 0.49125022416972397,
"perf_norm_to_cublas": 0.7240969423391094,
"compute_intensity": 15.50780880265026,
"tile_compute_intensity": 1.5900621118012421,
"MxNxK": 67108864,
"size_m": 16384,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0022314880043268204,
"perf_norm_to_sol": 0.5389432589207123,
"perf_norm_to_cublas": 0.7965210960408874,
"compute_intensity": 31.906523855890946,
"tile_compute_intensity": 3.8496240601503757,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006968608126044273,
"perf_norm_to_sol": 0.7867784111479862,
"perf_norm_to_cublas": 1.0851727621007243,
"compute_intensity": 120.02930402930403,
"tile_compute_intensity": 1.9692307692307693,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000052127998787909743,
"perf_norm_to_sol": 0.6573663094699617,
"perf_norm_to_cublas": 1.8912217512279017,
"compute_intensity": 256,
"tile_compute_intensity": 3.878787878787879,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00010528001002967358,
"perf_norm_to_sol": 0.6509723958727687,
"perf_norm_to_cublas": 0.9379331018886085,
"compute_intensity": 167.18367346938774,
"tile_compute_intensity": 3.4594594594594597,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005445056036114693,
"perf_norm_to_sol": 0.8055381458640433,
"perf_norm_to_cublas": 0.9291334612393519,
"compute_intensity": 668.734693877551,
"tile_compute_intensity": 14.222222222222221,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00011120958952233195,
"perf_norm_to_sol": 0.6162632256884949,
"perf_norm_to_cublas": 0.9236613732620205,
"compute_intensity": 117.02857142857142,
"tile_compute_intensity": 7.111111111111111,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000162240001372993,
"perf_norm_to_sol": 0.2640161943206743,
"perf_norm_to_cublas": 0.8254437508924528,
"compute_intensity": 93.0909090909091,
"tile_compute_intensity": 2.6666666666666665,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000066016000346280634,
"perf_norm_to_sol": 0.0442171937056893,
"perf_norm_to_cublas": 0.7794474257618892,
"compute_intensity": 42.666666666666664,
"tile_compute_intensity": 1.1428571428571428,
"MxNxK": 1048576,
"size_m": 128,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.001088822353631258,
"perf_norm_to_sol": 0.5533412530959309,
"perf_norm_to_cublas": 0.8229044538937644,
"compute_intensity": 31.875486381322958,
"tile_compute_intensity": 3.8208955223880596,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007765184156596661,
"perf_norm_to_sol": 0.7060683067850188,
"perf_norm_to_cublas": 0.946748921748331,
"compute_intensity": 63.25868725868726,
"tile_compute_intensity": 7.111111111111111,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00010125759290531277,
"perf_norm_to_sol": 0.7444801381168034,
"perf_norm_to_cublas": 0.8355403065155155,
"compute_intensity": 7.964997569275644,
"tile_compute_intensity": 0.9624060150375939,
"MxNxK": 67108864,
"size_m": 8192,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00019776320550590754,
"perf_norm_to_sol": 0.6930953631258621,
"perf_norm_to_cublas": 1.0012944637099837,
"compute_intensity": 127.50194552529183,
"tile_compute_intensity": 3.506849315068493,
"MxNxK": 536870912,
"size_m": 16384,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006998303811997175,
"perf_norm_to_sol": 0.7834398986684449,
"perf_norm_to_cublas": 1.0787482359635798,
"compute_intensity": 203.527950310559,
"tile_compute_intensity": 3.8208955223880596,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018233599839732051,
"perf_norm_to_sol": 0.7517372429901134,
"perf_norm_to_cublas": 1.058230965431348,
"compute_intensity": 199.8048780487805,
"tile_compute_intensity": 1.9393939393939394,
"MxNxK": 536870912,
"size_m": 128,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001530239969724789,
"perf_norm_to_sol": 0.6306908532397912,
"perf_norm_to_cublas": 0.9997908846833159,
"compute_intensity": 7.869356388088376,
"tile_compute_intensity": 0.7950310559006211,
"MxNxK": 8388608,
"size_m": 256,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006949727889150381,
"perf_norm_to_sol": 0.7889158419974248,
"perf_norm_to_cublas": 1.0831111975019208,
"compute_intensity": 390.0952380952381,
"tile_compute_intensity": 6.918918918918919,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000120630394667387,
"perf_norm_to_sol": 0.5681352577473936,
"perf_norm_to_cublas": 0.889540335176208,
"compute_intensity": 85.11168831168831,
"tile_compute_intensity": 3.1604938271604937,
"MxNxK": 268435456,
"size_m": 16384,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000973824004177004,
"perf_norm_to_sol": 0.7037655682398726,
"perf_norm_to_cublas": 1.032005833294332,
"compute_intensity": 195.04761904761904,
"tile_compute_intensity": 3.4594594594594597,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0015641951933503151,
"perf_norm_to_sol": 0.7654998214351574,
"perf_norm_to_cublas": 0.781989349714309,
"compute_intensity": 7.9941449133935105,
"tile_compute_intensity": 0.9903288201160542,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00011605439940467476,
"perf_norm_to_sol": 0.5905366855378781,
"perf_norm_to_cublas": 0.8892656904200091,
"compute_intensity": 61.59398496240601,
"tile_compute_intensity": 5.818181818181818,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027151072397828104,
"perf_norm_to_sol": 0.8077397973806206,
"perf_norm_to_cublas": 0.9032742224689796,
"compute_intensity": 1024,
"tile_compute_intensity": 15.515151515151516,
"MxNxK": 8589934592,
"size_m": 2048,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.001384518388658762,
"perf_norm_to_sol": 0.7920083220611345,
"perf_norm_to_cublas": 0.9090444582401378,
"compute_intensity": 237.44927536231884,
"tile_compute_intensity": 18.285714285714285,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006848768331110478,
"perf_norm_to_sol": 0.8005454651483401,
"perf_norm_to_cublas": 1.0643011669190756,
"compute_intensity": 234.05714285714285,
"tile_compute_intensity": 3.7372262773722627,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006150399713078514,
"perf_norm_to_sol": 0.0519104877277682,
"perf_norm_to_cublas": 0.5915713213404165,
"compute_intensity": 7.641791044776119,
"tile_compute_intensity": 0.6666666666666666,
"MxNxK": 262144,
"size_m": 256,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00035368320532143117,
"perf_norm_to_sol": 0.775093409416949,
"perf_norm_to_cublas": 1.6715341575547684,
"compute_intensity": 221.40540540540542,
"tile_compute_intensity": 3.190031152647975,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 128,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0002501599956303835,
"perf_norm_to_sol": 0.6172743006237188,
"perf_norm_to_cublas": 0.8355740584068375,
"compute_intensity": 31.477425552353505,
"tile_compute_intensity": 3.1801242236024843,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 16384,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003530911868438125,
"perf_norm_to_sol": 0.7763929876487283,
"perf_norm_to_cublas": 1.0761185958517656,
"compute_intensity": 455.1111111111111,
"tile_compute_intensity": 5.278350515463917,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001836159935919568,
"perf_norm_to_sol": 0.23328026546678177,
"perf_norm_to_cublas": 5.303764346078755,
"compute_intensity": 113.77777777777777,
"tile_compute_intensity": 1.3195876288659794,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006243200186872855,
"perf_norm_to_sol": 0.19578884483324427,
"perf_norm_to_cublas": 0.705791875126521,
"compute_intensity": 7.816793893129771,
"tile_compute_intensity": 0.8,
"MxNxK": 1048576,
"size_m": 512,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007224319968372583,
"perf_norm_to_sol": 0.7589296229022305,
"perf_norm_to_cublas": 0.9006733161408109,
"compute_intensity": 123.18796992481202,
"tile_compute_intensity": 11.636363636363637,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000009171199781121687,
"perf_norm_to_sol": 0.11676222509417343,
"perf_norm_to_cublas": 2.534194074146827,
"compute_intensity": 73.14285714285714,
"tile_compute_intensity": 0.9846153846153847,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 64,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008566399628762156,
"perf_norm_to_sol": 0.14056137824682124,
"perf_norm_to_cublas": 0.8502055026640178,
"compute_intensity": 41.795918367346935,
"tile_compute_intensity": 1.4545454545454546,
"MxNxK": 4194304,
"size_m": 1024,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007259903941303492,
"perf_norm_to_sol": 0.7552097760039576,
"perf_norm_to_cublas": 0.9106588777478629,
"compute_intensity": 120.02930402930403,
"tile_compute_intensity": 10.24,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00014120959676802158,
"perf_norm_to_sol": 0.5462496450806543,
"perf_norm_to_cublas": 0.7957533059869354,
"compute_intensity": 15.7462758289284,
"tile_compute_intensity": 1.7655172413793103,
"MxNxK": 134217728,
"size_m": 16384,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00019565760158002377,
"perf_norm_to_sol": 0.7005542316074567,
"perf_norm_to_cublas": 1.0004251835262505,
"compute_intensity": 215.57894736842104,
"tile_compute_intensity": 10.666666666666666,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013891360722482204,
"perf_norm_to_sol": 0.7893755750577561,
"perf_norm_to_cublas": 0.8897063403177918,
"compute_intensity": 240.94117647058823,
"tile_compute_intensity": 18.285714285714285,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018051520455628634,
"perf_norm_to_sol": 0.759319754089258,
"perf_norm_to_cublas": 1.0948574042098684,
"compute_intensity": 341.3333333333333,
"tile_compute_intensity": 3.506849315068493,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000959615979809314,
"perf_norm_to_sol": 0.7141854846992466,
"perf_norm_to_cublas": 1.0694612186998762,
"compute_intensity": 112.21917808219177,
"tile_compute_intensity": 1.9104477611940298,
"MxNxK": 268435456,
"size_m": 4096,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000031830399530008434,
"perf_norm_to_sol": 0.6075530108741741,
"perf_norm_to_cublas": 0.9384739214482573,
"compute_intensity": 15.723608445297504,
"tile_compute_intensity": 1.5802469135802468,
"MxNxK": 33554432,
"size_m": 512,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007224287837743759,
"perf_norm_to_sol": 0.7589329983056702,
"perf_norm_to_cublas": 0.8810280236482566,
"compute_intensity": 124.12121212121212,
"tile_compute_intensity": 11.636363636363637,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001114911981858313,
"perf_norm_to_sol": 0.6147066448446808,
"perf_norm_to_cublas": 0.9271260864939108,
"compute_intensity": 117.02857142857142,
"tile_compute_intensity": 8,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013980287127196788,
"perf_norm_to_sol": 0.7843544813405294,
"perf_norm_to_cublas": 0.8965973382974427,
"compute_intensity": 225.98620689655172,
"tile_compute_intensity": 15.058823529411764,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000022604799596592784,
"perf_norm_to_sol": 0.37898135346030615,
"perf_norm_to_cublas": 1.0273216642311331,
"compute_intensity": 120.47058823529412,
"tile_compute_intensity": 1.7777777777777777,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00036280960775911806,
"perf_norm_to_sol": 0.7555960911821067,
"perf_norm_to_cublas": 1.037811516648683,
"compute_intensity": 455.1111111111111,
"tile_compute_intensity": 6.7368421052631575,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.021831241250038148,
"perf_norm_to_sol": 0.8036556956558725,
"perf_norm_to_cublas": 0.9034660328129076,
"compute_intensity": 1489.4545454545455,
"tile_compute_intensity": 51.2,
"MxNxK": 68719476736,
"size_m": 8192,
"size_n": 4096,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00000668479988235049,
"perf_norm_to_sol": 0.19923005346485118,
"perf_norm_to_cublas": 0.7515557790955317,
"compute_intensity": 14.94890510948905,
"tile_compute_intensity": 1.2307692307692308,
"MxNxK": 2097152,
"size_m": 1024,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0016608929261565208,
"perf_norm_to_sol": 0.7209321698842355,
"perf_norm_to_cublas": 0.7364019540082032,
"compute_intensity": 7.9941449133935105,
"tile_compute_intensity": 0.9922480620155039,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000016246399900410326,
"perf_norm_to_sol": 0.26365218135493973,
"perf_norm_to_cublas": 0.8193815525560123,
"compute_intensity": 81.92,
"tile_compute_intensity": 1.6842105263157894,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00011861120583489537,
"perf_norm_to_sol": 0.5778069608526224,
"perf_norm_to_cublas": 0.8913290066127666,
"compute_intensity": 101.7639751552795,
"tile_compute_intensity": 5.12,
"MxNxK": 268435456,
"size_m": 8192,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00007320960285142064,
"perf_norm_to_sol": 0.46806960902121475,
"perf_norm_to_cublas": 0.8415508271479905,
"compute_intensity": 59.7956204379562,
"tile_compute_intensity": 4.923076923076923,
"MxNxK": 134217728,
"size_m": 4096,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001978783868253231,
"perf_norm_to_sol": 0.692691925238145,
"perf_norm_to_cublas": 1.0016818821265843,
"compute_intensity": 168.90721649484536,
"tile_compute_intensity": 6.095238095238095,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0004277823958545923,
"perf_norm_to_sol": 0.6408340411448,
"perf_norm_to_cublas": 0.9266019902777504,
"compute_intensity": 61.94328922495274,
"tile_compute_intensity": 6.2439024390243905,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00019497280009090902,
"perf_norm_to_sol": 0.7030147829294193,
"perf_norm_to_cublas": 1.0047267631858445,
"compute_intensity": 199.8048780487805,
"tile_compute_intensity": 6.095238095238095,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 4096,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00000655999974696897,
"perf_norm_to_sol": 0.3796201275767372,
"perf_norm_to_cublas": 0.7980488010474718,
"compute_intensity": 7.742911153119093,
"tile_compute_intensity": 0.6597938144329897,
"MxNxK": 2097152,
"size_m": 128,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003771199844777584,
"perf_norm_to_sol": 0.45432742354819255,
"perf_norm_to_cublas": 0.8107763974963544,
"compute_intensity": 84.45360824742268,
"tile_compute_intensity": 3.0476190476190474,
"MxNxK": 67108864,
"size_m": 4096,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000022681600239593535,
"perf_norm_to_sol": 0.37769811015632454,
"perf_norm_to_cublas": 1.283860055610314,
"compute_intensity": 170.66666666666666,
"tile_compute_intensity": 2.56,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003160960040986538,
"perf_norm_to_sol": 0.5420376996060989,
"perf_norm_to_cublas": 3.0835188292470215,
"compute_intensity": 204.8,
"tile_compute_intensity": 2.6122448979591835,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 256,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013591552153229714,
"perf_norm_to_sol": 0.8067879764592172,
"perf_norm_to_cublas": 1.0582126623752606,
"compute_intensity": 337.8144329896907,
"tile_compute_intensity": 3.8496240601503757,
"MxNxK": 4294967296,
"size_m": 256,
"size_n": 16384,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003174080047756434,
"perf_norm_to_sol": 0.6023706590095335,
"perf_norm_to_cublas": 0.9457606856227402,
"compute_intensity": 15.814671814671815,
"tile_compute_intensity": 1.7777777777777777,
"MxNxK": 33554432,
"size_m": 2048,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006620799831580371,
"perf_norm_to_sol": 0.10195573695957273,
"perf_norm_to_cublas": 0.595456724381558,
"compute_intensity": 14.840579710144928,
"tile_compute_intensity": 1.1428571428571428,
"MxNxK": 1048576,
"size_m": 512,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000058009603526443244,
"perf_norm_to_sol": 0.5907158142813788,
"perf_norm_to_cublas": 0.9062223120389438,
"compute_intensity": 126.03076923076924,
"tile_compute_intensity": 3.3684210526315788,
"MxNxK": 134217728,
"size_m": 4096,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000114592001773417,
"perf_norm_to_sol": 0.5980729833312355,
"perf_norm_to_cublas": 0.8974308413229686,
"compute_intensity": 112.21917808219177,
"tile_compute_intensity": 5.12,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 4096,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002698761597275734,
"perf_norm_to_sol": 0.812632050916483,
"perf_norm_to_cublas": 1.0478204436427254,
"compute_intensity": 910.2222222222222,
"tile_compute_intensity": 13.473684210526315,
"MxNxK": 8589934592,
"size_m": 1024,
"size_n": 4096,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.011158080399036407,
"perf_norm_to_sol": 0.7861926400595628,
"perf_norm_to_cublas": 0.9044712643233008,
"compute_intensity": 250.13740458015266,
"tile_compute_intensity": 3.930902111324376,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00036571838427335024,
"perf_norm_to_sol": 0.7495863846461799,
"perf_norm_to_cublas": 1.0064399870507597,
"compute_intensity": 372.3636363636364,
"tile_compute_intensity": 10.666666666666666,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 2048,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003169920237269253,
"perf_norm_to_sol": 0.5405055587894311,
"perf_norm_to_cublas": 1.3654349621819717,
"compute_intensity": 204.8,
"tile_compute_intensity": 3.0476190476190474,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018005119636654854,
"perf_norm_to_sol": 0.761276589654015,
"perf_norm_to_cublas": 1.1798422115725307,
"compute_intensity": 215.57894736842104,
"tile_compute_intensity": 1.8754578754578755,
"MxNxK": 536870912,
"size_m": 128,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00536707192659378,
"perf_norm_to_sol": 0.8172426983368853,
"perf_norm_to_cublas": 0.9340082725740405,
"compute_intensity": 1170.2857142857142,
"tile_compute_intensity": 28.444444444444443,
"MxNxK": 17179869184,
"size_m": 4096,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027176415547728538,
"perf_norm_to_sol": 0.8069865460649859,
"perf_norm_to_cublas": 0.9131846954814342,
"compute_intensity": 244.53731343283582,
"tile_compute_intensity": 1.9806576402321083,
"MxNxK": 8589934592,
"size_m": 128,
"size_n": 8192,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006201599899213761,
"perf_norm_to_sol": 0.10002200940328947,
"perf_norm_to_cublas": 0.6119710951503705,
"compute_intensity": 7.757575757575758,
"tile_compute_intensity": 0.7272727272727273,
"MxNxK": 524288,
"size_m": 256,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00008783040102571249,
"perf_norm_to_sol": 0.4524112665342262,
"perf_norm_to_cublas": 0.6298319825612421,
"compute_intensity": 15.50780880265026,
"tile_compute_intensity": 1.3298701298701299,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003214080061297864,
"perf_norm_to_sol": 0.5330792875368758,
"perf_norm_to_cublas": 3.0402230453312105,
"compute_intensity": 170.66666666666666,
"tile_compute_intensity": 1.7534246575342465,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000965824001468718,
"perf_norm_to_sol": 0.7095949185597609,
"perf_norm_to_cublas": 1.2388841378121622,
"compute_intensity": 195.04761904761904,
"tile_compute_intensity": 2.6528497409326426,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 128,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013123199460096658,
"perf_norm_to_sol": 0.2057512051518022,
"perf_norm_to_cublas": 0.6232625955928649,
"compute_intensity": 29.681159420289855,
"tile_compute_intensity": 1.8823529411764706,
"MxNxK": 8388608,
"size_m": 256,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.001373215951025486,
"perf_norm_to_sol": 0.7985270525335315,
"perf_norm_to_cublas": 1.6473098233858,
"compute_intensity": 125.06870229007633,
"tile_compute_intensity": 1.965451055662188,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 64,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00136219197884202,
"perf_norm_to_sol": 0.8049893868825841,
"perf_norm_to_cublas": 1.6306847367008683,
"compute_intensity": 630.1538461538462,
"tile_compute_intensity": 7.937984496124031,
"MxNxK": 4294967296,
"size_m": 1024,
"size_n": 512,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018352639162912964,
"perf_norm_to_sol": 0.8183276732838556,
"perf_norm_to_cublas": 0.865235816230301,
"compute_intensity": 7.980516317584024,
"tile_compute_intensity": 0.9660377358490566,
"MxNxK": 134217728,
"size_m": 2048,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00026662400923669336,
"perf_norm_to_sol": 0.5714939445645282,
"perf_norm_to_cublas": 0.8836413564298699,
"compute_intensity": 31.690522243713733,
"tile_compute_intensity": 3.657142857142857,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 2048,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000066016000346280634,
"perf_norm_to_sol": 0.060798641345322785,
"perf_norm_to_cublas": 0.6422685235700423,
"compute_intensity": 26.94736842105263,
"tile_compute_intensity": 1.1428571428571428,
"MxNxK": 1048576,
"size_m": 128,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000062719998823013155,
"perf_norm_to_sol": 0.10035371529901634,
"perf_norm_to_cublas": 0.7489796316105563,
"compute_intensity": 7.6992481203007515,
"tile_compute_intensity": 0.7272727272727273,
"MxNxK": 524288,
"size_m": 512,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000062719998823013155,
"perf_norm_to_sol": 0.10035371529901634,
"perf_norm_to_cublas": 0.6596938738222247,
"compute_intensity": 7.6992481203007515,
"tile_compute_intensity": 0.64,
"MxNxK": 524288,
"size_m": 128,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018820799887180327,
"perf_norm_to_sol": 0.7282833968518788,
"perf_norm_to_cublas": 0.9875711822840483,
"compute_intensity": 248.24242424242425,
"tile_compute_intensity": 3.657142857142857,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027192672714591026,
"perf_norm_to_sol": 0.8065040883429053,
"perf_norm_to_cublas": 0.9253162983240925,
"compute_intensity": 744.7272727272727,
"tile_compute_intensity": 12.487804878048781,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 512,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.02180553525686264,
"perf_norm_to_sol": 0.8046031049987129,
"perf_norm_to_cublas": 0.8878822604134214,
"compute_intensity": 1310.72,
"tile_compute_intensity": 42.666666666666664,
"MxNxK": 68719476736,
"size_m": 16384,
"size_n": 2048,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002738921530544758,
"perf_norm_to_sol": 0.8007166862106585,
"perf_norm_to_cublas": 1.637199102613198,
"compute_intensity": 244.53731343283582,
"tile_compute_intensity": 3.757798165137615,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 128,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00020344960503280163,
"perf_norm_to_sol": 0.6737234054150768,
"perf_norm_to_cublas": 0.9012551249734781,
"compute_intensity": 112.99310344827586,
"tile_compute_intensity": 7.529411764705882,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 512,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003739519976079464,
"perf_norm_to_sol": 0.7330821154043156,
"perf_norm_to_cublas": 1.0145644300184369,
"compute_intensity": 455.1111111111111,
"tile_compute_intensity": 9.846153846153847,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013510399730876088,
"perf_norm_to_sol": 0.3902564109294627,
"perf_norm_to_cublas": 0.7626717473715775,
"compute_intensity": 15.03119266055046,
"tile_compute_intensity": 0.9922480620155039,
"MxNxK": 8388608,
"size_m": 128,
"size_n": 4096,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00006027839845046401,
"perf_norm_to_sol": 0.568482094152239,
"perf_norm_to_cublas": 0.9184053303720155,
"compute_intensity": 84.89119170984456,
"tile_compute_intensity": 1.855072463768116,
"MxNxK": 134217728,
"size_m": 8192,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009722880204208196,
"perf_norm_to_sol": 0.4098058005108214,
"perf_norm_to_cublas": 0.7469720777589217,
"compute_intensity": 30.97164461247637,
"tile_compute_intensity": 3.1219512195121952,
"MxNxK": 134217728,
"size_m": 8192,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002710575982928276,
"perf_norm_to_sol": 0.8090900921211529,
"perf_norm_to_cublas": 0.9288381611861334,
"compute_intensity": 744.7272727272727,
"tile_compute_intensity": 21.333333333333332,
"MxNxK": 8589934592,
"size_m": 2048,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00035883840173482894,
"perf_norm_to_sol": 0.7639581497988125,
"perf_norm_to_cublas": 1.0601942305705843,
"compute_intensity": 113.3840830449827,
"tile_compute_intensity": 1.9541984732824427,
"MxNxK": 1073741824,
"size_m": 16384,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009696639608591795,
"perf_norm_to_sol": 0.7067848567435694,
"perf_norm_to_cublas": 1.1071546999465744,
"compute_intensity": 292.57142857142856,
"tile_compute_intensity": 3.9384615384615387,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 256,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018252161098644137,
"perf_norm_to_sol": 0.7509727751813106,
"perf_norm_to_cublas": 1.170062009172186,
"compute_intensity": 120.47058823529412,
"tile_compute_intensity": 1.8754578754578755,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000017561599088367073,
"perf_norm_to_sol": 0.5526726532983428,
"perf_norm_to_cublas": 0.9803207410791253,
"compute_intensity": 15.693486590038313,
"tile_compute_intensity": 1.6842105263157894,
"MxNxK": 16777216,
"size_m": 2048,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003916159912478179,
"perf_norm_to_sol": 0.43751009852887107,
"perf_norm_to_cublas": 0.8520183417989372,
"compute_intensity": 63.750972762645915,
"tile_compute_intensity": 1.7534246575342465,
"MxNxK": 67108864,
"size_m": 8192,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027463968843221666,
"perf_norm_to_sol": 0.7985372340932066,
"perf_norm_to_cublas": 0.9281723770713168,
"compute_intensity": 504.12307692307695,
"tile_compute_intensity": 13.473684210526315,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003751136129721999,
"perf_norm_to_sol": 0.7308119779871054,
"perf_norm_to_cublas": 1.0357779622341423,
"compute_intensity": 202.2716049382716,
"tile_compute_intensity": 9.846153846153847,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005377955362200737,
"perf_norm_to_sol": 0.8155888340550953,
"perf_norm_to_cublas": 1.0601090761307281,
"compute_intensity": 780.1904761904761,
"tile_compute_intensity": 7.641791044776119,
"MxNxK": 17179869184,
"size_m": 512,
"size_n": 8192,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.001351667195558548,
"perf_norm_to_sol": 0.8112574526241169,
"perf_norm_to_cublas": 1.6365211332742065,
"compute_intensity": 630.1538461538462,
"tile_compute_intensity": 6.3602484472049685,
"MxNxK": 4294967296,
"size_m": 512,
"size_n": 1024,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000019648000306915492,
"perf_norm_to_sol": 0.5357693424131508,
"perf_norm_to_cublas": 0.8819218156002484,
"compute_intensity": 15.044995408631772,
"tile_compute_intensity": 0.9961089494163424,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001248672022484243,
"perf_norm_to_sol": 0.5488581399475583,
"perf_norm_to_cublas": 0.8931600946521497,
"compute_intensity": 60.014652014652015,
"tile_compute_intensity": 5.12,
"MxNxK": 268435456,
"size_m": 8192,
"size_n": 512,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.021829110383987427,
"perf_norm_to_sol": 0.8037341451486927,
"perf_norm_to_cublas": 1.0397299714907258,
"compute_intensity": 1489.4545454545455,
"tile_compute_intensity": 14.124137931034483,
"MxNxK": 68719476736,
"size_m": 1024,
"size_n": 4096,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.022148627042770385,
"perf_norm_to_sol": 0.7921394558656154,
"perf_norm_to_cublas": 0.8629006171548764,
"compute_intensity": 2048,
"tile_compute_intensity": 39.38461538461539,
"MxNxK": 68719476736,
"size_m": 4096,
"size_n": 4096,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x4096x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009337280062027276,
"perf_norm_to_sol": 0.805002506956814,
"perf_norm_to_cublas": 0.898522908799616,
"compute_intensity": 7.976630963972736,
"tile_compute_intensity": 0.9624060150375939,
"MxNxK": 67108864,
"size_m": 2048,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007061344105750323,
"perf_norm_to_sol": 0.7764457229689805,
"perf_norm_to_cublas": 1.0492189425288811,
"compute_intensity": 254.015503875969,
"tile_compute_intensity": 6.918918918918919,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000698339194059372,
"perf_norm_to_sol": 0.7851128042021255,
"perf_norm_to_cublas": 1.0491818210915453,
"compute_intensity": 334.3673469387755,
"tile_compute_intensity": 6.918918918918919,
"MxNxK": 2147483648,
"size_m": 512,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010708486288785934,
"perf_norm_to_sol": 0.819200814227298,
"perf_norm_to_cublas": 1.0617588764021648,
"compute_intensity": 1365.3333333333333,
"tile_compute_intensity": 14.027397260273972,
"MxNxK": 34359738368,
"size_m": 1024,
"size_n": 4096,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x4096x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000014815999020356684,
"perf_norm_to_sol": 0.6513960691016064,
"perf_norm_to_cublas": 1.069762569811826,
"compute_intensity": 7.869356388088376,
"tile_compute_intensity": 0.8767123287671232,
"MxNxK": 8388608,
"size_m": 4096,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001625279983272776,
"perf_norm_to_sol": 0.2635483619433072,
"perf_norm_to_cublas": 0.7702303704082404,
"compute_intensity": 55.351351351351354,
"tile_compute_intensity": 2.4615384615384617,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 1024,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0004183359909802675,
"perf_norm_to_sol": 0.6553046531419137,
"perf_norm_to_cublas": 0.9009944346965397,
"compute_intensity": 62.77394636015325,
"tile_compute_intensity": 6.7368421052631575,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00019393919501453638,
"perf_norm_to_sol": 0.7067615224595405,
"perf_norm_to_cublas": 1.0116984858261926,
"compute_intensity": 127.50194552529183,
"tile_compute_intensity": 1.8754578754578755,
"MxNxK": 536870912,
"size_m": 128,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00038893758319318293,
"perf_norm_to_sol": 0.7048367998161301,
"perf_norm_to_cublas": 0.9089869810620242,
"compute_intensity": 119.5912408759124,
"tile_compute_intensity": 9.846153846153847,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005475347116589546,
"perf_norm_to_sol": 0.8010816940113377,
"perf_norm_to_cublas": 0.9019564392510349,
"compute_intensity": 248.24242424242425,
"tile_compute_intensity": 1.9825750242013553,
"MxNxK": 17179869184,
"size_m": 128,
"size_n": 8192,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007073376327753067,
"perf_norm_to_sol": 0.7751249439125646,
"perf_norm_to_cublas": 0.9559678234834061,
"compute_intensity": 224.43835616438355,
"tile_compute_intensity": 14.222222222222221,
"MxNxK": 2147483648,
"size_m": 8192,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001143679954111576,
"perf_norm_to_sol": 0.5992443963028454,
"perf_norm_to_cublas": 0.9182708789862031,
"compute_intensity": 85.11168831168831,
"tile_compute_intensity": 1.7716262975778547,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.022190290689468383,
"perf_norm_to_sol": 0.790652164919923,
"perf_norm_to_cublas": 0.8952844184110668,
"compute_intensity": 910.2222222222222,
"tile_compute_intensity": 51.2,
"MxNxK": 68719476736,
"size_m": 8192,
"size_n": 8192,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006992000271566212,
"perf_norm_to_sol": 0.07657677714785711,
"perf_norm_to_cublas": 0.7867276631648,
"compute_intensity": 46.54545454545455,
"tile_compute_intensity": 1.3333333333333333,
"MxNxK": 2097152,
"size_m": 128,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027484385296702384,
"perf_norm_to_sol": 0.7979440500682963,
"perf_norm_to_cublas": 1.6116414909708316,
"compute_intensity": 655.36,
"tile_compute_intensity": 7.968871595330739,
"MxNxK": 8589934592,
"size_m": 1024,
"size_n": 512,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.022173671424388884,
"perf_norm_to_sol": 0.7912447622243112,
"perf_norm_to_cublas": 1.0261112211357966,
"compute_intensity": 910.2222222222222,
"tile_compute_intensity": 7.728301886792453,
"MxNxK": 68719476736,
"size_m": 512,
"size_n": 8192,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008537599933333695,
"perf_norm_to_sol": 0.6159619962980221,
"perf_norm_to_cublas": 1.0843327946133914,
"compute_intensity": 7.525953146531925,
"tile_compute_intensity": 0.6632124352331606,
"MxNxK": 4194304,
"size_m": 8192,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00000870079966261983,
"perf_norm_to_sol": 0.2893612150377502,
"perf_norm_to_cublas": 0.7613093111656116,
"compute_intensity": 15.398496240601503,
"tile_compute_intensity": 1.4545454545454546,
"MxNxK": 4194304,
"size_m": 1024,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027596129104495047,
"perf_norm_to_sol": 0.7947129698605433,
"perf_norm_to_cublas": 0.8916798886899915,
"compute_intensity": 239.1824817518248,
"tile_compute_intensity": 19.692307692307693,
"MxNxK": 8589934592,
"size_m": 16384,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00019899520557373763,
"perf_norm_to_sol": 0.688804337460586,
"perf_norm_to_cublas": 0.8911973738878705,
"compute_intensity": 120.47058823529412,
"tile_compute_intensity": 9.142857142857142,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001854720030678436,
"perf_norm_to_sol": 0.23094584099256416,
"perf_norm_to_cublas": 5.228433258099168,
"compute_intensity": 93.0909090909091,
"tile_compute_intensity": 1.3195876288659794,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 64,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00018360000103712082,
"perf_norm_to_sol": 0.7465618734138159,
"perf_norm_to_cublas": 1.0434858779845693,
"compute_intensity": 315.0769230769231,
"tile_compute_intensity": 3.657142857142857,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 2048,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x2048x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003697087988257408,
"perf_norm_to_sol": 0.7414957997667656,
"perf_norm_to_cublas": 1.0293680180808813,
"compute_intensity": 227.55555555555554,
"tile_compute_intensity": 12.8,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00142819844186306,
"perf_norm_to_sol": 0.7677855217612346,
"perf_norm_to_cublas": 0.9000497158034959,
"compute_intensity": 123.65283018867925,
"tile_compute_intensity": 12.19047619047619,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000355644803494215,
"perf_norm_to_sol": 0.7708182961558782,
"perf_norm_to_cublas": 1.0736824663426652,
"compute_intensity": 119.5912408759124,
"tile_compute_intensity": 1.9541984732824427,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00036083520390093324,
"perf_norm_to_sol": 0.7597305321167248,
"perf_norm_to_cublas": 1.0475430181689553,
"compute_intensity": 512,
"tile_compute_intensity": 7.757575757575758,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006880512461066246,
"perf_norm_to_sol": 0.7968520455920253,
"perf_norm_to_cublas": 1.085942353802044,
"compute_intensity": 224.43835616438355,
"tile_compute_intensity": 1.9692307692307693,
"MxNxK": 2147483648,
"size_m": 128,
"size_n": 8192,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x8192x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.022379513084888458,
"perf_norm_to_sol": 0.783967073246001,
"perf_norm_to_cublas": 0.9176382650436256,
"compute_intensity": 2048,
"tile_compute_intensity": 31.03030303030303,
"MxNxK": 68719476736,
"size_m": 4096,
"size_n": 2048,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x2048x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00035793918650597336,
"perf_norm_to_sol": 0.7658773663261037,
"perf_norm_to_cublas": 1.0539086059914902,
"compute_intensity": 512,
"tile_compute_intensity": 6.2439024390243905,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000022803198953624813,
"perf_norm_to_sol": 0.37568402412477886,
"perf_norm_to_cublas": 0.9886332301632413,
"compute_intensity": 63.50387596899225,
"tile_compute_intensity": 1.7297297297297298,
"MxNxK": 33554432,
"size_m": 4096,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00019541759975254535,
"perf_norm_to_sol": 0.7014146162199298,
"perf_norm_to_cublas": 1.0046013867068773,
"compute_intensity": 168.90721649484536,
"tile_compute_intensity": 3.506849315068493,
"MxNxK": 536870912,
"size_m": 256,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0223203644156456,
"perf_norm_to_sol": 0.786044575577469,
"perf_norm_to_cublas": 0.904102512440297,
"compute_intensity": 489.07462686567163,
"tile_compute_intensity": 39.38461538461539,
"MxNxK": 68719476736,
"size_m": 8192,
"size_n": 16384,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x16384x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003619263879954815,
"perf_norm_to_sol": 0.7574399948685847,
"perf_norm_to_cublas": 1.0490619905793772,
"compute_intensity": 202.2716049382716,
"tile_compute_intensity": 3.764705882352941,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000022268800239544363,
"perf_norm_to_sol": 0.3846995551472511,
"perf_norm_to_cublas": 0.961488735250199,
"compute_intensity": 83.59183673469387,
"tile_compute_intensity": 1.7297297297297298,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 2048,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.01103825941681862,
"perf_norm_to_sol": 0.7947268093327364,
"perf_norm_to_cublas": 0.9228820258191176,
"compute_intensity": 481.88235294117646,
"tile_compute_intensity": 7.501831501831502,
"MxNxK": 34359738368,
"size_m": 8192,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000352345616556704,
"perf_norm_to_sol": 0.7780358505523941,
"perf_norm_to_cublas": 1.6799959876110224,
"compute_intensity": 327.68,
"tile_compute_intensity": 3.9844357976653697,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000015404800069518388,
"perf_norm_to_sol": 0.6448552202711884,
"perf_norm_to_cublas": 1.0087245555444508,
"compute_intensity": 7.75390440132513,
"tile_compute_intensity": 0.7950310559006211,
"MxNxK": 8388608,
"size_m": 8192,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003245759871788323,
"perf_norm_to_sol": 0.5278762375662528,
"perf_norm_to_cublas": 1.255249971620321,
"compute_intensity": 124.12121212121212,
"tile_compute_intensity": 1.8285714285714285,
"MxNxK": 67108864,
"size_m": 128,
"size_n": 2048,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000017862400272861124,
"perf_norm_to_sol": 0.5433656964386765,
"perf_norm_to_cublas": 0.9503762459930583,
"compute_intensity": 15.693486590038313,
"tile_compute_intensity": 1.5609756097560976,
"MxNxK": 16777216,
"size_m": 512,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00028533758595585825,
"perf_norm_to_sol": 0.541174187864146,
"perf_norm_to_cublas": 0.9875628674167835,
"compute_intensity": 31.477425552353505,
"tile_compute_intensity": 3.506849315068493,
"MxNxK": 536870912,
"size_m": 16384,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027024161070585253,
"perf_norm_to_sol": 0.8115331188267404,
"perf_norm_to_cublas": 0.9065298060734938,
"compute_intensity": 1024,
"tile_compute_intensity": 12.487804878048781,
"MxNxK": 8589934592,
"size_m": 1024,
"size_n": 2048,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x2048x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005883520352654159,
"perf_norm_to_sol": 0.339855025310419,
"perf_norm_to_cublas": 0.6691504223812653,
"compute_intensity": 30.91320754716981,
"tile_compute_intensity": 3.0476190476190474,
"MxNxK": 67108864,
"size_m": 4096,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00011712000705301761,
"perf_norm_to_sol": 0.5851637315518752,
"perf_norm_to_cublas": 0.8835245491987899,
"compute_intensity": 101.7639751552795,
"tile_compute_intensity": 3.1604938271604937,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013866592198610306,
"perf_norm_to_sol": 0.7907855586712261,
"perf_norm_to_cublas": 0.9388458299136149,
"compute_intensity": 337.8144329896907,
"tile_compute_intensity": 12.19047619047619,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003311040054541081,
"perf_norm_to_sol": 0.5174686747788738,
"perf_norm_to_cublas": 1.2714796371901143,
"compute_intensity": 84.45360824742268,
"tile_compute_intensity": 1.8285714285714285,
"MxNxK": 67108864,
"size_m": 4096,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00019331839866936206,
"perf_norm_to_sol": 0.7090311200409023,
"perf_norm_to_cublas": 0.9706018767760229,
"compute_intensity": 168.90721649484536,
"tile_compute_intensity": 3.657142857142857,
"MxNxK": 536870912,
"size_m": 8192,
"size_n": 128,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x128x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002718716859817505,
"perf_norm_to_sol": 0.8066673672947451,
"perf_norm_to_cublas": 0.92701849821869,
"compute_intensity": 455.1111111111111,
"tile_compute_intensity": 7.062068965517241,
"MxNxK": 8589934592,
"size_m": 4096,
"size_n": 256,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006742400000803173,
"perf_norm_to_sol": 0.1894104465997858,
"perf_norm_to_cublas": 0.7755102040816327,
"compute_intensity": 15.283582089552239,
"tile_compute_intensity": 1.3333333333333333,
"MxNxK": 2097152,
"size_m": 512,
"size_n": 256,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x256x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002716095931828022,
"perf_norm_to_sol": 0.8074457702430249,
"perf_norm_to_cublas": 1.6329721534098816,
"compute_intensity": 244.53731343283582,
"tile_compute_intensity": 1.967339097022094,
"MxNxK": 8589934592,
"size_m": 128,
"size_n": 4096,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x4096x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000052537600276991724,
"perf_norm_to_sol": 0.7181260888446274,
"perf_norm_to_cublas": 0.8918869616307132,
"compute_intensity": 7.961127308066083,
"tile_compute_intensity": 0.9552238805970149,
"MxNxK": 33554432,
"size_m": 4096,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013783935457468033,
"perf_norm_to_sol": 0.7955275829953985,
"perf_norm_to_cublas": 1.0467095186964017,
"compute_intensity": 496.4848484848485,
"tile_compute_intensity": 12.8,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.022254575788974763,
"perf_norm_to_sol": 0.7883682681798201,
"perf_norm_to_cublas": 1.0321779484991858,
"compute_intensity": 910.2222222222222,
"tile_compute_intensity": 14.124137931034483,
"MxNxK": 68719476736,
"size_m": 8192,
"size_n": 512,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x512x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003536063944920897,
"perf_norm_to_sol": 0.7752617761900668,
"perf_norm_to_cublas": 1.076125354647136,
"compute_intensity": 372.3636363636364,
"tile_compute_intensity": 5.278350515463917,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002748480183072388,
"perf_norm_to_sol": 0.7016213232278792,
"perf_norm_to_cublas": 0.9908021278934351,
"compute_intensity": 7.8731379144642,
"tile_compute_intensity": 0.8827586206896552,
"MxNxK": 16777216,
"size_m": 8192,
"size_n": 256,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x256x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.022195692360401153,
"perf_norm_to_sol": 0.7904597472765428,
"perf_norm_to_cublas": 0.8608272273150744,
"compute_intensity": 1820.4444444444443,
"tile_compute_intensity": 39.38461538461539,
"MxNxK": 68719476736,
"size_m": 8192,
"size_n": 2048,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x2048x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006409599882317707,
"perf_norm_to_sol": 0.015654967555692713,
"perf_norm_to_cublas": 0.6425362234196706,
"compute_intensity": 13.473684210526315,
"tile_compute_intensity": 0.6666666666666666,
"MxNxK": 131072,
"size_m": 128,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008540799899492413,
"perf_norm_to_sol": 0.12538049197132062,
"perf_norm_to_cublas": 0.9524166269536543,
"compute_intensity": 73.14285714285714,
"tile_compute_intensity": 1.7777777777777777,
"MxNxK": 4194304,
"size_m": 256,
"size_n": 128,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006736000068485737,
"perf_norm_to_sol": 0.13271328498004756,
"perf_norm_to_cublas": 0.839904954564151,
"compute_intensity": 25.28395061728395,
"tile_compute_intensity": 1.2307692307692308,
"MxNxK": 2097152,
"size_m": 1024,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00019182399846613408,
"perf_norm_to_sol": 0.7145548097687606,
"perf_norm_to_cublas": 0.9767787164552801,
"compute_intensity": 248.24242424242425,
"tile_compute_intensity": 6.4,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009615999879315495,
"perf_norm_to_sol": 0.7127119511923733,
"perf_norm_to_cublas": 1.1136106292420136,
"compute_intensity": 204.8,
"tile_compute_intensity": 3.1604938271604937,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013632736168801784,
"perf_norm_to_sol": 0.8043506984121371,
"perf_norm_to_cublas": 1.6340971628781698,
"compute_intensity": 399.609756097561,
"tile_compute_intensity": 5.3194805194805195,
"MxNxK": 4294967296,
"size_m": 1024,
"size_n": 256,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x256x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00008048639865592122,
"perf_norm_to_sol": 0.47963798519210227,
"perf_norm_to_cublas": 0.6569258789287228,
"compute_intensity": 15.738712776176753,
"tile_compute_intensity": 1.5900621118012421,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000013952000881545245,
"perf_norm_to_sol": 0.15350482017864714,
"perf_norm_to_cublas": 0.9830274948992161,
"compute_intensity": 102.4,
"tile_compute_intensity": 1.8823529411764706,
"MxNxK": 8388608,
"size_m": 256,
"size_n": 128,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x128x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027243455871939657,
"perf_norm_to_sol": 0.8050007245915093,
"perf_norm_to_cublas": 0.8680848413386946,
"compute_intensity": 404.5432098765432,
"tile_compute_intensity": 12.487804878048781,
"MxNxK": 8589934592,
"size_m": 1024,
"size_n": 16384,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010859663784503936,
"perf_norm_to_sol": 0.8077967109288376,
"perf_norm_to_cublas": 0.8615576833438143,
"compute_intensity": 1260.3076923076924,
"tile_compute_intensity": 25.6,
"MxNxK": 34359738368,
"size_m": 2048,
"size_n": 8192,
"size_k": 2048,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x8192x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0008135167881846428,
"perf_norm_to_sol": 0.7366527498368952,
"perf_norm_to_cublas": 0.7544448985526161,
"compute_intensity": 7.990246281394782,
"tile_compute_intensity": 0.9884169884169884,
"MxNxK": 536870912,
"size_m": 16384,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009583359933458268,
"perf_norm_to_sol": 0.7151393753588705,
"perf_norm_to_cublas": 1.2684319513131823,
"compute_intensity": 117.02857142857142,
"tile_compute_intensity": 1.7716262975778547,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 64,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x64x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003705503884702921,
"perf_norm_to_sol": 0.7398117233065084,
"perf_norm_to_cublas": 1.0108206600834506,
"compute_intensity": 327.68,
"tile_compute_intensity": 10.666666666666666,
"MxNxK": 1073741824,
"size_m": 4096,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013676895759999752,
"perf_norm_to_sol": 0.8017536326272561,
"perf_norm_to_cublas": 1.0558630290992004,
"compute_intensity": 630.1538461538462,
"tile_compute_intensity": 18.285714285714285,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027657633647322653,
"perf_norm_to_sol": 0.7929457016078172,
"perf_norm_to_cublas": 0.8776849286386259,
"compute_intensity": 239.1824817518248,
"tile_compute_intensity": 15.515151515151516,
"MxNxK": 8589934592,
"size_m": 2048,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00008695040014572442,
"perf_norm_to_sol": 0.4464989487368955,
"perf_norm_to_cublas": 0.7530546496795245,
"compute_intensity": 31.386973180076627,
"tile_compute_intensity": 3.3684210526315788,
"MxNxK": 134217728,
"size_m": 4096,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000008144000457832589,
"perf_norm_to_sol": 0.6104486241530994,
"perf_norm_to_cublas": 1.1473476929256596,
"compute_intensity": 7.750236518448439,
"tile_compute_intensity": 0.7901234567901234,
"MxNxK": 4194304,
"size_m": 4096,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00000876159974723123,
"perf_norm_to_sol": 0.30192911546117424,
"perf_norm_to_cublas": 0.7016070037737553,
"compute_intensity": 15.003663003663004,
"tile_compute_intensity": 0.9846153846153847,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001919072004966438,
"perf_norm_to_sol": 0.7142450120596107,
"perf_norm_to_cublas": 0.9684681003958638,
"compute_intensity": 341.3333333333333,
"tile_compute_intensity": 9.142857142857142,
"MxNxK": 536870912,
"size_m": 1024,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002901760162785649,
"perf_norm_to_sol": 0.6645595068965392,
"perf_norm_to_cublas": 0.9019628354889868,
"compute_intensity": 7.8731379144642,
"tile_compute_intensity": 0.7975077881619937,
"MxNxK": 16777216,
"size_m": 256,
"size_n": 8192,
"size_k": 8,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0014356384053826332,
"perf_norm_to_sol": 0.7638065976454245,
"perf_norm_to_cublas": 0.8640169834025838,
"compute_intensity": 123.65283018867925,
"tile_compute_intensity": 10.448979591836734,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_2048x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027028609067201613,
"perf_norm_to_sol": 0.8113995678712461,
"perf_norm_to_cublas": 0.909489600695313,
"compute_intensity": 442.81081081081084,
"tile_compute_intensity": 3.9083969465648853,
"MxNxK": 8589934592,
"size_m": 256,
"size_n": 8192,
"size_k": 4096,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x8192x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0006741983816027641,
"perf_norm_to_sol": 0.813225095006603,
"perf_norm_to_cublas": 1.676676819792379,
"compute_intensity": 390.0952380952381,
"tile_compute_intensity": 3.5432525951557095,
"MxNxK": 2147483648,
"size_m": 256,
"size_n": 1024,
"size_k": 8192,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x1024x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000403024023398757,
"perf_norm_to_sol": 0.6802014409817646,
"perf_norm_to_cublas": 0.8461471136209362,
"compute_intensity": 61.94328922495274,
"tile_compute_intensity": 5.278350515463917,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005423814430832863,
"perf_norm_to_sol": 0.8086929225534207,
"perf_norm_to_cublas": 0.872902469263758,
"compute_intensity": 668.734693877551,
"tile_compute_intensity": 23.272727272727273,
"MxNxK": 17179869184,
"size_m": 16384,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_16384x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0005555456038564444,
"perf_norm_to_sol": 0.5443527544621342,
"perf_norm_to_cublas": 0.8206937138204384,
"compute_intensity": 31.813592233009707,
"tile_compute_intensity": 3.764705882352941,
"MxNxK": 1073741824,
"size_m": 8192,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.001564505510032177,
"perf_norm_to_sol": 0.7008924409872223,
"perf_norm_to_cublas": 0.9271949400997098,
"compute_intensity": 63.50387596899225,
"tile_compute_intensity": 7.314285714285714,
"MxNxK": 4294967296,
"size_m": 8192,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_8192x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000065664004068821665,
"perf_norm_to_sol": 0.03334066719332554,
"perf_norm_to_cublas": 0.6408381489567848,
"compute_intensity": 25.6,
"tile_compute_intensity": 1,
"MxNxK": 524288,
"size_m": 128,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_128x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000006710399611620233,
"perf_norm_to_sol": 0.19031370249471216,
"perf_norm_to_cublas": 0.6862184546114084,
"compute_intensity": 15.283582089552239,
"tile_compute_intensity": 1.2307692307692308,
"MxNxK": 2097152,
"size_m": 256,
"size_n": 512,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x512x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00009671999723650515,
"perf_norm_to_sol": 0.7085854251933199,
"perf_norm_to_cublas": 2.08724565517292,
"compute_intensity": 101.7639751552795,
"tile_compute_intensity": 1.332465842550423,
"MxNxK": 268435456,
"size_m": 256,
"size_n": 64,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_256x64x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0027265504002571108,
"perf_norm_to_sol": 0.8043497642743066,
"perf_norm_to_cublas": 0.9271367044913613,
"compute_intensity": 655.36,
"tile_compute_intensity": 13.473684210526315,
"MxNxK": 8589934592,
"size_m": 1024,
"size_n": 8192,
"size_k": 1024,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x8192x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010897347331047058,
"perf_norm_to_sol": 0.8050033113950869,
"perf_norm_to_cublas": 0.8947308290918499,
"compute_intensity": 862.3157894736842,
"tile_compute_intensity": 7.501831501831502,
"MxNxK": 34359738368,
"size_m": 512,
"size_n": 4096,
"size_k": 16384,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_512x4096x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005470060557126999,
"perf_norm_to_sol": 0.8018559022610492,
"perf_norm_to_cublas": 0.8742338211480005,
"compute_intensity": 468.1142857142857,
"tile_compute_intensity": 28.444444444444443,
"MxNxK": 17179869184,
"size_m": 4096,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_4096x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "stream-kxmxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001716800034046173,
"perf_norm_to_sol": 0.5610926908448476,
"perf_norm_to_cublas": 0.9955265236790714,
"compute_intensity": 15.753846153846155,
"tile_compute_intensity": 1.6842105263157894,
"MxNxK": 16777216,
"size_m": 1024,
"size_n": 1024,
"size_k": 16,
"details": "../evaluation/dgemm/STREAM-K/DGEMM_OUTPUT_1024x1024x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000005708800017600879,
"perf_norm_to_sol": 0.11584667765337547,
"perf_norm_to_cublas": 0.7589686105798857,
"compute_intensity": 7.501831501831502,
"tile_compute_intensity": 0.64,
"MxNxK": 524288,
"size_m": 1024,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001482880034018308,
"perf_norm_to_sol": 0.6397609500478638,
"perf_norm_to_cublas": 1.032369394581539,
"compute_intensity": 7.937984496124031,
"tile_compute_intensity": 0.9142857142857143,
"MxNxK": 8388608,
"size_m": 1024,
"size_n": 1024,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003310976084321737,
"perf_norm_to_sol": 0.9080746784831719,
"perf_norm_to_cublas": 1.003836916391466,
"compute_intensity": 15.953261927945473,
"tile_compute_intensity": 1.9248120300751879,
"MxNxK": 536870912,
"size_m": 4096,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000029600001289509238,
"perf_norm_to_sol": 0.289418823398901,
"perf_norm_to_cublas": 0.7244324089622882,
"compute_intensity": 97.52380952380952,
"tile_compute_intensity": 2.909090909090909,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 1024,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00011566720204427838,
"perf_norm_to_sol": 0.592513514248319,
"perf_norm_to_cublas": 1.0230453966323787,
"compute_intensity": 56.79029462738301,
"tile_compute_intensity": 3.9384615384615387,
"MxNxK": 268435456,
"size_m": 16384,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0016135327517986298,
"perf_norm_to_sol": 0.6795958028382568,
"perf_norm_to_cublas": 0.8225648039592787,
"compute_intensity": 630.1538461538462,
"tile_compute_intensity": 12.19047619047619,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 512,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000005648000296787359,
"perf_norm_to_sol": 0.029071550874630488,
"perf_norm_to_cublas": 0.6266288698208482,
"compute_intensity": 7.529411764705882,
"tile_compute_intensity": 0.5714285714285714,
"MxNxK": 131072,
"size_m": 128,
"size_n": 128,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x128x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000054643204202875494,
"perf_norm_to_sol": 0.31355399709026965,
"perf_norm_to_cublas": 0.7293861997771323,
"compute_intensity": 157.53846153846155,
"tile_compute_intensity": 3.2,
"MxNxK": 67108864,
"size_m": 256,
"size_n": 1024,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00042353281751275065,
"perf_norm_to_sol": 0.1618159857576148,
"perf_norm_to_cublas": 0.2543330630838044,
"compute_intensity": 204.8,
"tile_compute_intensity": 1.8686131386861313,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 1024,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.011351747065782547,
"perf_norm_to_sol": 0.7727797876467711,
"perf_norm_to_cublas": 0.9006979955509488,
"compute_intensity": 1638.4,
"tile_compute_intensity": 30.11764705882353,
"MxNxK": 34359738368,
"size_m": 4096,
"size_n": 2048,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00041446080431342127,
"perf_norm_to_sol": 0.08267896463702637,
"perf_norm_to_cublas": 0.2534299843221903,
"compute_intensity": 186.1818181818182,
"tile_compute_intensity": 2.6391752577319587,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 128,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x128x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000016633600171189756,
"perf_norm_to_sol": 0.14258620046274448,
"perf_norm_to_cublas": 0.5679107158963768,
"compute_intensity": 42.22680412371134,
"tile_compute_intensity": 1.5238095238095237,
"MxNxK": 8388608,
"size_m": 2048,
"size_n": 64,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00010367679642513395,
"perf_norm_to_sol": 0.16525968859390608,
"perf_norm_to_cublas": 0.42260564472310025,
"compute_intensity": 204.8,
"tile_compute_intensity": 3.764705882352941,
"MxNxK": 67108864,
"size_m": 512,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000005849599983775988,
"perf_norm_to_sol": 0.4116879587566608,
"perf_norm_to_cublas": 0.9398249595845779,
"compute_intensity": 7.876923076923077,
"tile_compute_intensity": 0.8421052631578947,
"MxNxK": 2097152,
"size_m": 512,
"size_n": 512,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0008234239183366298,
"perf_norm_to_sol": 0.16646196167089647,
"perf_norm_to_cublas": 0.2576131203410483,
"compute_intensity": 315.0769230769231,
"tile_compute_intensity": 3.9689922480620154,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00016498879995197058,
"perf_norm_to_sol": 0.9133684073709154,
"perf_norm_to_cublas": 0.9653988765511631,
"compute_intensity": 7.9669341113542425,
"tile_compute_intensity": 0.9394495412844037,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 16384,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x16384x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00042679039761424066,
"perf_norm_to_sol": 0.16058088642488932,
"perf_norm_to_cublas": 0.25589329002775046,
"compute_intensity": 117.02857142857142,
"tile_compute_intensity": 1.8686131386861313,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 64,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x64x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0065150074660778046,
"perf_norm_to_sol": 0.6732456357564618,
"perf_norm_to_cublas": 0.7540949842154228,
"compute_intensity": 468.1142857142857,
"tile_compute_intensity": 3.8714555765595464,
"MxNxK": 17179869184,
"size_m": 256,
"size_n": 4096,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.013405045866966248,
"perf_norm_to_sol": 0.6544103447294367,
"perf_norm_to_cublas": 0.7394194892070608,
"compute_intensity": 862.3157894736842,
"tile_compute_intensity": 12.720496894409937,
"MxNxK": 34359738368,
"size_m": 4096,
"size_n": 512,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x512x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001087040000129491,
"perf_norm_to_sol": 0.2416781394589672,
"perf_norm_to_cublas": 0.772740647291422,
"compute_intensity": 30.11764705882353,
"tile_compute_intensity": 2.2857142857142856,
"MxNxK": 8388608,
"size_m": 512,
"size_n": 512,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005385599797591567,
"perf_norm_to_sol": 0.03976714696498047,
"perf_norm_to_cublas": 0.2613190753956545,
"compute_intensity": 78.76923076923077,
"tile_compute_intensity": 1.5238095238095237,
"MxNxK": 8388608,
"size_m": 512,
"size_n": 64,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.002881222404539585,
"perf_norm_to_sol": 0.7611700395892477,
"perf_norm_to_cublas": 0.9859093140907181,
"compute_intensity": 655.36,
"tile_compute_intensity": 7.529411764705882,
"MxNxK": 8589934592,
"size_m": 512,
"size_n": 8192,
"size_k": 2048,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x8192x2048.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002896319783758372,
"perf_norm_to_sol": 0.03697277141950776,
"perf_norm_to_cublas": 0.29554749002655734,
"compute_intensity": 60.23529411764706,
"tile_compute_intensity": 1.4545454545454546,
"MxNxK": 4194304,
"size_m": 512,
"size_n": 64,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x64x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00637568011879921,
"perf_norm_to_sol": 0.6879580314144963,
"perf_norm_to_cublas": 0.7710278822987596,
"compute_intensity": 1170.2857142857142,
"tile_compute_intensity": 12.641975308641975,
"MxNxK": 17179869184,
"size_m": 1024,
"size_n": 2048,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x2048x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00000758399983169511,
"perf_norm_to_sol": 0.6374820510597938,
"perf_norm_to_cublas": 1.1438818731369989,
"compute_intensity": 7.861804222648752,
"tile_compute_intensity": 0.7901234567901234,
"MxNxK": 4194304,
"size_m": 256,
"size_n": 2048,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x2048x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00011082240380346775,
"perf_norm_to_sol": 0.6184162950306012,
"perf_norm_to_cublas": 0.902027002894668,
"compute_intensity": 61.59398496240601,
"tile_compute_intensity": 5.12,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 4096,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00011982719879597426,
"perf_norm_to_sol": 0.6455514362715529,
"perf_norm_to_cublas": 0.95900769429708,
"compute_intensity": 31.44721689059501,
"tile_compute_intensity": 3.1604938271604937,
"MxNxK": 268435456,
"size_m": 1024,
"size_n": 8192,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.005586972832679749,
"perf_norm_to_sol": 0.7850763686913865,
"perf_norm_to_cublas": 0.8786817675791616,
"compute_intensity": 248.24242424242425,
"tile_compute_intensity": 23.272727272727273,
"MxNxK": 17179869184,
"size_m": 8192,
"size_n": 8192,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00037006400525569917,
"perf_norm_to_sol": 0.7407840740325043,
"perf_norm_to_cublas": 0.8856760182125122,
"compute_intensity": 113.3840830449827,
"tile_compute_intensity": 5.278350515463917,
"MxNxK": 1073741824,
"size_m": 512,
"size_n": 16384,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00020231681410223245,
"perf_norm_to_sol": 0.04234347789544985,
"perf_norm_to_cublas": 0.4795014431883478,
"compute_intensity": 146.28571428571428,
"tile_compute_intensity": 1.9692307692307693,
"MxNxK": 33554432,
"size_m": 256,
"size_n": 128,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000010326399933546782,
"perf_norm_to_sol": 0.07066940787135838,
"perf_norm_to_cublas": 0.4679268537401737,
"compute_intensity": 28.444444444444443,
"tile_compute_intensity": 1.6,
"MxNxK": 2097152,
"size_m": 256,
"size_n": 256,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x256x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0004032704047858715,
"perf_norm_to_sol": 0.6797858662890585,
"perf_norm_to_cublas": 0.9345272942805023,
"compute_intensity": 327.68,
"tile_compute_intensity": 3.764705882352941,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 4096,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x4096x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0001097952015697956,
"perf_norm_to_sol": 0.6242019631701224,
"perf_norm_to_cublas": 0.9078429609548424,
"compute_intensity": 62.06060606060606,
"tile_compute_intensity": 5.818181818181818,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 2048,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x2048x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007407391909509897,
"perf_norm_to_sol": 0.7401728565600925,
"perf_norm_to_cublas": 0.9056121129147817,
"compute_intensity": 203.527950310559,
"tile_compute_intensity": 6.320987654320987,
"MxNxK": 2147483648,
"size_m": 512,
"size_n": 16384,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0028715040534734726,
"perf_norm_to_sol": 0.7637461521518565,
"perf_norm_to_cublas": 0.8758515516399232,
"compute_intensity": 126.03076923076924,
"tile_compute_intensity": 13.473684210526315,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 8192,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x8192x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.023811429738998413,
"perf_norm_to_sol": 0.7368226757545631,
"perf_norm_to_cublas": 0.9676833183350251,
"compute_intensity": 1489.4545454545455,
"tile_compute_intensity": 21.11340206185567,
"MxNxK": 68719476736,
"size_m": 4096,
"size_n": 1024,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00020199359860271216,
"perf_norm_to_sol": 0.6785797257003324,
"perf_norm_to_cublas": 0.9087654178825826,
"compute_intensity": 60.12477064220184,
"tile_compute_intensity": 3.9689922480620154,
"MxNxK": 536870912,
"size_m": 512,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00011349120177328587,
"perf_norm_to_sol": 0.603873950541404,
"perf_norm_to_cublas": 0.842102274941993,
"compute_intensity": 292.57142857142856,
"tile_compute_intensity": 5.818181818181818,
"MxNxK": 268435456,
"size_m": 512,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001053119995049201,
"perf_norm_to_sol": 0.28064516079058893,
"perf_norm_to_cublas": 0.8030993705873806,
"compute_intensity": 28.248275862068965,
"tile_compute_intensity": 1.8823529411764706,
"MxNxK": 8388608,
"size_m": 2048,
"size_n": 128,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x128x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0058562207967042925,
"perf_norm_to_sol": 0.7489813816320023,
"perf_norm_to_cublas": 0.9609025295077859,
"compute_intensity": 1170.2857142857142,
"tile_compute_intensity": 13.837837837837839,
"MxNxK": 17179869184,
"size_m": 1024,
"size_n": 4096,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x4096x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001786880020517856,
"perf_norm_to_sol": 0.589116005736005,
"perf_norm_to_cublas": 0.9666905713056705,
"compute_intensity": 15.044995408631772,
"tile_compute_intensity": 1.3195876288659794,
"MxNxK": 16777216,
"size_m": 8192,
"size_n": 128,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x128x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00010432959534227848,
"perf_norm_to_sol": 0.328451290075851,
"perf_norm_to_cublas": 0.75772173673186,
"compute_intensity": 256,
"tile_compute_intensity": 4.923076923076923,
"MxNxK": 134217728,
"size_m": 512,
"size_n": 512,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x512x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000015273600001819432,
"perf_norm_to_sol": 0.6883192693440412,
"perf_norm_to_cublas": 1.0064947982733892,
"compute_intensity": 7.527682058350563,
"tile_compute_intensity": 0.6649350649350649,
"MxNxK": 8388608,
"size_m": 16384,
"size_n": 64,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0028304064646363257,
"perf_norm_to_sol": 0.7748357697489259,
"perf_norm_to_cublas": 0.8969770188519344,
"compute_intensity": 655.36,
"tile_compute_intensity": 21.333333333333332,
"MxNxK": 8589934592,
"size_m": 8192,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000012588800746016205,
"perf_norm_to_sol": 0.40578321035406467,
"perf_norm_to_cublas": 1.0129637665562587,
"compute_intensity": 30.567164179104477,
"tile_compute_intensity": 2.4615384615384617,
"MxNxK": 16777216,
"size_m": 512,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_512x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00046207043342292307,
"perf_norm_to_sol": 0.6525781465227377,
"perf_norm_to_cublas": 0.9212656558519714,
"compute_intensity": 15.929995138551288,
"tile_compute_intensity": 1.9248120300751879,
"MxNxK": 536870912,
"size_m": 16384,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000007625600119354203,
"perf_norm_to_sol": 0.3839913234715559,
"perf_norm_to_cublas": 0.8619387636807055,
"compute_intensity": 14.197573656845753,
"tile_compute_intensity": 0.9846153846153847,
"MxNxK": 4194304,
"size_m": 4096,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0015071647241711617,
"perf_norm_to_sol": 0.7275582212604131,
"perf_norm_to_cublas": 0.8699609183876126,
"compute_intensity": 63.38104448742747,
"tile_compute_intensity": 7.013698630136986,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 16384,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x16384x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0014242367818951608,
"perf_norm_to_sol": 0.7699211955509865,
"perf_norm_to_cublas": 0.9035509009374845,
"compute_intensity": 431.1578947368421,
"tile_compute_intensity": 18.285714285714285,
"MxNxK": 4294967296,
"size_m": 2048,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.010891581326723099,
"perf_norm_to_sol": 0.8054294802345846,
"perf_norm_to_cublas": 0.8739899823736237,
"compute_intensity": 862.3157894736842,
"tile_compute_intensity": 36.57142857142857,
"MxNxK": 34359738368,
"size_m": 4096,
"size_n": 8192,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x8192x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0036572255194187164,
"perf_norm_to_sol": 0.6551277427621157,
"perf_norm_to_cublas": 0.916848030362758,
"compute_intensity": 15.984390243902439,
"tile_compute_intensity": 1.9768339768339769,
"MxNxK": 4294967296,
"size_m": 16384,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000007827200170140713,
"perf_norm_to_sol": 0.6351560924111055,
"perf_norm_to_cublas": 1.1218314791131347,
"compute_intensity": 7.750236518448439,
"tile_compute_intensity": 0.6632124352331606,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 4096,
"size_k": 8,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x8.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0007437407970428467,
"perf_norm_to_sol": 0.7371856500439089,
"perf_norm_to_cublas": 0.997693860244913,
"compute_intensity": 390.0952380952381,
"tile_compute_intensity": 16,
"MxNxK": 2147483648,
"size_m": 4096,
"size_n": 1024,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x1024x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.011275728046894074,
"perf_norm_to_sol": 0.7779897360447312,
"perf_norm_to_cublas": 0.8768021146521091,
"compute_intensity": 474.8985507246377,
"tile_compute_intensity": 36.57142857142857,
"MxNxK": 34359738368,
"size_m": 16384,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003310207976028323,
"perf_norm_to_sol": 0.9109308816537912,
"perf_norm_to_cublas": 1.0053168566180415,
"compute_intensity": 15.929995138551288,
"tile_compute_intensity": 1.8754578754578755,
"MxNxK": 536870912,
"size_m": 2048,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.006434805691242218,
"perf_norm_to_sol": 0.6816367974292173,
"perf_norm_to_cublas": 0.7593717471473315,
"compute_intensity": 468.1142857142857,
"tile_compute_intensity": 3.9233716475095783,
"MxNxK": 17179869184,
"size_m": 256,
"size_n": 8192,
"size_k": 8192,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x8192.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000840278435498476,
"perf_norm_to_sol": 0.3262460511716894,
"perf_norm_to_cublas": 0.4456275372208856,
"compute_intensity": 372.3636363636364,
"tile_compute_intensity": 3.5310344827586206,
"MxNxK": 1073741824,
"size_m": 256,
"size_n": 1024,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x1024x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00001657599932514131,
"perf_norm_to_sol": 0.032301210690892555,
"perf_norm_to_cublas": 0.33725871174810856,
"compute_intensity": 46.54545454545455,
"tile_compute_intensity": 1.6,
"MxNxK": 2097152,
"size_m": 256,
"size_n": 128,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x128x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.01284896731376648,
"perf_norm_to_sol": 0.682731963798871,
"perf_norm_to_cublas": 0.7593077117909319,
"compute_intensity": 481.88235294117646,
"tile_compute_intensity": 3.930902111324376,
"MxNxK": 34359738368,
"size_m": 256,
"size_n": 8192,
"size_k": 16384,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x8192x16384.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00011842240346595644,
"perf_norm_to_sol": 0.6458145196033621,
"perf_norm_to_cublas": 0.9242575528092811,
"compute_intensity": 31.62934362934363,
"tile_compute_intensity": 3.4594594594594597,
"MxNxK": 268435456,
"size_m": 2048,
"size_n": 4096,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x4096x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0003887712024152279,
"perf_norm_to_sol": 0.7051384458597573,
"perf_norm_to_cublas": 0.8681713223867674,
"compute_intensity": 62.77394636015325,
"tile_compute_intensity": 6.2439024390243905,
"MxNxK": 1073741824,
"size_m": 2048,
"size_n": 8192,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x8192x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00040195840410888194,
"perf_norm_to_sol": 0.6820047016403342,
"perf_norm_to_cublas": 0.937291002736217,
"compute_intensity": 512,
"tile_compute_intensity": 9.846153846153847,
"MxNxK": 1073741824,
"size_m": 1024,
"size_n": 1024,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x1024x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000025587200070731342,
"perf_norm_to_sol": 0.5483072354681483,
"perf_norm_to_cublas": 0.9152075908001813,
"compute_intensity": 25.580015612802498,
"tile_compute_intensity": 1.3264248704663213,
"MxNxK": 33554432,
"size_m": 16384,
"size_n": 64,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x64x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00010346239432692527,
"perf_norm_to_sol": 0.04140053785506815,
"perf_norm_to_cublas": 0.23373130433001865,
"compute_intensity": 128,
"tile_compute_intensity": 1.5609756097560976,
"MxNxK": 16777216,
"size_m": 128,
"size_n": 256,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x256x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0013990592211484908,
"perf_norm_to_sol": 0.7837767474662375,
"perf_norm_to_cublas": 0.9183542928625235,
"compute_intensity": 399.609756097561,
"tile_compute_intensity": 12.19047619047619,
"MxNxK": 4294967296,
"size_m": 1024,
"size_n": 8192,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x8192x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005411839811131358,
"perf_norm_to_sol": 0.3165946459906324,
"perf_norm_to_cublas": 0.7518921814213975,
"compute_intensity": 157.53846153846155,
"tile_compute_intensity": 4.571428571428571,
"MxNxK": 67108864,
"size_m": 1024,
"size_n": 256,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00011446399148553609,
"perf_norm_to_sol": 0.5987418355508412,
"perf_norm_to_cublas": 0.8432205949687471,
"compute_intensity": 167.18367346938774,
"tile_compute_intensity": 1.9104477611940298,
"MxNxK": 268435456,
"size_m": 128,
"size_n": 4096,
"size_k": 512,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x4096x512.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00003103039925917983,
"perf_norm_to_sol": 0.677894867495054,
"perf_norm_to_cublas": 1.0358874781419258,
"compute_intensity": 15.05190629306385,
"tile_compute_intensity": 0.9980506822612085,
"MxNxK": 33554432,
"size_m": 128,
"size_n": 16384,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x16384x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00021359040401875974,
"perf_norm_to_sol": 0.16043412783775232,
"perf_norm_to_cublas": 0.46261254259215373,
"compute_intensity": 256,
"tile_compute_intensity": 3.1219512195121952,
"MxNxK": 134217728,
"size_m": 256,
"size_n": 512,
"size_k": 1024,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_256x512x1024.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0018336672335863113,
"perf_norm_to_sol": 0.653958659783155,
"perf_norm_to_cublas": 0.9152212632813738,
"compute_intensity": 15.976596782057532,
"tile_compute_intensity": 1.9692307692307693,
"MxNxK": 2147483648,
"size_m": 16384,
"size_n": 8192,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_16384x8192x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00002935999946203083,
"perf_norm_to_sol": 0.29178466290146016,
"perf_norm_to_cublas": 0.7625068410358862,
"compute_intensity": 97.52380952380952,
"tile_compute_intensity": 4,
"MxNxK": 33554432,
"size_m": 1024,
"size_n": 256,
"size_k": 128,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x256x128.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000018819198885466903,
"perf_norm_to_sol": 0.4552158462192252,
"perf_norm_to_cublas": 0.9731338713337877,
"compute_intensity": 56.10958904109589,
"tile_compute_intensity": 3.5555555555555554,
"MxNxK": 33554432,
"size_m": 2048,
"size_n": 256,
"size_k": 64,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_2048x256x64.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000010598399967420846,
"perf_norm_to_sol": 0.1411542469173825,
"perf_norm_to_cublas": 0.5455917859886464,
"compute_intensity": 28.054794520547944,
"tile_compute_intensity": 1.28,
"MxNxK": 4194304,
"size_m": 128,
"size_n": 1024,
"size_k": 32,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_128x1024x32.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0000892351963557303,
"perf_norm_to_sol": 0.8472351494667987,
"perf_norm_to_cublas": 1.004375006930633,
"compute_intensity": 15.906796116504854,
"tile_compute_intensity": 1.8823529411764706,
"MxNxK": 134217728,
"size_m": 4096,
"size_n": 2048,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x2048x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000009529600356472656,
"perf_norm_to_sol": 0.6135836066945349,
"perf_norm_to_cublas": 1.1195432705907737,
"compute_intensity": 14.209887250650477,
"tile_compute_intensity": 0.9922480620155039,
"MxNxK": 8388608,
"size_m": 8192,
"size_n": 64,
"size_k": 16,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_8192x64x16.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.0015968607738614082,
"perf_norm_to_sol": 0.6866911028272158,
"perf_norm_to_cublas": 0.8343810702360097,
"compute_intensity": 431.1578947368421,
"tile_compute_intensity": 7.013698630136986,
"MxNxK": 4294967296,
"size_m": 4096,
"size_n": 256,
"size_k": 4096,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_4096x256x4096.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.00005928640021011233,
"perf_norm_to_sol": 0.577994110990365,
"perf_norm_to_cublas": 0.8643601589188211,
"compute_intensity": 186.1818181818182,
"tile_compute_intensity": 6.4,
"MxNxK": 134217728,
"size_m": 1024,
"size_n": 512,
"size_k": 256,
"details": "../evaluation/dgemm/OUTPUT-MXN/DGEMM_OUTPUT_1024x512x256.json\">JSON output</a>"
},
{
"engine": "CuTe",
"schedule": "output-mxn",
"metric": "cute-statistics_cute-sec",
"perf": 0.000007843200000934302,
"perf_norm_to_sol": 0.6094364194105388,
"perf_norm_to_cublas": 1.0803753418954225,
"compute_intensity": 7.9073359073359075,
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment