Created
December 11, 2020 15:59
-
-
Save lenLRX/2e999f32a86637ffe0ca0828c8af60a4 to your computer and use it in GitHub Desktop.
Failed to run tune_network_cuda
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Extract tasks... | |
========== Task 0 (workload key: ["b32ed43fb351136894c322ee49097a1a"]) ========== | |
placeholder = PLACEHOLDER [1, 1000] | |
T_softmax_maxelem(i0) max= placeholder[i0, k] | |
T_softmax_exp(i0, i1) = tir.exp((placeholder[i0, i1] - T_softmax_maxelem[i0])) | |
T_softmax_expsum(i0) += T_softmax_exp[i0, k] | |
T_softmax_norm(i0, i1) = (T_softmax_exp[i0, i1]/T_softmax_expsum[i0]) | |
========== Task 1 (workload key: ["d09dc1a6bb90d59c91b68989ad3492ff"]) ========== | |
placeholder = PLACEHOLDER [1, 512] | |
placeholder = PLACEHOLDER [1000, 512] | |
T_dense(i, j) += (placeholder[i, k]*placeholder[j, k]) | |
placeholder = PLACEHOLDER [1000] | |
T_add(ax0, ax1) = (T_dense[ax0, ax1] + placeholder[ax1]) | |
========== Task 2 (workload key: ["7de313da0ca29a8c63f647791692430d"]) ========== | |
placeholder = PLACEHOLDER [1, 7, 7, 512] | |
tensor(ax0, ax1, ax2, ax3) += placeholder[ax0, ((ax1*7) + rv0), ((ax2*7) + rv1), ax3] | |
tensor(ax0, ax1, ax2, ax3) = (tensor[ax0, ax1, ax2, ax3]/(float32((select((bool)1, ((ax1 + 1)*7), (((ax1 + 1)*7) + 1)) - (ax1*7)))*float32((select((bool)1, ((ax2 + 1)*7), (((ax2 + 1)*7) + 1)) - (ax2*7))))) | |
========== Task 3 (workload key: ["8d5a93959138dc7b2ee1f1b3219dfa14"]) ========== | |
placeholder = PLACEHOLDER [1, 7, 7, 512] | |
data_pad(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 8)) && (i2 >= 1)) && (i2 < 8)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
input_tile(eps, nu, p, ci) = data_pad[floordiv(p, 16), ((floormod(floordiv(p, 4), 4)*2) + eps), ((floormod(p, 4)*2) + nu), ci] | |
B(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 4) == 3)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 4) == 2)), ..(OMITTED).. ormod(i, 4) == 0) && (floormod(j, 4) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 4) == 0)), 1f, 0f)))))))))))))))) | |
data_pack(eps, nu, p, ci) += ((input_tile[r_a, r_b, p, ci]*B[r_a, eps])*B[r_b, nu]) | |
placeholder = PLACEHOLDER [4, 4, 512, 512] | |
bgemm(eps, nu, p, co) += (data_pack[eps, nu, p, ci]*placeholder[eps, nu, co, ci]) | |
A(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 2) == 1)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 2) == 0)), ..(OMITTED).. ct(((floormod(i, 4) == 0) && (floormod(j, 2) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 2) == 0)), 1f, 0f)))))))) | |
inverse(vh, vw, p, co) += ((bgemm[r_a, r_b, p, co]*A[r_a, vh])*A[r_b, vw]) | |
conv2d_winograd(n, h, w, co) = inverse[floormod(h, 2), floormod(w, 2), ((((n*4)*4) + (floordiv(h, 2)*4)) + floordiv(w, 2)), co] | |
placeholder = PLACEHOLDER [1, 7, 7, 512] | |
T_add(ax0, ax1, ax2, ax3) = (conv2d_winograd[ax0, ax1, ax2, ax3] + placeholder[ax0, ax1, ax2, ax3]) | |
placeholder = PLACEHOLDER [1, 1, 1, 512] | |
T_multiply(ax0, ax1, ax2, ax3) = (T_add[ax0, ax1, ax2, ax3]*placeholder[ax0, 0, 0, ax3]) | |
placeholder = PLACEHOLDER [1, 1, 1, 512] | |
T_add(ax0, ax1, ax2, ax3) = (T_multiply[ax0, ax1, ax2, ax3] + placeholder[ax0, 0, 0, ax3]) | |
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f) | |
========== Task 4 (workload key: ["ac6920940de3797cc3f9f9c260675e5d"]) ========== | |
placeholder = PLACEHOLDER [1, 7, 7, 512] | |
data_pad(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 8)) && (i2 >= 1)) && (i2 < 8)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
input_tile(eps, nu, p, ci) = data_pad[floordiv(p, 16), ((floormod(floordiv(p, 4), 4)*2) + eps), ((floormod(p, 4)*2) + nu), ci] | |
B(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 4) == 3)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 4) == 2)), ..(OMITTED).. ormod(i, 4) == 0) && (floormod(j, 4) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 4) == 0)), 1f, 0f)))))))))))))))) | |
data_pack(eps, nu, p, ci) += ((input_tile[r_a, r_b, p, ci]*B[r_a, eps])*B[r_b, nu]) | |
placeholder = PLACEHOLDER [4, 4, 512, 512] | |
bgemm(eps, nu, p, co) += (data_pack[eps, nu, p, ci]*placeholder[eps, nu, co, ci]) | |
A(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 2) == 1)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 2) == 0)), ..(OMITTED).. ct(((floormod(i, 4) == 0) && (floormod(j, 2) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 2) == 0)), 1f, 0f)))))))) | |
inverse(vh, vw, p, co) += ((bgemm[r_a, r_b, p, co]*A[r_a, vh])*A[r_b, vw]) | |
conv2d_winograd(n, h, w, co) = inverse[floormod(h, 2), floormod(w, 2), ((((n*4)*4) + (floordiv(h, 2)*4)) + floordiv(w, 2)), co] | |
placeholder = PLACEHOLDER [1, 1, 1, 512] | |
T_add(ax0, ax1, ax2, ax3) = (conv2d_winograd[ax0, ax1, ax2, ax3] + placeholder[ax0, 0, 0, ax3]) | |
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f) | |
========== Task 5 (workload key: ["7e83a2ee5cd5d50282ed19310700046a"]) ========== | |
placeholder = PLACEHOLDER [1, 7, 7, 512] | |
data_pad(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 8)) && (i2 >= 1)) && (i2 < 8)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
input_tile(eps, nu, p, ci) = data_pad[floordiv(p, 16), ((floormod(floordiv(p, 4), 4)*2) + eps), ((floormod(p, 4)*2) + nu), ci] | |
B(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 4) == 3)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 4) == 2)), ..(OMITTED).. ormod(i, 4) == 0) && (floormod(j, 4) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 4) == 0)), 1f, 0f)))))))))))))))) | |
data_pack(eps, nu, p, ci) += ((input_tile[r_a, r_b, p, ci]*B[r_a, eps])*B[r_b, nu]) | |
placeholder = PLACEHOLDER [4, 4, 512, 512] | |
bgemm(eps, nu, p, co) += (data_pack[eps, nu, p, ci]*placeholder[eps, nu, co, ci]) | |
A(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 2) == 1)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 2) == 0)), ..(OMITTED).. ct(((floormod(i, 4) == 0) && (floormod(j, 2) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 2) == 0)), 1f, 0f)))))))) | |
inverse(vh, vw, p, co) += ((bgemm[r_a, r_b, p, co]*A[r_a, vh])*A[r_b, vw]) | |
conv2d_winograd(n, h, w, co) = inverse[floormod(h, 2), floormod(w, 2), ((((n*4)*4) + (floordiv(h, 2)*4)) + floordiv(w, 2)), co] | |
placeholder = PLACEHOLDER [1, 7, 7, 512] | |
T_add(ax0, ax1, ax2, ax3) = (conv2d_winograd[ax0, ax1, ax2, ax3] + placeholder[ax0, ax1, ax2, ax3]) | |
========== Task 6 (workload key: ["1f6cd3637ec856bf5cf5010a623eed05"]) ========== | |
placeholder = PLACEHOLDER [1, 14, 14, 256] | |
PaddedInput(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 15)) && (i2 >= 1)) && (i2 < 15)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
placeholder = PLACEHOLDER [3, 3, 256, 512] | |
Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, ((yy*2) + ry), ((xx*2) + rx), rc]*placeholder[ry, rx, rc, ff]) | |
placeholder = PLACEHOLDER [1, 1, 1, 512] | |
T_add(ax0, ax1, ax2, ax3) = (Conv2dOutput[ax0, ax1, ax2, ax3] + placeholder[ax0, 0, 0, ax3]) | |
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f) | |
========== Task 7 (workload key: ["424ba83160af31badc0b098136e1a3b0"]) ========== | |
placeholder = PLACEHOLDER [1, 14, 14, 256] | |
data_pad(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 15)) && (i2 >= 1)) && (i2 < 15)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
input_tile(eps, nu, p, ci) = data_pad[floordiv(p, 49), ((floormod(floordiv(p, 7), 7)*2) + eps), ((floormod(p, 7)*2) + nu), ci] | |
B(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 4) == 3)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 4) == 2)), ..(OMITTED).. ormod(i, 4) == 0) && (floormod(j, 4) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 4) == 0)), 1f, 0f)))))))))))))))) | |
data_pack(eps, nu, p, ci) += ((input_tile[r_a, r_b, p, ci]*B[r_a, eps])*B[r_b, nu]) | |
placeholder = PLACEHOLDER [4, 4, 256, 256] | |
bgemm(eps, nu, p, co) += (data_pack[eps, nu, p, ci]*placeholder[eps, nu, co, ci]) | |
A(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 2) == 1)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 2) == 0)), ..(OMITTED).. ct(((floormod(i, 4) == 0) && (floormod(j, 2) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 2) == 0)), 1f, 0f)))))))) | |
inverse(vh, vw, p, co) += ((bgemm[r_a, r_b, p, co]*A[r_a, vh])*A[r_b, vw]) | |
conv2d_winograd(n, h, w, co) = inverse[floormod(h, 2), floormod(w, 2), ((((n*7)*7) + (floordiv(h, 2)*7)) + floordiv(w, 2)), co] | |
placeholder = PLACEHOLDER [1, 14, 14, 256] | |
T_add(ax0, ax1, ax2, ax3) = (conv2d_winograd[ax0, ax1, ax2, ax3] + placeholder[ax0, ax1, ax2, ax3]) | |
placeholder = PLACEHOLDER [1, 1, 1, 256] | |
T_add(ax0, ax1, ax2, ax3) = (T_add[ax0, ax1, ax2, ax3] + placeholder[ax0, 0, 0, ax3]) | |
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f) | |
========== Task 8 (workload key: ["a169cd0053d3a7ca82998fcb62e42c58"]) ========== | |
placeholder = PLACEHOLDER [1, 14, 14, 256] | |
data_pad(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 15)) && (i2 >= 1)) && (i2 < 15)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
input_tile(eps, nu, p, ci) = data_pad[floordiv(p, 49), ((floormod(floordiv(p, 7), 7)*2) + eps), ((floormod(p, 7)*2) + nu), ci] | |
B(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 4) == 3)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 4) == 2)), ..(OMITTED).. ormod(i, 4) == 0) && (floormod(j, 4) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 4) == 0)), 1f, 0f)))))))))))))))) | |
data_pack(eps, nu, p, ci) += ((input_tile[r_a, r_b, p, ci]*B[r_a, eps])*B[r_b, nu]) | |
placeholder = PLACEHOLDER [4, 4, 256, 256] | |
bgemm(eps, nu, p, co) += (data_pack[eps, nu, p, ci]*placeholder[eps, nu, co, ci]) | |
A(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 2) == 1)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 2) == 0)), ..(OMITTED).. ct(((floormod(i, 4) == 0) && (floormod(j, 2) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 2) == 0)), 1f, 0f)))))))) | |
inverse(vh, vw, p, co) += ((bgemm[r_a, r_b, p, co]*A[r_a, vh])*A[r_b, vw]) | |
conv2d_winograd(n, h, w, co) = inverse[floormod(h, 2), floormod(w, 2), ((((n*7)*7) + (floordiv(h, 2)*7)) + floordiv(w, 2)), co] | |
placeholder = PLACEHOLDER [1, 1, 1, 256] | |
T_add(ax0, ax1, ax2, ax3) = (conv2d_winograd[ax0, ax1, ax2, ax3] + placeholder[ax0, 0, 0, ax3]) | |
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f) | |
========== Task 9 (workload key: ["0141ffc4fbabc10cc5a94c954419055b"]) ========== | |
placeholder = PLACEHOLDER [1, 14, 14, 256] | |
data_pad(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 15)) && (i2 >= 1)) && (i2 < 15)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
input_tile(eps, nu, p, ci) = data_pad[floordiv(p, 49), ((floormod(floordiv(p, 7), 7)*2) + eps), ((floormod(p, 7)*2) + nu), ci] | |
B(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 4) == 3)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 4) == 2)), ..(OMITTED).. ormod(i, 4) == 0) && (floormod(j, 4) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 4) == 0)), 1f, 0f)))))))))))))))) | |
data_pack(eps, nu, p, ci) += ((input_tile[r_a, r_b, p, ci]*B[r_a, eps])*B[r_b, nu]) | |
placeholder = PLACEHOLDER [4, 4, 256, 256] | |
bgemm(eps, nu, p, co) += (data_pack[eps, nu, p, ci]*placeholder[eps, nu, co, ci]) | |
A(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 2) == 1)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 2) == 0)), ..(OMITTED).. ct(((floormod(i, 4) == 0) && (floormod(j, 2) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 2) == 0)), 1f, 0f)))))))) | |
inverse(vh, vw, p, co) += ((bgemm[r_a, r_b, p, co]*A[r_a, vh])*A[r_b, vw]) | |
conv2d_winograd(n, h, w, co) = inverse[floormod(h, 2), floormod(w, 2), ((((n*7)*7) + (floordiv(h, 2)*7)) + floordiv(w, 2)), co] | |
placeholder = PLACEHOLDER [1, 14, 14, 256] | |
T_add(ax0, ax1, ax2, ax3) = (conv2d_winograd[ax0, ax1, ax2, ax3] + placeholder[ax0, ax1, ax2, ax3]) | |
========== Task 10 (workload key: ["81aae4b8e2c076a4014d403e8a2c70a1"]) ========== | |
placeholder = PLACEHOLDER [1, 28, 28, 128] | |
PaddedInput(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 29)) && (i2 >= 1)) && (i2 < 29)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
placeholder = PLACEHOLDER [3, 3, 128, 256] | |
Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, ((yy*2) + ry), ((xx*2) + rx), rc]*placeholder[ry, rx, rc, ff]) | |
placeholder = PLACEHOLDER [1, 1, 1, 256] | |
T_add(ax0, ax1, ax2, ax3) = (Conv2dOutput[ax0, ax1, ax2, ax3] + placeholder[ax0, 0, 0, ax3]) | |
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f) | |
========== Task 11 (workload key: ["c7a6b56bdc04b94c829fb2ef9874019e"]) ========== | |
placeholder = PLACEHOLDER [1, 28, 28, 128] | |
data_pad(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 29)) && (i2 >= 1)) && (i2 < 29)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
input_tile(eps, nu, p, ci) = data_pad[floordiv(p, 196), ((floormod(floordiv(p, 14), 14)*2) + eps), ((floormod(p, 14)*2) + nu), ci] | |
B(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 4) == 3)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 4) == 2)), ..(OMITTED).. ormod(i, 4) == 0) && (floormod(j, 4) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 4) == 0)), 1f, 0f)))))))))))))))) | |
data_pack(eps, nu, p, ci) += ((input_tile[r_a, r_b, p, ci]*B[r_a, eps])*B[r_b, nu]) | |
placeholder = PLACEHOLDER [4, 4, 128, 128] | |
bgemm(eps, nu, p, co) += (data_pack[eps, nu, p, ci]*placeholder[eps, nu, co, ci]) | |
A(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 2) == 1)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 2) == 0)), ..(OMITTED).. ct(((floormod(i, 4) == 0) && (floormod(j, 2) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 2) == 0)), 1f, 0f)))))))) | |
inverse(vh, vw, p, co) += ((bgemm[r_a, r_b, p, co]*A[r_a, vh])*A[r_b, vw]) | |
conv2d_winograd(n, h, w, co) = inverse[floormod(h, 2), floormod(w, 2), ((((n*14)*14) + (floordiv(h, 2)*14)) + floordiv(w, 2)), co] | |
placeholder = PLACEHOLDER [1, 28, 28, 128] | |
T_add(ax0, ax1, ax2, ax3) = (conv2d_winograd[ax0, ax1, ax2, ax3] + placeholder[ax0, ax1, ax2, ax3]) | |
placeholder = PLACEHOLDER [1, 1, 1, 128] | |
T_add(ax0, ax1, ax2, ax3) = (T_add[ax0, ax1, ax2, ax3] + placeholder[ax0, 0, 0, ax3]) | |
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f) | |
========== Task 12 (workload key: ["c035cc8b0568a8e054d06bd7f4950550"]) ========== | |
placeholder = PLACEHOLDER [1, 28, 28, 128] | |
data_pad(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 29)) && (i2 >= 1)) && (i2 < 29)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
input_tile(eps, nu, p, ci) = data_pad[floordiv(p, 196), ((floormod(floordiv(p, 14), 14)*2) + eps), ((floormod(p, 14)*2) + nu), ci] | |
B(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 4) == 3)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 4) == 2)), ..(OMITTED).. ormod(i, 4) == 0) && (floormod(j, 4) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 4) == 0)), 1f, 0f)))))))))))))))) | |
data_pack(eps, nu, p, ci) += ((input_tile[r_a, r_b, p, ci]*B[r_a, eps])*B[r_b, nu]) | |
placeholder = PLACEHOLDER [4, 4, 128, 128] | |
bgemm(eps, nu, p, co) += (data_pack[eps, nu, p, ci]*placeholder[eps, nu, co, ci]) | |
A(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 2) == 1)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 2) == 0)), ..(OMITTED).. ct(((floormod(i, 4) == 0) && (floormod(j, 2) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 2) == 0)), 1f, 0f)))))))) | |
inverse(vh, vw, p, co) += ((bgemm[r_a, r_b, p, co]*A[r_a, vh])*A[r_b, vw]) | |
conv2d_winograd(n, h, w, co) = inverse[floormod(h, 2), floormod(w, 2), ((((n*14)*14) + (floordiv(h, 2)*14)) + floordiv(w, 2)), co] | |
placeholder = PLACEHOLDER [1, 1, 1, 128] | |
T_add(ax0, ax1, ax2, ax3) = (conv2d_winograd[ax0, ax1, ax2, ax3] + placeholder[ax0, 0, 0, ax3]) | |
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f) | |
========== Task 13 (workload key: ["c5ee3e05edd9754492d0763aa41fd025"]) ========== | |
placeholder = PLACEHOLDER [1, 28, 28, 128] | |
data_pad(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 29)) && (i2 >= 1)) && (i2 < 29)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
input_tile(eps, nu, p, ci) = data_pad[floordiv(p, 196), ((floormod(floordiv(p, 14), 14)*2) + eps), ((floormod(p, 14)*2) + nu), ci] | |
B(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 4) == 3)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 4) == 2)), ..(OMITTED).. ormod(i, 4) == 0) && (floormod(j, 4) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 4) == 0)), 1f, 0f)))))))))))))))) | |
data_pack(eps, nu, p, ci) += ((input_tile[r_a, r_b, p, ci]*B[r_a, eps])*B[r_b, nu]) | |
placeholder = PLACEHOLDER [4, 4, 128, 128] | |
bgemm(eps, nu, p, co) += (data_pack[eps, nu, p, ci]*placeholder[eps, nu, co, ci]) | |
A(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 2) == 1)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 2) == 0)), ..(OMITTED).. ct(((floormod(i, 4) == 0) && (floormod(j, 2) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 2) == 0)), 1f, 0f)))))))) | |
inverse(vh, vw, p, co) += ((bgemm[r_a, r_b, p, co]*A[r_a, vh])*A[r_b, vw]) | |
conv2d_winograd(n, h, w, co) = inverse[floormod(h, 2), floormod(w, 2), ((((n*14)*14) + (floordiv(h, 2)*14)) + floordiv(w, 2)), co] | |
placeholder = PLACEHOLDER [1, 28, 28, 128] | |
T_add(ax0, ax1, ax2, ax3) = (conv2d_winograd[ax0, ax1, ax2, ax3] + placeholder[ax0, ax1, ax2, ax3]) | |
========== Task 14 (workload key: ["022ebb6b7c55c5ed030421380ec83a04"]) ========== | |
placeholder = PLACEHOLDER [1, 56, 56, 64] | |
PaddedInput(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 57)) && (i2 >= 1)) && (i2 < 57)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
placeholder = PLACEHOLDER [3, 3, 64, 128] | |
Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, ((yy*2) + ry), ((xx*2) + rx), rc]*placeholder[ry, rx, rc, ff]) | |
placeholder = PLACEHOLDER [1, 1, 1, 128] | |
T_add(ax0, ax1, ax2, ax3) = (Conv2dOutput[ax0, ax1, ax2, ax3] + placeholder[ax0, 0, 0, ax3]) | |
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f) | |
========== Task 15 (workload key: ["de0df0893e01892cfe69f7bc2c24111f"]) ========== | |
placeholder = PLACEHOLDER [1, 56, 56, 64] | |
data_pad(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 57)) && (i2 >= 1)) && (i2 < 57)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
input_tile(eps, nu, p, ci) = data_pad[floordiv(p, 196), ((floormod(floordiv(p, 14), 14)*4) + eps), ((floormod(p, 14)*4) + nu), ci] | |
B(i, j) = select(((floormod(i, 6) == 5) && (floormod(j, 6) == 5)), 1f, select(((floormod(i, 6) == 5) && (floormod(j, 6) == 4)), ..(OMITTED).. (floormod(j, 6) == 1)), 0f, select(((floormod(i, 6) == 0) && (floormod(j, 6) == 0)), 1f, 0f)))))))))))))))))))))))))))))))))))) | |
data_pack(eps, nu, p, ci) += ((input_tile[r_a, r_b, p, ci]*B[r_a, eps])*B[r_b, nu]) | |
placeholder = PLACEHOLDER [6, 6, 64, 64] | |
bgemm(eps, nu, p, co) += (data_pack[eps, nu, p, ci]*placeholder[eps, nu, co, ci]) | |
A(i, j) = select(((floormod(i, 6) == 5) && (floormod(j, 4) == 3)), 1f, select(((floormod(i, 6) == 5) && (floormod(j, 4) == 2)), ..(OMITTED).. 6) == 0) && (floormod(j, 4) == 1)), 0f, select(((floormod(i, 6) == 0) && (floormod(j, 4) == 0)), 1f, 0f)))))))))))))))))))))))) | |
inverse(vh, vw, p, co) += ((bgemm[r_a, r_b, p, co]*A[r_a, vh])*A[r_b, vw]) | |
conv2d_winograd(n, h, w, co) = inverse[floormod(h, 4), floormod(w, 4), ((((n*14)*14) + (floordiv(h, 4)*14)) + floordiv(w, 4)), co] | |
placeholder = PLACEHOLDER [1, 56, 56, 64] | |
T_add(ax0, ax1, ax2, ax3) = (conv2d_winograd[ax0, ax1, ax2, ax3] + placeholder[ax0, ax1, ax2, ax3]) | |
placeholder = PLACEHOLDER [1, 1, 1, 64] | |
T_add(ax0, ax1, ax2, ax3) = (T_add[ax0, ax1, ax2, ax3] + placeholder[ax0, 0, 0, ax3]) | |
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f) | |
========== Task 16 (workload key: ["f2e3c09a00e7d0a9897f70497e089f1e"]) ========== | |
placeholder = PLACEHOLDER [1, 56, 56, 64] | |
data_pad(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 57)) && (i2 >= 1)) && (i2 < 57)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
input_tile(eps, nu, p, ci) = data_pad[floordiv(p, 196), ((floormod(floordiv(p, 14), 14)*4) + eps), ((floormod(p, 14)*4) + nu), ci] | |
B(i, j) = select(((floormod(i, 6) == 5) && (floormod(j, 6) == 5)), 1f, select(((floormod(i, 6) == 5) && (floormod(j, 6) == 4)), ..(OMITTED).. (floormod(j, 6) == 1)), 0f, select(((floormod(i, 6) == 0) && (floormod(j, 6) == 0)), 1f, 0f)))))))))))))))))))))))))))))))))))) | |
data_pack(eps, nu, p, ci) += ((input_tile[r_a, r_b, p, ci]*B[r_a, eps])*B[r_b, nu]) | |
placeholder = PLACEHOLDER [6, 6, 64, 64] | |
bgemm(eps, nu, p, co) += (data_pack[eps, nu, p, ci]*placeholder[eps, nu, co, ci]) | |
A(i, j) = select(((floormod(i, 6) == 5) && (floormod(j, 4) == 3)), 1f, select(((floormod(i, 6) == 5) && (floormod(j, 4) == 2)), ..(OMITTED).. 6) == 0) && (floormod(j, 4) == 1)), 0f, select(((floormod(i, 6) == 0) && (floormod(j, 4) == 0)), 1f, 0f)))))))))))))))))))))))) | |
inverse(vh, vw, p, co) += ((bgemm[r_a, r_b, p, co]*A[r_a, vh])*A[r_b, vw]) | |
conv2d_winograd(n, h, w, co) = inverse[floormod(h, 4), floormod(w, 4), ((((n*14)*14) + (floordiv(h, 4)*14)) + floordiv(w, 4)), co] | |
placeholder = PLACEHOLDER [1, 1, 1, 64] | |
T_add(ax0, ax1, ax2, ax3) = (conv2d_winograd[ax0, ax1, ax2, ax3] + placeholder[ax0, 0, 0, ax3]) | |
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f) | |
========== Task 17 (workload key: ["fa26946d7ac51126bfa859cb183f9ca1"]) ========== | |
placeholder = PLACEHOLDER [1, 56, 56, 64] | |
data_pad(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 57)) && (i2 >= 1)) && (i2 < 57)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
input_tile(eps, nu, p, ci) = data_pad[floordiv(p, 196), ((floormod(floordiv(p, 14), 14)*4) + eps), ((floormod(p, 14)*4) + nu), ci] | |
B(i, j) = select(((floormod(i, 6) == 5) && (floormod(j, 6) == 5)), 1f, select(((floormod(i, 6) == 5) && (floormod(j, 6) == 4)), ..(OMITTED).. (floormod(j, 6) == 1)), 0f, select(((floormod(i, 6) == 0) && (floormod(j, 6) == 0)), 1f, 0f)))))))))))))))))))))))))))))))))))) | |
data_pack(eps, nu, p, ci) += ((input_tile[r_a, r_b, p, ci]*B[r_a, eps])*B[r_b, nu]) | |
placeholder = PLACEHOLDER [6, 6, 64, 64] | |
bgemm(eps, nu, p, co) += (data_pack[eps, nu, p, ci]*placeholder[eps, nu, co, ci]) | |
A(i, j) = select(((floormod(i, 6) == 5) && (floormod(j, 4) == 3)), 1f, select(((floormod(i, 6) == 5) && (floormod(j, 4) == 2)), ..(OMITTED).. 6) == 0) && (floormod(j, 4) == 1)), 0f, select(((floormod(i, 6) == 0) && (floormod(j, 4) == 0)), 1f, 0f)))))))))))))))))))))))) | |
inverse(vh, vw, p, co) += ((bgemm[r_a, r_b, p, co]*A[r_a, vh])*A[r_b, vw]) | |
conv2d_winograd(n, h, w, co) = inverse[floormod(h, 4), floormod(w, 4), ((((n*14)*14) + (floordiv(h, 4)*14)) + floordiv(w, 4)), co] | |
placeholder = PLACEHOLDER [1, 56, 56, 64] | |
T_add(ax0, ax1, ax2, ax3) = (conv2d_winograd[ax0, ax1, ax2, ax3] + placeholder[ax0, ax1, ax2, ax3]) | |
========== Task 18 (workload key: ["ba2026d923536b75e9b4faed89287d5f"]) ========== | |
placeholder = PLACEHOLDER [1, 112, 112, 64] | |
pad_temp(ax0, ax1, ax2, ax3) = tir.if_then_else(((((ax1 >= 1) && (ax1 < 113)) && (ax2 >= 1)) && (ax2 < 113)), placeholder[ax0, (ax1 - 1), (ax2 - 1), ax3], -3.40282e+38f) | |
tensor(ax0, ax1, ax2, ax3) max= pad_temp[ax0, ((ax1*2) + dh), ((ax2*2) + dw), ax3] | |
placeholder = PLACEHOLDER [1, 1, 1, 64] | |
T_add(ax0, ax1, ax2, ax3) = (tensor[ax0, ax1, ax2, ax3] + placeholder[ax0, 0, 0, ax3]) | |
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f) | |
========== Task 19 (workload key: ["a0eb8d6048282a4a0986cc2ccf14eaa2"]) ========== | |
placeholder = PLACEHOLDER [1, 224, 224, 3] | |
PaddedInput(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 3) && (i1 < 227)) && (i2 >= 3)) && (i2 < 227)), placeholder[i0, (i1 - 3), (i2 - 3), i3], 0f) | |
placeholder = PLACEHOLDER [7, 7, 3, 64] | |
Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, ((yy*2) + ry), ((xx*2) + rx), rc]*placeholder[ry, rx, rc, ff]) | |
placeholder = PLACEHOLDER [1, 1, 1, 64] | |
T_add(ax0, ax1, ax2, ax3) = (Conv2dOutput[ax0, ax1, ax2, ax3] + placeholder[ax0, 0, 0, ax3]) | |
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f) | |
========== Task 20 (workload key: ["bf78a7bf0209980f72953637dfd14a6f"]) ========== | |
placeholder = PLACEHOLDER [1, 56, 56, 64] | |
PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3] | |
placeholder = PLACEHOLDER [1, 1, 64, 64] | |
Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, (yy + ry), (xx + rx), rc]*placeholder[ry, rx, rc, ff]) | |
========== Task 21 (workload key: ["6630936c26852f2b89dbfa2ff37fbb9c"]) ========== | |
placeholder = PLACEHOLDER [1, 56, 56, 64] | |
PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3] | |
placeholder = PLACEHOLDER [1, 1, 64, 128] | |
Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, ((yy*2) + ry), ((xx*2) + rx), rc]*placeholder[ry, rx, rc, ff]) | |
========== Task 22 (workload key: ["ba5f918733ccbbd4a1d7fd3724665a2f"]) ========== | |
placeholder = PLACEHOLDER [1, 28, 28, 128] | |
PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3] | |
placeholder = PLACEHOLDER [1, 1, 128, 256] | |
Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, ((yy*2) + ry), ((xx*2) + rx), rc]*placeholder[ry, rx, rc, ff]) | |
========== Task 23 (workload key: ["21ad409d72953de188314010134e3acd"]) ========== | |
placeholder = PLACEHOLDER [1, 14, 14, 256] | |
PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3] | |
placeholder = PLACEHOLDER [1, 1, 256, 512] | |
Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, ((yy*2) + ry), ((xx*2) + rx), rc]*placeholder[ry, rx, rc, ff]) | |
Compile... | |
----------------------------------- | |
Cannot find tuned schedules for target=cuda -keys=cuda,gpu -max_num_threads=1024 -thread_warp_size=32, workload_key=["b32ed43fb351136894c322ee49097a1a"]. A fallback TOPI schedule is used, which may bring great performance regression or even compilation failure. Compute DAG info: | |
placeholder = PLACEHOLDER [1, 1000] | |
T_softmax_maxelem(i0) max= placeholder[i0, k] | |
T_softmax_exp(i0, i1) = tir.exp((placeholder[i0, i1] - T_softmax_maxelem[i0])) | |
T_softmax_expsum(i0) += T_softmax_exp[i0, k] | |
T_softmax_norm(i0, i1) = (T_softmax_exp[i0, i1]/T_softmax_expsum[i0]) | |
----------------------------------- | |
Cannot find tuned schedules for target=cuda -keys=cuda,gpu -max_num_threads=1024 -thread_warp_size=32, workload_key=["d09dc1a6bb90d59c91b68989ad3492ff"]. A fallback TOPI schedule is used, which may bring great performance regression or even compilation failure. Compute DAG info: | |
placeholder = PLACEHOLDER [1, 512] | |
placeholder = PLACEHOLDER [1000, 512] | |
T_dense(i, j) += (placeholder[i, k]*placeholder[j, k]) | |
placeholder = PLACEHOLDER [1000] | |
T_add(ax0, ax1) = (T_dense[ax0, ax1] + placeholder[ax1]) | |
----------------------------------- | |
Cannot find tuned schedules for target=cuda -keys=cuda,gpu -max_num_threads=1024 -thread_warp_size=32, workload_key=["7de313da0ca29a8c63f647791692430d"]. A fallback TOPI schedule is used, which may bring great performance regression or even compilation failure. Compute DAG info: | |
placeholder = PLACEHOLDER [1, 7, 7, 512] | |
tensor(ax0, ax1, ax2, ax3) += placeholder[ax0, ((ax1*7) + rv0), ((ax2*7) + rv1), ax3] | |
tensor(ax0, ax1, ax2, ax3) = (tensor[ax0, ax1, ax2, ax3]/(float32((select((bool)1, ((ax1 + 1)*7), (((ax1 + 1)*7) + 1)) - (ax1*7)))*float32((select((bool)1, ((ax2 + 1)*7), (((ax2 + 1)*7) + 1)) - (ax2*7))))) | |
----------------------------------- | |
Cannot find tuned schedules for target=cuda -keys=cuda,gpu -max_num_threads=1024 -thread_warp_size=32, workload_key=["8d5a93959138dc7b2ee1f1b3219dfa14"]. A fallback TOPI schedule is used, which may bring great performance regression or even compilation failure. Compute DAG info: | |
placeholder = PLACEHOLDER [1, 7, 7, 512] | |
data_pad(i0, i1, i2, i3) = tir.if_then_else(((((i1 >= 1) && (i1 < 8)) && (i2 >= 1)) && (i2 < 8)), placeholder[i0, (i1 - 1), (i2 - 1), i3], 0f) | |
input_tile(eps, nu, p, ci) = data_pad[floordiv(p, 16), ((floormod(floordiv(p, 4), 4)*2) + eps), ((floormod(p, 4)*2) + nu), ci] | |
B(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 4) == 3)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 4) == 2)), ..(OMITTED).. ormod(i, 4) == 0) && (floormod(j, 4) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 4) == 0)), 1f, 0f)))))))))))))))) | |
data_pack(eps, nu, p, ci) += ((input_tile[r_a, r_b, p, ci]*B[r_a, eps])*B[r_b, nu]) | |
placeholder = PLACEHOLDER [4, 4, 512, 512] | |
bgemm(eps, nu, p, co) += (data_pack[eps, nu, p, ci]*placeholder[eps, nu, co, ci]) | |
A(i, j) = select(((floormod(i, 4) == 3) && (floormod(j, 2) == 1)), 1f, select(((floormod(i, 4) == 3) && (floormod(j, 2) == 0)), ..(OMITTED).. ct(((floormod(i, 4) == 0) && (floormod(j, 2) == 1)), 0f, select(((floormod(i, 4) == 0) && (floormod(j, 2) == 0)), 1f, 0f)))))))) | |
inverse(vh, vw, p, co) += ((bgemm[r_a, r_b, p, co]*A[r_a, vh])*A[r_b, vw]) | |
conv2d_winograd(n, h, w, co) = inverse[floormod(h, 2), floormod(w, 2), ((((n*4)*4) + (floordiv(h, 2)*4)) + floordiv(w, 2)), co] | |
placeholder = PLACEHOLDER [1, 7, 7, 512] | |
T_add(ax0, ax1, ax2, ax3) = (conv2d_winograd[ax0, ax1, ax2, ax3] + placeholder[ax0, ax1, ax2, ax3]) | |
placeholder = PLACEHOLDER [1, 1, 1, 512] | |
T_multiply(ax0, ax1, ax2, ax3) = (T_add[ax0, ax1, ax2, ax3]*placeholder[ax0, 0, 0, ax3]) | |
placeholder = PLACEHOLDER [1, 1, 1, 512] | |
T_add(ax0, ax1, ax2, ax3) = (T_multiply[ax0, ax1, ax2, ax3] + placeholder[ax0, 0, 0, ax3]) | |
T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f) | |
Traceback (most recent call last): | |
File "tune_network_cuda.py", line 281, in <module> | |
lib = relay.build(mod, target=target, params=params) | |
File "/home/len/tvm/python/tvm/relay/build_module.py", line 275, in build | |
graph_json, mod, params = bld_mod.build(mod, target, target_host, params) | |
File "/home/len/tvm/python/tvm/relay/build_module.py", line 138, in build | |
self._build(mod, target, target_host) | |
File "/home/len/tvm/python/tvm/_ffi/_ctypes/packed_func.py", line 237, in __call__ | |
raise get_last_ffi_error() | |
tvm._ffi.base.TVMError: Traceback (most recent call last): | |
[bt] (8) /home/len/tvm/build/libtvm.so(tvm::relay::backend::MemoizedExprTranslator<std::vector<tvm::relay::backend::GraphNodeRef, std::allocator<tvm::relay::backend::GraphNodeRef> > >::VisitExpr(tvm::RelayExpr const&)+0x192) [0x7f6f814a45a2] | |
[bt] (7) /home/len/tvm/build/libtvm.so(tvm::relay::ExprFunctor<std::vector<tvm::relay::backend::GraphNodeRef, std::allocator<tvm::relay::backend::GraphNodeRef> > (tvm::RelayExpr const&)>::InitVTable()::{lambda(tvm::runtime::ObjectRef const&, tvm::relay::ExprFunctor<std::vector<tvm::relay::backend::GraphNodeRef, std::allocator<tvm::relay::backend::GraphNodeRef> > (tvm::RelayExpr const&)>*)#6}::_FUN(tvm::runtime::ObjectRef const&, tvm::relay::ExprFunctor<std::vector<tvm::relay::backend::GraphNodeRef, std::allocator<tvm::relay::backend::GraphNodeRef> > (tvm::RelayExpr const&)>*)+0x2c) [0x7f6f8148ac4c] | |
[bt] (6) /home/len/tvm/build/libtvm.so(tvm::relay::backend::GraphRuntimeCodegen::VisitExpr_(tvm::relay::CallNode const*)+0xb9c) [0x7f6f8149f6dc] | |
[bt] (5) /home/len/tvm/build/libtvm.so(+0x121412d) [0x7f6f8146b12d] | |
[bt] (4) /home/len/tvm/build/libtvm.so(tvm::relay::CompileEngineImpl::LowerInternal(tvm::relay::CCacheKey const&)+0x75e) [0x7f6f8147e62e] | |
[bt] (3) /home/len/tvm/build/libtvm.so(tvm::relay::CreateSchedule(tvm::relay::Function const&, tvm::Target const&)+0x44a) [0x7f6f8146784a] | |
[bt] (2) /home/len/tvm/build/libtvm.so(tvm::relay::ScheduleGetter::Create(tvm::relay::Function const&)+0xd32) [0x7f6f81474f42] | |
[bt] (1) /home/len/tvm/build/libtvm.so(tvm::relay::OpImplementation::Schedule(tvm::Attrs const&, tvm::runtime::Array<tvm::te::Tensor, void> const&, tvm::Target const&)+0xb6) [0x7f6f81547b96] | |
[bt] (0) /home/len/tvm/build/libtvm.so(+0x52aec9) [0x7f6f80781ec9] | |
File "/home/len/tvm/python/tvm/_ffi/_ctypes/packed_func.py", line 81, in cfun | |
rv = local_pyfunc(*pyargs) | |
File "/home/len/tvm/python/tvm/relay/op/strategy/generic.py", line 35, in naive_schedule | |
raise RuntimeError( | |
RuntimeError: Cannot compile for GPU targets if no tuned schedule is found. Please see the warning messages above for more information about the failed workloads |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment