Skip to content

Instantly share code, notes, and snippets.

@yzhliu
Created January 4, 2018 21:39
Show Gist options
  • Save yzhliu/296ef231411fa5158d56ae8aa198916b to your computer and use it in GitHub Desktop.
Save yzhliu/296ef231411fa5158d56ae8aa198916b to your computer and use it in GitHub Desktop.
// attr [data_vec] storage_scope = "global"
allocate data_vec[float32 * 1 * 56 * 8 * 64 * 3 * 9]
// attr [kernel_vec] storage_scope = "global"
allocate kernel_vec[float32 * 16 * 64 * 3 * 3 * 4]
produce data_vec {
// attr [iter_var(h.outer, )] pragma_scope = "parallel_launch_point"
// attr [iter_var(h.outer, )] pragma_scope = "parallel_barrier_when_finish"
for (h.outer, 0, 28) {
// attr [iter_var(h.inner, )] pragma_scope = "parallel_stride_pattern"
parallel (h.inner, 0, 2) {
for (w, 0, 8) {
for (ci, 0, 64) {
for (vh, 0, 3) {
for (vw, 0, 9) {
data_vec[((((((((((h.outer*2) + h.inner)*8) + w)*64) + ci)*3) + vh)*9) + vw)] = tvm_if_then_else(((((((1 - vh) - h.inner) <= (h.outer*2)) && ((h.outer*2) < ((57 - vh) - h.inner))) && ((1 - vw) <= (w*7))) && ((w*7) < (57 - vw))), A[(((((((((h.outer*2) + h.inner)*8) + w) + (ci*448)) + (vh*8))*7) + vw) + -57)], 0.000000f)
}
}
}
}
}
}
}
produce kernel_vec {
// attr [iter_var(co.outer, )] pragma_scope = "parallel_launch_point"
// attr [iter_var(co.outer, )] pragma_scope = "parallel_barrier_when_finish"
for (co.outer, 0, 4) {
// attr [iter_var(co.inner, )] pragma_scope = "parallel_stride_pattern"
parallel (co.inner, 0, 4) {
for (ci, 0, 64) {
for (dh, 0, 3) {
for (dw, 0, 3) {
for (vc, 0, 4) {
kernel_vec[((((((((((co.outer*4) + co.inner)*64) + ci)*3) + dh)*3) + dw)*4) + vc)] = W[(((((((((co.outer*4) + co.inner)*256) + ci)*3) + dh)*3) + dw) + (vc*576))]
}
}
}
}
}
}
}
produce output_unpack {
// attr [iter_var(co.outer.outer, )] pragma_scope = "parallel_launch_point"
// attr [iter_var(co.outer.outer, )] pragma_scope = "parallel_barrier_when_finish"
for (co.outer.outer, 0, 4) {
// attr [iter_var(co.outer.inner, )] pragma_scope = "parallel_stride_pattern"
parallel (co.outer.inner, 0, 4) {
// attr [conv] storage_scope = "global"
allocate conv[float32 * 1 * 1 * 1 * 1 * 1 * 7 * 4]
// attr [conv.global] storage_scope = "global"
allocate conv.global[float32x4 * 1 * 1 * 1 * 1 * 1 * 7 * 1]
for (h.outer, 0, 56) {
for (w.outer, 0, 8) {
produce conv {
produce conv.global {
conv.global[ramp(0, 1, 4)] = x4(0.000000f)
conv.global[ramp(4, 1, 4)] = x4(0.000000f)
conv.global[ramp(8, 1, 4)] = x4(0.000000f)
conv.global[ramp(12, 1, 4)] = x4(0.000000f)
conv.global[ramp(16, 1, 4)] = x4(0.000000f)
conv.global[ramp(20, 1, 4)] = x4(0.000000f)
conv.global[ramp(24, 1, 4)] = x4(0.000000f)
for (ci, 0, 64) {
for (dh, 0, 3) {
for (dw, 0, 3) {
conv.global[ramp(0, 1, 4)] = (conv.global[ramp(0, 1, 4)] + (x4(data_vec[((((((((h.outer*8) + w.outer)*64) + ci)*3) + dh)*9) + dw)])*kernel_vec[ramp((((((((((co.outer.outer*4) + co.outer.inner)*64) + ci)*3) + dh)*3) + dw)*4), 1, 4)]))
conv.global[ramp(4, 1, 4)] = (conv.global[ramp(4, 1, 4)] + (x4(data_vec[(((((((((h.outer*8) + w.outer)*64) + ci)*3) + dh)*9) + dw) + 1)])*kernel_vec[ramp((((((((((co.outer.outer*4) + co.outer.inner)*64) + ci)*3) + dh)*3) + dw)*4), 1, 4)]))
conv.global[ramp(8, 1, 4)] = (conv.global[ramp(8, 1, 4)] + (x4(data_vec[(((((((((h.outer*8) + w.outer)*64) + ci)*3) + dh)*9) + dw) + 2)])*kernel_vec[ramp((((((((((co.outer.outer*4) + co.outer.inner)*64) + ci)*3) + dh)*3) + dw)*4), 1, 4)]))
conv.global[ramp(12, 1, 4)] = (conv.global[ramp(12, 1, 4)] + (x4(data_vec[(((((((((h.outer*8) + w.outer)*64) + ci)*3) + dh)*9) + dw) + 3)])*kernel_vec[ramp((((((((((co.outer.outer*4) + co.outer.inner)*64) + ci)*3) + dh)*3) + dw)*4), 1, 4)]))
conv.global[ramp(16, 1, 4)] = (conv.global[ramp(16, 1, 4)] + (x4(data_vec[(((((((((h.outer*8) + w.outer)*64) + ci)*3) + dh)*9) + dw) + 4)])*kernel_vec[ramp((((((((((co.outer.outer*4) + co.outer.inner)*64) + ci)*3) + dh)*3) + dw)*4), 1, 4)]))
conv.global[ramp(20, 1, 4)] = (conv.global[ramp(20, 1, 4)] + (x4(data_vec[(((((((((h.outer*8) + w.outer)*64) + ci)*3) + dh)*9) + dw) + 5)])*kernel_vec[ramp((((((((((co.outer.outer*4) + co.outer.inner)*64) + ci)*3) + dh)*3) + dw)*4), 1, 4)]))
conv.global[ramp(24, 1, 4)] = (conv.global[ramp(24, 1, 4)] + (x4(data_vec[(((((((((h.outer*8) + w.outer)*64) + ci)*3) + dh)*9) + dw) + 6)])*kernel_vec[ramp((((((((((co.outer.outer*4) + co.outer.inner)*64) + ci)*3) + dh)*3) + dw)*4), 1, 4)]))
}
}
}
}
conv[ramp(0, 1, 4)] = conv.global[ramp(0, 1, 4)]
conv[ramp(4, 1, 4)] = conv.global[ramp(4, 1, 4)]
conv[ramp(8, 1, 4)] = conv.global[ramp(8, 1, 4)]
conv[ramp(12, 1, 4)] = conv.global[ramp(12, 1, 4)]
conv[ramp(16, 1, 4)] = conv.global[ramp(16, 1, 4)]
conv[ramp(20, 1, 4)] = conv.global[ramp(20, 1, 4)]
conv[ramp(24, 1, 4)] = conv.global[ramp(24, 1, 4)]
}
for (w.inner, 0, 7) {
for (co.inner, 0, 4) {
output_unpack[(((((((((co.outer.outer*4) + co.outer.inner)*224) + h.outer)*8) + w.outer)*7) + w.inner) + (co.inner*3136))] = conv[((w.inner*4) + co.inner)]
}
}
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment