Skip to content

Instantly share code, notes, and snippets.

@comaniac
Last active October 15, 2020 21:49
Show Gist options
  • Save comaniac/d737b23d8968059ee696abcf0f85739e to your computer and use it in GitHub Desktop.
Save comaniac/d737b23d8968059ee696abcf0f85739e to your computer and use it in GitHub Desktop.
extracted_reduction_ax0, extracted_reduction_ax1, extracted_reduction_ax2, extracted_reduction_ax3, extracted_reduction_n0_n0_k1_shifted_shifted, extracted_reduction_n1_n1_k2_shifted_shifted, extracted_reduction_n2_n2_k3_shifted_shifted = tuple(extracted_reduction.op.axis) + tuple(extracted_reduction.op.reduce_axis)
pad_temp_data_grad_ax0, pad_temp_data_grad_ax1, pad_temp_data_grad_ax2, pad_temp_data_grad_ax3 = tuple(pad_temp_data_grad.op.axis) + tuple(pad_temp_data_grad.op.reduce_axis)
pad_temp_i0, pad_temp_i1, pad_temp_i2, pad_temp_i3 = tuple(pad_temp.op.axis) + tuple(pad_temp.op.reduce_axis)
compute_kernel_grad_ax0, compute_kernel_grad_ax1, compute_kernel_grad_ax2, compute_kernel_grad_ax3, compute_kernel_grad_n0_n0_k0_shifted_shifted, compute_kernel_grad_n1_n1_k2_shifted_shifted, compute_kernel_grad_n2_n2_k3_shifted_shifted = tuple(compute_kernel_grad.op.axis) + tuple(compute_kernel_grad.op.reduce_axis)
compute_kernel_grad_local, = s.cache_write([compute_kernel_grad], "local")
compute_kernel_grad_local_ax0_c, compute_kernel_grad_local_ax1_c, compute_kernel_grad_local_ax2_c, compute_kernel_grad_local_ax3_c, compute_kernel_grad_local_n0_n0_k0_shifted_shifted, compute_kernel_grad_local_n1_n1_k2_shifted_shifted, compute_kernel_grad_local_n2_n2_k3_shifted_shifted = tuple(compute_kernel_grad_local.op.axis) + tuple(compute_kernel_grad_local.op.reduce_axis)
compute_kernel_grad_local_ax0_c_o_i, compute_kernel_grad_local_ax0_c_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax0_c, factor=1)
compute_kernel_grad_local_ax0_c_o_o_i, compute_kernel_grad_local_ax0_c_o_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax0_c_o_i, factor=2)
compute_kernel_grad_local_ax0_c_o_o_o_i, compute_kernel_grad_local_ax0_c_o_o_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax0_c_o_o_i, factor=8)
compute_kernel_grad_local_ax0_c_o_o_o_o, compute_kernel_grad_local_ax0_c_o_o_o_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax0_c_o_o_o_i, factor=1)
compute_kernel_grad_local_ax1_c_o_i, compute_kernel_grad_local_ax1_c_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax1_c, factor=1)
compute_kernel_grad_local_ax1_c_o_o_i, compute_kernel_grad_local_ax1_c_o_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax1_c_o_i, factor=1)
compute_kernel_grad_local_ax1_c_o_o_o_i, compute_kernel_grad_local_ax1_c_o_o_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax1_c_o_o_i, factor=1)
compute_kernel_grad_local_ax1_c_o_o_o_o, compute_kernel_grad_local_ax1_c_o_o_o_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax1_c_o_o_o_i, factor=1)
compute_kernel_grad_local_ax2_c_o_i, compute_kernel_grad_local_ax2_c_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax2_c, factor=1)
compute_kernel_grad_local_ax2_c_o_o_i, compute_kernel_grad_local_ax2_c_o_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax2_c_o_i, factor=1)
compute_kernel_grad_local_ax2_c_o_o_o_i, compute_kernel_grad_local_ax2_c_o_o_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax2_c_o_o_i, factor=7)
compute_kernel_grad_local_ax2_c_o_o_o_o, compute_kernel_grad_local_ax2_c_o_o_o_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax2_c_o_o_o_i, factor=1)
compute_kernel_grad_local_ax3_c_o_i, compute_kernel_grad_local_ax3_c_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax3_c, factor=7)
compute_kernel_grad_local_ax3_c_o_o_i, compute_kernel_grad_local_ax3_c_o_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax3_c_o_i, factor=1)
compute_kernel_grad_local_ax3_c_o_o_o_i, compute_kernel_grad_local_ax3_c_o_o_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax3_c_o_o_i, factor=1)
compute_kernel_grad_local_ax3_c_o_o_o_o, compute_kernel_grad_local_ax3_c_o_o_o_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_ax3_c_o_o_o_i, factor=1)
compute_kernel_grad_local_n0_n0_k0_shifted_shifted_o_i, compute_kernel_grad_local_n0_n0_k0_shifted_shifted_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_n0_n0_k0_shifted_shifted, factor=1)
compute_kernel_grad_local_n0_n0_k0_shifted_shifted_o_o, compute_kernel_grad_local_n0_n0_k0_shifted_shifted_o_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_n0_n0_k0_shifted_shifted_o_i, factor=4)
compute_kernel_grad_local_n1_n1_k2_shifted_shifted_o_i, compute_kernel_grad_local_n1_n1_k2_shifted_shifted_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_n1_n1_k2_shifted_shifted, factor=1)
compute_kernel_grad_local_n1_n1_k2_shifted_shifted_o_o, compute_kernel_grad_local_n1_n1_k2_shifted_shifted_o_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_n1_n1_k2_shifted_shifted_o_i, factor=1)
compute_kernel_grad_local_n2_n2_k3_shifted_shifted_o_i, compute_kernel_grad_local_n2_n2_k3_shifted_shifted_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_n2_n2_k3_shifted_shifted, factor=1)
compute_kernel_grad_local_n2_n2_k3_shifted_shifted_o_o, compute_kernel_grad_local_n2_n2_k3_shifted_shifted_o_i = s[compute_kernel_grad_local].split(compute_kernel_grad_local_n2_n2_k3_shifted_shifted_o_i, factor=16)
s[compute_kernel_grad_local].reorder(compute_kernel_grad_local_ax0_c_o_o_o_o, compute_kernel_grad_local_ax1_c_o_o_o_o, compute_kernel_grad_local_ax2_c_o_o_o_o, compute_kernel_grad_local_ax3_c_o_o_o_o, compute_kernel_grad_local_ax0_c_o_o_o_i, compute_kernel_grad_local_ax1_c_o_o_o_i, compute_kernel_grad_local_ax2_c_o_o_o_i, compute_kernel_grad_local_ax3_c_o_o_o_i, compute_kernel_grad_local_ax0_c_o_o_i, compute_kernel_grad_local_ax1_c_o_o_i, compute_kernel_grad_local_ax2_c_o_o_i, compute_kernel_grad_local_ax3_c_o_o_i, compute_kernel_grad_local_n0_n0_k0_shifted_shifted_o_o, compute_kernel_grad_local_n1_n1_k2_shifted_shifted_o_o, compute_kernel_grad_local_n2_n2_k3_shifted_shifted_o_o, compute_kernel_grad_local_n0_n0_k0_shifted_shifted_o_i, compute_kernel_grad_local_n1_n1_k2_shifted_shifted_o_i, compute_kernel_grad_local_n2_n2_k3_shifted_shifted_o_i, compute_kernel_grad_local_ax0_c_o_i, compute_kernel_grad_local_ax1_c_o_i, compute_kernel_grad_local_ax2_c_o_i, compute_kernel_grad_local_ax3_c_o_i, compute_kernel_grad_local_n0_n0_k0_shifted_shifted_i, compute_kernel_grad_local_n1_n1_k2_shifted_shifted_i, compute_kernel_grad_local_n2_n2_k3_shifted_shifted_i, compute_kernel_grad_local_ax0_c_i, compute_kernel_grad_local_ax1_c_i, compute_kernel_grad_local_ax2_c_i, compute_kernel_grad_local_ax3_c_i)
compute_kernel_grad_ax0_o_i, compute_kernel_grad_ax0_i = s[compute_kernel_grad].split(compute_kernel_grad_ax0, factor=2)
compute_kernel_grad_ax0_o_o_i, compute_kernel_grad_ax0_o_i = s[compute_kernel_grad].split(compute_kernel_grad_ax0_o_i, factor=8)
compute_kernel_grad_ax0_o_o_o, compute_kernel_grad_ax0_o_o_i = s[compute_kernel_grad].split(compute_kernel_grad_ax0_o_o_i, factor=1)
compute_kernel_grad_ax1_o_i, compute_kernel_grad_ax1_i = s[compute_kernel_grad].split(compute_kernel_grad_ax1, factor=1)
compute_kernel_grad_ax1_o_o_i, compute_kernel_grad_ax1_o_i = s[compute_kernel_grad].split(compute_kernel_grad_ax1_o_i, factor=1)
compute_kernel_grad_ax1_o_o_o, compute_kernel_grad_ax1_o_o_i = s[compute_kernel_grad].split(compute_kernel_grad_ax1_o_o_i, factor=1)
compute_kernel_grad_ax2_o_i, compute_kernel_grad_ax2_i = s[compute_kernel_grad].split(compute_kernel_grad_ax2, factor=1)
compute_kernel_grad_ax2_o_o_i, compute_kernel_grad_ax2_o_i = s[compute_kernel_grad].split(compute_kernel_grad_ax2_o_i, factor=7)
compute_kernel_grad_ax2_o_o_o, compute_kernel_grad_ax2_o_o_i = s[compute_kernel_grad].split(compute_kernel_grad_ax2_o_o_i, factor=1)
compute_kernel_grad_ax3_o_i, compute_kernel_grad_ax3_i = s[compute_kernel_grad].split(compute_kernel_grad_ax3, factor=7)
compute_kernel_grad_ax3_o_o_i, compute_kernel_grad_ax3_o_i = s[compute_kernel_grad].split(compute_kernel_grad_ax3_o_i, factor=1)
compute_kernel_grad_ax3_o_o_o, compute_kernel_grad_ax3_o_o_i = s[compute_kernel_grad].split(compute_kernel_grad_ax3_o_o_i, factor=1)
s[compute_kernel_grad].reorder(compute_kernel_grad_ax0_o_o_o, compute_kernel_grad_ax1_o_o_o, compute_kernel_grad_ax2_o_o_o, compute_kernel_grad_ax3_o_o_o, compute_kernel_grad_ax0_o_o_i, compute_kernel_grad_ax1_o_o_i, compute_kernel_grad_ax2_o_o_i, compute_kernel_grad_ax3_o_o_i, compute_kernel_grad_ax0_o_i, compute_kernel_grad_ax1_o_i, compute_kernel_grad_ax2_o_i, compute_kernel_grad_ax3_o_i, compute_kernel_grad_ax0_i, compute_kernel_grad_ax1_i, compute_kernel_grad_ax2_i, compute_kernel_grad_ax3_i)
s[compute_kernel_grad_local].compute_at(s[compute_kernel_grad], compute_kernel_grad_ax3_o_i)
pad_temp_shared = s.cache_read(pad_temp, "shared", [compute_kernel_grad_local])
pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3 = tuple(pad_temp_shared.op.axis)
s[pad_temp_shared].compute_at(s[compute_kernel_grad_local], compute_kernel_grad_local_n2_n2_k3_shifted_shifted_o_o)
s[pad_temp].compute_inline()
extracted_reduction_local, = s.cache_write([extracted_reduction], "local")
extracted_reduction_local_ax0_c, extracted_reduction_local_ax1_c, extracted_reduction_local_ax2_c, extracted_reduction_local_ax3_c, extracted_reduction_local_n0_n0_k1_shifted_shifted, extracted_reduction_local_n1_n1_k2_shifted_shifted, extracted_reduction_local_n2_n2_k3_shifted_shifted = tuple(extracted_reduction_local.op.axis) + tuple(extracted_reduction_local.op.reduce_axis)
extracted_reduction_local_ax0_c_o_i, extracted_reduction_local_ax0_c_i = s[extracted_reduction_local].split(extracted_reduction_local_ax0_c, factor=1)
extracted_reduction_local_ax0_c_o_o_i, extracted_reduction_local_ax0_c_o_i = s[extracted_reduction_local].split(extracted_reduction_local_ax0_c_o_i, factor=1)
extracted_reduction_local_ax0_c_o_o_o_i, extracted_reduction_local_ax0_c_o_o_i = s[extracted_reduction_local].split(extracted_reduction_local_ax0_c_o_o_i, factor=1)
extracted_reduction_local_ax0_c_o_o_o_o, extracted_reduction_local_ax0_c_o_o_o_i = s[extracted_reduction_local].split(extracted_reduction_local_ax0_c_o_o_o_i, factor=1)
extracted_reduction_local_ax1_c_o_i, extracted_reduction_local_ax1_c_i = s[extracted_reduction_local].split(extracted_reduction_local_ax1_c, factor=1)
extracted_reduction_local_ax1_c_o_o_i, extracted_reduction_local_ax1_c_o_i = s[extracted_reduction_local].split(extracted_reduction_local_ax1_c_o_i, factor=1)
extracted_reduction_local_ax1_c_o_o_o_i, extracted_reduction_local_ax1_c_o_o_i = s[extracted_reduction_local].split(extracted_reduction_local_ax1_c_o_o_i, factor=1)
extracted_reduction_local_ax1_c_o_o_o_o, extracted_reduction_local_ax1_c_o_o_o_i = s[extracted_reduction_local].split(extracted_reduction_local_ax1_c_o_o_o_i, factor=3)
extracted_reduction_local_ax2_c_o_i, extracted_reduction_local_ax2_c_i = s[extracted_reduction_local].split(extracted_reduction_local_ax2_c, factor=1)
extracted_reduction_local_ax2_c_o_o_i, extracted_reduction_local_ax2_c_o_i = s[extracted_reduction_local].split(extracted_reduction_local_ax2_c_o_i, factor=1)
extracted_reduction_local_ax2_c_o_o_o_i, extracted_reduction_local_ax2_c_o_o_i = s[extracted_reduction_local].split(extracted_reduction_local_ax2_c_o_o_i, factor=230)
extracted_reduction_local_ax2_c_o_o_o_o, extracted_reduction_local_ax2_c_o_o_o_i = s[extracted_reduction_local].split(extracted_reduction_local_ax2_c_o_o_o_i, factor=1)
extracted_reduction_local_ax3_c_o_i, extracted_reduction_local_ax3_c_i = s[extracted_reduction_local].split(extracted_reduction_local_ax3_c, factor=23)
extracted_reduction_local_ax3_c_o_o_i, extracted_reduction_local_ax3_c_o_i = s[extracted_reduction_local].split(extracted_reduction_local_ax3_c_o_i, factor=1)
extracted_reduction_local_ax3_c_o_o_o_i, extracted_reduction_local_ax3_c_o_o_i = s[extracted_reduction_local].split(extracted_reduction_local_ax3_c_o_o_i, factor=1)
extracted_reduction_local_ax3_c_o_o_o_o, extracted_reduction_local_ax3_c_o_o_o_i = s[extracted_reduction_local].split(extracted_reduction_local_ax3_c_o_o_o_i, factor=1)
extracted_reduction_local_n0_n0_k1_shifted_shifted_o_i, extracted_reduction_local_n0_n0_k1_shifted_shifted_i = s[extracted_reduction_local].split(extracted_reduction_local_n0_n0_k1_shifted_shifted, factor=2)
extracted_reduction_local_n0_n0_k1_shifted_shifted_o_o, extracted_reduction_local_n0_n0_k1_shifted_shifted_o_i = s[extracted_reduction_local].split(extracted_reduction_local_n0_n0_k1_shifted_shifted_o_i, factor=2)
extracted_reduction_local_n1_n1_k2_shifted_shifted_o_i, extracted_reduction_local_n1_n1_k2_shifted_shifted_i = s[extracted_reduction_local].split(extracted_reduction_local_n1_n1_k2_shifted_shifted, factor=2)
extracted_reduction_local_n1_n1_k2_shifted_shifted_o_o, extracted_reduction_local_n1_n1_k2_shifted_shifted_o_i = s[extracted_reduction_local].split(extracted_reduction_local_n1_n1_k2_shifted_shifted_o_i, factor=2)
extracted_reduction_local_n2_n2_k3_shifted_shifted_o_i, extracted_reduction_local_n2_n2_k3_shifted_shifted_i = s[extracted_reduction_local].split(extracted_reduction_local_n2_n2_k3_shifted_shifted, factor=1)
extracted_reduction_local_n2_n2_k3_shifted_shifted_o_o, extracted_reduction_local_n2_n2_k3_shifted_shifted_o_i = s[extracted_reduction_local].split(extracted_reduction_local_n2_n2_k3_shifted_shifted_o_i, factor=1)
s[extracted_reduction_local].reorder(extracted_reduction_local_ax0_c_o_o_o_o, extracted_reduction_local_ax1_c_o_o_o_o, extracted_reduction_local_ax2_c_o_o_o_o, extracted_reduction_local_ax3_c_o_o_o_o, extracted_reduction_local_ax0_c_o_o_o_i, extracted_reduction_local_ax1_c_o_o_o_i, extracted_reduction_local_ax2_c_o_o_o_i, extracted_reduction_local_ax3_c_o_o_o_i, extracted_reduction_local_ax0_c_o_o_i, extracted_reduction_local_ax1_c_o_o_i, extracted_reduction_local_ax2_c_o_o_i, extracted_reduction_local_ax3_c_o_o_i, extracted_reduction_local_n0_n0_k1_shifted_shifted_o_o, extracted_reduction_local_n1_n1_k2_shifted_shifted_o_o, extracted_reduction_local_n2_n2_k3_shifted_shifted_o_o, extracted_reduction_local_n0_n0_k1_shifted_shifted_o_i, extracted_reduction_local_n1_n1_k2_shifted_shifted_o_i, extracted_reduction_local_n2_n2_k3_shifted_shifted_o_i, extracted_reduction_local_ax0_c_o_i, extracted_reduction_local_ax1_c_o_i, extracted_reduction_local_ax2_c_o_i, extracted_reduction_local_ax3_c_o_i, extracted_reduction_local_n0_n0_k1_shifted_shifted_i, extracted_reduction_local_n1_n1_k2_shifted_shifted_i, extracted_reduction_local_n2_n2_k3_shifted_shifted_i, extracted_reduction_local_ax0_c_i, extracted_reduction_local_ax1_c_i, extracted_reduction_local_ax2_c_i, extracted_reduction_local_ax3_c_i)
extracted_reduction_ax0_o_i, extracted_reduction_ax0_i = s[extracted_reduction].split(extracted_reduction_ax0, factor=1)
extracted_reduction_ax0_o_o_i, extracted_reduction_ax0_o_i = s[extracted_reduction].split(extracted_reduction_ax0_o_i, factor=1)
extracted_reduction_ax0_o_o_o, extracted_reduction_ax0_o_o_i = s[extracted_reduction].split(extracted_reduction_ax0_o_o_i, factor=1)
extracted_reduction_ax1_o_i, extracted_reduction_ax1_i = s[extracted_reduction].split(extracted_reduction_ax1, factor=1)
extracted_reduction_ax1_o_o_i, extracted_reduction_ax1_o_i = s[extracted_reduction].split(extracted_reduction_ax1_o_i, factor=1)
extracted_reduction_ax1_o_o_o, extracted_reduction_ax1_o_o_i = s[extracted_reduction].split(extracted_reduction_ax1_o_o_i, factor=3)
extracted_reduction_ax2_o_i, extracted_reduction_ax2_i = s[extracted_reduction].split(extracted_reduction_ax2, factor=1)
extracted_reduction_ax2_o_o_i, extracted_reduction_ax2_o_i = s[extracted_reduction].split(extracted_reduction_ax2_o_i, factor=230)
extracted_reduction_ax2_o_o_o, extracted_reduction_ax2_o_o_i = s[extracted_reduction].split(extracted_reduction_ax2_o_o_i, factor=1)
extracted_reduction_ax3_o_i, extracted_reduction_ax3_i = s[extracted_reduction].split(extracted_reduction_ax3, factor=23)
extracted_reduction_ax3_o_o_i, extracted_reduction_ax3_o_i = s[extracted_reduction].split(extracted_reduction_ax3_o_i, factor=1)
extracted_reduction_ax3_o_o_o, extracted_reduction_ax3_o_o_i = s[extracted_reduction].split(extracted_reduction_ax3_o_o_i, factor=1)
s[extracted_reduction].reorder(extracted_reduction_ax0_o_o_o, extracted_reduction_ax1_o_o_o, extracted_reduction_ax2_o_o_o, extracted_reduction_ax3_o_o_o, extracted_reduction_ax0_o_o_i, extracted_reduction_ax1_o_o_i, extracted_reduction_ax2_o_o_i, extracted_reduction_ax3_o_o_i, extracted_reduction_ax0_o_i, extracted_reduction_ax1_o_i, extracted_reduction_ax2_o_i, extracted_reduction_ax3_o_i, extracted_reduction_ax0_i, extracted_reduction_ax1_i, extracted_reduction_ax2_i, extracted_reduction_ax3_i)
s[extracted_reduction_local].compute_at(s[extracted_reduction], extracted_reduction_ax3_o_i)
kernel_shared = s.cache_read(kernel, "shared", [extracted_reduction_local])
kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3 = tuple(kernel_shared.op.axis)
s[kernel_shared].compute_at(s[extracted_reduction_local], extracted_reduction_local_n2_n2_k3_shifted_shifted_o_o)
dy_shared = s.cache_read(dy, "shared", [extracted_reduction_local])
dy_shared_ax0, dy_shared_ax1, dy_shared_ax2, dy_shared_ax3 = tuple(dy_shared.op.axis)
s[dy_shared].compute_at(s[extracted_reduction_local], extracted_reduction_local_n2_n2_k3_shifted_shifted_o_o)
dy_d_shared = s.cache_read(dy, "shared", [compute_kernel_grad_local])
dy_d_shared_ax0, dy_d_shared_ax1, dy_d_shared_ax2, dy_d_shared_ax3 = tuple(dy_d_shared.op.axis)
s[dy_d_shared].compute_at(s[compute_kernel_grad_local], compute_kernel_grad_local_n2_n2_k3_shifted_shifted_o_o)
compute_kernel_grad_ax0_o_o_o_ax1_o_o_o_fused_ax2_o_o_o_fused_ax3_o_o_o_fused = s[compute_kernel_grad].fuse(compute_kernel_grad_ax0_o_o_o, compute_kernel_grad_ax1_o_o_o, compute_kernel_grad_ax2_o_o_o, compute_kernel_grad_ax3_o_o_o)
s[compute_kernel_grad].bind(compute_kernel_grad_ax0_o_o_o_ax1_o_o_o_fused_ax2_o_o_o_fused_ax3_o_o_o_fused, te.thread_axis("blockIdx.x"))
compute_kernel_grad_ax0_o_o_i_ax1_o_o_i_fused_ax2_o_o_i_fused_ax3_o_o_i_fused = s[compute_kernel_grad].fuse(compute_kernel_grad_ax0_o_o_i, compute_kernel_grad_ax1_o_o_i, compute_kernel_grad_ax2_o_o_i, compute_kernel_grad_ax3_o_o_i)
s[compute_kernel_grad].bind(compute_kernel_grad_ax0_o_o_i_ax1_o_o_i_fused_ax2_o_o_i_fused_ax3_o_o_i_fused, te.thread_axis("vthread"))
compute_kernel_grad_ax0_o_i_ax1_o_i_fused_ax2_o_i_fused_ax3_o_i_fused = s[compute_kernel_grad].fuse(compute_kernel_grad_ax0_o_i, compute_kernel_grad_ax1_o_i, compute_kernel_grad_ax2_o_i, compute_kernel_grad_ax3_o_i)
s[compute_kernel_grad].bind(compute_kernel_grad_ax0_o_i_ax1_o_i_fused_ax2_o_i_fused_ax3_o_i_fused, te.thread_axis("threadIdx.x"))
pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
pad_temp_data_grad_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_data_grad].fuse(pad_temp_data_grad_ax0, pad_temp_data_grad_ax1, pad_temp_data_grad_ax2, pad_temp_data_grad_ax3)
pad_temp_data_grad_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_data_grad_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_data_grad].split(pad_temp_data_grad_ax0_ax1_fused_ax2_fused_ax3_fused, factor=32)
s[pad_temp_data_grad].bind(pad_temp_data_grad_ax0_ax1_fused_ax2_fused_ax3_fused_o, te.thread_axis("blockIdx.x"))
s[pad_temp_data_grad].bind(pad_temp_data_grad_ax0_ax1_fused_ax2_fused_ax3_fused_i, te.thread_axis("threadIdx.x"))
extracted_reduction_ax0_o_o_o_ax1_o_o_o_fused_ax2_o_o_o_fused_ax3_o_o_o_fused = s[extracted_reduction].fuse(extracted_reduction_ax0_o_o_o, extracted_reduction_ax1_o_o_o, extracted_reduction_ax2_o_o_o, extracted_reduction_ax3_o_o_o)
s[extracted_reduction].bind(extracted_reduction_ax0_o_o_o_ax1_o_o_o_fused_ax2_o_o_o_fused_ax3_o_o_o_fused, te.thread_axis("blockIdx.x"))
extracted_reduction_ax0_o_o_i_ax1_o_o_i_fused_ax2_o_o_i_fused_ax3_o_o_i_fused = s[extracted_reduction].fuse(extracted_reduction_ax0_o_o_i, extracted_reduction_ax1_o_o_i, extracted_reduction_ax2_o_o_i, extracted_reduction_ax3_o_o_i)
s[extracted_reduction].bind(extracted_reduction_ax0_o_o_i_ax1_o_o_i_fused_ax2_o_o_i_fused_ax3_o_o_i_fused, te.thread_axis("vthread"))
extracted_reduction_ax0_o_i_ax1_o_i_fused_ax2_o_i_fused_ax3_o_i_fused = s[extracted_reduction].fuse(extracted_reduction_ax0_o_i, extracted_reduction_ax1_o_i, extracted_reduction_ax2_o_i, extracted_reduction_ax3_o_i)
s[extracted_reduction].bind(extracted_reduction_ax0_o_i_ax1_o_i_fused_ax2_o_i_fused_ax3_o_i_fused, te.thread_axis("threadIdx.x"))
kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=28)
s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=230)
s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
dy_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[dy_shared].fuse(dy_shared_ax0, dy_shared_ax1, dy_shared_ax2, dy_shared_ax3)
dy_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, dy_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[dy_shared].split(dy_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
s[dy_shared].vectorize(dy_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
dy_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, dy_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[dy_shared].split(dy_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=230)
s[dy_shared].bind(dy_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
dy_d_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[dy_d_shared].fuse(dy_d_shared_ax0, dy_d_shared_ax1, dy_d_shared_ax2, dy_d_shared_ax3)
dy_d_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, dy_d_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[dy_d_shared].split(dy_d_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=8)
s[dy_d_shared].vectorize(dy_d_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
dy_d_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, dy_d_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[dy_d_shared].split(dy_d_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
s[dy_d_shared].bind(dy_d_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
s[extracted_reduction_local].pragma(extracted_reduction_local_ax0_c_o_o_o_o, "auto_unroll_max_step", 16)
s[extracted_reduction_local].pragma(extracted_reduction_local_ax0_c_o_o_o_o, "unroll_explicit", True)
s[compute_kernel_grad_local].pragma(compute_kernel_grad_local_ax0_c_o_o_o_o, "auto_unroll_max_step", 512)
s[compute_kernel_grad_local].pragma(compute_kernel_grad_local_ax0_c_o_o_o_o, "unroll_explicit", True)
import os
import numpy as np
import logging
import tvm
from tvm import auto_scheduler, te, topi
from tvm.topi.nn.util import get_pad_tuple
from tvm.auto_scheduler.compute_dag import ComputeDAG
resnet_conv2d_configs = {
# format : H, W, CI, CO, KH, KW, strides, padding, dilation
'18': [
(224, 224, 3, 64, 7, 7, (2, 2), (3, 3), (1, 1)),
(56, 56, 64, 128, 3, 3, (2, 2), (1, 1), (1, 1)),
(56, 56, 64, 128, 1, 1, (2, 2), (0, 0), (1, 1)),
(56, 56, 64, 64, 3, 3, (1, 1), (1, 1), (1, 1)),
(56, 56, 64, 64, 1, 1, (1, 1), (0, 0), (1, 1)),
(28, 28, 128, 256, 3, 3, (2, 2), (1, 1), (1, 1)),
(28, 28, 128, 256, 1, 1, (2, 2), (0, 0), (1, 1)),
(28, 28, 128, 128, 3, 3, (1, 1), (1, 1), (1, 1)),
(14, 14, 256, 512, 3, 3, (2, 2), (1, 1), (1, 1)),
(14, 14, 256, 512, 1, 1, (2, 2), (0, 0), (1, 1)),
(14, 14, 256, 256, 3, 3, (1, 1), (1, 1), (1, 1)),
(7, 7, 512, 512, 3, 3, (1, 1), (1, 1), (1, 1)),
],
'50': [
(224, 224, 3, 64, 7, 7, (2, 2), (3, 3), (1, 1)),
(56, 56, 256, 512, 1, 1, (2, 2), (0, 0), (1, 1)),
(56, 56, 256, 128, 1, 1, (2, 2), (0, 0), (1, 1)),
(56, 56, 256, 64, 1, 1, (1, 1), (0, 0), (1, 1)),
(56, 56, 64, 256, 1, 1, (1, 1), (0, 0), (1, 1)),
(56, 56, 64, 64, 3, 3, (1, 1), (1, 1), (1, 1)),
(56, 56, 64, 64, 1, 1, (1, 1), (0, 0), (1, 1)),
(28, 28, 512, 1024, 1, 1, (2, 2), (0, 0), (1, 1)),
(28, 28, 512, 256, 1, 1, (2, 2), (0, 0), (1, 1)),
(28, 28, 512, 128, 1, 1, (1, 1), (0, 0), (1, 1)),
(28, 28, 128, 512, 1, 1, (1, 1), (0, 0), (1, 1)),
(28, 28, 128, 128, 3, 3, (1, 1), (1, 1), (1, 1)),
(14, 14, 1024, 2048, 1, 1, (2, 2), (0, 0), (1, 1)),
(14, 14, 1024, 512, 1, 1, (2, 2), (0, 0), (1, 1)),
(14, 14, 1024, 256, 1, 1, (1, 1), (0, 0), (1, 1)),
(14, 14, 256, 1024, 1, 1, (1, 1), (0, 0), (1, 1)),
(14, 14, 256, 256, 3, 3, (1, 1), (1, 1), (1, 1)),
(7, 7, 2048, 512, 1, 1, (1, 1), (0, 0), (1, 1)),
(7, 7, 512, 2048, 1, 1, (1, 1), (0, 0), (1, 1)),
(7, 7, 512, 512, 3, 3, (1, 1), (1, 1), (1, 1)),
],
}
def get_log_file_name_from_task(task):
return "{}.json".format(
task.workload_key.replace("[", "")
.replace("]", "")
.replace('"', "")
.replace(",", "_")
.replace(" ", "")
)
@auto_scheduler.register_workload
def conv2d_nchw(N, H, W, CI, CO, KH, KW, stride, padding, dilation):
data = te.placeholder((N, CI, H, W), name="data")
kernel = te.placeholder((CO, CI, KH, KW), name="kernel")
out = topi.nn.conv2d_nchw(data, kernel, stride, padding, dilation, out_dtype="float32")
return [data, kernel, out]
@auto_scheduler.register_workload
def conv2d_nchw_gd(N, H, W, CI, CO, KH, KW, stride, padding, dilation):
data, kernel, f_out = conv2d_nchw(N, H, W, CI, CO, KH, KW, stride, padding, dilation)
dy = te.placeholder(f_out.shape, name="dy")
out = te.gradient(f_out, [data, kernel], head=dy)
return [data, kernel, dy, *out]
target = tvm.target.Target("cuda -model=t4")
batch = 32
target_layer = 0
task = auto_scheduler.create_task(conv2d_nchw_gd, (batch, *resnet_conv2d_configs["18"][target_layer]), target)
print('Getting device...')
ctx = tvm.gpu()
num_out = 2
inp, res = auto_scheduler.load_best("tune.log", task.workload_key)
print(task.compute_dag.print_python_code_from_state(inp.state))
sch, args = task.compute_dag.apply_steps_from_state(inp.state, layout_rewrite=True)
func = tvm.build(sch, args, target)
in_nps = [np.random.uniform(size=[v.value for v in a.shape]).astype(np.float32) for a in args[:-num_out]]
in_args = [tvm.nd.array(dnp, ctx=ctx) for dnp in in_nps]
out_args = [tvm.nd.empty([v.value for v in a.shape], ctx=ctx) for a in args[-num_out:]]
# Evaluate execution time
evaluator = func.time_evaluator(func.entry_name, ctx, min_repeat_ms=500)
print(
"Median execution time: %.3f ms"
% (np.median(evaluator(*in_args, *out_args).results) * 1000)
)
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 2, 2, 1], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [2, 8], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [4, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 4, 1], 1], ["SP", 2, 5, 3, [1, 1, 3, 1], 1], ["SP", 2, 10, 230, [1, 1, 5, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 23], 1], ["SP", 2, 20, 64, [2, 1], 1], ["SP", 2, 23, 4, [1, 1], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 112, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 48, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 312, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 128, [16], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[1.34032], 0, 9.02326, 1602798060], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 32, 1, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 7, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [16, 2], 1], ["SP", 6, 23, 112, [1, 2], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 1, 2], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 2], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [4, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 288, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 32, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 64, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.125088], 0, 2.82023, 1602798061], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 1, 2, 2], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [4, 7], 1], ["SP", 6, 26, 112, [2, 2], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 1, 1, 4], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 10, 1], 1], ["SP", 2, 15, 230, [1, 115, 1, 1], 1], ["SP", 2, 20, 64, [1, 1], 1], ["SP", 2, 23, 4, [2, 1], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 385, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 12, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 48, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 448, [28], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$512"], ["PR", 11, 0, "auto_unroll_max_step$512"]]]], "r": [[0.116859], 0, 6.49943, 1602798064], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 1, 16, 2], 1], ["SP", 6, 5, 3, [1, 1, 1, 3], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [2, 1], 1], ["SP", 6, 23, 112, [8, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 4, 1], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 1, 1], 1], ["SP", 2, 15, 230, [1, 23, 2, 1], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [4, 1], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 999, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [16], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 64, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 3584, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$16"]]]], "r": [[0.265191], 0, 3.0222, 1602798066], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 32, 1, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 7, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [16, 2], 1], ["SP", 6, 23, 112, [2, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 1, 2], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 2], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 288, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 32, [8], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 64, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.120509], 0, 2.84417, 1602798067], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 16, 1, 4], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 1, 7], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [1, 8], 1], ["SP", 6, 23, 112, [7, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 2, 8, 1], 1], ["SP", 2, 5, 3, [1, 1, 1, 3], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 1, 5, 2], 1], ["SP", 2, 20, 64, [4, 1], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 104, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 336, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 768, [3], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 224, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.411443], 0, 4.09382, 1602798070], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [4, 1, 4, 2], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [4, 7], 1], ["SP", 6, 26, 112, [2, 2], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 1, 4], 1], ["SP", 2, 5, 3, [1, 1, 1, 3], 1], ["SP", 2, 10, 230, [1, 1, 5, 2], 1], ["SP", 2, 15, 230, [1, 115, 1, 1], 1], ["SP", 2, 20, 64, [1, 1], 1], ["SP", 2, 23, 4, [2, 1], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 385, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 12, [3], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 48, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 448, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$0"], ["PR", 11, 0, "auto_unroll_max_step$512"]]]], "r": [[1e+10], 4, 4.6668, 1602798072], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 1, 2, 2], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [4, 7], 1], ["SP", 6, 26, 112, [2, 2], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 1, 4], 1], ["SP", 2, 5, 3, [1, 1, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 5, 2], 1], ["SP", 2, 15, 230, [1, 115, 1, 1], 1], ["SP", 2, 20, 64, [1, 1], 1], ["SP", 2, 23, 4, [2, 1], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 385, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 12, [3], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 48, [24], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 448, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$0"], ["PR", 11, 0, "auto_unroll_max_step$512"]]]], "r": [[1e+10], 4, 5.49505, 1602798073], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 1, 16, 2], 1], ["SP", 6, 5, 3, [1, 1, 3, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [56, 1], 1], ["SP", 6, 26, 112, [2, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 4, 1], 1], ["SP", 2, 5, 3, [1, 3, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 1, 1], 1], ["SP", 2, 15, 230, [1, 23, 2, 1], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [4, 1], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 999, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 64, [2], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 3584, [64], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$16"]]]], "r": [[0.182775], 0, 2.86086, 1602798075], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 16, 1, 4], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [7, 1, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [1, 8], 1], ["SP", 6, 23, 112, [1, 7], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 2, 8, 1], 1], ["SP", 2, 5, 3, [1, 1, 1, 3], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 1, 5, 2], 1], ["SP", 2, 20, 64, [4, 1], 1], ["SP", 2, 23, 4, [1, 4], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 104, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [64], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 336, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 768, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 224, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.284998], 0, 3.66251, 1602798078], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 1, 2, 2], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [4, 7], 1], ["SP", 6, 26, 112, [2, 2], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 1, 4], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 5, 2], 1], ["SP", 2, 15, 230, [1, 115, 1, 1], 1], ["SP", 2, 20, 64, [1, 1], 1], ["SP", 2, 23, 4, [2, 1], 1], ["SP", 2, 26, 4, [1, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 385, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 12, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 48, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 448, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$0"], ["PR", 11, 0, "auto_unroll_max_step$1024"]]]], "r": [[1e+10], 4, 6.97145, 1602798079], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 1, 16, 2], 1], ["SP", 6, 5, 3, [1, 1, 1, 3], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [56, 1], 1], ["SP", 6, 26, 112, [2, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [4, 1, 1, 4], 1], ["SP", 2, 5, 3, [1, 3, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 1, 1], 1], ["SP", 2, 15, 230, [1, 23, 2, 1], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [4, 1], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 999, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [7], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 64, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 3584, [56], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$16"]]]], "r": [[0.180806], 0, 2.84316, 1602798081], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 64, 1, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 7, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [16, 2], 1], ["SP", 6, 23, 112, [2, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 1, 2], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 2], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 288, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [64], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 32, [2], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 64, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.0557266], 0, 2.81324, 1602798083], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 1, 16, 2], 1], ["SP", 6, 5, 3, [1, 1, 1, 3], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [1, 2], 1], ["SP", 6, 23, 112, [2, 1], 1], ["SP", 6, 26, 112, [1, 2], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 8, 1], 1], ["SP", 2, 5, 3, [1, 3, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 1, 5], 1], ["SP", 2, 15, 230, [1, 23, 2, 1], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [4, 1], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 999, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 64, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 3584, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$16"]]]], "r": [[0.533157], 0, 4.1963, 1602798086], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 32, 2, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 7, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [16, 2], 1], ["SP", 6, 23, 112, [2, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 1, 2], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 2], 1], ["SP", 2, 20, 64, [1, 8], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 288, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 32, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 64, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[1e+10], 4, 2.66111, 1602798087], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 1, 2, 1], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [2, 8], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [4, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 32, 1, 1], 1], ["SP", 2, 5, 3, [1, 1, 3, 1], 1], ["SP", 2, 10, 230, [1, 1, 5, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 23], 1], ["SP", 2, 20, 64, [2, 1], 1], ["SP", 2, 23, 4, [1, 1], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 112, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 48, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 312, [4], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 128, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.475149], 0, 5.03817, 1602798090], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 16, 1, 4], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [1, 8], 1], ["SP", 6, 23, 112, [7, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 2, 8, 1], 1], ["SP", 2, 5, 3, [1, 1, 1, 3], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 1, 5, 2], 1], ["SP", 2, 20, 64, [4, 1], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 104, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 336, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 768, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 224, [28], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.601378], 0, 4.85202, 1602798094], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 16, 1, 4], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [7, 1, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [1, 7], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 2, 8, 1], 1], ["SP", 2, 5, 3, [1, 1, 1, 3], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 1, 5, 2], 1], ["SP", 2, 20, 64, [4, 1], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 104, [2], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 336, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 768, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 224, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.436542], 0, 4.0016, 1602798097], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 16, 1, 4], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [7, 1, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [1, 8], 1], ["SP", 6, 23, 112, [7, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 2, 8, 1], 1], ["SP", 2, 5, 3, [1, 1, 3, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 1, 5, 2], 1], ["SP", 2, 20, 64, [4, 1], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 104, [26], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 336, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 768, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 224, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.45132], 0, 4.63824, 1602798100], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 32, 1, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 7, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [16, 2], 1], ["SP", 6, 23, 112, [2, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 1, 2], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 2], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 288, [36], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [64], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 32, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 64, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.0911261], 0, 3.40014, 1602798102], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [2, 2, 2, 1], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [2, 8], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [4, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 32, 1, 1], 1], ["SP", 2, 5, 3, [1, 1, 3, 1], 1], ["SP", 2, 10, 230, [1, 1, 5, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 23], 1], ["SP", 2, 20, 64, [2, 1], 1], ["SP", 2, 23, 4, [1, 1], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 112, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 48, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 312, [4], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 128, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[1e+10], 4, 2.56777, 1602798103], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 2, 2, 1], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [2, 8], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 4, 1], 1], ["SP", 2, 5, 3, [1, 1, 3, 1], 1], ["SP", 2, 10, 230, [1, 1, 5, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 23], 1], ["SP", 2, 20, 64, [2, 1], 1], ["SP", 2, 23, 4, [1, 1], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 112, [14], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [1], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 48, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 312, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 128, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$1024"]]]], "r": [[1.37037], 0, 8.34994, 1602798110], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 4, 8, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 7, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [32, 1], 1], ["SP", 6, 23, 112, [2, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 1, 1], 1], ["SP", 2, 5, 3, [1, 1, 1, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 2, 1], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 288, [8], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [2], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 32, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 64, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.238265], 0, 3.19664, 1602798112], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 32, 1, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 7, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [16, 2], 1], ["SP", 6, 23, 112, [2, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [2, 4, 1, 1], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 2], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [2, 1], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 288, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [64], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 32, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 64, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.125042], 0, 2.82976, 1602798113], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 2, 2, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 3], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [2, 8], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [4, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 4, 1], 1], ["SP", 2, 5, 3, [1, 1, 3, 1], 1], ["SP", 2, 10, 230, [1, 1, 5, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 23], 1], ["SP", 2, 20, 64, [2, 1], 1], ["SP", 2, 23, 4, [1, 1], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 112, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 48, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 312, [3], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 128, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[1.36137], 0, 8.18244, 1602798120], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 8, 2, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 7], 1], ["SP", 6, 20, 32, [4, 1], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [16, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 1, 1, 1], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [1, 230, 1, 1], 1], ["SP", 2, 15, 230, [1, 1, 1, 23], 1], ["SP", 2, 20, 64, [2, 2], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [1, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 148, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 112, [28], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 208, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 128, [8], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$512"]]]], "r": [[0.0446075], 0, 2.79341, 1602798121], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 1, 4, 2], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [4, 7], 1], ["SP", 6, 26, 112, [2, 2], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 1, 4], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 1, 2], 1], ["SP", 2, 15, 230, [1, 115, 1, 1], 1], ["SP", 2, 20, 64, [1, 1], 1], ["SP", 2, 23, 4, [2, 1], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 385, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 12, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 48, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 448, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$0"], ["PR", 11, 0, "auto_unroll_max_step$512"]]]], "r": [[1e+10], 4, 2.83277, 1602798123], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 2, 2, 1], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 7], 1], ["SP", 6, 20, 32, [2, 8], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [4, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 4, 1], 1], ["SP", 2, 5, 3, [1, 1, 3, 1], 1], ["SP", 2, 10, 230, [1, 1, 5, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 23], 1], ["SP", 2, 20, 64, [2, 1], 1], ["SP", 2, 23, 4, [1, 1], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 112, [56], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 48, [2], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 312, [24], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 128, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[1.55063], 0, 9.56473, 1602798130], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 1, 2, 32], 1], ["SP", 6, 5, 3, [1, 1, 1, 3], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [2, 1], 1], ["SP", 6, 23, 112, [8, 1], 1], ["SP", 6, 26, 112, [2, 4], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [4, 8, 1, 1], 1], ["SP", 2, 5, 3, [1, 3, 1, 1], 1], ["SP", 2, 10, 230, [1, 5, 1, 23], 1], ["SP", 2, 15, 230, [1, 1, 1, 1], 1], ["SP", 2, 20, 64, [4, 1], 1], ["SP", 2, 23, 4, [1, 2], 1], ["SP", 2, 26, 4, [1, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 450, [6], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 16, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 52, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 4096, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$1024"], ["PR", 11, 0, "auto_unroll_max_step$1024"]]]], "r": [[0.296273], 0, 8.50166, 1602798133], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 32, 1, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 7, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [16, 2], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 1, 2], 1], ["SP", 2, 5, 3, [1, 1, 1, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 2], 1], ["SP", 2, 20, 64, [2, 1], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 288, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 32, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 64, [4], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$512"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[1e+10], 4, 2.7506, 1602798134], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 16, 1, 4], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [7, 1, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [1, 8], 1], ["SP", 6, 23, 112, [7, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 2, 8, 1], 1], ["SP", 2, 5, 3, [1, 3, 1, 1], 1], ["SP", 2, 10, 230, [2, 1, 1, 1], 1], ["SP", 2, 15, 230, [1, 1, 5, 2], 1], ["SP", 2, 20, 64, [4, 1], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 104, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 336, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 768, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 224, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.455146], 0, 3.99472, 1602798137], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 2, 2, 1], 1], ["SP", 6, 5, 3, [1, 1, 3, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 7], 1], ["SP", 6, 20, 32, [2, 8], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [4, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 4, 1], 1], ["SP", 2, 5, 3, [1, 1, 1, 3], 1], ["SP", 2, 10, 230, [1, 1, 5, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 23], 1], ["SP", 2, 20, 64, [2, 1], 1], ["SP", 2, 23, 4, [1, 1], 1], ["SP", 2, 26, 4, [1, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 112, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 48, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 312, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 128, [8], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$1024"]]]], "r": [[1e+10], 7, 13.9806, 1602798147], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 16, 1, 4], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [7, 1, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [1, 8], 1], ["SP", 6, 23, 112, [7, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 16, 1, 1], 1], ["SP", 2, 5, 3, [1, 1, 1, 3], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 1, 5, 2], 1], ["SP", 2, 20, 64, [2, 2], 1], ["SP", 2, 23, 4, [1, 4], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 104, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [1], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 336, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 768, [32], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 224, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$512"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[1e+10], 4, 5.00449, 1602798162], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [2, 16, 1, 2], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [7, 1, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [7, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 2, 8, 1], 1], ["SP", 2, 5, 3, [1, 1, 1, 3], 1], ["SP", 2, 10, 230, [1, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 1, 5, 2], 1], ["SP", 2, 20, 64, [4, 1], 1], ["SP", 2, 23, 4, [4, 1], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 104, [2], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 336, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 768, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 224, [16], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$0"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.396076], 0, 3.62507, 1602798165], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 1, 2, 2], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [8, 4], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [4, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 4, 1], 1], ["SP", 2, 5, 3, [1, 1, 3, 1], 1], ["SP", 2, 10, 230, [1, 5, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 23], 1], ["SP", 2, 20, 64, [2, 1], 1], ["SP", 2, 23, 4, [1, 1], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 112, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [4], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 48, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 312, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 128, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.386402], 0, 5.07873, 1602798168], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 32, 1, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 7, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [16, 2], 1], ["SP", 6, 23, 112, [2, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [2, 4, 1, 2], 1], ["SP", 2, 5, 3, [1, 1, 1, 3], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 2], 1], ["SP", 2, 20, 64, [2, 1], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 288, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [1], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [14], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 32, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 64, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[1e+10], 4, 2.85737, 1602798169], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 1, 16, 2], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [56, 1], 1], ["SP", 6, 26, 112, [2, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 4, 1], 1], ["SP", 2, 5, 3, [1, 3, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 1, 1], 1], ["SP", 2, 15, 230, [1, 23, 2, 1], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [4, 1], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 999, [27], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [64], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 64, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 3584, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$16"]]]], "r": [[0.102003], 0, 2.31934, 1602798171], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 1, 16, 2], 1], ["SP", 6, 5, 3, [1, 1, 1, 3], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [56, 1], 1], ["SP", 6, 26, 112, [2, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 4, 1], 1], ["SP", 2, 5, 3, [1, 3, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 1, 1], 1], ["SP", 2, 15, 230, [1, 23, 2, 1], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [4, 1], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 999, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 64, [32], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 3584, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$16"]]]], "r": [[0.198117], 0, 2.59479, 1602798172], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 16, 1, 4], 1], ["SP", 6, 5, 3, [1, 1, 1, 3], 1], ["SP", 6, 10, 7, [7, 1, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [1, 8], 1], ["SP", 6, 23, 112, [7, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 2, 8, 1], 1], ["SP", 2, 5, 3, [1, 1, 1, 3], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 1, 5, 2], 1], ["SP", 2, 20, 64, [4, 1], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 104, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [2], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 336, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 768, [24], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 224, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.474477], 0, 4.53066, 1602798176], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 32, 1, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 7, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [16, 2], 1], ["SP", 6, 23, 112, [2, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 1, 2], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 2], 1], ["SP", 2, 20, 64, [1, 1], 1], ["SP", 2, 23, 4, [1, 1], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 288, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 32, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 64, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$1024"]]]], "r": [[0.110601], 0, 3.16915, 1602798177], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 2, 2, 1], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [2, 8], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [4, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 1, 2], 1], ["SP", 2, 5, 3, [1, 3, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 23], 1], ["SP", 2, 20, 64, [1, 1], 1], ["SP", 2, 23, 4, [1, 1], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 112, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 48, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 312, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 128, [64], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$512"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.172827], 0, 4.71167, 1602798179], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 16, 1, 4], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [7, 1, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 8], 1], ["SP", 6, 23, 112, [7, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 2, 1, 8], 1], ["SP", 2, 5, 3, [1, 1, 1, 3], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 1, 5, 2], 1], ["SP", 2, 20, 64, [4, 1], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 104, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 336, [12], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 768, [8], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 224, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.518043], 0, 4.63943, 1602798182], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 1, 2, 2], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [4, 7], 1], ["SP", 6, 26, 112, [2, 2], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 1, 4], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 5, 2], 1], ["SP", 2, 15, 230, [1, 115, 1, 1], 1], ["SP", 2, 20, 64, [1, 1], 1], ["SP", 2, 23, 4, [2, 1], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 385, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 12, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 48, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 448, [4], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$0"], ["PR", 11, 0, "auto_unroll_max_step$512"]]]], "r": [[1e+10], 4, 4.8503, 1602798184], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 1, 2, 1], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [4, 7], 1], ["SP", 6, 26, 112, [2, 2], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 1, 4], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 5, 2], 1], ["SP", 2, 15, 230, [1, 5, 1, 1], 1], ["SP", 2, 20, 64, [1, 1], 1], ["SP", 2, 23, 4, [4, 1], 1], ["SP", 2, 26, 4, [1, 4], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 385, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 12, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 48, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 448, [64], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$0"], ["PR", 11, 0, "auto_unroll_max_step$1024"]]]], "r": [[0.13228], 0, 4.86224, 1602798186], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 1, 16, 4], 1], ["SP", 6, 5, 3, [1, 1, 1, 3], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [56, 1], 1], ["SP", 6, 26, 112, [2, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 4, 1], 1], ["SP", 2, 5, 3, [1, 1, 1, 3], 1], ["SP", 2, 10, 230, [1, 1, 1, 1], 1], ["SP", 2, 15, 230, [1, 23, 2, 1], 1], ["SP", 2, 20, 64, [1, 1], 1], ["SP", 2, 23, 4, [4, 1], 1], ["SP", 2, 26, 4, [1, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 999, [37], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 64, [32], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 3584, [4], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$16"]]]], "r": [[0.302763], 0, 3.35951, 1602798188], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 16, 1, 4], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [7, 1, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [1, 8], 1], ["SP", 6, 23, 112, [7, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 2, 8, 1], 1], ["SP", 2, 5, 3, [1, 1, 3, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 1, 5, 2], 1], ["SP", 2, 20, 64, [2, 1], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [1, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 104, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 336, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 768, [2], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 224, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.309466], 0, 3.84374, 1602798190], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 1, 2, 2], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [4, 7], 1], ["SP", 6, 26, 112, [2, 2], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 1, 4], 1], ["SP", 2, 5, 3, [1, 1, 3, 1], 1], ["SP", 2, 10, 230, [1, 1, 5, 2], 1], ["SP", 2, 15, 230, [1, 115, 1, 1], 1], ["SP", 2, 20, 64, [1, 1], 1], ["SP", 2, 23, 4, [2, 1], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 385, [11], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 12, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 48, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 448, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$512"]]]], "r": [[1e+10], 4, 5.38523, 1602798192], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 1, 1, 32], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [2, 1], 1], ["SP", 6, 23, 112, [8, 1], 1], ["SP", 6, 26, 112, [2, 4], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [4, 8, 1, 1], 1], ["SP", 2, 5, 3, [1, 3, 1, 1], 1], ["SP", 2, 10, 230, [1, 5, 23, 1], 1], ["SP", 2, 15, 230, [1, 1, 1, 1], 1], ["SP", 2, 20, 64, [4, 1], 1], ["SP", 2, 23, 4, [2, 1], 1], ["SP", 2, 26, 4, [1, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 450, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [64], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 16, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 52, [2], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 4096, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$512"], ["PR", 11, 0, "auto_unroll_max_step$1024"]]]], "r": [[0.166286], 0, 5.55289, 1602798195], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 2, 2, 1], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [2, 8], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [4, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 4, 1], 1], ["SP", 2, 5, 3, [1, 1, 3, 1], 1], ["SP", 2, 10, 230, [1, 1, 5, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 23], 1], ["SP", 2, 20, 64, [2, 1], 1], ["SP", 2, 23, 4, [1, 1], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 112, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [64], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 48, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 312, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 128, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[1.40547], 0, 9.21314, 1602798201], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 16, 1, 4], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [7, 1, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [1, 8], 1], ["SP", 6, 23, 112, [7, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 2, 8, 1], 1], ["SP", 2, 5, 3, [1, 1, 3, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 2], 1], ["SP", 2, 20, 64, [4, 1], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 104, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 336, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 768, [16], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 224, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$0"]]]], "r": [[0.170309], 0, 3.97569, 1602798204], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 1, 16, 2], 1], ["SP", 6, 5, 3, [1, 1, 1, 3], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [56, 1], 1], ["SP", 6, 26, 112, [2, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [8, 1, 4, 1], 1], ["SP", 2, 5, 3, [1, 1, 1, 3], 1], ["SP", 2, 10, 230, [1, 1, 1, 1], 1], ["SP", 2, 15, 230, [1, 23, 2, 1], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [4, 1], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 999, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [64], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 64, [4], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 3584, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$16"]]]], "r": [[0.361175], 0, 3.67203, 1602798206], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 2, 16, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 7, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [16, 2], 1], ["SP", 6, 23, 112, [2, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 1, 2], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 2], 1], ["SP", 2, 20, 64, [1, 8], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 288, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 32, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 64, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$16"]]]], "r": [[1e+10], 4, 2.64485, 1602798207], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 8, 2, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [7, 1, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 7], 1], ["SP", 6, 20, 32, [4, 1], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [16, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 1, 1, 1], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [1, 23, 10, 1], 1], ["SP", 2, 15, 230, [1, 1, 1, 23], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 148, [37], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 112, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 208, [13], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 128, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$512"]]]], "r": [[0.248322], 0, 4.59149, 1602798210], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 1, 2, 2], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 7], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [4, 7], 1], ["SP", 6, 26, 112, [1, 2], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 1, 4], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 5, 2], 1], ["SP", 2, 15, 230, [1, 115, 1, 1], 1], ["SP", 2, 20, 64, [1, 1], 1], ["SP", 2, 23, 4, [2, 1], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 385, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 12, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 48, [24], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 448, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$0"], ["PR", 11, 0, "auto_unroll_max_step$512"]]]], "r": [[0.446619], 0, 5.63077, 1602798213], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 32, 1, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 7, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [16, 2], 1], ["SP", 6, 23, 112, [2, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 1, 2], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 1, 1, 10], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [4, 1], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 288, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [64], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 32, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 64, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.133613], 0, 3.76193, 1602798215], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 64, 1, 1], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [7, 1, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 7], 1], ["SP", 6, 20, 32, [1, 8], 1], ["SP", 6, 23, 112, [7, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 2, 8, 1], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [2, 1, 1, 23], 1], ["SP", 2, 15, 230, [1, 1, 5, 2], 1], ["SP", 2, 20, 64, [4, 1], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 104, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 336, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 768, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 224, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[1e+10], 7, 11.5596, 1602798225], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 2, 1, 32], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [8, 1], 1], ["SP", 6, 26, 112, [2, 4], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [2, 8, 2, 1], 1], ["SP", 2, 5, 3, [1, 3, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 23, 1], 1], ["SP", 2, 15, 230, [1, 1, 1, 1], 1], ["SP", 2, 20, 64, [8, 1], 1], ["SP", 2, 23, 4, [2, 1], 1], ["SP", 2, 26, 4, [1, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 450, [45], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [64], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 16, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 52, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 4096, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$1024"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.20203], 0, 4.98409, 1602798227], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 32, 1, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 7, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [16, 2], 1], ["SP", 6, 23, 112, [2, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 1, 2], 1], ["SP", 2, 5, 3, [1, 1, 1, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 2], 1], ["SP", 2, 20, 64, [2, 1], 1], ["SP", 2, 23, 4, [1, 4], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 288, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 32, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 64, [4], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[0.0545667], 0, 2.75968, 1602798228], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 8, 2, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 1, 7], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [4, 1], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [16, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 1, 1, 1], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [1, 5, 2, 1], 1], ["SP", 2, 15, 230, [1, 1, 1, 1], 1], ["SP", 2, 20, 64, [2, 2], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 148, [37], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [64], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 112, [28], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 208, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 128, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$512"]]]], "r": [[0.328327], 0, 5.06443, 1602798231], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 1, 16, 4], 1], ["SP", 6, 5, 3, [1, 1, 1, 3], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [1, 1], 1], ["SP", 6, 23, 112, [56, 1], 1], ["SP", 6, 26, 112, [2, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 4, 1], 1], ["SP", 2, 5, 3, [1, 3, 1, 1], 1], ["SP", 2, 10, 230, [1, 1, 1, 1], 1], ["SP", 2, 15, 230, [1, 23, 2, 1], 1], ["SP", 2, 20, 64, [1, 2], 1], ["SP", 2, 23, 4, [4, 1], 1], ["SP", 2, 26, 4, [1, 2], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 999, [37], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [4], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 64, [4], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 3584, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$16"]]]], "r": [[0.237937], 0, 3.18645, 1602798233], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 32, 1, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 1, 7, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [2, 2], 1], ["SP", 6, 23, 112, [2, 1], 1], ["SP", 6, 26, 112, [1, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 1, 2], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [2, 23, 1, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 2], 1], ["SP", 2, 20, 64, [1, 1], 1], ["SP", 2, 23, 4, [1, 1], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 288, [18], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 56, [28], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 32, [8], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 64, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$16"]]]], "r": [[0.235176], 0, 3.01538, 1602798235], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [8, 2, 2, 1], 1], ["SP", 6, 5, 3, [1, 3, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 1], 1], ["SP", 6, 20, 32, [2, 8], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [4, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 8, 4, 1], 1], ["SP", 2, 5, 3, [1, 1, 3, 1], 1], ["SP", 2, 10, 230, [1, 1, 5, 1], 1], ["SP", 2, 15, 230, [1, 5, 1, 23], 1], ["SP", 2, 20, 64, [2, 1], 1], ["SP", 2, 23, 4, [1, 1], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 112, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 48, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 312, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 128, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$64"], ["PR", 11, 0, "auto_unroll_max_step$64"]]]], "r": [[1.40743], 0, 8.56577, 1602798242], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [2, 1, 2, 2], 1], ["SP", 6, 5, 3, [1, 1, 3, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 7, 1, 1], 1], ["SP", 6, 20, 32, [2, 1], 1], ["SP", 6, 23, 112, [7, 1], 1], ["SP", 6, 26, 112, [2, 2], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 4, 2, 1], 1], ["SP", 2, 5, 3, [1, 3, 1, 1], 1], ["SP", 2, 10, 230, [1, 5, 1, 2], 1], ["SP", 2, 15, 230, [1, 1, 46, 1], 1], ["SP", 2, 20, 64, [8, 1], 1], ["SP", 2, 23, 4, [1, 2], 1], ["SP", 2, 26, 4, [1, 4], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 546, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 224, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 832, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 224, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$1024"], ["PR", 11, 0, "auto_unroll_max_step$1024"]]]], "r": [[0.139352], 0, 13.2241, 1602798245], "v": "v0.2"}
{"i": [["[\"conv2d_nchw_gd\", 32, 224, 224, 3, 64, 7, 7, [2, 2], [3, 3], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -model=t4 -thread_warp_size=32"], [[], [["CHW", 6, "local"], ["SP", 6, 0, 64, [1, 8, 2, 1], 1], ["SP", 6, 5, 3, [1, 1, 1, 1], 1], ["SP", 6, 10, 7, [1, 7, 1, 1], 1], ["SP", 6, 15, 7, [1, 1, 1, 7], 1], ["SP", 6, 20, 32, [4, 1], 1], ["SP", 6, 23, 112, [1, 1], 1], ["SP", 6, 26, 112, [16, 1], 1], ["RE", 6, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 7, 0, 1, 3], ["FSP", 7, 4, 2, 3], ["FSP", 7, 8, 3, 3], ["FSP", 7, 12, 4, 3], ["RE", 7, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 6, 7, 11], ["CHR", 5, "shared", [6]], ["CA", 6, 7, 14], ["CI", 5], ["CHW", 2, "local"], ["SP", 2, 0, 32, [1, 1, 1, 1], 1], ["SP", 2, 5, 3, [3, 1, 1, 1], 1], ["SP", 2, 10, 230, [1, 115, 2, 1], 1], ["SP", 2, 15, 230, [1, 1, 1, 23], 1], ["SP", 2, 20, 64, [2, 2], 1], ["SP", 2, 23, 4, [2, 2], 1], ["SP", 2, 26, 4, [2, 1], 1], ["RE", 2, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9, 14, 19]], ["FSP", 3, 0, 19, 3], ["FSP", 3, 4, 20, 3], ["FSP", 3, 8, 21, 3], ["FSP", 3, 12, 22, 3], ["RE", 3, [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]], ["CA", 2, 3, 11], ["CHR", 1, "shared", [2]], ["CA", 2, 3, 14], ["CHR", 0, "shared", [3]], ["CA", 1, 4, 14], ["CHR", 0, "shared", [10]], ["CA", 1, 11, 14], ["FU", 12, [0, 1, 2, 3]], ["AN", 12, 0, 5], ["FU", 12, [1, 2, 3, 4]], ["AN", 12, 1, 4], ["FU", 12, [2, 3, 4, 5]], ["AN", 12, 2, 6], ["FU", 10, [0, 1, 2, 3]], ["SP", 10, 0, 148, [1], 1], ["AN", 10, 1, 2], ["FFSP", 10, 0, [4, 3, 2, 1], 1, 1], ["AN", 10, 1, 6], ["FU", 7, [0, 1, 2, 3]], ["SP", 7, 0, 4816896, [32], 1], ["AN", 7, 0, 5], ["AN", 7, 1, 6], ["FU", 6, [0, 1, 2, 3]], ["AN", 6, 0, 5], ["FU", 6, [1, 2, 3, 4]], ["AN", 6, 1, 4], ["FU", 6, [2, 3, 4, 5]], ["AN", 6, 2, 6], ["FU", 4, [0, 1, 2, 3]], ["SP", 4, 0, 112, [1], 1], ["AN", 4, 1, 2], ["FFSP", 4, 0, [22, 21, 20, 19], 1, 1], ["AN", 4, 1, 6], ["FU", 2, [0, 1, 2, 3]], ["SP", 2, 0, 208, [1], 1], ["AN", 2, 1, 2], ["FFSP", 2, 0, [22, 21, 20, 19], 1, 1], ["AN", 2, 1, 6], ["FU", 1, [0, 1, 2, 3]], ["SP", 1, 0, 128, [1], 1], ["AN", 1, 1, 2], ["FFSP", 1, 0, [4, 3, 2, 1], 1, 1], ["AN", 1, 1, 6], ["PR", 5, 0, "auto_unroll_max_step$16"], ["PR", 11, 0, "auto_unroll_max_step$512"]]]], "r": [[0.11101], 0, 2.54608, 1602798246], "v": "v0.2"}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment