Skip to content

Instantly share code, notes, and snippets.

@Laurawly
Created April 17, 2019 22:26
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save Laurawly/776483e7bd61bb4b9ca05f6d8c1c766e to your computer and use it in GitHub Desktop.
Save Laurawly/776483e7bd61bb4b9ca05f6d8c1c766e to your computer and use it in GitHub Desktop.
ir
// attr [get_valid_counts_phase_one.v0] storage_scope = "global"
allocate get_valid_counts_phase_one.v0[int32 * 15360]
// attr [get_valid_counts_phase_one.v1] storage_scope = "global"
allocate get_valid_counts_phase_one.v1[int32 * 15360]
// attr [get_valid_counts_phase_two.v1] storage_scope = "global"
allocate get_valid_counts_phase_two.v1[int32 * 496]
// attr [get_valid_counts_phase_two.v0] storage_scope = "global"
allocate get_valid_counts_phase_two.v0[int32 * 15360]
// attr [get_valid_counts_phase_three] storage_scope = "global"
allocate get_valid_counts_phase_three[int32 * 496]
produce get_valid_counts_phase_one {
// attr [0] extern_scope = 0
// attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 512
// attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 31
if (((blockIdx.x*512) < (15360 - threadIdx.x))) {
if ((0.000000f < data[((((blockIdx.x*512) + threadIdx.x)*5) + 1)])) {
get_valid_counts_phase_one.v0[((blockIdx.x*512) + threadIdx.x)] = 1
get_valid_counts_phase_one.v1[((blockIdx.x*512) + threadIdx.x)] = 1
} else {
get_valid_counts_phase_one.v0[((blockIdx.x*512) + threadIdx.x)] = 0
get_valid_counts_phase_one.v1[((blockIdx.x*512) + threadIdx.x)] = 0
}
}
}
produce get_valid_counts_phase_two {
// attr [0] extern_scope = 0
// attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 512
// attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 1
if ((threadIdx.x < 496)) {
for (i, 0, 31) {
get_valid_counts_phase_two.v1[((blockIdx.x*496) + threadIdx.x)] = (get_valid_counts_phase_two.v1[((blockIdx.x*496) + threadIdx.x)] + get_valid_counts_phase_one.v1[(((blockIdx.x*15360) + (threadIdx.x*31)) + i)])
if ((i == 0)) {
get_valid_counts_phase_two.v0[((blockIdx.x*15360) + (threadIdx.x*31))] = get_valid_counts_phase_one.v1[((blockIdx.x*15360) + (threadIdx.x*31))]
} else {
get_valid_counts_phase_two.v0[(((blockIdx.x*15360) + (threadIdx.x*31)) + i)] = (get_valid_counts_phase_two.v0[((((blockIdx.x*15360) + (threadIdx.x*31)) + i) + -1)] + get_valid_counts_phase_one.v1[(((blockIdx.x*15360) + (threadIdx.x*31)) + i)])
}
tvm_storage_sync("shared")
}
}
}
produce get_valid_counts_phase_three {
// attr [0] extern_scope = 0
// attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 512
// attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 1
if ((threadIdx.x < 496)) {
for (i, 0, (log(496.000000f)*1.442695f)) {
if ((i == 0)) {
if ((0 < threadIdx.x)) {
get_valid_counts_phase_three[((blockIdx.x*496) + threadIdx.x)] = (get_valid_counts_phase_two.v1[((blockIdx.x*496) + threadIdx.x)] + get_valid_counts_phase_two.v1[(((blockIdx.x*496) + threadIdx.x) + -1)])
} else {
get_valid_counts_phase_three[(blockIdx.x*496)] = get_valid_counts_phase_two.v1[(blockIdx.x*496)]
}
} else if ((int32(pow(2.000000f, i)) <= threadIdx.x)) {
get_valid_counts_phase_three[((blockIdx.x*496) + threadIdx.x)] = (get_valid_counts_phase_three[((blockIdx.x*496) + threadIdx.x)] + get_valid_counts_phase_three[(((blockIdx.x*496) + threadIdx.x) - int32(pow(2.000000f, i)))])
}
tvm_storage_sync("shared")
}
}
}
produce get_valid_counts_phase_four {
// attr [0] extern_scope = 0
// attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 512
// attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 31
if (((blockIdx.x*512) < (15360 - threadIdx.x))) {
if (((((blockIdx.x*512) + threadIdx.x) % 15360) < 31)) {
get_valid_counts_phase_one.v1[((blockIdx.x*512) + threadIdx.x)] = get_valid_counts_phase_two.v0[((blockIdx.x*512) + threadIdx.x)]
} else {
get_valid_counts_phase_one.v1[((blockIdx.x*512) + threadIdx.x)] = (get_valid_counts_phase_two.v0[((blockIdx.x*512) + threadIdx.x)] + get_valid_counts_phase_three[((((((blockIdx.x*512) + threadIdx.x)/15360)*496) + ((((blockIdx.x*512) + threadIdx.x) % 15360)/31)) + -1)])
}
}
}
produce get_valid_counts_phase_five {
// attr [0] extern_scope = 0
// attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 512
// attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 151
if (((blockIdx.x*512) < (15360 - threadIdx.x))) {
if ((0 < get_valid_counts_phase_one.v0[((blockIdx.x*512) + threadIdx.x)])) {
for (i, 0, 5) {
get_valid_counts_phase_five.v1[((((((((blockIdx.x*512) + threadIdx.x)/15360)*15360) + get_valid_counts_phase_one.v1[((blockIdx.x*512) + threadIdx.x)])*5) + i) + -5)] = data[((((blockIdx.x*512) + threadIdx.x)*5) + i)]
tvm_storage_sync("shared")
}
}
if (((((blockIdx.x*512) + threadIdx.x) % 15360) == 0)) {
get_valid_counts_phase_five.v0[(((blockIdx.x*512) + threadIdx.x)/15360)] = get_valid_counts_phase_one.v1[(((blockIdx.x*512) + threadIdx.x) + 15359)]
}
if ((get_valid_counts_phase_one.v1[(((((blockIdx.x*512) + threadIdx.x)/15360)*15360) + 15359)] <= (((blockIdx.x*512) + threadIdx.x) % 15360))) {
for (j, 0, 5) {
get_valid_counts_phase_five.v1[((((blockIdx.x*512) + threadIdx.x)*5) + j)] = -1.000000f
tvm_storage_sync("shared")
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment