Skip to content

Instantly share code, notes, and snippets.

@apartridge
Created February 21, 2022 13:10
Failed to load binary:python
Realizing Pipeline for target(x86-64-linux-opencl-sse41-strict_float)
jit-compiling for: target(x86-64-linux-opencl-sse41-strict_float)
Inferred argument: float32 b0
Inferred argument: (void *) __user_context
Creating initial loop nests...
Injecting realization of { nan_or_one }
for (.__root, 0, 1) {
produce nan_or_one {
let nan_or_one.s0.row.loop_max = nan_or_one.s0.row.max
let nan_or_one.s0.row.loop_min = nan_or_one.s0.row.min
let nan_or_one.s0.row.loop_extent = (nan_or_one.s0.row.max + 1) - nan_or_one.s0.row.min
let nan_or_one.s0.col.loop_max = nan_or_one.s0.col.max
let nan_or_one.s0.col.loop_min = nan_or_one.s0.col.min
let nan_or_one.s0.col.loop_extent = (nan_or_one.s0.col.max + 1) - nan_or_one.s0.col.min
let nan_or_one.s0.__outermost.loop_extent = 1
let nan_or_one.s0.__outermost.loop_max = 0
let nan_or_one.s0.__outermost.loop_min = 0
let nan_or_one.s0.col.col_outer.loop_extent = ((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1
let nan_or_one.s0.col.col_outer.loop_max = (((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1) - 1
let nan_or_one.s0.col.col_outer.loop_min = 0
let nan_or_one.s0.col.col_inner.loop_extent = 1
let nan_or_one.s0.col.col_inner.loop_max = 1 - 1
let nan_or_one.s0.col.col_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_extent = ((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1
let nan_or_one.s0.row.row_outer.loop_max = (((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1) - 1
let nan_or_one.s0.row.row_outer.loop_min = 0
let nan_or_one.s0.row.row_inner.loop_extent = 1
let nan_or_one.s0.row.row_inner.loop_max = 1 - 1
let nan_or_one.s0.row.row_inner.loop_min = 0
for (nan_or_one.s0.__outermost, nan_or_one.s0.__outermost.loop_min, nan_or_one.s0.__outermost.loop_extent) {
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, nan_or_one.s0.row.row_outer.loop_min, nan_or_one.s0.row.row_outer.loop_extent) {
let nan_or_one.s0.row.row_inner.base = (nan_or_one.s0.row.row_outer*1) + nan_or_one.s0.row.loop_min
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, nan_or_one.s0.col.col_outer.loop_min, nan_or_one.s0.col.col_outer.loop_extent) {
let nan_or_one.s0.col.col_inner.base = (nan_or_one.s0.col.col_outer*1) + nan_or_one.s0.col.loop_min
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, nan_or_one.s0.row.row_inner.loop_min, nan_or_one.s0.row.row_inner.loop_extent) {
let nan_or_one.s0.row = nan_or_one.s0.row.row_inner.base + nan_or_one.s0.row.row_inner
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, nan_or_one.s0.col.col_inner.loop_min, nan_or_one.s0.col.col_inner.loop_extent) {
let nan_or_one.s0.col = nan_or_one.s0.col.col_inner.base + nan_or_one.s0.col.col_inner
nan_or_one(nan_or_one.s0.col, nan_or_one.s0.row) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)strict_float((float32)strict_float((float32)b0(nan_or_one.s0.col, nan_or_one.s0.row, 0))))), t1, t1))
}
}
}
}
}
}
}
Lowering after creating initial loop nests:
produce nan_or_one {
let nan_or_one.s0.row.loop_max = nan_or_one.s0.row.max
let nan_or_one.s0.row.loop_min = nan_or_one.s0.row.min
let nan_or_one.s0.row.loop_extent = (nan_or_one.s0.row.max + 1) - nan_or_one.s0.row.min
let nan_or_one.s0.col.loop_max = nan_or_one.s0.col.max
let nan_or_one.s0.col.loop_min = nan_or_one.s0.col.min
let nan_or_one.s0.col.loop_extent = (nan_or_one.s0.col.max + 1) - nan_or_one.s0.col.min
let nan_or_one.s0.col.col_outer.loop_extent = ((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1
let nan_or_one.s0.col.col_outer.loop_max = (((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1) - 1
let nan_or_one.s0.col.col_outer.loop_min = 0
let nan_or_one.s0.col.col_inner.loop_extent = 1
let nan_or_one.s0.col.col_inner.loop_max = 1 - 1
let nan_or_one.s0.col.col_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_extent = ((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1
let nan_or_one.s0.row.row_outer.loop_max = (((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1) - 1
let nan_or_one.s0.row.row_outer.loop_min = 0
let nan_or_one.s0.row.row_inner.loop_extent = 1
let nan_or_one.s0.row.row_inner.loop_max = 1 - 1
let nan_or_one.s0.row.row_inner.loop_min = 0
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, nan_or_one.s0.row.row_outer.loop_min, nan_or_one.s0.row.row_outer.loop_extent) {
let nan_or_one.s0.row.row_inner.base = (nan_or_one.s0.row.row_outer*1) + nan_or_one.s0.row.loop_min
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, nan_or_one.s0.col.col_outer.loop_min, nan_or_one.s0.col.col_outer.loop_extent) {
let nan_or_one.s0.col.col_inner.base = (nan_or_one.s0.col.col_outer*1) + nan_or_one.s0.col.loop_min
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, nan_or_one.s0.row.row_inner.loop_min, nan_or_one.s0.row.row_inner.loop_extent) {
let nan_or_one.s0.row = nan_or_one.s0.row.row_inner.base + nan_or_one.s0.row.row_inner
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, nan_or_one.s0.col.col_inner.loop_min, nan_or_one.s0.col.col_inner.loop_extent) {
let nan_or_one.s0.col = nan_or_one.s0.col.col_inner.base + nan_or_one.s0.col.col_inner
nan_or_one(nan_or_one.s0.col, nan_or_one.s0.row) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)strict_float((float32)strict_float((float32)b0(nan_or_one.s0.col, nan_or_one.s0.row, 0))))), t1, t1))
}
}
}
}
}
Skipping injecting memoization...
Injecting tracing...
Lowering after injecting tracing: (unchanged)
Adding checks for parameters
Lowering after injecting parameter checks: (unchanged)
Computing bounds of each function's value
Bounds on value 0 for func nan_or_one are: nanf, nanf
Clamping unsafe data-dependent accesses
Lowering after clamping unsafe data-dependent accesses
produce nan_or_one {
let nan_or_one.s0.row.loop_max = nan_or_one.s0.row.max
let nan_or_one.s0.row.loop_min = nan_or_one.s0.row.min
let nan_or_one.s0.row.loop_extent = (nan_or_one.s0.row.max + 1) - nan_or_one.s0.row.min
let nan_or_one.s0.col.loop_max = nan_or_one.s0.col.max
let nan_or_one.s0.col.loop_min = nan_or_one.s0.col.min
let nan_or_one.s0.col.loop_extent = (nan_or_one.s0.col.max + 1) - nan_or_one.s0.col.min
let nan_or_one.s0.col.col_outer.loop_extent = ((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1
let nan_or_one.s0.col.col_outer.loop_max = (((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1) - 1
let nan_or_one.s0.col.col_outer.loop_min = 0
let nan_or_one.s0.col.col_inner.loop_extent = 1
let nan_or_one.s0.col.col_inner.loop_max = 1 - 1
let nan_or_one.s0.col.col_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_extent = ((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1
let nan_or_one.s0.row.row_outer.loop_max = (((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1) - 1
let nan_or_one.s0.row.row_outer.loop_min = 0
let nan_or_one.s0.row.row_inner.loop_extent = 1
let nan_or_one.s0.row.row_inner.loop_max = 1 - 1
let nan_or_one.s0.row.row_inner.loop_min = 0
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, nan_or_one.s0.row.row_outer.loop_min, nan_or_one.s0.row.row_outer.loop_extent) {
let nan_or_one.s0.row.row_inner.base = (nan_or_one.s0.row.row_outer*1) + nan_or_one.s0.row.loop_min
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, nan_or_one.s0.col.col_outer.loop_min, nan_or_one.s0.col.col_outer.loop_extent) {
let nan_or_one.s0.col.col_inner.base = (nan_or_one.s0.col.col_outer*1) + nan_or_one.s0.col.loop_min
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, nan_or_one.s0.row.row_inner.loop_min, nan_or_one.s0.row.row_inner.loop_extent) {
let nan_or_one.s0.row = nan_or_one.s0.row.row_inner.base + nan_or_one.s0.row.row_inner
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, nan_or_one.s0.col.col_inner.loop_min, nan_or_one.s0.col.col_inner.loop_extent) {
let nan_or_one.s0.col = nan_or_one.s0.col.col_inner.base + nan_or_one.s0.col.col_inner
nan_or_one(nan_or_one.s0.col, nan_or_one.s0.row) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)strict_float((float32)strict_float((float32)b0(nan_or_one.s0.col, nan_or_one.s0.row, 0))))), t1, t1))
}
}
}
}
}
Performing computation bounds inference...
Lowering after computation bounds inference:
let nan_or_one.s0.row.max = (nan_or_one.min.1 + nan_or_one.extent.1) - 1
let nan_or_one.s0.row.min = nan_or_one.min.1
let nan_or_one.s0.col.max = (nan_or_one.min.0 + nan_or_one.extent.0) - 1
let nan_or_one.s0.col.min = nan_or_one.min.0
add_image_checks_marker()
produce nan_or_one {
let nan_or_one.s0.row.loop_max = nan_or_one.s0.row.max
let nan_or_one.s0.row.loop_min = nan_or_one.s0.row.min
let nan_or_one.s0.row.loop_extent = (nan_or_one.s0.row.max + 1) - nan_or_one.s0.row.min
let nan_or_one.s0.col.loop_max = nan_or_one.s0.col.max
let nan_or_one.s0.col.loop_min = nan_or_one.s0.col.min
let nan_or_one.s0.col.loop_extent = (nan_or_one.s0.col.max + 1) - nan_or_one.s0.col.min
let nan_or_one.s0.col.col_outer.loop_extent = ((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1
let nan_or_one.s0.col.col_outer.loop_max = (((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1) - 1
let nan_or_one.s0.col.col_outer.loop_min = 0
let nan_or_one.s0.col.col_inner.loop_extent = 1
let nan_or_one.s0.col.col_inner.loop_max = 1 - 1
let nan_or_one.s0.col.col_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_extent = ((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1
let nan_or_one.s0.row.row_outer.loop_max = (((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1) - 1
let nan_or_one.s0.row.row_outer.loop_min = 0
let nan_or_one.s0.row.row_inner.loop_extent = 1
let nan_or_one.s0.row.row_inner.loop_max = 1 - 1
let nan_or_one.s0.row.row_inner.loop_min = 0
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, nan_or_one.s0.row.row_outer.loop_min, nan_or_one.s0.row.row_outer.loop_extent) {
let nan_or_one.s0.row.row_inner.base = (nan_or_one.s0.row.row_outer*1) + nan_or_one.s0.row.loop_min
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, nan_or_one.s0.col.col_outer.loop_min, nan_or_one.s0.col.col_outer.loop_extent) {
let nan_or_one.s0.col.col_inner.base = (nan_or_one.s0.col.col_outer*1) + nan_or_one.s0.col.loop_min
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, nan_or_one.s0.row.row_inner.loop_min, nan_or_one.s0.row.row_inner.loop_extent) {
let nan_or_one.s0.row = nan_or_one.s0.row.row_inner.base + nan_or_one.s0.row.row_inner
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, nan_or_one.s0.col.col_inner.loop_min, nan_or_one.s0.col.col_inner.loop_extent) {
let nan_or_one.s0.col = nan_or_one.s0.col.col_inner.base + nan_or_one.s0.col.col_inner
nan_or_one(nan_or_one.s0.col, nan_or_one.s0.row) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)strict_float((float32)strict_float((float32)b0(nan_or_one.s0.col, nan_or_one.s0.row, 0))))), t1, t1))
}
}
}
}
}
Removing extern loops...
Lowering after removing extern loops: (unchanged)
Performing sliding window optimization...
Lowering after sliding window:
let nan_or_one.s0.row.max = (nan_or_one.min.1 + nan_or_one.extent.1) - 1
let nan_or_one.s0.row.min = nan_or_one.min.1
let nan_or_one.s0.col.max = (nan_or_one.min.0 + nan_or_one.extent.0) - 1
let nan_or_one.s0.col.min = nan_or_one.min.0
add_image_checks_marker()
produce nan_or_one {
let nan_or_one.s0.row.loop_max = nan_or_one.s0.row.max
let nan_or_one.s0.row.loop_min = nan_or_one.s0.row.min
let nan_or_one.s0.row.loop_extent = (nan_or_one.s0.row.max + 1) - nan_or_one.s0.row.min
let nan_or_one.s0.col.loop_max = nan_or_one.s0.col.max
let nan_or_one.s0.col.loop_min = nan_or_one.s0.col.min
let nan_or_one.s0.col.loop_extent = (nan_or_one.s0.col.max + 1) - nan_or_one.s0.col.min
let nan_or_one.s0.col.col_outer.loop_extent = ((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1
let nan_or_one.s0.col.col_outer.loop_max = (((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1) - 1
let nan_or_one.s0.col.col_outer.loop_min = 0
let nan_or_one.s0.col.col_inner.loop_extent = 1
let nan_or_one.s0.col.col_inner.loop_max = 1 - 1
let nan_or_one.s0.col.col_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_extent = ((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1
let nan_or_one.s0.row.row_outer.loop_max = (((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1) - 1
let nan_or_one.s0.row.row_outer.loop_min = 0
let nan_or_one.s0.row.row_inner.loop_extent = 1
let nan_or_one.s0.row.row_inner.loop_max = 1 - 1
let nan_or_one.s0.row.row_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_min.orig = nan_or_one.s0.row.row_outer.loop_min
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, nan_or_one.s0.row.row_outer.loop_min, nan_or_one.s0.row.row_outer.loop_extent) {
let nan_or_one.s0.row.row_inner.base = (nan_or_one.s0.row.row_outer*1) + nan_or_one.s0.row.loop_min
let nan_or_one.s0.col.col_outer.loop_min.orig = nan_or_one.s0.col.col_outer.loop_min
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, nan_or_one.s0.col.col_outer.loop_min, nan_or_one.s0.col.col_outer.loop_extent) {
let nan_or_one.s0.col.col_inner.base = (nan_or_one.s0.col.col_outer*1) + nan_or_one.s0.col.loop_min
let nan_or_one.s0.row.row_inner.loop_min.orig = nan_or_one.s0.row.row_inner.loop_min
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, nan_or_one.s0.row.row_inner.loop_min, nan_or_one.s0.row.row_inner.loop_extent) {
let nan_or_one.s0.row = nan_or_one.s0.row.row_inner.base + nan_or_one.s0.row.row_inner
let nan_or_one.s0.col.col_inner.loop_min.orig = nan_or_one.s0.col.col_inner.loop_min
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, nan_or_one.s0.col.col_inner.loop_min, nan_or_one.s0.col.col_inner.loop_extent) {
let nan_or_one.s0.col = nan_or_one.s0.col.col_inner.base + nan_or_one.s0.col.col_inner
nan_or_one(nan_or_one.s0.col, nan_or_one.s0.row) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)strict_float((float32)strict_float((float32)b0(nan_or_one.s0.col, nan_or_one.s0.row, 0))))), t1, t1))
}
}
}
}
}
Uniquifying variable names...
Lowering after uniquifying variable names: (unchanged)
Simplifying...
Lowering after first simplification:
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.min = nan_or_one.min.1
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.min = nan_or_one.min.0
add_image_checks_marker()
produce nan_or_one {
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.loop_min = nan_or_one.min.1
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.loop_min = nan_or_one.min.0
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.loop_max = nan_or_one.extent.0 + -1
let nan_or_one.s0.col.col_outer.loop_min = 0
let nan_or_one.s0.col.col_inner.loop_extent = 1
let nan_or_one.s0.col.col_inner.loop_max = 0
let nan_or_one.s0.col.col_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.row.row_outer.loop_max = nan_or_one.extent.1 + -1
let nan_or_one.s0.row.row_outer.loop_min = 0
let nan_or_one.s0.row.row_inner.loop_extent = 1
let nan_or_one.s0.row.row_inner.loop_max = 0
let nan_or_one.s0.row.row_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_min.orig = 0
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, 0, nan_or_one.extent.1) {
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer
let nan_or_one.s0.col.col_outer.loop_min.orig = 0
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, 0, nan_or_one.extent.0) {
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer
let nan_or_one.s0.row.row_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, 0, 1) {
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer
let nan_or_one.s0.col.col_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, 0, 1) {
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer, 0))), t1, t1))
}
}
}
}
}
Simplifying correlated differences...
Lowering after simplifying correlated differences:
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.min = nan_or_one.min.1
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.min = nan_or_one.min.0
add_image_checks_marker()
produce nan_or_one {
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.loop_min = nan_or_one.min.1
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.loop_min = nan_or_one.min.0
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.loop_max = nan_or_one.extent.0 + -1
let nan_or_one.s0.col.col_outer.loop_min = 0
let nan_or_one.s0.col.col_inner.loop_extent = 1
let nan_or_one.s0.col.col_inner.loop_max = 0
let nan_or_one.s0.col.col_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.row.row_outer.loop_max = nan_or_one.extent.1 + -1
let nan_or_one.s0.row.row_outer.loop_min = 0
let nan_or_one.s0.row.row_inner.loop_extent = 1
let nan_or_one.s0.row.row_inner.loop_max = 0
let nan_or_one.s0.row.row_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_min.orig = 0
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, 0, nan_or_one.extent.1) {
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer
let nan_or_one.s0.col.col_outer.loop_min.orig = 0
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, 0, nan_or_one.extent.0) {
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer
let nan_or_one.s0.row.row_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, 0, 1) {
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer
let nan_or_one.s0.col.col_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, 0, 1) {
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer, 0))), t1, t1))
}
}
}
}
}
Performing allocation bounds inference...
Lowering after allocation bounds inference: (unchanged)
Adding checks for images
Injecting constraints for b0.0
Injecting constraints for b0.1
Injecting constraints for b0.2
Injecting constraints for nan_or_one.0
Injecting constraints for nan_or_one.1
Lowering after injecting image checks:
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.min = nan_or_one.min.1
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.min = nan_or_one.min.0
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let b0.min.0.required = nan_or_one.min.0 + 0
let b0.stride.0.required = 1
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let b0.min.1.required = nan_or_one.min.1 + 0
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required
let b0.extent.2.required = (0 + 1) - 0
let b0.min.2.required = 0
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let nan_or_one.min.0.required = nan_or_one.min.0 + 0
let nan_or_one.stride.0.required = 1
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let nan_or_one.min.1.required = nan_or_one.min.1 + 0
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required
let b0.stride.0.constrained = 3
let b0.min.0.constrained = 0
let b0.extent.0.constrained = 1
let b0.stride.1.constrained = 3
let b0.min.1.constrained = 0
let b0.extent.1.constrained = 1
let b0.stride.2.constrained = 1
let b0.min.2.constrained = 0
let b0.extent.2.constrained = 3
let nan_or_one.stride.0.constrained = 1
let b0.stride.0.proposed = 3
let b0.min.0.proposed = 0
let b0.extent.0.proposed = 1
let b0.stride.1.proposed = 3
let b0.min.1.proposed = 0
let b0.extent.1.proposed = 1
let b0.stride.2.proposed = 1
let b0.min.2.proposed = 0
let b0.extent.2.proposed = 3
let nan_or_one.stride.0.proposed = 1
let nan_or_one.min.0.proposed = nan_or_one.min.0.required
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required
let nan_or_one.min.1.proposed = nan_or_one.min.1.required
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0)
}
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert((uint32)b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", (uint32)b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert((uint32)nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", (uint32)nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1))
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1))
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1))
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1))
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1))
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained))
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained))
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained))
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained))
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained))
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained))
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained))
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained))
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained))
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained))
let b0.total_extent.0 = int64(b0.extent.0.constrained)
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0)
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647))
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647))
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647))
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.loop_min = nan_or_one.min.1
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.loop_min = nan_or_one.min.0
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.loop_max = nan_or_one.extent.0 + -1
let nan_or_one.s0.col.col_outer.loop_min = 0
let nan_or_one.s0.col.col_inner.loop_extent = 1
let nan_or_one.s0.col.col_inner.loop_max = 0
let nan_or_one.s0.col.col_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.row.row_outer.loop_max = nan_or_one.extent.1 + -1
let nan_or_one.s0.row.row_outer.loop_min = 0
let nan_or_one.s0.row.row_inner.loop_extent = 1
let nan_or_one.s0.row.row_inner.loop_max = 0
let nan_or_one.s0.row.row_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_min.orig = 0
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, 0, nan_or_one.extent.1) {
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer
let nan_or_one.s0.col.col_outer.loop_min.orig = 0
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, 0, nan_or_one.extent.0) {
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer
let nan_or_one.s0.row.row_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, 0, 1) {
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer
let nan_or_one.s0.col.col_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, 0, 1) {
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer, 0))), t1, t1))
}
}
}
}
}
}
Removing code that depends on undef values...
Lowering after removing code that depends on undef values: (unchanged)
Performing storage folding optimization...
Lowering after storage folding: (unchanged)
Injecting debug_to_file calls...
Lowering after injecting debug_to_file calls:
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.min = nan_or_one.min.1
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.min = nan_or_one.min.0
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let b0.min.0.required = nan_or_one.min.0 + 0
let b0.stride.0.required = 1
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let b0.min.1.required = nan_or_one.min.1 + 0
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required
let b0.extent.2.required = (0 + 1) - 0
let b0.min.2.required = 0
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let nan_or_one.min.0.required = nan_or_one.min.0 + 0
let nan_or_one.stride.0.required = 1
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let nan_or_one.min.1.required = nan_or_one.min.1 + 0
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required
let b0.stride.0.constrained = 3
let b0.min.0.constrained = 0
let b0.extent.0.constrained = 1
let b0.stride.1.constrained = 3
let b0.min.1.constrained = 0
let b0.extent.1.constrained = 1
let b0.stride.2.constrained = 1
let b0.min.2.constrained = 0
let b0.extent.2.constrained = 3
let nan_or_one.stride.0.constrained = 1
let b0.stride.0.proposed = 3
let b0.min.0.proposed = 0
let b0.extent.0.proposed = 1
let b0.stride.1.proposed = 3
let b0.min.1.proposed = 0
let b0.extent.1.proposed = 1
let b0.stride.2.proposed = 1
let b0.min.2.proposed = 0
let b0.extent.2.proposed = 3
let nan_or_one.stride.0.proposed = 1
let nan_or_one.min.0.proposed = nan_or_one.min.0.required
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required
let nan_or_one.min.1.proposed = nan_or_one.min.1.required
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0)
}
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert((uint32)b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", (uint32)b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert((uint32)nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", (uint32)nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1))
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1))
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1))
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1))
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1))
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained))
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained))
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained))
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained))
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained))
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained))
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained))
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained))
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained))
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained))
let b0.total_extent.0 = int64(b0.extent.0.constrained)
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0)
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647))
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647))
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647))
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.loop_min = nan_or_one.min.1
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.loop_min = nan_or_one.min.0
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.loop_max = nan_or_one.extent.0 + -1
let nan_or_one.s0.col.col_outer.loop_min = 0
let nan_or_one.s0.col.col_inner.loop_extent = 1
let nan_or_one.s0.col.col_inner.loop_max = 0
let nan_or_one.s0.col.col_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.row.row_outer.loop_max = nan_or_one.extent.1 + -1
let nan_or_one.s0.row.row_outer.loop_min = 0
let nan_or_one.s0.row.row_inner.loop_extent = 1
let nan_or_one.s0.row.row_inner.loop_max = 0
let nan_or_one.s0.row.row_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_min.orig = 0
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, 0, nan_or_one.extent.1) {
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer
let nan_or_one.s0.col.col_outer.loop_min.orig = 0
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, 0, nan_or_one.extent.0) {
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer
let nan_or_one.s0.row.row_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, 0, 1) {
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer
let nan_or_one.s0.col.col_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, 0, 1) {
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer, 0))), t1, t1))
}
}
}
}
}
}
Injecting prefetches...
Lowering after injecting prefetches: (unchanged)
Discarding safe promises...
Lowering after discarding safe promises: (unchanged)
Dynamically skipping stages...
Lowering after dynamically skipping stages: (unchanged)
Forking asynchronous producers...
Lowering after forking asynchronous producers:
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.min = nan_or_one.min.1
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.min = nan_or_one.min.0
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let b0.min.0.required = nan_or_one.min.0 + 0
let b0.stride.0.required = 1
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let b0.min.1.required = nan_or_one.min.1 + 0
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required
let b0.extent.2.required = (0 + 1) - 0
let b0.min.2.required = 0
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let nan_or_one.min.0.required = nan_or_one.min.0 + 0
let nan_or_one.stride.0.required = 1
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let nan_or_one.min.1.required = nan_or_one.min.1 + 0
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required
let b0.stride.0.constrained = 3
let b0.min.0.constrained = 0
let b0.extent.0.constrained = 1
let b0.stride.1.constrained = 3
let b0.min.1.constrained = 0
let b0.extent.1.constrained = 1
let b0.stride.2.constrained = 1
let b0.min.2.constrained = 0
let b0.extent.2.constrained = 3
let nan_or_one.stride.0.constrained = 1
let b0.stride.0.proposed = 3
let b0.min.0.proposed = 0
let b0.extent.0.proposed = 1
let b0.stride.1.proposed = 3
let b0.min.1.proposed = 0
let b0.extent.1.proposed = 1
let b0.stride.2.proposed = 1
let b0.min.2.proposed = 0
let b0.extent.2.proposed = 3
let nan_or_one.stride.0.proposed = 1
let nan_or_one.min.0.proposed = nan_or_one.min.0.required
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required
let nan_or_one.min.1.proposed = nan_or_one.min.1.required
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0)
}
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert((uint32)b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", (uint32)b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert((uint32)nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", (uint32)nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1))
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1))
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1))
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1))
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1))
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained))
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained))
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained))
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained))
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained))
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained))
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained))
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained))
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained))
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained))
let b0.total_extent.0 = int64(b0.extent.0.constrained)
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0)
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647))
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647))
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647))
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.loop_min = nan_or_one.min.1
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.loop_min = nan_or_one.min.0
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.loop_max = nan_or_one.extent.0 + -1
let nan_or_one.s0.col.col_outer.loop_min = 0
let nan_or_one.s0.col.col_inner.loop_extent = 1
let nan_or_one.s0.col.col_inner.loop_max = 0
let nan_or_one.s0.col.col_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.row.row_outer.loop_max = nan_or_one.extent.1 + -1
let nan_or_one.s0.row.row_outer.loop_min = 0
let nan_or_one.s0.row.row_inner.loop_extent = 1
let nan_or_one.s0.row.row_inner.loop_max = 0
let nan_or_one.s0.row.row_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_min.orig = 0
produce nan_or_one {
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, 0, nan_or_one.extent.1) {
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer
let nan_or_one.s0.col.col_outer.loop_min.orig = 0
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, 0, nan_or_one.extent.0) {
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer
let nan_or_one.s0.row.row_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, 0, 1) {
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer
let nan_or_one.s0.col.col_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, 0, 1) {
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer, 0))), t1, t1))
}
}
}
}
}
}
Destructuring tuple-valued realizations...
Lowering after destructuring tuple-valued realizations:
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.min = nan_or_one.min.1
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.min = nan_or_one.min.0
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let b0.min.0.required = nan_or_one.min.0 + 0
let b0.stride.0.required = 1
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let b0.min.1.required = nan_or_one.min.1 + 0
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required
let b0.extent.2.required = (0 + 1) - 0
let b0.min.2.required = 0
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let nan_or_one.min.0.required = nan_or_one.min.0 + 0
let nan_or_one.stride.0.required = 1
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let nan_or_one.min.1.required = nan_or_one.min.1 + 0
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required
let b0.stride.0.constrained = 3
let b0.min.0.constrained = 0
let b0.extent.0.constrained = 1
let b0.stride.1.constrained = 3
let b0.min.1.constrained = 0
let b0.extent.1.constrained = 1
let b0.stride.2.constrained = 1
let b0.min.2.constrained = 0
let b0.extent.2.constrained = 3
let nan_or_one.stride.0.constrained = 1
let b0.stride.0.proposed = 3
let b0.min.0.proposed = 0
let b0.extent.0.proposed = 1
let b0.stride.1.proposed = 3
let b0.min.1.proposed = 0
let b0.extent.1.proposed = 1
let b0.stride.2.proposed = 1
let b0.min.2.proposed = 0
let b0.extent.2.proposed = 3
let nan_or_one.stride.0.proposed = 1
let nan_or_one.min.0.proposed = nan_or_one.min.0.required
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required
let nan_or_one.min.1.proposed = nan_or_one.min.1.required
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0)
}
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert((uint32)b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", (uint32)b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert((uint32)nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", (uint32)nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1))
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1))
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1))
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1))
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1))
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained))
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained))
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained))
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained))
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained))
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained))
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained))
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained))
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained))
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained))
let b0.total_extent.0 = int64(b0.extent.0.constrained)
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0)
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647))
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647))
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647))
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.loop_min = nan_or_one.min.1
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.loop_min = nan_or_one.min.0
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.loop_max = nan_or_one.extent.0 + -1
let nan_or_one.s0.col.col_outer.loop_min = 0
let nan_or_one.s0.col.col_inner.loop_extent = 1
let nan_or_one.s0.col.col_inner.loop_max = 0
let nan_or_one.s0.col.col_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.row.row_outer.loop_max = nan_or_one.extent.1 + -1
let nan_or_one.s0.row.row_outer.loop_min = 0
let nan_or_one.s0.row.row_inner.loop_extent = 1
let nan_or_one.s0.row.row_inner.loop_max = 0
let nan_or_one.s0.row.row_inner.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_min.orig = 0
produce nan_or_one {
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, 0, nan_or_one.extent.1) {
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer
let nan_or_one.s0.col.col_outer.loop_min.orig = 0
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, 0, nan_or_one.extent.0) {
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer
let nan_or_one.s0.row.row_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, 0, 1) {
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer
let nan_or_one.s0.col.col_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, 0, 1) {
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer, 0))), t1, t1))
}
}
}
}
}
}
Canonicalizing GPU var names...
Lowering after canonicalizing GPU var names:
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.min = nan_or_one.min.1
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.min = nan_or_one.min.0
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let b0.min.0.required = nan_or_one.min.0 + 0
let b0.stride.0.required = 1
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let b0.min.1.required = nan_or_one.min.1 + 0
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required
let b0.extent.2.required = (0 + 1) - 0
let b0.min.2.required = 0
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let nan_or_one.min.0.required = nan_or_one.min.0 + 0
let nan_or_one.stride.0.required = 1
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let nan_or_one.min.1.required = nan_or_one.min.1 + 0
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required
let b0.stride.0.constrained = 3
let b0.min.0.constrained = 0
let b0.extent.0.constrained = 1
let b0.stride.1.constrained = 3
let b0.min.1.constrained = 0
let b0.extent.1.constrained = 1
let b0.stride.2.constrained = 1
let b0.min.2.constrained = 0
let b0.extent.2.constrained = 3
let nan_or_one.stride.0.constrained = 1
let b0.stride.0.proposed = 3
let b0.min.0.proposed = 0
let b0.extent.0.proposed = 1
let b0.stride.1.proposed = 3
let b0.min.1.proposed = 0
let b0.extent.1.proposed = 1
let b0.stride.2.proposed = 1
let b0.min.2.proposed = 0
let b0.extent.2.proposed = 3
let nan_or_one.stride.0.proposed = 1
let nan_or_one.min.0.proposed = nan_or_one.min.0.required
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required
let nan_or_one.min.1.proposed = nan_or_one.min.1.required
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0)
}
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert((uint32)b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", (uint32)b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert((uint32)nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", (uint32)nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1))
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1))
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1))
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1))
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1))
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained))
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained))
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained))
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained))
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained))
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained))
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained))
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained))
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained))
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained))
let b0.total_extent.0 = int64(b0.extent.0.constrained)
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0)
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647))
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647))
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647))
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.loop_min = nan_or_one.min.1
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.loop_min = nan_or_one.min.0
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.__block_id_x.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.__block_id_x.loop_max = nan_or_one.extent.0 + -1
let nan_or_one.s0.col.col_outer.__block_id_x.loop_min = 0
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_extent = 1
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_max = 0
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_min = 0
let nan_or_one.s0.row.row_outer.__block_id_y.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.row.row_outer.__block_id_y.loop_max = nan_or_one.extent.1 + -1
let nan_or_one.s0.row.row_outer.__block_id_y.loop_min = 0
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_extent = 1
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_max = 0
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_min.orig = 0
produce nan_or_one {
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y
let nan_or_one.s0.col.col_outer.loop_min.orig = 0
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x
let nan_or_one.s0.row.row_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) {
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y
let nan_or_one.s0.col.col_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) {
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x, nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x, nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y, 0))), t1, t1))
}
}
}
}
}
}
Bounding small realizations...
Lowering after bounding small realizations:
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.min = nan_or_one.min.1
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.min = nan_or_one.min.0
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let b0.min.0.required = nan_or_one.min.0 + 0
let b0.stride.0.required = 1
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let b0.min.1.required = nan_or_one.min.1 + 0
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required
let b0.extent.2.required = (0 + 1) - 0
let b0.min.2.required = 0
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let nan_or_one.min.0.required = nan_or_one.min.0 + 0
let nan_or_one.stride.0.required = 1
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let nan_or_one.min.1.required = nan_or_one.min.1 + 0
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required
let b0.stride.0.constrained = 3
let b0.min.0.constrained = 0
let b0.extent.0.constrained = 1
let b0.stride.1.constrained = 3
let b0.min.1.constrained = 0
let b0.extent.1.constrained = 1
let b0.stride.2.constrained = 1
let b0.min.2.constrained = 0
let b0.extent.2.constrained = 3
let nan_or_one.stride.0.constrained = 1
let b0.stride.0.proposed = 3
let b0.min.0.proposed = 0
let b0.extent.0.proposed = 1
let b0.stride.1.proposed = 3
let b0.min.1.proposed = 0
let b0.extent.1.proposed = 1
let b0.stride.2.proposed = 1
let b0.min.2.proposed = 0
let b0.extent.2.proposed = 3
let nan_or_one.stride.0.proposed = 1
let nan_or_one.min.0.proposed = nan_or_one.min.0.required
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required
let nan_or_one.min.1.proposed = nan_or_one.min.1.required
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0)
}
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert((uint32)b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", (uint32)b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert((uint32)nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", (uint32)nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1))
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1))
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1))
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1))
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1))
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained))
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained))
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained))
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained))
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained))
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained))
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained))
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained))
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained))
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained))
let b0.total_extent.0 = int64(b0.extent.0.constrained)
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0)
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647))
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647))
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647))
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.loop_min = nan_or_one.min.1
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.loop_min = nan_or_one.min.0
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.__block_id_x.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.__block_id_x.loop_max = nan_or_one.extent.0 + -1
let nan_or_one.s0.col.col_outer.__block_id_x.loop_min = 0
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_extent = 1
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_max = 0
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_min = 0
let nan_or_one.s0.row.row_outer.__block_id_y.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.row.row_outer.__block_id_y.loop_max = nan_or_one.extent.1 + -1
let nan_or_one.s0.row.row_outer.__block_id_y.loop_min = 0
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_extent = 1
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_max = 0
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_min.orig = 0
produce nan_or_one {
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y
let nan_or_one.s0.col.col_outer.loop_min.orig = 0
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x
let nan_or_one.s0.row.row_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) {
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y
let nan_or_one.s0.col.col_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) {
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x, nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x, nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y, 0))), t1, t1))
}
}
}
}
}
}
Performing storage flattening...
load call to b0 0
Lowering after storage flattening:
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.min = nan_or_one.min.1
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.min = nan_or_one.min.0
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let b0.min.0.required = nan_or_one.min.0 + 0
let b0.stride.0.required = 1
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let b0.min.1.required = nan_or_one.min.1 + 0
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required
let b0.extent.2.required = (0 + 1) - 0
let b0.min.2.required = 0
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let nan_or_one.min.0.required = nan_or_one.min.0 + 0
let nan_or_one.stride.0.required = 1
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let nan_or_one.min.1.required = nan_or_one.min.1 + 0
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required
let b0.stride.0.constrained = 3
let b0.min.0.constrained = 0
let b0.extent.0.constrained = 1
let b0.stride.1.constrained = 3
let b0.min.1.constrained = 0
let b0.extent.1.constrained = 1
let b0.stride.2.constrained = 1
let b0.min.2.constrained = 0
let b0.extent.2.constrained = 3
let nan_or_one.stride.0.constrained = 1
let b0.stride.0.proposed = 3
let b0.min.0.proposed = 0
let b0.extent.0.proposed = 1
let b0.stride.1.proposed = 3
let b0.min.1.proposed = 0
let b0.extent.1.proposed = 1
let b0.stride.2.proposed = 1
let b0.min.2.proposed = 0
let b0.extent.2.proposed = 3
let nan_or_one.stride.0.proposed = 1
let nan_or_one.min.0.proposed = nan_or_one.min.0.required
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required
let nan_or_one.min.1.proposed = nan_or_one.min.1.required
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0)
}
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert((uint32)b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", (uint32)b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert((uint32)nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", (uint32)nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1))
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1))
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1))
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1))
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1))
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained))
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained))
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained))
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained))
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained))
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained))
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained))
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained))
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained))
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained))
let b0.total_extent.0 = int64(b0.extent.0.constrained)
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0)
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647))
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647))
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647))
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.loop_min = nan_or_one.min.1
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.loop_min = nan_or_one.min.0
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.__block_id_x.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.__block_id_x.loop_max = nan_or_one.extent.0 + -1
let nan_or_one.s0.col.col_outer.__block_id_x.loop_min = 0
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_extent = 1
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_max = 0
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_min = 0
let nan_or_one.s0.row.row_outer.__block_id_y.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.row.row_outer.__block_id_y.loop_max = nan_or_one.extent.1 + -1
let nan_or_one.s0.row.row_outer.__block_id_y.loop_min = 0
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_extent = 1
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_max = 0
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_min.orig = 0
produce nan_or_one {
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y
let nan_or_one.s0.col.col_outer.loop_min.orig = 0
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x
let nan_or_one.s0.row.row_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) {
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y
let nan_or_one.s0.col.col_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) {
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x
nan_or_one[((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*nan_or_one.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1)) - ((0 + (nan_or_one.min.0*nan_or_one.stride.0)) + (nan_or_one.min.1*nan_or_one.stride.1))] =
let t1 = (float32)strict_float(nanf) in
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0[(((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*b0.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*b0.stride.1)) + (0*b0.stride.2)) - (((0 + (b0.min.0*b0.stride.0)) + (b0.min.1*b0.stride.1)) + (b0.min.2*b0.stride.2))])), t1, t1))
}
}
}
}
}
}
Adding atomic mutex allocation...
Lowering after adding atomic mutex allocation: (unchanged)
Unpacking buffer arguments...
Lowering after unpacking buffer arguments:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.min = nan_or_one.min.1
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.min = nan_or_one.min.0
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let b0.min.0.required = nan_or_one.min.0 + 0
let b0.stride.0.required = 1
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let b0.min.1.required = nan_or_one.min.1 + 0
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required
let b0.extent.2.required = (0 + 1) - 0
let b0.min.2.required = 0
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let nan_or_one.min.0.required = nan_or_one.min.0 + 0
let nan_or_one.stride.0.required = 1
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let nan_or_one.min.1.required = nan_or_one.min.1 + 0
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required
let b0.stride.0.constrained = 3
let b0.min.0.constrained = 0
let b0.extent.0.constrained = 1
let b0.stride.1.constrained = 3
let b0.min.1.constrained = 0
let b0.extent.1.constrained = 1
let b0.stride.2.constrained = 1
let b0.min.2.constrained = 0
let b0.extent.2.constrained = 3
let nan_or_one.stride.0.constrained = 1
let b0.stride.0.proposed = 3
let b0.min.0.proposed = 0
let b0.extent.0.proposed = 1
let b0.stride.1.proposed = 3
let b0.min.1.proposed = 0
let b0.extent.1.proposed = 1
let b0.stride.2.proposed = 1
let b0.min.2.proposed = 0
let b0.extent.2.proposed = 3
let nan_or_one.stride.0.proposed = 1
let nan_or_one.min.0.proposed = nan_or_one.min.0.required
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required
let nan_or_one.min.1.proposed = nan_or_one.min.1.required
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0)
}
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1))
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1))
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1))
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1))
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1))
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained))
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained))
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained))
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained))
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained))
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained))
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained))
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained))
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained))
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained))
let b0.total_extent.0 = int64(b0.extent.0.constrained)
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0)
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647))
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647))
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647))
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.loop_min = nan_or_one.min.1
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.loop_min = nan_or_one.min.0
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.__block_id_x.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.__block_id_x.loop_max = nan_or_one.extent.0 + -1
let nan_or_one.s0.col.col_outer.__block_id_x.loop_min = 0
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_extent = 1
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_max = 0
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_min = 0
let nan_or_one.s0.row.row_outer.__block_id_y.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.row.row_outer.__block_id_y.loop_max = nan_or_one.extent.1 + -1
let nan_or_one.s0.row.row_outer.__block_id_y.loop_min = 0
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_extent = 1
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_max = 0
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_min.orig = 0
produce nan_or_one {
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y
let nan_or_one.s0.col.col_outer.loop_min.orig = 0
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x
let nan_or_one.s0.row.row_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) {
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y
let nan_or_one.s0.col.col_inner.loop_min.orig = 0
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) {
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x
nan_or_one[((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*nan_or_one.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1)) - ((0 + (nan_or_one.min.0*nan_or_one.stride.0)) + (nan_or_one.min.1*nan_or_one.stride.1))] =
let t1 = (float32)strict_float(nanf) in
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[(((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*b0.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*b0.stride.1)) + (0*b0.stride.2)) - (((0 + (b0.min.0*b0.stride.0)) + (b0.min.1*b0.stride.1)) + (b0.min.2*b0.stride.2))])), t1, t1))
}
}
}
}
}
}
Skipping rewriting memoized allocations...
Selecting a GPU API for GPU loops...
Lowering after selecting a GPU API:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.min = nan_or_one.min.1
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.min = nan_or_one.min.0
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let b0.min.0.required = nan_or_one.min.0 + 0
let b0.stride.0.required = 1
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let b0.min.1.required = nan_or_one.min.1 + 0
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required
let b0.extent.2.required = (0 + 1) - 0
let b0.min.2.required = 0
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let nan_or_one.min.0.required = nan_or_one.min.0 + 0
let nan_or_one.stride.0.required = 1
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let nan_or_one.min.1.required = nan_or_one.min.1 + 0
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required
let b0.stride.0.constrained = 3
let b0.min.0.constrained = 0
let b0.extent.0.constrained = 1
let b0.stride.1.constrained = 3
let b0.min.1.constrained = 0
let b0.extent.1.constrained = 1
let b0.stride.2.constrained = 1
let b0.min.2.constrained = 0
let b0.extent.2.constrained = 3
let nan_or_one.stride.0.constrained = 1
let b0.stride.0.proposed = 3
let b0.min.0.proposed = 0
let b0.extent.0.proposed = 1
let b0.stride.1.proposed = 3
let b0.min.1.proposed = 0
let b0.extent.1.proposed = 1
let b0.stride.2.proposed = 1
let b0.min.2.proposed = 0
let b0.extent.2.proposed = 3
let nan_or_one.stride.0.proposed = 1
let nan_or_one.min.0.proposed = nan_or_one.min.0.required
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required
let nan_or_one.min.1.proposed = nan_or_one.min.1.required
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0)
}
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1))
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1))
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1))
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1))
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1))
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained))
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained))
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained))
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained))
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained))
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained))
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained))
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained))
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained))
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained))
let b0.total_extent.0 = int64(b0.extent.0.constrained)
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0)
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647))
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647))
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647))
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.loop_min = nan_or_one.min.1
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.loop_min = nan_or_one.min.0
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.__block_id_x.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.__block_id_x.loop_max = nan_or_one.extent.0 + -1
let nan_or_one.s0.col.col_outer.__block_id_x.loop_min = 0
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_extent = 1
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_max = 0
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_min = 0
let nan_or_one.s0.row.row_outer.__block_id_y.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.row.row_outer.__block_id_y.loop_max = nan_or_one.extent.1 + -1
let nan_or_one.s0.row.row_outer.__block_id_y.loop_min = 0
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_extent = 1
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_max = 0
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_min.orig = 0
produce nan_or_one {
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y
let nan_or_one.s0.col.col_outer.loop_min.orig = 0
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x
let nan_or_one.s0.row.row_inner.loop_min.orig = 0
gpu_thread<OpenCL> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) {
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y
let nan_or_one.s0.col.col_inner.loop_min.orig = 0
gpu_thread<OpenCL> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) {
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x
nan_or_one[((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*nan_or_one.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1)) - ((0 + (nan_or_one.min.0*nan_or_one.stride.0)) + (nan_or_one.min.1*nan_or_one.stride.1))] =
let t1 = (float32)strict_float(nanf) in
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[(((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*b0.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*b0.stride.1)) + (0*b0.stride.2)) - (((0 + (b0.min.0*b0.stride.0)) + (b0.min.1*b0.stride.1)) + (b0.min.2*b0.stride.2))])), t1, t1))
}
}
}
}
}
}
Injecting host <-> dev buffer copies...
Lowering after injecting host <-> dev buffer copies:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.min = nan_or_one.min.1
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.min = nan_or_one.min.0
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let b0.min.0.required = nan_or_one.min.0 + 0
let b0.stride.0.required = 1
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let b0.min.1.required = nan_or_one.min.1 + 0
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required
let b0.extent.2.required = (0 + 1) - 0
let b0.min.2.required = 0
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0)
let nan_or_one.min.0.required = nan_or_one.min.0 + 0
let nan_or_one.stride.0.required = 1
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0)
let nan_or_one.min.1.required = nan_or_one.min.1 + 0
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required
let b0.stride.0.constrained = 3
let b0.min.0.constrained = 0
let b0.extent.0.constrained = 1
let b0.stride.1.constrained = 3
let b0.min.1.constrained = 0
let b0.extent.1.constrained = 1
let b0.stride.2.constrained = 1
let b0.min.2.constrained = 0
let b0.extent.2.constrained = 3
let nan_or_one.stride.0.constrained = 1
let b0.stride.0.proposed = 3
let b0.min.0.proposed = 0
let b0.extent.0.proposed = 1
let b0.stride.1.proposed = 3
let b0.min.1.proposed = 0
let b0.extent.1.proposed = 1
let b0.stride.2.proposed = 1
let b0.min.2.proposed = 0
let b0.extent.2.proposed = 3
let nan_or_one.stride.0.proposed = 1
let nan_or_one.min.0.proposed = nan_or_one.min.0.required
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required
let nan_or_one.min.1.proposed = nan_or_one.min.1.required
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0)
}
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1))
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1))
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1))
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1))
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1))
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained))
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained))
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained))
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained))
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained))
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained))
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained))
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained))
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained))
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained))
let b0.total_extent.0 = int64(b0.extent.0.constrained)
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0)
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647))
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647))
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647))
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1
let nan_or_one.s0.row.loop_min = nan_or_one.min.1
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1
let nan_or_one.s0.col.loop_min = nan_or_one.min.0
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.__block_id_x.loop_extent = nan_or_one.extent.0
let nan_or_one.s0.col.col_outer.__block_id_x.loop_max = nan_or_one.extent.0 + -1
let nan_or_one.s0.col.col_outer.__block_id_x.loop_min = 0
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_extent = 1
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_max = 0
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_min = 0
let nan_or_one.s0.row.row_outer.__block_id_y.loop_extent = nan_or_one.extent.1
let nan_or_one.s0.row.row_outer.__block_id_y.loop_max = nan_or_one.extent.1 + -1
let nan_or_one.s0.row.row_outer.__block_id_y.loop_min = 0
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_extent = 1
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_max = 0
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_min = 0
let nan_or_one.s0.row.row_outer.loop_min.orig = 0
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y
let nan_or_one.s0.col.col_outer.loop_min.orig = 0
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x
let nan_or_one.s0.row.row_inner.loop_min.orig = 0
gpu_thread<OpenCL> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) {
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y
let nan_or_one.s0.col.col_inner.loop_min.orig = 0
gpu_thread<OpenCL> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) {
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x
nan_or_one[((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*nan_or_one.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1)) - ((0 + (nan_or_one.min.0*nan_or_one.stride.0)) + (nan_or_one.min.1*nan_or_one.stride.1))] =
let t1 = (float32)strict_float(nanf) in
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[(((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*b0.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*b0.stride.1)) + (0*b0.stride.2)) - (((0 + (b0.min.0*b0.stride.0)) + (b0.min.1*b0.stride.1)) + (b0.min.2*b0.stride.2))])), t1, t1))
}
}
}
}
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Selecting a GPU API for extern stages...
Lowering after selecting a GPU API for extern stages: (unchanged)
Simplifying...
Lowering after second simplifcation:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1))
0
0
0
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0)
}
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1))
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1))
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1))
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
0
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
0
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3))
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0))
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1))
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3))
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0))
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1))
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1))
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0))
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3))
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1))
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0)
assert(uint64(int64(nan_or_one.extent.0)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(int64(nan_or_one.extent.0)), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) {
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] =
let t1 = (float32)strict_float(nanf) in
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1))
}
}
}
}
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Reduce prefetch dimension...
Lowering after reduce prefetch dimension: (unchanged)
Simplifying correlated differences...
Lowering after simplifying correlated differences:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1))
0
0
0
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0)
}
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1))
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1))
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1))
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
0
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
0
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3))
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0))
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1))
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3))
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0))
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1))
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1))
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0))
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3))
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1))
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0)
assert(uint64(int64(nan_or_one.extent.0)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(int64(nan_or_one.extent.0)), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) {
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] =
let t1 = (float32)strict_float(nanf) in
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1))
}
}
}
}
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Unrolling...
Lowering after unrolling: (unchanged)
Vectorizing...
Lowering after vectorizing:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0)
}
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1))
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1))
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1))
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3))
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0))
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1))
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3))
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0))
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1))
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1))
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0))
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3))
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1))
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0)
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) {
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] =
let t1 = (float32)strict_float(nanf) in
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1))
}
}
}
}
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Injecting per-block gpu synchronization...
Lowering after injecting per-block gpu synchronization:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0)
}
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1))
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1))
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1))
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3))
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0))
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1))
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3))
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0))
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1))
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1))
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0))
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3))
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1))
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0)
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) {
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] =
let t1 = (float32)strict_float(nanf) in
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1))
}
}
}
}
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Detecting vector interleavings...
Lowering after rewriting vector interleavings:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0)
}
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1))
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1))
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1))
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3))
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0))
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1))
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3))
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0))
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1))
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1))
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0))
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3))
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1))
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0)
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) {
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] =
let t1 = (float32)strict_float(nanf) in
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1))
}
}
}
}
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Partitioning loops to simplify boundary conditions...
Lowering after partitioning loops:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0)
}
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1))
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1))
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1))
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3))
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0))
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1))
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3))
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0))
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1))
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1))
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0))
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3))
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1))
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0)
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) {
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] =
let t1 = (float32)strict_float(nanf) in
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1))
}
}
}
}
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Trimming loops to the region over which they do something...
Lowering after loop trimming: (unchanged)
Rebasing loops to zero...
Lowering after rebasing loops to zero:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0)
}
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1))
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1))
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1))
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3))
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0))
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1))
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3))
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0))
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1))
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1))
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0))
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3))
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1))
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0)
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) {
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] =
let t1 = (float32)strict_float(nanf) in
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1))
}
}
}
}
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Hoisting loop invariant if statements...
Lowering after hoisting loop invariant if statements: (unchanged)
Injecting early frees...
Lowering after injecting early frees: (unchanged)
Simplifying correlated differences...
Lowering after simplifying correlated differences:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0)
}
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1))
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1))
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1))
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3))
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0))
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1))
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3))
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0))
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1))
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1))
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0))
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3))
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1))
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0)
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) {
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] =
let t1 = (float32)strict_float(nanf) in
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1))
}
}
}
}
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Bounding small allocations...
Lowering after bounding small allocations:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0)
}
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1))
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1))
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1))
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3))
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0))
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1))
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3))
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0))
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1))
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1))
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0))
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3))
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1))
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0)
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) {
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] =
let t1 = (float32)strict_float(nanf) in
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1))
}
}
}
}
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Simplifying...
Lowering unsafe promises...
Lowering after lowering unsafe promises:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0)
}
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1))
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1))
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1))
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3))
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0))
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1))
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3))
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0))
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1))
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1))
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0))
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3))
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1))
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0)
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) {
let t6 = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x
let t7 = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y
let t8 = (float32)strict_float(nanf)
nan_or_one[((t7*nan_or_one.stride.1) + t6) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] = (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[(t6 + t7)*3])), t8, t8))
}
}
}
}
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Flattening nested ramps...
Lowering after flattening nested ramps: (unchanged)
Removing dead allocations and moving loop invariant code...
Lowering after removing dead allocations and hoisting loop invariants:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0)
}
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1))
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1))
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1))
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3))
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0))
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1))
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3))
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0))
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1))
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1))
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0))
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3))
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1))
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0)
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
let t10 = 0 - (nan_or_one.min.1*nan_or_one.stride.1)
let t9 = nan_or_one.min.0 + nan_or_one.min.1
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) {
let t8 = (float32)strict_float(nanf)
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + t10) + nan_or_one.s0.col.col_outer.__block_id_x] = (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.s0.row.row_outer.__block_id_y + t9) + nan_or_one.s0.col.col_outer.__block_id_x)*3])), t8, t8))
}
}
}
}
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Finding intrinsics...
Lowering after finding intrinsics:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0)
}
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1))
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1))
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1))
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3))
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0))
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1))
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3))
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0))
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1))
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1))
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0))
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3))
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1))
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0)
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
let t10 = 0 - (nan_or_one.min.1*nan_or_one.stride.1)
let t9 = nan_or_one.min.0 + nan_or_one.min.1
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) {
let t8 = (float32)strict_float(nanf)
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + t10) + nan_or_one.s0.col.col_outer.__block_id_x] = (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.s0.row.row_outer.__block_id_y + t9) + nan_or_one.s0.col.col_outer.__block_id_x)*3])), t8, t8))
}
}
}
}
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Hoisting prefetches...
Lowering after hoisting prefetches:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0)
}
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1))
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1))
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1))
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3))
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0))
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1))
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3))
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0))
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1))
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1))
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0))
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3))
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1))
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0)
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
let t10 = 0 - (nan_or_one.min.1*nan_or_one.stride.1)
let t9 = nan_or_one.min.0 + nan_or_one.min.1
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) {
let t8 = (float32)strict_float(nanf)
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + t10) + nan_or_one.s0.col.col_outer.__block_id_x] = (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.s0.row.row_outer.__block_id_y + t9) + nan_or_one.s0.col.col_outer.__block_id_x)*3])), t8, t8))
}
}
}
}
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Lowering after final simplification:
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0)
}
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1))
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1))
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1))
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3))
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0))
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1))
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3))
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0))
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1))
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1))
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0))
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3))
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1))
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0)
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
let t10 = 0 - (nan_or_one.min.1*nan_or_one.stride.1)
let t9 = nan_or_one.min.0 + nan_or_one.min.1
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) {
let t8 = (float32)strict_float(nanf)
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + t10) + nan_or_one.s0.col.col_outer.__block_id_x] = (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.s0.row.row_outer.__block_id_y + t9) + nan_or_one.s0.col.col_outer.__block_id_x)*3])), t8, t8))
}
}
}
}
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Skipping Hexagon offload...
Offloading GPU loops...
OpenCL device codegen init_module
Kernel launch: nan_or_one.s0.row.row_outer.__block_id_y
Kernel bounds: (1, 1, 1, 1) threads, (nan_or_one.extent.0, nan_or_one.extent.1, 1, 1) blocks
var: nan_or_one.min.1
var: nan_or_one.stride.1
var: t10
var: t9
buffer: b0 12 (read) dims=3
buffer: nan_or_one 0 (write) dims=0
CodeGen_OpenCL_Dev::compile _kernel_nan_or_one_s0_row_row_outer___block_id_y
CodeGen_OpenCL_Dev: after removing predication:
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) {
let t8 = (float32)strict_float(nanf)
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + t10) + nan_or_one.s0.col.col_outer.__block_id_x] = (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0[((nan_or_one.s0.row.row_outer.__block_id_y + t9) + nan_or_one.s0.col.col_outer.__block_id_x)*3])), t8, t8))
}
}
}
}
Adding OpenCL kernel _kernel_nan_or_one_s0_row_row_outer___block_id_y
Eliminating bool vectors
After eliminating bool vectors:
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) {
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) {
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) {
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) {
let t8 = (float32)strict_float(nanf)
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + t10) + nan_or_one.s0.col.col_outer.__block_id_x] = (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0[((nan_or_one.s0.row.row_outer.__block_id_y + t9) + nan_or_one.s0.col.col_outer.__block_id_x)*3])), t8, t8))
}
}
}
}
Compiled launch to kernel "_kernel_nan_or_one_s0_row_row_outer___block_id_y"
Generating init_kernels for opencl
OpenCL kernel:
/*OpenCL C x86-64-linux-jit-opencl-sse41-strict_float-user_context*/
#pragma OPENCL FP_CONTRACT ON
inline float float_from_bits(unsigned int x) {return as_float(x);}
inline float nan_f32() { return NAN; }
inline float neg_inf_f32() { return -INFINITY; }
inline float inf_f32() { return INFINITY; }
inline bool is_nan_f32(float x) {return isnan(x); }
inline bool is_inf_f32(float x) {return isinf(x); }
inline bool is_finite_f32(float x) {return isfinite(x); }
#define sqrt_f32 sqrt
#define sin_f32 sin
#define cos_f32 cos
#define exp_f32 exp
#define log_f32 log
#define abs_f32 fabs
#define floor_f32 floor
#define ceil_f32 ceil
#define round_f32 round
#define trunc_f32 trunc
#define pow_f32 pow
#define asin_f32 asin
#define acos_f32 acos
#define tan_f32 tan
#define atan_f32 atan
#define atan2_f32 atan2
#define sinh_f32 sinh
#define asinh_f32 asinh
#define cosh_f32 cosh
#define acosh_f32 acosh
#define tanh_f32 tanh
#define atanh_f32 atanh
#define fast_inverse_f32 native_recip
#define fast_inverse_sqrt_f32 native_rsqrt
#define halide_unused(x)
__kernel void _at_least_one_kernel(int x) { }
// Address spaces for _kernel_nan_or_one_s0_row_row_outer___block_id_y
#if 12 <= MAX_CONSTANT_BUFFER_SIZE && 0 < MAX_CONSTANT_ARGS
#define __address_space__b0 __constant
#else
#define __address_space__b0 __global
#endif
#define __address_space__nan_or_one __global
__kernel void _kernel_nan_or_one_s0_row_row_outer___block_id_y(
__address_space__b0 const float *restrict _b0,
__address_space__nan_or_one float *restrict _nan_or_one,
const int _nan_or_one_min_1,
const int _nan_or_one_stride_1,
const int _t10,
const int _t9,
__local int16* __shared)
{
int _nan_or_one_s0_row_row_outer___block_id_y = get_group_id(1);
int _nan_or_one_s0_col_col_outer___block_id_x = get_group_id(0);
int ___thread_id_y = get_local_id(1);
int ___thread_id_x = get_local_id(0);
float _0 = (nan_f32());
int _1 = _nan_or_one_s0_row_row_outer___block_id_y + _t9;
int _2 = _1 + _nan_or_one_s0_col_col_outer___block_id_x;
int _3 = _2 * 3;
float _4 = _b0[_3];
float _5 = (_4);
bool _6 = is_nan_f32(_5);
float _7 = (float)(_6 ? _0 : _0);
float _8 = (_7);
int _9 = _nan_or_one_min_1 + _nan_or_one_s0_row_row_outer___block_id_y;
int _10 = _9 * _nan_or_one_stride_1;
int _11 = _10 + _t10;
int _12 = _11 + _nan_or_one_s0_col_col_outer___block_id_x;
_nan_or_one[_12] = _8;
} // kernel _kernel_nan_or_one_s0_row_row_outer___block_id_y
#undef __address_space__b0
#undef __address_space__nan_or_one
Lowering after splitting off GPU loops:
let opencl = (void *)_halide_buffer_get_host((halide_buffer_t *)opencl_buf.buffer)
let halide_opencl_initialize_kernels_result = halide_opencl_initialize_kernels(opencl, (void *)_halide_buffer_get_host((halide_buffer_t *)opencl_gpu_source_kernels.buffer), 2510)
assert(halide_opencl_initialize_kernels_result == 0, halide_opencl_initialize_kernels_result)
(void *)register_destructor("halide_opencl_finalize_kernels", opencl[0])
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one"))
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0"))
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer)
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer)
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer)
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0)
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0)
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0)
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1)
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1)
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1)
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2)
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2)
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2)
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer)
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0)
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1)
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1)
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1))
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1))
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0)
}
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) {
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0)
}
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) {
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730))
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3))
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730))
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2))
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1))
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0))
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1))
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1))
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1))
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2))
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0))
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1))
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3))
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0))
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1))
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3))
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0))
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1))
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1))
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0))
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3))
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1))
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0)
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647))
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647))
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647))
produce nan_or_one {
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result)
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface())
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1)
let t10 = 0 - (nan_or_one.min.1*nan_or_one.stride.1)
let t9 = nan_or_one.min.0 + nan_or_one.min.1
let halide_opencl_run_result = halide_opencl_run(opencl[0], "_kernel_nan_or_one_s0_row_row_outer___block_id_y", nan_or_one.extent.0, nan_or_one.extent.1, 1, 1, 1, 1, 0, (void *)make_struct((int64)8, (int64)8, (int64)4, (int64)4, (int64)4, (int64)4, (int64)0), (void *)make_struct((void *)b0.buffer, (void *)nan_or_one.buffer, (void *)make_struct(nan_or_one.min.1), (void *)make_struct(nan_or_one.stride.1), (void *)make_struct(t10), (void *)make_struct(t9), (void *)reinterpret((uint64)0)), (void *)make_struct((uint8)1, (uint8)1, (uint8)0, (uint8)0, (uint8)0, (uint8)0, (uint8)0))
assert(halide_opencl_run_result == 0, halide_opencl_run_result)
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1)
}
}
Embedding image opencl_buf
Embedding image opencl_gpu_source_kernels
Target triple of initial module: x86_64--linux-gnu
Generating llvm bitcode...
Generating llvm bitcode prolog for function nan_or_one...
Generating llvm bitcode for function nan_or_one...
0x2aa5050
Done generating llvm bitcode
; ModuleID = 'nan_or_one'
source_filename = "/host/buildtrees/halide/src/v13.0.4-7253dad238.clean/src/runtime/halide_buffer_t.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64--linux-gnu"
%struct.halide_filter_argument_t = type { i8*, i32, i32, %struct.halide_type_t, %struct.halide_scalar_value_t*, %struct.halide_scalar_value_t*, %struct.halide_scalar_value_t*, %struct.halide_scalar_value_t*, i64** }
%struct.halide_type_t = type { i8, i8, i16 }
%struct.halide_scalar_value_t = type { %union.anon }
%union.anon = type { double }
%struct.halide_filter_metadata_t = type { i32, i32, %struct.halide_filter_argument_t*, i8*, i8* }
%struct.halide_buffer_t.4 = type { i64, %struct.halide_device_interface_t.1*, i8*, i64, %struct.halide_type_t, i32, %struct.halide_dimension_t*, i8* }
%struct.halide_dimension_t = type { i32, i32, i32, i32 }
%struct.halide_device_interface_t.1 = type { i32 (i8*, %struct.halide_buffer_t.4*, %struct.halide_device_interface_t.1*)*, i32 (i8*, %struct.halide_buffer_t.4*)*, i32 (i8*, %struct.halide_buffer_t.4*)*, void (i8*, %struct.halide_device_interface_t.1*)*, i32 (i8*, %struct.halide_buffer_t.4*)*, i32 (i8*, %struct.halide_buffer_t.4*, %struct.halide_device_interface_t.1*)*, i32 (i8*, %struct.halide_buffer_t.4*, %struct.halide_device_interface_t.1*)*, i32 (i8*, %struct.halide_buffer_t.4*)*, {}*, i32 (i8*, %struct.halide_buffer_t.4*, %struct.halide_buffer_t.4*)*, i32 (i8*, %struct.halide_buffer_t.4*, i32, i32, %struct.halide_buffer_t.4*)*, i32 (i8*, %struct.halide_buffer_t.4*)*, i32 (i8*, %struct.halide_buffer_t.4*, i64, %struct.halide_device_interface_t.1*)*, i32 (i8*, %struct.halide_buffer_t.4*)*, i32 (i8*, i32*, i32*)*, %struct.halide_device_interface_impl_t* }
%struct.halide_device_interface_impl_t = type opaque
%struct.halide_buffer_t = type { i64, %struct.halide_device_interface_t*, i8*, i64, %struct.halide_type_t, i32, %struct.halide_dimension_t*, i8* }
%struct.halide_device_interface_t = type { i32 (i8*, %struct.halide_buffer_t*, %struct.halide_device_interface_t*)*, i32 (i8*, %struct.halide_buffer_t*)*, i32 (i8*, %struct.halide_buffer_t*)*, void (i8*, %struct.halide_device_interface_t*)*, i32 (i8*, %struct.halide_buffer_t*)*, i32 (i8*, %struct.halide_buffer_t*, %struct.halide_device_interface_t*)*, i32 (i8*, %struct.halide_buffer_t*, %struct.halide_device_interface_t*)*, i32 (i8*, %struct.halide_buffer_t*)*, i32 (i8*, %struct.halide_buffer_t*, %struct.halide_device_interface_t*, %struct.halide_buffer_t*)*, i32 (i8*, %struct.halide_buffer_t*, %struct.halide_buffer_t*)*, i32 (i8*, %struct.halide_buffer_t*, i32, i32, %struct.halide_buffer_t*)*, i32 (i8*, %struct.halide_buffer_t*)*, i32 (i8*, %struct.halide_buffer_t*, i64, %struct.halide_device_interface_t*)*, i32 (i8*, %struct.halide_buffer_t*)*, i32 (i8*, i32*, i32*)*, %struct.halide_device_interface_impl_t* }
@opencl_buf.data = private global [8 x i8] zeroinitializer, align 32
@opencl_gpu_source_kernels.data = private constant [2510 x i8] c"/*OpenCL C x86-64-linux-jit-opencl-sse41-strict_float-user_context*/\0A#pragma OPENCL FP_CONTRACT ON\0Ainline float float_from_bits(unsigned int x) {return as_float(x);}\0Ainline float nan_f32() { return NAN; }\0Ainline float neg_inf_f32() { return -INFINITY; }\0Ainline float inf_f32() { return INFINITY; }\0Ainline bool is_nan_f32(float x) {return isnan(x); }\0Ainline bool is_inf_f32(float x) {return isinf(x); }\0Ainline bool is_finite_f32(float x) {return isfinite(x); }\0A#define sqrt_f32 sqrt \0A#define sin_f32 sin \0A#define cos_f32 cos \0A#define exp_f32 exp \0A#define log_f32 log \0A#define abs_f32 fabs \0A#define floor_f32 floor \0A#define ceil_f32 ceil \0A#define round_f32 round \0A#define trunc_f32 trunc \0A#define pow_f32 pow\0A#define asin_f32 asin \0A#define acos_f32 acos \0A#define tan_f32 tan \0A#define atan_f32 atan \0A#define atan2_f32 atan2\0A#define sinh_f32 sinh \0A#define asinh_f32 asinh \0A#define cosh_f32 cosh \0A#define acosh_f32 acosh \0A#define tanh_f32 tanh \0A#define atanh_f32 atanh \0A#define fast_inverse_f32 native_recip \0A#define fast_inverse_sqrt_f32 native_rsqrt \0A#define halide_unused(x)\0A\0A__kernel void _at_least_one_kernel(int x) { }\0A// Address spaces for _kernel_nan_or_one_s0_row_row_outer___block_id_y\0A#if 12 <= MAX_CONSTANT_BUFFER_SIZE && 0 < MAX_CONSTANT_ARGS\0A#define __address_space__b0 __constant\0A#else\0A#define __address_space__b0 __global\0A#endif\0A#define __address_space__nan_or_one __global\0A__kernel void _kernel_nan_or_one_s0_row_row_outer___block_id_y(\0A __address_space__b0 const float *restrict _b0,\0A __address_space__nan_or_one float *restrict _nan_or_one,\0A const int _nan_or_one_min_1,\0A const int _nan_or_one_stride_1,\0A const int _t10,\0A const int _t9,\0A __local int16* __shared)\0A{\0A int _nan_or_one_s0_row_row_outer___block_id_y = get_group_id(1);\0A int _nan_or_one_s0_col_col_outer___block_id_x = get_group_id(0);\0A int ___thread_id_y = get_local_id(1);\0A int ___thread_id_x = get_local_id(0);\0A float _0 = (nan_f32());\0A int _1 = _nan_or_one_s0_row_row_outer___block_id_y + _t9;\0A int _2 = _1 + _nan_or_one_s0_col_col_outer___block_id_x;\0A int _3 = _2 * 3;\0A float _4 = _b0[_3];\0A float _5 = (_4);\0A bool _6 = is_nan_f32(_5);\0A float _7 = (float)(_6 ? _0 : _0);\0A float _8 = (_7);\0A int _9 = _nan_or_one_min_1 + _nan_or_one_s0_row_row_outer___block_id_y;\0A int _10 = _9 * _nan_or_one_stride_1;\0A int _11 = _10 + _t10;\0A int _12 = _11 + _nan_or_one_s0_col_col_outer___block_id_x;\0A _nan_or_one[_12] = _8;\0A} // kernel _kernel_nan_or_one_s0_row_row_outer___block_id_y\0A#undef __address_space__b0\0A#undef __address_space__nan_or_one\0A\00", align 32
@str = private constant [11 x i8] c"nan_or_one\00", align 32
@str.3 = private constant [3 x i8] c"b0\00", align 32
@str.4 = private constant [16 x i8] c"Input buffer b0\00", align 32
@str.5 = private constant [25 x i8] c"Output buffer nan_or_one\00", align 32
@str.6 = private constant [12 x i8] c"b0.stride.0\00", align 32
@str.7 = private constant [2 x i8] c"3\00", align 32
@str.8 = private constant [9 x i8] c"b0.min.0\00", align 32
@str.9 = private constant [2 x i8] c"0\00", align 32
@str.10 = private constant [12 x i8] c"b0.extent.0\00", align 32
@str.11 = private constant [2 x i8] c"1\00", align 32
@str.12 = private constant [12 x i8] c"b0.stride.1\00", align 32
@str.13 = private constant [9 x i8] c"b0.min.1\00", align 32
@str.14 = private constant [12 x i8] c"b0.extent.1\00", align 32
@str.15 = private constant [12 x i8] c"b0.stride.2\00", align 32
@str.16 = private constant [9 x i8] c"b0.min.2\00", align 32
@str.17 = private constant [12 x i8] c"b0.extent.2\00", align 32
@str.18 = private constant [20 x i8] c"nan_or_one.stride.0\00", align 32
@str.19 = private constant [49 x i8] c"_kernel_nan_or_one_s0_row_row_outer___block_id_y\00", align 32
@str.20 = private constant [15 x i8] c"__user_context\00", align 32
@0 = private constant [4 x i64*] zeroinitializer
@1 = private constant [3 x %struct.halide_filter_argument_t] [%struct.halide_filter_argument_t { i8* getelementptr inbounds ([3 x i8], [3 x i8]* @str.3, i32 0, i32 0), i32 1, i32 3, %struct.halide_type_t { i8 2, i8 32, i16 1 }, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, i64** null }, %struct.halide_filter_argument_t { i8* getelementptr inbounds ([15 x i8], [15 x i8]* @str.20, i32 0, i32 0), i32 0, i32 0, %struct.halide_type_t { i8 3, i8 64, i16 1 }, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, i64** null }, %struct.halide_filter_argument_t { i8* getelementptr inbounds ([11 x i8], [11 x i8]* @str, i32 0, i32 0), i32 2, i32 2, %struct.halide_type_t { i8 2, i8 32, i16 1 }, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, i64** getelementptr inbounds ([4 x i64*], [4 x i64*]* @0, i32 0, i32 0) }]
@str.21 = private constant [56 x i8] c"x86-64-linux-jit-opencl-sse41-strict_float-user_context\00", align 32
@nan_or_one_metadata_storage = private constant %struct.halide_filter_metadata_t { i32 1, i32 3, %struct.halide_filter_argument_t* getelementptr inbounds ([3 x %struct.halide_filter_argument_t], [3 x %struct.halide_filter_argument_t]* @1, i32 0, i32 0), i8* getelementptr inbounds ([56 x i8], [56 x i8]* @str.21, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @str, i32 0, i32 0) }
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #0
; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly %0, i8* noalias nocapture readonly %1, i64 %2, i1 immarg %3) #1
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg %0, i8* nocapture %1) #0
declare i8* @memcpy(i8* %0, i8* %1, i64 %2) local_unnamed_addr #2
declare i32 @halide_copy_to_device(i8* %0, %struct.halide_buffer_t.4* %1, %struct.halide_device_interface_t.1* %2) local_unnamed_addr #2
declare i32 @halide_error_access_out_of_bounds(i8* %0, i8* %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) local_unnamed_addr #2
declare i32 @halide_error_bad_dimensions(i8* %0, i8* %1, i32 %2, i32 %3) local_unnamed_addr #2
declare i32 @halide_error_bad_type(i8* %0, i8* %1, i32 %2, i32 %3) local_unnamed_addr #2
declare i32 @halide_error_buffer_allocation_too_large(i8* %0, i8* %1, i64 %2, i64 %3) local_unnamed_addr #2
declare i32 @halide_error_buffer_argument_is_null(i8* %0, i8* %1) local_unnamed_addr #2
declare i32 @halide_error_buffer_extents_negative(i8* %0, i8* %1, i32 %2, i32 %3) local_unnamed_addr #2
declare i32 @halide_error_buffer_extents_too_large(i8* %0, i8* %1, i64 %2, i64 %3) local_unnamed_addr #2
declare i32 @halide_error_constraint_violated(i8* %0, i8* %1, i32 %2, i8* %3, i32 %4) local_unnamed_addr #2
declare i32 @halide_error_constraints_make_required_region_smaller(i8* %0, i8* %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) local_unnamed_addr #2
declare %struct.halide_device_interface_t.1* @halide_opencl_device_interface() local_unnamed_addr #2
declare i32 @halide_opencl_initialize_kernels(i8* %0, i8** %1, i8* %2, i32 %3) local_unnamed_addr #2
declare void @halide_opencl_finalize_kernels(i8* %0, i8* %1) local_unnamed_addr #2
declare i32 @halide_opencl_run(i8* %0, i8* %1, i8* %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i64* %10, i8** %11, i8* %12) local_unnamed_addr #2
; Function Attrs: nounwind
define i32 @nan_or_one(%struct.halide_buffer_t* noalias %b0.buffer, i8* %__user_context, %struct.halide_buffer_t* noalias %nan_or_one.buffer) local_unnamed_addr #3 {
entry:
%0 = alloca i32, align 4
%1 = alloca i32, align 4
%2 = alloca [7 x i8], align 1
%3 = alloca { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, align 8
%4 = alloca i32, align 4
%5 = alloca i32, align 4
%6 = alloca i32, align 4
%7 = alloca i32, align 4
%8 = alloca [7 x i64], align 16
%9 = alloca [8 x i32], align 4
%halide_opencl_initialize_kernels_result = tail call i32 @halide_opencl_initialize_kernels(i8* %__user_context, i8** bitcast ([8 x i8]* @opencl_buf.data to i8**), i8* getelementptr inbounds ([2510 x i8], [2510 x i8]* @opencl_gpu_source_kernels.data, i64 0, i64 0), i32 2510) #5
%10 = icmp eq i32 %halide_opencl_initialize_kernels_result, 0
br i1 %10, label %"assert succeeded", label %common.ret, !prof !11
"assert succeeded": ; preds = %entry
%11 = load i64, i64* bitcast ([8 x i8]* @opencl_buf.data to i64*), align 32, !tbaa !12
%12 = inttoptr i64 %11 to i8*
%.not = icmp eq %struct.halide_buffer_t* %nan_or_one.buffer, null
br i1 %.not, label %"assert failed1", label %"assert succeeded2", !prof !26
common.ret: ; preds = %entry, %15, %destructor_block, %assert_failed35, %assert_failed34, %assert_failed33, %assert_failed32, %assert_failed31, %assert_failed30, %assert_failed29, %assert_failed28, %assert_failed27, %assert_failed26, %assert_failed25, %assert_failed24, %assert_failed23, %assert_failed22, %assert_failed21, %assert_failed20, %assert_failed19, %assert_failed18, %assert_failed17, %assert_failed16, %assert_failed15, %assert_failed
%common.ret.op = phi i32 [ %203, %assert_failed ], [ %204, %assert_failed15 ], [ %205, %assert_failed16 ], [ %206, %assert_failed17 ], [ %209, %assert_failed18 ], [ %210, %assert_failed19 ], [ %213, %assert_failed20 ], [ %214, %assert_failed21 ], [ %216, %assert_failed22 ], [ %217, %assert_failed23 ], [ %218, %assert_failed24 ], [ %219, %assert_failed25 ], [ %221, %assert_failed26 ], [ %222, %assert_failed27 ], [ %223, %assert_failed28 ], [ %224, %assert_failed29 ], [ %225, %assert_failed30 ], [ %226, %assert_failed31 ], [ %227, %assert_failed32 ], [ %228, %assert_failed33 ], [ %229, %assert_failed34 ], [ %230, %assert_failed35 ], [ %13, %destructor_block ], [ %13, %15 ], [ %halide_opencl_initialize_kernels_result, %entry ]
ret i32 %common.ret.op
destructor_block: ; preds = %"assert succeeded45", %"assert succeeded43", %"produce nan_or_one", %_halide_buffer_is_bounds_query.exit66, %"assert succeeded47", %"assert failed40", %"assert failed38", %"assert failed36", %"assert failed7", %"assert failed5", %"assert failed3", %"assert failed1"
%13 = phi i32 [ %16, %"assert failed1" ], [ %17, %"assert failed3" ], [ %75, %"assert failed5" ], [ %82, %"assert failed7" ], [ %231, %"assert failed36" ], [ %235, %"assert failed38" ], [ %237, %"assert failed40" ], [ 0, %"assert succeeded47" ], [ 0, %_halide_buffer_is_bounds_query.exit66 ], [ %halide_copy_to_device_result, %"produce nan_or_one" ], [ %"halide_copy_to_device_result$1", %"assert succeeded43" ], [ %halide_opencl_run_result, %"assert succeeded45" ]
%14 = icmp eq i64 %11, 0
br i1 %14, label %common.ret, label %15
15: ; preds = %destructor_block
call void @halide_opencl_finalize_kernels(i8* %__user_context, i8* nonnull %12) #10
br label %common.ret
"assert failed1": ; preds = %"assert succeeded"
%16 = tail call i32 @halide_error_buffer_argument_is_null(i8* %__user_context, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @str, i64 0, i64 0)) #5
br label %destructor_block
"assert succeeded2": ; preds = %"assert succeeded"
%.not48 = icmp eq %struct.halide_buffer_t* %b0.buffer, null
br i1 %.not48, label %"assert failed3", label %"assert succeeded4", !prof !26
"assert failed3": ; preds = %"assert succeeded2"
%17 = tail call i32 @halide_error_buffer_argument_is_null(i8* %__user_context, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @str.3, i64 0, i64 0)) #5
br label %destructor_block
"assert succeeded4": ; preds = %"assert succeeded2"
%18 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 2
%19 = bitcast i32* %1 to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %19) #5
%20 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 4, i32 0
%21 = call i8* @memcpy(i8* nonnull %19, i8* nonnull %20, i64 4) #10
%22 = load i32, i32* %1, align 4, !tbaa !27
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %19) #5
%23 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 5
%24 = load i32, i32* %23, align 4, !tbaa !31
%25 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 6
%26 = load %struct.halide_dimension_t*, %struct.halide_dimension_t** %25, align 8, !tbaa !38
%27 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %26, i64 0, i32 0
%28 = load i32, i32* %27, align 4, !tbaa !39
%29 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %26, i64 0, i32 1
%30 = bitcast i32* %29 to <2 x i32>*
%31 = load <2 x i32>, <2 x i32>* %30, align 4, !tbaa !27
%32 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %26, i64 1, i32 0
%33 = bitcast i32* %32 to <2 x i32>*
%34 = load <2 x i32>, <2 x i32>* %33, align 4, !tbaa !27
%35 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %26, i64 1, i32 2
%36 = load i32, i32* %35, align 4, !tbaa !41
%37 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %26, i64 2, i32 0
%38 = load i32, i32* %37, align 4, !tbaa !39
%39 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %26, i64 2, i32 1
%40 = load i32, i32* %39, align 4, !tbaa !42
%41 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %26, i64 2, i32 2
%42 = load i32, i32* %41, align 4, !tbaa !41
%43 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 2
%44 = bitcast i32* %0 to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %44) #5
%45 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 4, i32 0
%46 = call i8* @memcpy(i8* nonnull %44, i8* nonnull %45, i64 4) #10
%47 = load i32, i32* %0, align 4, !tbaa !27
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %44) #5
%48 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 5
%49 = load i32, i32* %48, align 4, !tbaa !31
%50 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 6
%51 = load %struct.halide_dimension_t*, %struct.halide_dimension_t** %50, align 8, !tbaa !38
%52 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %51, i64 0, i32 0
%53 = load i32, i32* %52, align 4, !tbaa !39
%54 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %51, i64 0, i32 1
%55 = load i32, i32* %54, align 4, !tbaa !42
%56 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %51, i64 0, i32 2
%57 = load i32, i32* %56, align 4, !tbaa !41
%58 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %51, i64 1, i32 0
%59 = load i32, i32* %58, align 4, !tbaa !39
%60 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %51, i64 1, i32 1
%61 = load i32, i32* %60, align 4, !tbaa !42
%62 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %51, i64 1, i32 2
%63 = load i32, i32* %62, align 4, !tbaa !41
%64 = load i8*, i8** %18, align 8, !tbaa !43
%65 = icmp eq i8* %64, null
br i1 %65, label %_halide_buffer_is_bounds_query.exit, label %_halide_buffer_is_bounds_query.exit63.thread
_halide_buffer_is_bounds_query.exit: ; preds = %"assert succeeded4"
%66 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 0
%67 = load i64, i64* %66, align 8, !tbaa !44
%68 = icmp ne i64 %67, 0
%69 = icmp sgt i32 %53, -1
%70 = add nsw i32 %55, %53
%71 = icmp slt i32 %70, 2
%72 = and i1 %69, %71
%73 = or i1 %72, %68
br i1 %73, label %_halide_buffer_is_bounds_query.exit62, label %"assert failed5", !prof !11
"assert failed5": ; preds = %_halide_buffer_is_bounds_query.exit
%74 = add nsw i32 %70, -1
%75 = call i32 @halide_error_constraints_make_required_region_smaller(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 0, i32 0, i32 0, i32 %53, i32 %74) #5
br label %destructor_block
_halide_buffer_is_bounds_query.exit62: ; preds = %_halide_buffer_is_bounds_query.exit
%76 = icmp sgt i32 %59, -1
%77 = add nsw i32 %61, %59
%78 = icmp slt i32 %77, 2
%79 = and i1 %76, %78
%80 = or i1 %79, %68
br i1 %80, label %_halide_buffer_is_bounds_query.exit63, label %"assert failed7", !prof !11
"assert failed7": ; preds = %_halide_buffer_is_bounds_query.exit62
%81 = add nsw i32 %77, -1
%82 = call i32 @halide_error_constraints_make_required_region_smaller(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 1, i32 0, i32 0, i32 %59, i32 %81) #5
br label %destructor_block
_halide_buffer_is_bounds_query.exit63.thread: ; preds = %"assert succeeded4"
%83 = add nsw i32 %55, %53
%84 = add nsw i32 %61, %59
br label %after_bb
_halide_buffer_is_bounds_query.exit63: ; preds = %_halide_buffer_is_bounds_query.exit62
%85 = icmp eq i64 %67, 0
br i1 %85, label %then_bb, label %after_bb
after_bb: ; preds = %_halide_buffer_is_bounds_query.exit63.thread, %_halide_buffer_is_bounds_query.exit63, %then_bb
%86 = phi i32 [ %84, %_halide_buffer_is_bounds_query.exit63.thread ], [ %77, %_halide_buffer_is_bounds_query.exit63 ], [ %77, %then_bb ]
%87 = phi i32 [ %83, %_halide_buffer_is_bounds_query.exit63.thread ], [ %70, %_halide_buffer_is_bounds_query.exit63 ], [ %70, %then_bb ]
%88 = load i8*, i8** %43, align 8, !tbaa !43
%89 = icmp eq i8* %88, null
br i1 %89, label %_halide_buffer_is_bounds_query.exit64, label %after_bb9
_halide_buffer_is_bounds_query.exit64: ; preds = %after_bb
%90 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 0
%91 = load i64, i64* %90, align 8, !tbaa !44
%92 = icmp eq i64 %91, 0
br i1 %92, label %_halide_buffer_init.exit68, label %after_bb9
then_bb: ; preds = %_halide_buffer_is_bounds_query.exit63
%93 = bitcast %struct.halide_dimension_t** %25 to <4 x i32>**
%94 = load <4 x i32>*, <4 x i32>** %93, align 8, !tbaa !38
%95 = bitcast %struct.halide_buffer_t* %b0.buffer to i8*
call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(24) %95, i8 0, i64 24, i1 false)
store i8 2, i8* %20, align 8, !tbaa !45
%96 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 4, i32 1
store i8 32, i8* %96, align 1, !tbaa !46
%97 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 4, i32 2
store i16 1, i16* %97, align 2, !tbaa !47
store i32 3, i32* %23, align 4, !tbaa !31
store <4 x i32> <i32 0, i32 1, i32 3, i32 0>, <4 x i32>* %94, align 4
%98 = load %struct.halide_dimension_t*, %struct.halide_dimension_t** %25, align 8, !tbaa !38
%.sroa.5.16..sroa_idx = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %98, i64 1, i32 0
%99 = bitcast i32* %.sroa.5.16..sroa_idx to <4 x i32>*
store <4 x i32> <i32 0, i32 1, i32 3, i32 0>, <4 x i32>* %99, align 4
%100 = load %struct.halide_dimension_t*, %struct.halide_dimension_t** %25, align 8, !tbaa !38
%.sroa.10.32..sroa_idx = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %100, i64 2, i32 0
%101 = bitcast i32* %.sroa.10.32..sroa_idx to <4 x i32>*
store <4 x i32> <i32 0, i32 3, i32 1, i32 0>, <4 x i32>* %101, align 4
%102 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 3
store i64 0, i64* %102, align 8, !tbaa !48
br label %after_bb
after_bb9: ; preds = %after_bb, %_halide_buffer_is_bounds_query.exit64, %_halide_buffer_init.exit68
%103 = load i8*, i8** %18, align 8, !tbaa !43
%104 = icmp eq i8* %103, null
br i1 %104, label %105, label %_halide_buffer_is_bounds_query.exit65
105: ; preds = %after_bb9
%106 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 0
%107 = load i64, i64* %106, align 8, !tbaa !44
%108 = icmp eq i64 %107, 0
br label %_halide_buffer_is_bounds_query.exit65
_halide_buffer_is_bounds_query.exit65: ; preds = %after_bb9, %105
%109 = phi i1 [ false, %after_bb9 ], [ %108, %105 ]
%110 = load i8*, i8** %43, align 8, !tbaa !43
%111 = icmp eq i8* %110, null
br i1 %111, label %112, label %_halide_buffer_is_bounds_query.exit66
112: ; preds = %_halide_buffer_is_bounds_query.exit65
%113 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 0
%114 = load i64, i64* %113, align 8, !tbaa !44
%115 = icmp eq i64 %114, 0
br label %_halide_buffer_is_bounds_query.exit66
_halide_buffer_is_bounds_query.exit66: ; preds = %_halide_buffer_is_bounds_query.exit65, %112
%116 = phi i1 [ false, %_halide_buffer_is_bounds_query.exit65 ], [ %115, %112 ]
%117 = or i1 %109, %116
br i1 %117, label %destructor_block, label %then_bb13
_halide_buffer_init.exit68: ; preds = %_halide_buffer_is_bounds_query.exit64
%118 = bitcast %struct.halide_dimension_t** %50 to i8**
%119 = load i8*, i8** %118, align 8, !tbaa !38
%120 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 0
store i32 %53, i32* %120, align 4
%121 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 1
store i32 %55, i32* %121, align 4
%122 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 2
store i32 1, i32* %122, align 4
%123 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 3
store i32 0, i32* %123, align 4
%124 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 4
store i32 %59, i32* %124, align 4
%125 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 5
store i32 %61, i32* %125, align 4
%126 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 6
store i32 %55, i32* %126, align 4
%127 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 7
store i32 0, i32* %127, align 4
%128 = bitcast %struct.halide_buffer_t* %nan_or_one.buffer to i8*
call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(24) %128, i8 0, i64 24, i1 false)
store i8 2, i8* %45, align 8, !tbaa !45
%129 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 4, i32 1
store i8 32, i8* %129, align 1, !tbaa !46
%130 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 4, i32 2
store i16 1, i16* %130, align 2, !tbaa !47
store i32 2, i32* %48, align 4, !tbaa !31
%131 = bitcast [8 x i32]* %9 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %119, i8* noundef nonnull align 4 dereferenceable(16) %131, i64 16, i1 false) #5, !tbaa.struct !49
%132 = load %struct.halide_dimension_t*, %struct.halide_dimension_t** %50, align 8, !tbaa !38
%133 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 4
%134 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %132, i64 1
%135 = bitcast %struct.halide_dimension_t* %134 to i8*
%136 = bitcast i32* %133 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %135, i8* noundef nonnull align 4 dereferenceable(16) %136, i64 16, i1 false) #5, !tbaa.struct !49
%137 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 3
store i64 0, i64* %137, align 8, !tbaa !48
br label %after_bb9
then_bb13: ; preds = %_halide_buffer_is_bounds_query.exit66
%138 = icmp ne i32 %22, 73730
%139 = zext i1 %138 to i64
%.not50 = icmp eq i32 %47, 73730
%140 = select i1 %.not50, i64 0, i64 4
%.not51 = icmp eq i32 %49, 2
%141 = select i1 %.not51, i64 0, i64 8
%142 = icmp sgt i32 %28, %53
%143 = extractelement <2 x i32> %31, i32 0
%144 = add nsw i32 %143, %28
%145 = icmp sgt i32 %87, %144
%146 = or i1 %142, %145
%147 = select i1 %146, i64 16, i64 0
%148 = lshr i32 %143, 26
%149 = and i32 %148, 32
%150 = zext i32 %149 to i64
%151 = extractelement <2 x i32> %34, i32 0
%152 = icmp sgt i32 %151, %59
%153 = extractelement <2 x i32> %34, i32 1
%154 = add nsw i32 %153, %151
%155 = icmp sgt i32 %86, %154
%156 = or i1 %152, %155
%157 = select i1 %156, i64 64, i64 0
%158 = lshr i32 %153, 24
%159 = and i32 %158, 128
%160 = zext i32 %159 to i64
%161 = icmp sgt i32 %38, 0
%162 = add nsw i32 %40, %38
%163 = icmp slt i32 %162, 1
%164 = or i1 %161, %163
%165 = select i1 %164, i64 256, i64 0
%166 = lshr i32 %40, 22
%167 = and i32 %166, 512
%168 = zext i32 %167 to i64
%169 = lshr i32 %55, 21
%170 = and i32 %169, 1024
%171 = zext i32 %170 to i64
%172 = lshr i32 %61, 20
%173 = and i32 %172, 2048
%174 = zext i32 %173 to i64
%.not58 = icmp eq i32 %42, 1
%175 = select i1 %.not58, i64 0, i64 262144
%176 = insertelement <8 x i32> poison, i32 %24, i32 0
%177 = insertelement <8 x i32> %176, i32 %28, i32 1
%178 = shufflevector <2 x i32> %31, <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%179 = shufflevector <8 x i32> %177, <8 x i32> %178, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
%180 = shufflevector <2 x i32> %34, <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%181 = shufflevector <8 x i32> %179, <8 x i32> %180, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
%182 = insertelement <8 x i32> %181, i32 %36, i32 6
%183 = insertelement <8 x i32> %182, i32 %38, i32 7
%184 = icmp eq <8 x i32> %183, <i32 3, i32 0, i32 1, i32 3, i32 0, i32 1, i32 3, i32 0>
%185 = select <8 x i1> %184, <8 x i64> zeroinitializer, <8 x i64> <i64 2, i64 8192, i64 16384, i64 4096, i64 65536, i64 131072, i64 32768, i64 524288>
%.not60 = icmp eq i32 %40, 3
%186 = select i1 %.not60, i64 0, i64 1048576
%.not61 = icmp eq i32 %57, 1
%187 = select i1 %.not61, i64 0, i64 2097152
%188 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %185)
%189 = or i64 %188, %186
%190 = or i64 %189, %175
%191 = or i64 %190, %165
%192 = or i64 %191, %140
%193 = or i64 %192, %141
%194 = or i64 %193, %187
%195 = or i64 %194, %157
%op.extra = or i64 %195, %139
%op.extra78 = or i64 %op.extra, %150
%op.extra79 = or i64 %op.extra78, %160
%op.extra80 = or i64 %op.extra79, %168
%op.extra81 = or i64 %op.extra80, %171
%op.extra82 = or i64 %op.extra81, %174
%196 = or i64 %op.extra82, %147
%197 = or i64 %196, -9223372036854775808
%198 = call i64 @llvm.cttz.i64(i64 %197, i1 true), !range !50
%199 = trunc i64 %198 to i32
switch i32 %199, label %no_errors_bb [
i32 0, label %assert_failed
i32 1, label %assert_failed15
i32 2, label %assert_failed16
i32 3, label %assert_failed17
i32 4, label %assert_failed18
i32 5, label %assert_failed19
i32 6, label %assert_failed20
i32 7, label %assert_failed21
i32 8, label %assert_failed22
i32 9, label %assert_failed23
i32 10, label %assert_failed24
i32 11, label %assert_failed25
i32 12, label %assert_failed26
i32 13, label %assert_failed27
i32 14, label %assert_failed28
i32 15, label %assert_failed29
i32 16, label %assert_failed30
i32 17, label %assert_failed31
i32 18, label %assert_failed32
i32 19, label %assert_failed33
i32 20, label %assert_failed34
i32 21, label %assert_failed35
], !prof !51
no_errors_bb: ; preds = %then_bb13
%200 = sext i32 %61 to i64
%201 = sext i32 %55 to i64
%nan_or_one.total_extent.1 = mul nsw i64 %200, %201
%202 = icmp sgt i32 %55, -1
br i1 %202, label %"assert succeeded37", label %"assert failed36", !prof !11
assert_failed: ; preds = %then_bb13
%203 = call i32 @halide_error_bad_type(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 %22, i32 73730) #5
br label %common.ret
assert_failed15: ; preds = %then_bb13
%204 = call i32 @halide_error_bad_dimensions(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 %24, i32 3) #5
br label %common.ret
assert_failed16: ; preds = %then_bb13
%205 = call i32 @halide_error_bad_type(i8* %__user_context, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @str.5, i64 0, i64 0), i32 %47, i32 73730) #5
br label %common.ret
assert_failed17: ; preds = %then_bb13
%206 = call i32 @halide_error_bad_dimensions(i8* %__user_context, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @str.5, i64 0, i64 0), i32 %49, i32 2) #5
br label %common.ret
assert_failed18: ; preds = %then_bb13
%207 = add nsw i32 %87, -1
%208 = add nsw i32 %144, -1
%209 = call i32 @halide_error_access_out_of_bounds(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 0, i32 %53, i32 %207, i32 %28, i32 %208) #5
br label %common.ret
assert_failed19: ; preds = %then_bb13
%210 = call i32 @halide_error_buffer_extents_negative(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 0, i32 %143) #5
br label %common.ret
assert_failed20: ; preds = %then_bb13
%211 = add nsw i32 %86, -1
%212 = add nsw i32 %154, -1
%213 = call i32 @halide_error_access_out_of_bounds(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 1, i32 %59, i32 %211, i32 %151, i32 %212) #5
br label %common.ret
assert_failed21: ; preds = %then_bb13
%214 = call i32 @halide_error_buffer_extents_negative(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 1, i32 %153) #5
br label %common.ret
assert_failed22: ; preds = %then_bb13
%215 = add nsw i32 %162, -1
%216 = call i32 @halide_error_access_out_of_bounds(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 2, i32 0, i32 0, i32 %38, i32 %215) #5
br label %common.ret
assert_failed23: ; preds = %then_bb13
%217 = call i32 @halide_error_buffer_extents_negative(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 2, i32 %40) #5
br label %common.ret
assert_failed24: ; preds = %then_bb13
%218 = call i32 @halide_error_buffer_extents_negative(i8* %__user_context, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @str.5, i64 0, i64 0), i32 0, i32 %55) #5
br label %common.ret
assert_failed25: ; preds = %then_bb13
%219 = call i32 @halide_error_buffer_extents_negative(i8* %__user_context, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @str.5, i64 0, i64 0), i32 1, i32 %61) #5
br label %common.ret
assert_failed26: ; preds = %then_bb13
%220 = extractelement <2 x i32> %31, i32 1
%221 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str.6, i64 0, i64 0), i32 %220, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.7, i64 0, i64 0), i32 3) #5
br label %common.ret
assert_failed27: ; preds = %then_bb13
%222 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @str.8, i64 0, i64 0), i32 %28, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.9, i64 0, i64 0), i32 0) #5
br label %common.ret
assert_failed28: ; preds = %then_bb13
%223 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str.10, i64 0, i64 0), i32 %143, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.11, i64 0, i64 0), i32 1) #5
br label %common.ret
assert_failed29: ; preds = %then_bb13
%224 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str.12, i64 0, i64 0), i32 %36, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.7, i64 0, i64 0), i32 3) #5
br label %common.ret
assert_failed30: ; preds = %then_bb13
%225 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @str.13, i64 0, i64 0), i32 %151, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.9, i64 0, i64 0), i32 0) #5
br label %common.ret
assert_failed31: ; preds = %then_bb13
%226 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str.14, i64 0, i64 0), i32 %153, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.11, i64 0, i64 0), i32 1) #5
br label %common.ret
assert_failed32: ; preds = %then_bb13
%227 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str.15, i64 0, i64 0), i32 %42, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.11, i64 0, i64 0), i32 1) #5
br label %common.ret
assert_failed33: ; preds = %then_bb13
%228 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @str.16, i64 0, i64 0), i32 %38, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.9, i64 0, i64 0), i32 0) #5
br label %common.ret
assert_failed34: ; preds = %then_bb13
%229 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str.17, i64 0, i64 0), i32 %40, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.7, i64 0, i64 0), i32 3) #5
br label %common.ret
assert_failed35: ; preds = %then_bb13
%230 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([20 x i8], [20 x i8]* @str.18, i64 0, i64 0), i32 %57, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.11, i64 0, i64 0), i32 1) #5
br label %common.ret
"assert failed36": ; preds = %no_errors_bb
%231 = call i32 @halide_error_buffer_allocation_too_large(i8* %__user_context, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @str, i64 0, i64 0), i64 %201, i64 2147483647) #5
br label %destructor_block
"assert succeeded37": ; preds = %no_errors_bb
%232 = sext i32 %63 to i64
%x0 = mul nsw i64 %232, %200
%233 = call i64 @llvm.abs.i64(i64 %x0, i1 true)
%234 = icmp ult i64 %233, 2147483648
br i1 %234, label %"assert succeeded39", label %"assert failed38", !prof !11
"assert failed38": ; preds = %"assert succeeded37"
%235 = call i32 @halide_error_buffer_allocation_too_large(i8* %__user_context, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @str, i64 0, i64 0), i64 %233, i64 2147483647) #5
br label %destructor_block
"assert succeeded39": ; preds = %"assert succeeded37"
%236 = icmp slt i64 %nan_or_one.total_extent.1, 2147483648
br i1 %236, label %"produce nan_or_one", label %"assert failed40", !prof !11
"assert failed40": ; preds = %"assert succeeded39"
%237 = call i32 @halide_error_buffer_extents_too_large(i8* %__user_context, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @str, i64 0, i64 0), i64 %nan_or_one.total_extent.1, i64 2147483647) #5
br label %destructor_block
"produce nan_or_one": ; preds = %"assert succeeded39"
%238 = call %struct.halide_device_interface_t.1* @halide_opencl_device_interface() #5
%239 = bitcast %struct.halide_buffer_t* %b0.buffer to %struct.halide_buffer_t.4*
%halide_copy_to_device_result = call i32 @halide_copy_to_device(i8* %__user_context, %struct.halide_buffer_t.4* nonnull %239, %struct.halide_device_interface_t.1* %238) #5
%240 = icmp eq i32 %halide_copy_to_device_result, 0
br i1 %240, label %"assert succeeded43", label %destructor_block, !prof !11
"assert succeeded43": ; preds = %"produce nan_or_one"
%241 = call %struct.halide_device_interface_t.1* @halide_opencl_device_interface() #5
%242 = bitcast %struct.halide_buffer_t* %nan_or_one.buffer to %struct.halide_buffer_t.4*
%"halide_copy_to_device_result$1" = call i32 @halide_copy_to_device(i8* %__user_context, %struct.halide_buffer_t.4* nonnull %242, %struct.halide_device_interface_t.1* %241) #5
%243 = icmp eq i32 %"halide_copy_to_device_result$1", 0
br i1 %243, label %"assert succeeded45", label %destructor_block, !prof !11
"assert succeeded45": ; preds = %"assert succeeded43"
%244 = mul i32 %59, %63
%t10 = sub i32 0, %244
%t9 = add nsw i32 %59, %53
%245 = load i64, i64* bitcast ([8 x i8]* @opencl_buf.data to i64*), align 32, !tbaa !12
%246 = inttoptr i64 %245 to i8*
%247 = getelementptr inbounds [7 x i64], [7 x i64]* %8, i64 0, i64 0
%248 = bitcast [7 x i64]* %8 to <2 x i64>*
store <2 x i64> <i64 8, i64 8>, <2 x i64>* %248, align 16
%249 = getelementptr inbounds [7 x i64], [7 x i64]* %8, i64 0, i64 2
%250 = bitcast i64* %249 to <2 x i64>*
store <2 x i64> <i64 4, i64 4>, <2 x i64>* %250, align 16
%251 = getelementptr inbounds [7 x i64], [7 x i64]* %8, i64 0, i64 4
%252 = bitcast i64* %251 to <2 x i64>*
store <2 x i64> <i64 4, i64 4>, <2 x i64>* %252, align 16
%253 = getelementptr inbounds [7 x i64], [7 x i64]* %8, i64 0, i64 6
store i64 0, i64* %253, align 16
store i32 %59, i32* %7, align 4
store i32 %63, i32* %6, align 4
store i32 %t10, i32* %5, align 4
store i32 %t9, i32* %4, align 4
%254 = getelementptr inbounds { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3, i64 0, i32 0
store %struct.halide_buffer_t* %b0.buffer, %struct.halide_buffer_t** %254, align 8
%255 = getelementptr inbounds { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3, i64 0, i32 1
store %struct.halide_buffer_t* %nan_or_one.buffer, %struct.halide_buffer_t** %255, align 8
%256 = getelementptr inbounds { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3, i64 0, i32 2
store i32* %7, i32** %256, align 8
%257 = getelementptr inbounds { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3, i64 0, i32 3
store i32* %6, i32** %257, align 8
%258 = getelementptr inbounds { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3, i64 0, i32 4
store i32* %5, i32** %258, align 8
%259 = getelementptr inbounds { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3, i64 0, i32 5
store i32* %4, i32** %259, align 8
%260 = getelementptr inbounds { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3, i64 0, i32 6
store i8* null, i8** %260, align 8
%261 = getelementptr inbounds [7 x i8], [7 x i8]* %2, i64 0, i64 0
store i8 1, i8* %261, align 1
%262 = getelementptr inbounds [7 x i8], [7 x i8]* %2, i64 0, i64 1
store i8 1, i8* %262, align 1
%263 = getelementptr inbounds [7 x i8], [7 x i8]* %2, i64 0, i64 2
%264 = bitcast { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3 to i8**
call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) %263, i8 0, i64 5, i1 false)
%halide_opencl_run_result = call i32 @halide_opencl_run(i8* %__user_context, i8* %246, i8* getelementptr inbounds ([49 x i8], [49 x i8]* @str.19, i64 0, i64 0), i32 %55, i32 %61, i32 1, i32 1, i32 1, i32 1, i32 0, i64* nonnull %247, i8** nonnull %264, i8* nonnull %261) #5
%265 = icmp eq i32 %halide_opencl_run_result, 0
br i1 %265, label %"assert succeeded47", label %destructor_block, !prof !11
"assert succeeded47": ; preds = %"assert succeeded45"
%266 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 3
%267 = load i64, i64* %266, align 8, !tbaa !48
%268 = or i64 %267, 2
store i64 %268, i64* %266, align 8, !tbaa !48
br label %destructor_block
}
; Function Attrs: mustprogress nofree nosync nounwind readnone speculatable willreturn
declare i64 @llvm.cttz.i64(i64 %0, i1 immarg %1) #4
; Function Attrs: nounwind
define i32 @nan_or_one_argv(i8** nocapture readonly %0) local_unnamed_addr #5 {
entry:
%1 = bitcast i8** %0 to %struct.halide_buffer_t**
%2 = load %struct.halide_buffer_t*, %struct.halide_buffer_t** %1, align 8
%3 = getelementptr i8*, i8** %0, i64 1
%4 = bitcast i8** %3 to i8***
%5 = load i8**, i8*** %4, align 8
%6 = load i8*, i8** %5, align 8
%7 = getelementptr i8*, i8** %0, i64 2
%8 = bitcast i8** %7 to %struct.halide_buffer_t**
%9 = load %struct.halide_buffer_t*, %struct.halide_buffer_t** %8, align 8
%10 = tail call i32 @nan_or_one(%struct.halide_buffer_t* %2, i8* %6, %struct.halide_buffer_t* %9) #11
ret i32 %10
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
define nonnull %struct.halide_filter_metadata_t* @nan_or_one_metadata() local_unnamed_addr #6 {
entry:
ret %struct.halide_filter_metadata_t* @nan_or_one_metadata_storage
}
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare i64 @llvm.abs.i64(i64 %0, i1 immarg %1) #7
; Function Attrs: argmemonly nofree nounwind willreturn writeonly
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly %0, i8 %1, i64 %2, i1 immarg %3) #8
; Function Attrs: nofree nosync nounwind readnone willreturn
declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %0) #9
attributes #0 = { argmemonly mustprogress nofree nosync nounwind willreturn }
attributes #1 = { argmemonly mustprogress nofree nounwind willreturn }
attributes #2 = { "frame-pointer"="all" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #3 = { nounwind "reciprocal-estimates"="none" }
attributes #4 = { mustprogress nofree nosync nounwind readnone speculatable willreturn }
attributes #5 = { nounwind }
attributes #6 = { mustprogress nofree norecurse nosync nounwind readnone willreturn }
attributes #7 = { nofree nosync nounwind readnone speculatable willreturn }
attributes #8 = { argmemonly nofree nounwind willreturn writeonly }
attributes #9 = { nofree nosync nounwind readnone willreturn }
attributes #10 = { nobuiltin nounwind "no-builtins" }
attributes #11 = { noinline }
!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
!llvm.ident = !{!10, !10, !10, !10, !10}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 1}
!2 = !{i32 7, !"frame-pointer", i32 2}
!3 = !{i32 2, !"halide_use_soft_float_abi", i32 0}
!4 = !{i32 2, !"halide_mcpu", !"penryn"}
!5 = !{i32 2, !"halide_mattrs", !""}
!6 = !{i32 2, !"halide_mabi", !""}
!7 = !{i32 2, !"halide_use_pic", i32 1}
!8 = !{i32 2, !"halide_use_large_code_model", i32 0}
!9 = !{i32 2, !"halide_per_instruction_fast_math_flags", i32 1}
!10 = !{!"clang version 13.0.0 (https://github.com/microsoft/vcpkg.git 6f1d72a4bd98b635c98691054864229aed61fa82)"}
!11 = !{!"branch_weights", i32 1073741824, i32 0}
!12 = !{!13, !13, i64 0}
!13 = !{!"opencl.width1.base0", !14, i64 0}
!14 = !{!"opencl.width2.base0", !15, i64 0}
!15 = !{!"opencl.width4.base0", !16, i64 0}
!16 = !{!"opencl.width8.base0", !17, i64 0}
!17 = !{!"opencl.width16.base0", !18, i64 0}
!18 = !{!"opencl.width32.base0", !19, i64 0}
!19 = !{!"opencl.width64.base0", !20, i64 0}
!20 = !{!"opencl.width128.base0", !21, i64 0}
!21 = !{!"opencl.width256.base0", !22, i64 0}
!22 = !{!"opencl.width512.base0", !23, i64 0}
!23 = !{!"opencl.width1024.base0", !24, i64 0}
!24 = !{!"opencl", !25, i64 0}
!25 = !{!"Halide buffer"}
!26 = !{!"branch_weights", i32 0, i32 1073741824}
!27 = !{!28, !28, i64 0}
!28 = !{!"int", !29, i64 0}
!29 = !{!"omnipotent char", !30, i64 0}
!30 = !{!"Simple C++ TBAA"}
!31 = !{!32, !28, i64 36}
!32 = !{!"_ZTS15halide_buffer_t", !33, i64 0, !34, i64 8, !34, i64 16, !33, i64 24, !35, i64 32, !28, i64 36, !34, i64 40, !34, i64 48}
!33 = !{!"long long", !29, i64 0}
!34 = !{!"any pointer", !29, i64 0}
!35 = !{!"_ZTS13halide_type_t", !36, i64 0, !29, i64 1, !37, i64 2}
!36 = !{!"_ZTS18halide_type_code_t", !29, i64 0}
!37 = !{!"short", !29, i64 0}
!38 = !{!32, !34, i64 40}
!39 = !{!40, !28, i64 0}
!40 = !{!"_ZTS18halide_dimension_t", !28, i64 0, !28, i64 4, !28, i64 8, !28, i64 12}
!41 = !{!40, !28, i64 8}
!42 = !{!40, !28, i64 4}
!43 = !{!32, !34, i64 16}
!44 = !{!32, !33, i64 0}
!45 = !{!32, !36, i64 32}
!46 = !{!32, !29, i64 33}
!47 = !{!32, !37, i64 34}
!48 = !{!32, !33, i64 24}
!49 = !{i64 0, i64 4, !27, i64 4, i64 4, !27, i64 8, i64 4, !27, i64 12, i64 4, !27}
!50 = !{i64 0, i64 64}
!51 = !{!"branch_weights", i32 1073741824, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0}
Creating new execution engine
Target triple: x86_64--linux-gnu
JIT compiling shared runtime for x86-64-linux-jit-opencl-sse41-strict_float-user_context
JIT Compiling halide_buffer_copy
Function halide_buffer_copy is at 0x7f93e7843ed0
JIT Compiling halide_buffer_copy_already_locked
Function halide_buffer_copy_already_locked is at 0x7f93e7843c10
JIT Compiling halide_buffer_to_string
Function halide_buffer_to_string is at 0x7f93e7844a10
JIT Compiling halide_cache_cleanup
Function halide_cache_cleanup is at 0x7f93ea435f40
JIT Compiling halide_can_reuse_device_allocations
Function halide_can_reuse_device_allocations is at 0x7f93ea434fd0
JIT Compiling halide_cond_broadcast
Function halide_cond_broadcast is at 0x7f93eb155c60
JIT Compiling halide_cond_signal
Function halide_cond_signal is at 0x7f93ea435340
JIT Compiling halide_cond_wait
Function halide_cond_wait is at 0x7f93eb155d20
JIT Compiling halide_copy_to_device
Function halide_copy_to_device is at 0x7f93e7843300
JIT Compiling halide_copy_to_host
Function halide_copy_to_host is at 0x7f93ea434b50
JIT Compiling halide_current_time_ns
Function halide_current_time_ns is at 0x7f93eb1552b0
JIT Compiling halide_debug_to_file
Function halide_debug_to_file is at 0x7f93ea432000
JIT Compiling halide_default_buffer_copy
Function halide_default_buffer_copy is at 0x7f93e7844fd0
JIT Compiling halide_default_device_and_host_free
Function halide_default_device_and_host_free is at 0x7f93e78437f0
JIT Compiling halide_default_device_and_host_malloc
Function halide_default_device_and_host_malloc is at 0x7f93e7843660
JIT Compiling halide_default_device_crop
Function halide_default_device_crop is at 0x7f93e7843f70
JIT Compiling halide_default_device_detach_native
Function halide_default_device_detach_native is at 0x7f93e7843b60
JIT Compiling halide_default_device_release_crop
Function halide_default_device_release_crop is at 0x7f93e7842190
JIT Compiling halide_default_device_slice
Function halide_default_device_slice is at 0x7f93e7843fa0
JIT Compiling halide_default_device_wrap_native
Function halide_default_device_wrap_native is at 0x7f93e7843ac0
JIT Compiling halide_default_do_loop_task
Function halide_default_do_loop_task is at 0x7f93eb1553c0
JIT Compiling halide_default_do_par_for
Function halide_default_do_par_for is at 0x7f93eb1553e0
JIT Compiling halide_default_do_parallel_tasks
Function halide_default_do_parallel_tasks is at 0x7f93ea435040
JIT Compiling halide_default_do_task
Function halide_default_do_task is at 0x7f93eb1553b0
JIT Compiling halide_default_error
Function halide_default_error is at 0x7f93eb155100
JIT Compiling halide_default_free
Function halide_default_free is at 0x7f93eb155060
JIT Compiling halide_default_get_library_symbol
Function halide_default_get_library_symbol is at 0x7f93ea435760
JIT Compiling halide_default_get_symbol
Function halide_default_get_symbol is at 0x7f93ea435700
JIT Compiling halide_default_load_library
Function halide_default_load_library is at 0x7f93ea435720
JIT Compiling halide_default_malloc
Function halide_default_malloc is at 0x7f93eb155000
JIT Compiling halide_default_print
Function halide_default_print is at 0x7f93eb155330
JIT Compiling halide_default_semaphore_init
Function halide_default_semaphore_init is at 0x7f93ea435190
JIT Compiling halide_default_semaphore_release
Function halide_default_semaphore_release is at 0x7f93ea4351a0
JIT Compiling halide_default_semaphore_try_acquire
Function halide_default_semaphore_try_acquire is at 0x7f93eb155cb0
JIT Compiling halide_default_trace
Function halide_default_trace is at 0x7f93ea434000
JIT Compiling halide_device_and_host_free
Function halide_device_and_host_free is at 0x7f93e7843530
JIT Compiling halide_device_and_host_free_as_destructor
Function halide_device_and_host_free_as_destructor is at 0x7f93e7844fb0
JIT Compiling halide_device_and_host_malloc
Function halide_device_and_host_malloc is at 0x7f93e7843420
JIT Compiling halide_device_crop
Function halide_device_crop is at 0x7f93e7842000
JIT Compiling halide_device_detach_native
Function halide_device_detach_native is at 0x7f93e78439c0
JIT Compiling halide_device_free
Function halide_device_free is at 0x7f93ea434cf0
JIT Compiling halide_device_free_as_destructor
Function halide_device_free_as_destructor is at 0x7f93ea432fd0
JIT Compiling halide_device_host_nop_free
Function halide_device_host_nop_free is at 0x7f93ea433fe0
JIT Compiling halide_device_malloc
Function halide_device_malloc is at 0x7f93e7843210
JIT Compiling halide_device_release
Function halide_device_release is at 0x7f93ea433fd0
JIT Compiling halide_device_release_crop
Function halide_device_release_crop is at 0x7f93e78421c0
JIT Compiling halide_device_slice
Function halide_device_slice is at 0x7f93e78420c0
JIT Compiling halide_device_sync
Function halide_device_sync is at 0x7f93e7843360
JIT Compiling halide_device_wrap_native
Function halide_device_wrap_native is at 0x7f93e78438c0
JIT Compiling halide_do_loop_task
Function halide_do_loop_task is at 0x7f93eb155ce0
JIT Compiling halide_do_par_for
Function halide_do_par_for is at 0x7f93ea435660
JIT Compiling halide_do_parallel_tasks
Function halide_do_parallel_tasks is at 0x7f93ea435680
JIT Compiling halide_do_task
Function halide_do_task is at 0x7f93eb155d00
JIT Compiling halide_double_to_string
Function halide_double_to_string is at 0x7f93ea433000
JIT Compiling halide_error
Function halide_error is at 0x7f93eb155210
JIT Compiling halide_error_access_out_of_bounds
Function halide_error_access_out_of_bounds is at 0x7f93e78429a0
JIT Compiling halide_error_bad_dimensions
Function halide_error_bad_dimensions is at 0x7f93e7842840
JIT Compiling halide_error_bad_extern_fold
Function halide_error_bad_extern_fold is at 0x7f93e78404b0
JIT Compiling halide_error_bad_fold
Function halide_error_bad_fold is at 0x7f93e7840340
JIT Compiling halide_error_bad_type
Function halide_error_bad_type is at 0x7f93e78426d0
JIT Compiling halide_error_bounds_inference_call_failed
Function halide_error_bounds_inference_call_failed is at 0x7f93e78422b0
JIT Compiling halide_error_buffer_allocation_too_large
Function halide_error_buffer_allocation_too_large is at 0x7f93e7842c40
JIT Compiling halide_error_buffer_argument_is_null
Function halide_error_buffer_argument_is_null is at 0x7f93e7841ce0
JIT Compiling halide_error_buffer_extents_negative
Function halide_error_buffer_extents_negative is at 0x7f93e7842d90
JIT Compiling halide_error_buffer_extents_too_large
Function halide_error_buffer_extents_too_large is at 0x7f93e7841000
JIT Compiling halide_error_buffer_is_null
Function halide_error_buffer_is_null is at 0x7f93e7844df0
JIT Compiling halide_error_constraint_violated
Function halide_error_constraint_violated is at 0x7f93e7841360
JIT Compiling halide_error_constraints_make_required_region_smaller
Function halide_error_constraints_make_required_region_smaller is at 0x7f93e7841150
JIT Compiling halide_error_debug_to_file_failed
Function halide_error_debug_to_file_failed is at 0x7f93e7841de0
JIT Compiling halide_error_device_dirty_with_no_device_support
Function halide_error_device_dirty_with_no_device_support is at 0x7f93e7840130
JIT Compiling halide_error_device_interface_no_device
Function halide_error_device_interface_no_device is at 0x7f93e7844ef0
JIT Compiling halide_error_explicit_bounds_too_small
Function halide_error_explicit_bounds_too_small is at 0x7f93e78424f0
JIT Compiling halide_error_extern_stage_failed
Function halide_error_extern_stage_failed is at 0x7f93e78423d0
JIT Compiling halide_error_fold_factor_too_small
Function halide_error_fold_factor_too_small is at 0x7f93e7840820
JIT Compiling halide_error_host_and_device_dirty
Function halide_error_host_and_device_dirty is at 0x7f93e7843000
JIT Compiling halide_error_host_is_null
Function halide_error_host_is_null is at 0x7f93e7840240
JIT Compiling halide_error_no_device_interface
Function halide_error_no_device_interface is at 0x7f93e7844d30
JIT Compiling halide_error_out_of_memory
Function halide_error_out_of_memory is at 0x7f93e7842ef0
JIT Compiling halide_error_param_too_large_f64
Function halide_error_param_too_large_f64 is at 0x7f93e7841b90
JIT Compiling halide_error_param_too_large_i64
Function halide_error_param_too_large_i64 is at 0x7f93e78418f0
JIT Compiling halide_error_param_too_large_u64
Function halide_error_param_too_large_u64 is at 0x7f93e7841a40
JIT Compiling halide_error_param_too_small_f64
Function halide_error_param_too_small_f64 is at 0x7f93e78417a0
JIT Compiling halide_error_param_too_small_i64
Function halide_error_param_too_small_i64 is at 0x7f93e7841500
JIT Compiling halide_error_param_too_small_u64
Function halide_error_param_too_small_u64 is at 0x7f93e7841650
JIT Compiling halide_error_requirement_failed
Function halide_error_requirement_failed is at 0x7f93e78409f0
JIT Compiling halide_error_specialize_fail
Function halide_error_specialize_fail is at 0x7f93e7840b20
JIT Compiling halide_error_unaligned_host_ptr
Function halide_error_unaligned_host_ptr is at 0x7f93e7840000
JIT Compiling halide_float16_bits_to_double
Function halide_float16_bits_to_double is at 0x7f93ea435c50
JIT Compiling halide_float16_bits_to_float
Function halide_float16_bits_to_float is at 0x7f93e7842240
JIT Compiling halide_free
Function halide_free is at 0x7f93eb1550e0
JIT Compiling halide_get_gpu_device
Function halide_get_gpu_device is at 0x7f93ea435860
JIT Compiling halide_get_library_symbol
Function halide_get_library_symbol is at 0x7f93ea435820
JIT Compiling halide_get_symbol
Function halide_get_symbol is at 0x7f93ea4357e0
JIT Compiling halide_get_trace_file
Function halide_get_trace_file is at 0x7f93ea4358f0
JIT Compiling halide_host_cpu_count
Function halide_host_cpu_count is at 0x7f93eb155370
JIT Compiling halide_int64_to_string
Function halide_int64_to_string is at 0x7f93ea435a20
JIT Compiling halide_join_thread
Function halide_join_thread is at 0x7f93ea435300
JIT Compiling halide_load_library
Function halide_load_library is at 0x7f93ea435800
JIT Compiling halide_malloc
Function halide_malloc is at 0x7f93eb1550c0
JIT Compiling halide_malloc_alignment
Function halide_malloc_alignment is at 0x7f93eb155050
JIT Compiling halide_memoization_cache_cleanup
Function halide_memoization_cache_cleanup is at 0x7f93ea434c30
JIT Compiling halide_memoization_cache_evict
Function halide_memoization_cache_evict is at 0x7f93ea432e70
JIT Compiling halide_memoization_cache_lookup
Function halide_memoization_cache_lookup is at 0x7f93e7844000
JIT Compiling halide_memoization_cache_release
Function halide_memoization_cache_release is at 0x7f93ea433f20
JIT Compiling halide_memoization_cache_set_size
Function halide_memoization_cache_set_size is at 0x7f93ea433ec0
JIT Compiling halide_memoization_cache_store
Function halide_memoization_cache_store is at 0x7f93e78444e0
JIT Compiling halide_msan_annotate_buffer_is_initialized
Function halide_msan_annotate_buffer_is_initialized is at 0x7f93ea433fc0
JIT Compiling halide_msan_annotate_buffer_is_initialized_as_destructor
Function halide_msan_annotate_buffer_is_initialized_as_destructor is at 0x7f93d5f95de0
JIT Compiling halide_msan_annotate_memory_is_initialized
Function halide_msan_annotate_memory_is_initialized is at 0x7f93eb1551e0
JIT Compiling halide_msan_check_buffer_is_initialized
Function halide_msan_check_buffer_is_initialized is at 0x7f93e7841fe0
JIT Compiling halide_msan_check_memory_is_initialized
Function halide_msan_check_memory_is_initialized is at 0x7f93e7841fd0
JIT Compiling halide_mutex_array_create
Function halide_mutex_array_create is at 0x7f93ea4353c0
JIT Compiling halide_mutex_array_destroy
Function halide_mutex_array_destroy is at 0x7f93ea435440
JIT Compiling halide_mutex_array_lock
Function halide_mutex_array_lock is at 0x7f93ea435480
JIT Compiling halide_mutex_array_unlock
Function halide_mutex_array_unlock is at 0x7f93ea4354b0
JIT Compiling halide_mutex_lock
Function halide_mutex_lock is at 0x7f93eb1554d0
JIT Compiling halide_mutex_unlock
Function halide_mutex_unlock is at 0x7f93eb155920
JIT Compiling halide_pointer_to_string
Function halide_pointer_to_string is at 0x7f93ea435a50
JIT Compiling halide_print
Function halide_print is at 0x7f93eb1551f0
JIT Compiling halide_profiler_get_pipeline_state
Function halide_profiler_get_pipeline_state is at 0x7f93e7840ec0
JIT Compiling halide_profiler_get_state
Function halide_profiler_get_state is at 0x7f93e7843fd0
JIT Compiling halide_profiler_memory_allocate
Function halide_profiler_memory_allocate is at 0x7f93d5f95b30
JIT Compiling halide_profiler_memory_free
Function halide_profiler_memory_free is at 0x7f93d5f95c70
JIT Compiling halide_profiler_pipeline_end
Function halide_profiler_pipeline_end is at 0x7f93e7843fe0
JIT Compiling halide_profiler_pipeline_start
Function halide_profiler_pipeline_start is at 0x7f93e7840f30
JIT Compiling halide_profiler_report
Function halide_profiler_report is at 0x7f93d5f95d40
JIT Compiling halide_profiler_report_unlocked
Function halide_profiler_report_unlocked is at 0x7f93d5f95000
JIT Compiling halide_profiler_reset
Function halide_profiler_reset is at 0x7f93d5f95d90
JIT Compiling halide_profiler_reset_unlocked
Function halide_profiler_reset_unlocked is at 0x7f93e7842f90
JIT Compiling halide_profiler_shutdown
Function halide_profiler_shutdown is at 0x7f93e7842f20
JIT Compiling halide_profiler_stack_peak_update
Function halide_profiler_stack_peak_update is at 0x7f93d5f95a90
JIT Compiling halide_register_device_allocation_pool
Function halide_register_device_allocation_pool is at 0x7f93ea432f80
JIT Compiling halide_release_jit_module
Function halide_release_jit_module is at 0x7f93d5f95e20
JIT Compiling halide_reuse_device_allocations
Function halide_reuse_device_allocations is at 0x7f93e7844c30
JIT Compiling halide_semaphore_init
Function halide_semaphore_init is at 0x7f93ea4356a0
JIT Compiling halide_semaphore_release
Function halide_semaphore_release is at 0x7f93ea4356c0
JIT Compiling halide_semaphore_try_acquire
Function halide_semaphore_try_acquire is at 0x7f93ea4356e0
JIT Compiling halide_set_custom_do_loop_task
Function halide_set_custom_do_loop_task is at 0x7f93ea4355b0
JIT Compiling halide_set_custom_do_par_for
Function halide_set_custom_do_par_for is at 0x7f93ea4355d0
JIT Compiling halide_set_custom_do_task
Function halide_set_custom_do_task is at 0x7f93ea435590
JIT Compiling halide_set_custom_free
Function halide_set_custom_free is at 0x7f93eb1550a0
JIT Compiling halide_set_custom_get_library_symbol
Function halide_set_custom_get_library_symbol is at 0x7f93ea4357c0
JIT Compiling halide_set_custom_get_symbol
Function halide_set_custom_get_symbol is at 0x7f93ea435780
JIT Compiling halide_set_custom_load_library
Function halide_set_custom_load_library is at 0x7f93ea4357a0
JIT Compiling halide_set_custom_malloc
Function halide_set_custom_malloc is at 0x7f93eb155080
JIT Compiling halide_set_custom_parallel_runtime
Function halide_set_custom_parallel_runtime is at 0x7f93ea4355f0
JIT Compiling halide_set_custom_print
Function halide_set_custom_print is at 0x7f93eb155250
JIT Compiling halide_set_custom_trace
Function halide_set_custom_trace is at 0x7f93ea435db0
JIT Compiling halide_set_error_handler
Function halide_set_error_handler is at 0x7f93eb155230
JIT Compiling halide_set_gpu_device
Function halide_set_gpu_device is at 0x7f93ea435840
JIT Compiling halide_set_num_threads
Function halide_set_num_threads is at 0x7f93ea4354e0
JIT Compiling halide_set_trace_file
Function halide_set_trace_file is at 0x7f93ea435cf0
JIT Compiling halide_shutdown_thread_pool
Function halide_shutdown_thread_pool is at 0x7f93ea435230
JIT Compiling halide_shutdown_trace
Function halide_shutdown_trace is at 0x7f93ea435d30
JIT Compiling halide_sleep_ms
Function halide_sleep_ms is at 0x7f93eb155310
JIT Compiling halide_spawn_thread
Function halide_spawn_thread is at 0x7f93eb155c00
JIT Compiling halide_start_clock
Function halide_start_clock is at 0x7f93eb155270
JIT Compiling halide_string_to_string
Function halide_string_to_string is at 0x7f93eb1551b0
JIT Compiling halide_thread_pool_cleanup
Function halide_thread_pool_cleanup is at 0x7f93ea435210
JIT Compiling halide_thread_yield
Function halide_thread_yield is at 0x7f93eb155390
JIT Compiling halide_trace
Function halide_trace is at 0x7f93ea435dd0
JIT Compiling halide_trace_cleanup
Function halide_trace_cleanup is at 0x7f93ea435d10
JIT Compiling halide_trace_helper
Function halide_trace_helper is at 0x7f93ea435df0
JIT Compiling halide_type_to_string
Function halide_type_to_string is at 0x7f93e7844940
JIT Compiling halide_uint64_to_string
Function halide_uint64_to_string is at 0x7f93ea435c70
JIT Compiling halide_use_jit_module
Function halide_use_jit_module is at 0x7f93d5f95df0
Finalizing object
Creating new execution engine
Target triple: x86_64--linux-gnu
JIT compiling opencl for x86-64-linux-jit-opencl-sse41-strict_float-user_context
JIT Compiling halide_acquire_cl_context
Function halide_acquire_cl_context is at 0x7f93d5d29750
JIT Compiling halide_opencl_buffer_copy
Function halide_opencl_buffer_copy is at 0x7f93d5d241f0
JIT Compiling halide_opencl_cleanup
Function halide_opencl_cleanup is at 0x7f93d5d23f70
JIT Compiling halide_opencl_compute_capability
Function halide_opencl_compute_capability is at 0x7f93d5d29230
JIT Compiling halide_opencl_copy_to_device
Function halide_opencl_copy_to_device is at 0x7f93d5d27f60
JIT Compiling halide_opencl_copy_to_host
Function halide_opencl_copy_to_host is at 0x7f93d5d27f40
JIT Compiling halide_opencl_detach_cl_mem
Function halide_opencl_detach_cl_mem is at 0x7f93d5d234c0
JIT Compiling halide_opencl_device_and_host_free
Function halide_opencl_device_and_host_free is at 0x7f93d5d27fa0
JIT Compiling halide_opencl_device_and_host_malloc
Function halide_opencl_device_and_host_malloc is at 0x7f93d5d27f80
JIT Compiling halide_opencl_device_crop
Function halide_opencl_device_crop is at 0x7f93d5d24ef0
JIT Compiling halide_opencl_device_free
Function halide_opencl_device_free is at 0x7f93d5d27aa0
JIT Compiling halide_opencl_device_interface
Function halide_opencl_device_interface is at 0x7f93d5d29fe0
JIT Compiling halide_opencl_device_malloc
Function halide_opencl_device_malloc is at 0x7f93d5d29b80
JIT Compiling halide_opencl_device_release
Function halide_opencl_device_release is at 0x7f93d5d24000
JIT Compiling halide_opencl_device_release_crop
Function halide_opencl_device_release_crop is at 0x7f93d5d23260
JIT Compiling halide_opencl_device_slice
Function halide_opencl_device_slice is at 0x7f93d5d24f70
JIT Compiling halide_opencl_device_sync
Function halide_opencl_device_sync is at 0x7f93d5d27cd0
JIT Compiling halide_opencl_finalize_kernels
Function halide_opencl_finalize_kernels is at 0x7f93d59c5590
JIT Compiling halide_opencl_get_build_options
Function halide_opencl_get_build_options is at 0x7f93d5d22f40
JIT Compiling halide_opencl_get_cl_mem
Function halide_opencl_get_cl_mem is at 0x7f93d59c57a0
JIT Compiling halide_opencl_get_crop_offset
Function halide_opencl_get_crop_offset is at 0x7f93d59c5810
JIT Compiling halide_opencl_get_device_type
Function halide_opencl_get_device_type is at 0x7f93d5d22b40
JIT Compiling halide_opencl_get_platform_name
Function halide_opencl_get_platform_name is at 0x7f93d5d22aa0
JIT Compiling halide_opencl_get_symbol
Function halide_opencl_get_symbol is at 0x7f93d5d296a0
JIT Compiling halide_opencl_image_buffer_copy
Function halide_opencl_image_buffer_copy is at 0x7f93d5d22000
JIT Compiling halide_opencl_image_copy_to_device
Function halide_opencl_image_copy_to_device is at 0x7f93d5d24fc0
JIT Compiling halide_opencl_image_copy_to_host
Function halide_opencl_image_copy_to_host is at 0x7f93d5d27fc0
JIT Compiling halide_opencl_image_device_and_host_free
Function halide_opencl_image_device_and_host_free is at 0x7f93d5d23cc0
JIT Compiling halide_opencl_image_device_and_host_malloc
Function halide_opencl_image_device_and_host_malloc is at 0x7f93d5d23ca0
JIT Compiling halide_opencl_image_device_crop
Function halide_opencl_image_device_crop is at 0x7f93d5d23ce0
JIT Compiling halide_opencl_image_device_interface
Function halide_opencl_image_device_interface is at 0x7f93d5d27fe0
JIT Compiling halide_opencl_image_device_malloc
Function halide_opencl_image_device_malloc is at 0x7f93d5d23550
JIT Compiling halide_opencl_image_device_release_crop
Function halide_opencl_image_device_release_crop is at 0x7f93d5d23eb0
JIT Compiling halide_opencl_image_device_slice
Function halide_opencl_image_device_slice is at 0x7f93d5d23df0
JIT Compiling halide_opencl_image_wrap_cl_mem
Function halide_opencl_image_wrap_cl_mem is at 0x7f93d5d22870
JIT Compiling halide_opencl_initialize_kernels
Function halide_opencl_initialize_kernels is at 0x7f93d59c5000
JIT Compiling halide_opencl_run
Function halide_opencl_run is at 0x7f93d59c4000
JIT Compiling halide_opencl_set_build_options
Function halide_opencl_set_build_options is at 0x7f93d5d22ed0
JIT Compiling halide_opencl_set_device_type
Function halide_opencl_set_device_type is at 0x7f93d5d22e60
JIT Compiling halide_opencl_set_platform_name
Function halide_opencl_set_platform_name is at 0x7f93d5d22df0
JIT Compiling halide_opencl_wrap_cl_mem
Function halide_opencl_wrap_cl_mem is at 0x7f93d5d23430
JIT Compiling halide_release_cl_context
Function halide_release_cl_context is at 0x7f93d5d29b60
JIT Compiling halide_release_jit_module
Function halide_release_jit_module is at 0x7f93d5d29200
JIT Compiling halide_use_jit_module
Function halide_use_jit_module is at 0x7f93d5d291d0
Finalizing object
Creating new execution engine
Target triple: x86_64--linux-gnu
JIT compiling nan_or_one for x86-64-linux-jit-opencl-sse41-strict_float-user_context
JIT Compiling nan_or_one
Function nan_or_one is at 0x7f93d59c0000
JIT Compiling nan_or_one_argv
Function nan_or_one_argv is at 0x7f93d59c0cf0
Finalizing object
custom_print: 0x7f93d1511460
custom_malloc: 0x7f93eb155000
custom_free: 0x7f93eb155060
custom_do_task: 0x7f93eb1553b0
custom_do_par_for: 0x7f93eb1553e0
custom_error: 0x7f93d17ae020
custom_trace: 0x7f93ea434000
JIT input Image argument b0 @ 0x2a7a788
__user_context @ 0x7ffddfa04658
JIT output buffer @ 0x2a7a5d8, 0
Calling jitted function
Back from jitted function. Exit status was 0
Realizing Pipeline for target(x86-64-linux-opencl-sse41-strict_float)
Reusing old jit module compiled for :
target(x86-64-linux-jit-opencl-sse41-strict_float-user_context)
custom_print: 0x7f93d1511460
custom_malloc: 0x7f93eb155000
custom_free: 0x7f93eb155060
custom_do_task: 0x7f93eb1553b0
custom_do_par_for: 0x7f93eb1553e0
custom_error: 0x7f93d17ae020
custom_trace: 0x7f93ea434000
JIT input Image argument b0 @ 0x2a7a788
__user_context @ 0x7ffddfa04658
JIT output buffer @ 0x2a7a5d8, 0x38dee80
Calling jitted function
Adjusting refcount for module GPU by 1
Adjusting refcount for module GPU by 1
Adjusting refcount for module GPU by -1
Adjusting refcount for module GPU by 1
Adjusting refcount for module GPU by 1
Adjusting refcount for module GPU by -1
Back from jitted function. Exit status was 0
Adjusting refcount for module GPU by 1
Adjusting refcount for module GPU by -1
Adjusting refcount for module GPU by -1
xyz= [[[2. 3. 4.]]] output= [[nan]]
Traceback (most recent call last):
File "(... removed ...)xyz_to_xyzw.py", line 69, in <module>
_tests()
File "(... removed ...)xyz_to_xyzw.py", line 59, in _tests
test_case(
File "(... removed ...)xyz_to_xyzw.py", line 57, in test_case
np.testing.assert_allclose(output, expected_output)
File "/home/stian/venv/lib/python3.9/site-packages/numpy/testing/_private/utils.py", line 1530, in assert_allclose
assert_array_compare(compare, actual, desired, err_msg=str(err_msg),
File "/home/stian/venv/lib/python3.9/site-packages/numpy/testing/_private/utils.py", line 768, in assert_array_compare
flagged = func_assert_same_pos(x, y, func=isnan, hasval='nan')
File "/home/stian/venv/lib/python3.9/site-packages/numpy/testing/_private/utils.py", line 745, in func_assert_same_pos
raise AssertionError(msg)
AssertionError:
Not equal to tolerance rtol=1e-07, atol=0
x and y nan location mismatch:
x: array([[nan]], dtype=float32)
y: array([[1.]], dtype=float32)
Adjusting refcount for module GPU by 1
Adjusting refcount for module GPU by -1
Adjusting refcount for module GPU by -1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment