Created
February 21, 2022 13:10
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Failed to load binary:python | |
Realizing Pipeline for target(x86-64-linux-opencl-sse41-strict_float) | |
jit-compiling for: target(x86-64-linux-opencl-sse41-strict_float) | |
Inferred argument: float32 b0 | |
Inferred argument: (void *) __user_context | |
Creating initial loop nests... | |
Injecting realization of { nan_or_one } | |
for (.__root, 0, 1) { | |
produce nan_or_one { | |
let nan_or_one.s0.row.loop_max = nan_or_one.s0.row.max | |
let nan_or_one.s0.row.loop_min = nan_or_one.s0.row.min | |
let nan_or_one.s0.row.loop_extent = (nan_or_one.s0.row.max + 1) - nan_or_one.s0.row.min | |
let nan_or_one.s0.col.loop_max = nan_or_one.s0.col.max | |
let nan_or_one.s0.col.loop_min = nan_or_one.s0.col.min | |
let nan_or_one.s0.col.loop_extent = (nan_or_one.s0.col.max + 1) - nan_or_one.s0.col.min | |
let nan_or_one.s0.__outermost.loop_extent = 1 | |
let nan_or_one.s0.__outermost.loop_max = 0 | |
let nan_or_one.s0.__outermost.loop_min = 0 | |
let nan_or_one.s0.col.col_outer.loop_extent = ((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1 | |
let nan_or_one.s0.col.col_outer.loop_max = (((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1) - 1 | |
let nan_or_one.s0.col.col_outer.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.loop_max = 1 - 1 | |
let nan_or_one.s0.col.col_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_extent = ((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1 | |
let nan_or_one.s0.row.row_outer.loop_max = (((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1) - 1 | |
let nan_or_one.s0.row.row_outer.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.loop_max = 1 - 1 | |
let nan_or_one.s0.row.row_inner.loop_min = 0 | |
for (nan_or_one.s0.__outermost, nan_or_one.s0.__outermost.loop_min, nan_or_one.s0.__outermost.loop_extent) { | |
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, nan_or_one.s0.row.row_outer.loop_min, nan_or_one.s0.row.row_outer.loop_extent) { | |
let nan_or_one.s0.row.row_inner.base = (nan_or_one.s0.row.row_outer*1) + nan_or_one.s0.row.loop_min | |
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, nan_or_one.s0.col.col_outer.loop_min, nan_or_one.s0.col.col_outer.loop_extent) { | |
let nan_or_one.s0.col.col_inner.base = (nan_or_one.s0.col.col_outer*1) + nan_or_one.s0.col.loop_min | |
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, nan_or_one.s0.row.row_inner.loop_min, nan_or_one.s0.row.row_inner.loop_extent) { | |
let nan_or_one.s0.row = nan_or_one.s0.row.row_inner.base + nan_or_one.s0.row.row_inner | |
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, nan_or_one.s0.col.col_inner.loop_min, nan_or_one.s0.col.col_inner.loop_extent) { | |
let nan_or_one.s0.col = nan_or_one.s0.col.col_inner.base + nan_or_one.s0.col.col_inner | |
nan_or_one(nan_or_one.s0.col, nan_or_one.s0.row) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)strict_float((float32)strict_float((float32)b0(nan_or_one.s0.col, nan_or_one.s0.row, 0))))), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
Lowering after creating initial loop nests: | |
produce nan_or_one { | |
let nan_or_one.s0.row.loop_max = nan_or_one.s0.row.max | |
let nan_or_one.s0.row.loop_min = nan_or_one.s0.row.min | |
let nan_or_one.s0.row.loop_extent = (nan_or_one.s0.row.max + 1) - nan_or_one.s0.row.min | |
let nan_or_one.s0.col.loop_max = nan_or_one.s0.col.max | |
let nan_or_one.s0.col.loop_min = nan_or_one.s0.col.min | |
let nan_or_one.s0.col.loop_extent = (nan_or_one.s0.col.max + 1) - nan_or_one.s0.col.min | |
let nan_or_one.s0.col.col_outer.loop_extent = ((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1 | |
let nan_or_one.s0.col.col_outer.loop_max = (((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1) - 1 | |
let nan_or_one.s0.col.col_outer.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.loop_max = 1 - 1 | |
let nan_or_one.s0.col.col_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_extent = ((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1 | |
let nan_or_one.s0.row.row_outer.loop_max = (((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1) - 1 | |
let nan_or_one.s0.row.row_outer.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.loop_max = 1 - 1 | |
let nan_or_one.s0.row.row_inner.loop_min = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, nan_or_one.s0.row.row_outer.loop_min, nan_or_one.s0.row.row_outer.loop_extent) { | |
let nan_or_one.s0.row.row_inner.base = (nan_or_one.s0.row.row_outer*1) + nan_or_one.s0.row.loop_min | |
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, nan_or_one.s0.col.col_outer.loop_min, nan_or_one.s0.col.col_outer.loop_extent) { | |
let nan_or_one.s0.col.col_inner.base = (nan_or_one.s0.col.col_outer*1) + nan_or_one.s0.col.loop_min | |
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, nan_or_one.s0.row.row_inner.loop_min, nan_or_one.s0.row.row_inner.loop_extent) { | |
let nan_or_one.s0.row = nan_or_one.s0.row.row_inner.base + nan_or_one.s0.row.row_inner | |
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, nan_or_one.s0.col.col_inner.loop_min, nan_or_one.s0.col.col_inner.loop_extent) { | |
let nan_or_one.s0.col = nan_or_one.s0.col.col_inner.base + nan_or_one.s0.col.col_inner | |
nan_or_one(nan_or_one.s0.col, nan_or_one.s0.row) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)strict_float((float32)strict_float((float32)b0(nan_or_one.s0.col, nan_or_one.s0.row, 0))))), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
Skipping injecting memoization... | |
Injecting tracing... | |
Lowering after injecting tracing: (unchanged) | |
Adding checks for parameters | |
Lowering after injecting parameter checks: (unchanged) | |
Computing bounds of each function's value | |
Bounds on value 0 for func nan_or_one are: nanf, nanf | |
Clamping unsafe data-dependent accesses | |
Lowering after clamping unsafe data-dependent accesses | |
produce nan_or_one { | |
let nan_or_one.s0.row.loop_max = nan_or_one.s0.row.max | |
let nan_or_one.s0.row.loop_min = nan_or_one.s0.row.min | |
let nan_or_one.s0.row.loop_extent = (nan_or_one.s0.row.max + 1) - nan_or_one.s0.row.min | |
let nan_or_one.s0.col.loop_max = nan_or_one.s0.col.max | |
let nan_or_one.s0.col.loop_min = nan_or_one.s0.col.min | |
let nan_or_one.s0.col.loop_extent = (nan_or_one.s0.col.max + 1) - nan_or_one.s0.col.min | |
let nan_or_one.s0.col.col_outer.loop_extent = ((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1 | |
let nan_or_one.s0.col.col_outer.loop_max = (((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1) - 1 | |
let nan_or_one.s0.col.col_outer.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.loop_max = 1 - 1 | |
let nan_or_one.s0.col.col_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_extent = ((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1 | |
let nan_or_one.s0.row.row_outer.loop_max = (((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1) - 1 | |
let nan_or_one.s0.row.row_outer.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.loop_max = 1 - 1 | |
let nan_or_one.s0.row.row_inner.loop_min = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, nan_or_one.s0.row.row_outer.loop_min, nan_or_one.s0.row.row_outer.loop_extent) { | |
let nan_or_one.s0.row.row_inner.base = (nan_or_one.s0.row.row_outer*1) + nan_or_one.s0.row.loop_min | |
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, nan_or_one.s0.col.col_outer.loop_min, nan_or_one.s0.col.col_outer.loop_extent) { | |
let nan_or_one.s0.col.col_inner.base = (nan_or_one.s0.col.col_outer*1) + nan_or_one.s0.col.loop_min | |
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, nan_or_one.s0.row.row_inner.loop_min, nan_or_one.s0.row.row_inner.loop_extent) { | |
let nan_or_one.s0.row = nan_or_one.s0.row.row_inner.base + nan_or_one.s0.row.row_inner | |
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, nan_or_one.s0.col.col_inner.loop_min, nan_or_one.s0.col.col_inner.loop_extent) { | |
let nan_or_one.s0.col = nan_or_one.s0.col.col_inner.base + nan_or_one.s0.col.col_inner | |
nan_or_one(nan_or_one.s0.col, nan_or_one.s0.row) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)strict_float((float32)strict_float((float32)b0(nan_or_one.s0.col, nan_or_one.s0.row, 0))))), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
Performing computation bounds inference... | |
Lowering after computation bounds inference: | |
let nan_or_one.s0.row.max = (nan_or_one.min.1 + nan_or_one.extent.1) - 1 | |
let nan_or_one.s0.row.min = nan_or_one.min.1 | |
let nan_or_one.s0.col.max = (nan_or_one.min.0 + nan_or_one.extent.0) - 1 | |
let nan_or_one.s0.col.min = nan_or_one.min.0 | |
add_image_checks_marker() | |
produce nan_or_one { | |
let nan_or_one.s0.row.loop_max = nan_or_one.s0.row.max | |
let nan_or_one.s0.row.loop_min = nan_or_one.s0.row.min | |
let nan_or_one.s0.row.loop_extent = (nan_or_one.s0.row.max + 1) - nan_or_one.s0.row.min | |
let nan_or_one.s0.col.loop_max = nan_or_one.s0.col.max | |
let nan_or_one.s0.col.loop_min = nan_or_one.s0.col.min | |
let nan_or_one.s0.col.loop_extent = (nan_or_one.s0.col.max + 1) - nan_or_one.s0.col.min | |
let nan_or_one.s0.col.col_outer.loop_extent = ((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1 | |
let nan_or_one.s0.col.col_outer.loop_max = (((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1) - 1 | |
let nan_or_one.s0.col.col_outer.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.loop_max = 1 - 1 | |
let nan_or_one.s0.col.col_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_extent = ((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1 | |
let nan_or_one.s0.row.row_outer.loop_max = (((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1) - 1 | |
let nan_or_one.s0.row.row_outer.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.loop_max = 1 - 1 | |
let nan_or_one.s0.row.row_inner.loop_min = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, nan_or_one.s0.row.row_outer.loop_min, nan_or_one.s0.row.row_outer.loop_extent) { | |
let nan_or_one.s0.row.row_inner.base = (nan_or_one.s0.row.row_outer*1) + nan_or_one.s0.row.loop_min | |
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, nan_or_one.s0.col.col_outer.loop_min, nan_or_one.s0.col.col_outer.loop_extent) { | |
let nan_or_one.s0.col.col_inner.base = (nan_or_one.s0.col.col_outer*1) + nan_or_one.s0.col.loop_min | |
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, nan_or_one.s0.row.row_inner.loop_min, nan_or_one.s0.row.row_inner.loop_extent) { | |
let nan_or_one.s0.row = nan_or_one.s0.row.row_inner.base + nan_or_one.s0.row.row_inner | |
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, nan_or_one.s0.col.col_inner.loop_min, nan_or_one.s0.col.col_inner.loop_extent) { | |
let nan_or_one.s0.col = nan_or_one.s0.col.col_inner.base + nan_or_one.s0.col.col_inner | |
nan_or_one(nan_or_one.s0.col, nan_or_one.s0.row) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)strict_float((float32)strict_float((float32)b0(nan_or_one.s0.col, nan_or_one.s0.row, 0))))), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
Removing extern loops... | |
Lowering after removing extern loops: (unchanged) | |
Performing sliding window optimization... | |
Lowering after sliding window: | |
let nan_or_one.s0.row.max = (nan_or_one.min.1 + nan_or_one.extent.1) - 1 | |
let nan_or_one.s0.row.min = nan_or_one.min.1 | |
let nan_or_one.s0.col.max = (nan_or_one.min.0 + nan_or_one.extent.0) - 1 | |
let nan_or_one.s0.col.min = nan_or_one.min.0 | |
add_image_checks_marker() | |
produce nan_or_one { | |
let nan_or_one.s0.row.loop_max = nan_or_one.s0.row.max | |
let nan_or_one.s0.row.loop_min = nan_or_one.s0.row.min | |
let nan_or_one.s0.row.loop_extent = (nan_or_one.s0.row.max + 1) - nan_or_one.s0.row.min | |
let nan_or_one.s0.col.loop_max = nan_or_one.s0.col.max | |
let nan_or_one.s0.col.loop_min = nan_or_one.s0.col.min | |
let nan_or_one.s0.col.loop_extent = (nan_or_one.s0.col.max + 1) - nan_or_one.s0.col.min | |
let nan_or_one.s0.col.col_outer.loop_extent = ((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1 | |
let nan_or_one.s0.col.col_outer.loop_max = (((nan_or_one.s0.col.loop_max - nan_or_one.s0.col.loop_min) + 1)/1) - 1 | |
let nan_or_one.s0.col.col_outer.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.loop_max = 1 - 1 | |
let nan_or_one.s0.col.col_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_extent = ((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1 | |
let nan_or_one.s0.row.row_outer.loop_max = (((nan_or_one.s0.row.loop_max - nan_or_one.s0.row.loop_min) + 1)/1) - 1 | |
let nan_or_one.s0.row.row_outer.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.loop_max = 1 - 1 | |
let nan_or_one.s0.row.row_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_min.orig = nan_or_one.s0.row.row_outer.loop_min | |
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, nan_or_one.s0.row.row_outer.loop_min, nan_or_one.s0.row.row_outer.loop_extent) { | |
let nan_or_one.s0.row.row_inner.base = (nan_or_one.s0.row.row_outer*1) + nan_or_one.s0.row.loop_min | |
let nan_or_one.s0.col.col_outer.loop_min.orig = nan_or_one.s0.col.col_outer.loop_min | |
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, nan_or_one.s0.col.col_outer.loop_min, nan_or_one.s0.col.col_outer.loop_extent) { | |
let nan_or_one.s0.col.col_inner.base = (nan_or_one.s0.col.col_outer*1) + nan_or_one.s0.col.loop_min | |
let nan_or_one.s0.row.row_inner.loop_min.orig = nan_or_one.s0.row.row_inner.loop_min | |
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, nan_or_one.s0.row.row_inner.loop_min, nan_or_one.s0.row.row_inner.loop_extent) { | |
let nan_or_one.s0.row = nan_or_one.s0.row.row_inner.base + nan_or_one.s0.row.row_inner | |
let nan_or_one.s0.col.col_inner.loop_min.orig = nan_or_one.s0.col.col_inner.loop_min | |
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, nan_or_one.s0.col.col_inner.loop_min, nan_or_one.s0.col.col_inner.loop_extent) { | |
let nan_or_one.s0.col = nan_or_one.s0.col.col_inner.base + nan_or_one.s0.col.col_inner | |
nan_or_one(nan_or_one.s0.col, nan_or_one.s0.row) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)strict_float((float32)strict_float((float32)b0(nan_or_one.s0.col, nan_or_one.s0.row, 0))))), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
Uniquifying variable names... | |
Lowering after uniquifying variable names: (unchanged) | |
Simplifying... | |
Lowering after first simplification: | |
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.min = nan_or_one.min.1 | |
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.min = nan_or_one.min.0 | |
add_image_checks_marker() | |
produce nan_or_one { | |
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.loop_min = nan_or_one.min.1 | |
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.loop_min = nan_or_one.min.0 | |
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.loop_max = nan_or_one.extent.0 + -1 | |
let nan_or_one.s0.col.col_outer.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.loop_max = 0 | |
let nan_or_one.s0.col.col_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.row.row_outer.loop_max = nan_or_one.extent.1 + -1 | |
let nan_or_one.s0.row.row_outer.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.loop_max = 0 | |
let nan_or_one.s0.row.row_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_min.orig = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, 0, nan_or_one.extent.1) { | |
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer | |
let nan_or_one.s0.col.col_outer.loop_min.orig = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, 0, nan_or_one.extent.0) { | |
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer | |
let nan_or_one.s0.row.row_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, 0, 1) { | |
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer | |
let nan_or_one.s0.col.col_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, 0, 1) { | |
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer | |
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer, 0))), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
Simplifying correlated differences... | |
Lowering after simplifying correlated differences: | |
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.min = nan_or_one.min.1 | |
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.min = nan_or_one.min.0 | |
add_image_checks_marker() | |
produce nan_or_one { | |
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.loop_min = nan_or_one.min.1 | |
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.loop_min = nan_or_one.min.0 | |
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.loop_max = nan_or_one.extent.0 + -1 | |
let nan_or_one.s0.col.col_outer.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.loop_max = 0 | |
let nan_or_one.s0.col.col_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.row.row_outer.loop_max = nan_or_one.extent.1 + -1 | |
let nan_or_one.s0.row.row_outer.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.loop_max = 0 | |
let nan_or_one.s0.row.row_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_min.orig = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, 0, nan_or_one.extent.1) { | |
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer | |
let nan_or_one.s0.col.col_outer.loop_min.orig = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, 0, nan_or_one.extent.0) { | |
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer | |
let nan_or_one.s0.row.row_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, 0, 1) { | |
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer | |
let nan_or_one.s0.col.col_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, 0, 1) { | |
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer | |
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer, 0))), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
Performing allocation bounds inference... | |
Lowering after allocation bounds inference: (unchanged) | |
Adding checks for images | |
Injecting constraints for b0.0 | |
Injecting constraints for b0.1 | |
Injecting constraints for b0.2 | |
Injecting constraints for nan_or_one.0 | |
Injecting constraints for nan_or_one.1 | |
Lowering after injecting image checks: | |
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.min = nan_or_one.min.1 | |
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.min = nan_or_one.min.0 | |
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let b0.min.0.required = nan_or_one.min.0 + 0 | |
let b0.stride.0.required = 1 | |
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let b0.min.1.required = nan_or_one.min.1 + 0 | |
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required | |
let b0.extent.2.required = (0 + 1) - 0 | |
let b0.min.2.required = 0 | |
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required | |
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let nan_or_one.min.0.required = nan_or_one.min.0 + 0 | |
let nan_or_one.stride.0.required = 1 | |
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let nan_or_one.min.1.required = nan_or_one.min.1 + 0 | |
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required | |
let b0.stride.0.constrained = 3 | |
let b0.min.0.constrained = 0 | |
let b0.extent.0.constrained = 1 | |
let b0.stride.1.constrained = 3 | |
let b0.min.1.constrained = 0 | |
let b0.extent.1.constrained = 1 | |
let b0.stride.2.constrained = 1 | |
let b0.min.2.constrained = 0 | |
let b0.extent.2.constrained = 3 | |
let nan_or_one.stride.0.constrained = 1 | |
let b0.stride.0.proposed = 3 | |
let b0.min.0.proposed = 0 | |
let b0.extent.0.proposed = 1 | |
let b0.stride.1.proposed = 3 | |
let b0.min.1.proposed = 0 | |
let b0.extent.1.proposed = 1 | |
let b0.stride.2.proposed = 1 | |
let b0.min.2.proposed = 0 | |
let b0.extent.2.proposed = 3 | |
let nan_or_one.stride.0.proposed = 1 | |
let nan_or_one.min.0.proposed = nan_or_one.min.0.required | |
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required | |
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required | |
let nan_or_one.min.1.proposed = nan_or_one.min.1.required | |
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0) | |
} | |
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert((uint32)b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", (uint32)b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert((uint32)nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", (uint32)nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1)) | |
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1)) | |
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1)) | |
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1)) | |
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1)) | |
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained)) | |
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained)) | |
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained)) | |
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained)) | |
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained)) | |
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained)) | |
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained)) | |
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained)) | |
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained)) | |
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained)) | |
let b0.total_extent.0 = int64(b0.extent.0.constrained) | |
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0 | |
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1 | |
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0 | |
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.loop_min = nan_or_one.min.1 | |
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.loop_min = nan_or_one.min.0 | |
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.loop_max = nan_or_one.extent.0 + -1 | |
let nan_or_one.s0.col.col_outer.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.loop_max = 0 | |
let nan_or_one.s0.col.col_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.row.row_outer.loop_max = nan_or_one.extent.1 + -1 | |
let nan_or_one.s0.row.row_outer.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.loop_max = 0 | |
let nan_or_one.s0.row.row_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_min.orig = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, 0, nan_or_one.extent.1) { | |
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer | |
let nan_or_one.s0.col.col_outer.loop_min.orig = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, 0, nan_or_one.extent.0) { | |
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer | |
let nan_or_one.s0.row.row_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, 0, 1) { | |
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer | |
let nan_or_one.s0.col.col_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, 0, 1) { | |
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer | |
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer, 0))), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
} | |
Removing code that depends on undef values... | |
Lowering after removing code that depends on undef values: (unchanged) | |
Performing storage folding optimization... | |
Lowering after storage folding: (unchanged) | |
Injecting debug_to_file calls... | |
Lowering after injecting debug_to_file calls: | |
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.min = nan_or_one.min.1 | |
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.min = nan_or_one.min.0 | |
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let b0.min.0.required = nan_or_one.min.0 + 0 | |
let b0.stride.0.required = 1 | |
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let b0.min.1.required = nan_or_one.min.1 + 0 | |
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required | |
let b0.extent.2.required = (0 + 1) - 0 | |
let b0.min.2.required = 0 | |
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required | |
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let nan_or_one.min.0.required = nan_or_one.min.0 + 0 | |
let nan_or_one.stride.0.required = 1 | |
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let nan_or_one.min.1.required = nan_or_one.min.1 + 0 | |
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required | |
let b0.stride.0.constrained = 3 | |
let b0.min.0.constrained = 0 | |
let b0.extent.0.constrained = 1 | |
let b0.stride.1.constrained = 3 | |
let b0.min.1.constrained = 0 | |
let b0.extent.1.constrained = 1 | |
let b0.stride.2.constrained = 1 | |
let b0.min.2.constrained = 0 | |
let b0.extent.2.constrained = 3 | |
let nan_or_one.stride.0.constrained = 1 | |
let b0.stride.0.proposed = 3 | |
let b0.min.0.proposed = 0 | |
let b0.extent.0.proposed = 1 | |
let b0.stride.1.proposed = 3 | |
let b0.min.1.proposed = 0 | |
let b0.extent.1.proposed = 1 | |
let b0.stride.2.proposed = 1 | |
let b0.min.2.proposed = 0 | |
let b0.extent.2.proposed = 3 | |
let nan_or_one.stride.0.proposed = 1 | |
let nan_or_one.min.0.proposed = nan_or_one.min.0.required | |
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required | |
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required | |
let nan_or_one.min.1.proposed = nan_or_one.min.1.required | |
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0) | |
} | |
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert((uint32)b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", (uint32)b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert((uint32)nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", (uint32)nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1)) | |
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1)) | |
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1)) | |
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1)) | |
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1)) | |
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained)) | |
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained)) | |
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained)) | |
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained)) | |
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained)) | |
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained)) | |
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained)) | |
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained)) | |
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained)) | |
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained)) | |
let b0.total_extent.0 = int64(b0.extent.0.constrained) | |
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0 | |
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1 | |
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0 | |
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.loop_min = nan_or_one.min.1 | |
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.loop_min = nan_or_one.min.0 | |
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.loop_max = nan_or_one.extent.0 + -1 | |
let nan_or_one.s0.col.col_outer.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.loop_max = 0 | |
let nan_or_one.s0.col.col_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.row.row_outer.loop_max = nan_or_one.extent.1 + -1 | |
let nan_or_one.s0.row.row_outer.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.loop_max = 0 | |
let nan_or_one.s0.row.row_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_min.orig = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, 0, nan_or_one.extent.1) { | |
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer | |
let nan_or_one.s0.col.col_outer.loop_min.orig = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, 0, nan_or_one.extent.0) { | |
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer | |
let nan_or_one.s0.row.row_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, 0, 1) { | |
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer | |
let nan_or_one.s0.col.col_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, 0, 1) { | |
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer | |
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer, 0))), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
} | |
Injecting prefetches... | |
Lowering after injecting prefetches: (unchanged) | |
Discarding safe promises... | |
Lowering after discarding safe promises: (unchanged) | |
Dynamically skipping stages... | |
Lowering after dynamically skipping stages: (unchanged) | |
Forking asynchronous producers... | |
Lowering after forking asynchronous producers: | |
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.min = nan_or_one.min.1 | |
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.min = nan_or_one.min.0 | |
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let b0.min.0.required = nan_or_one.min.0 + 0 | |
let b0.stride.0.required = 1 | |
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let b0.min.1.required = nan_or_one.min.1 + 0 | |
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required | |
let b0.extent.2.required = (0 + 1) - 0 | |
let b0.min.2.required = 0 | |
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required | |
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let nan_or_one.min.0.required = nan_or_one.min.0 + 0 | |
let nan_or_one.stride.0.required = 1 | |
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let nan_or_one.min.1.required = nan_or_one.min.1 + 0 | |
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required | |
let b0.stride.0.constrained = 3 | |
let b0.min.0.constrained = 0 | |
let b0.extent.0.constrained = 1 | |
let b0.stride.1.constrained = 3 | |
let b0.min.1.constrained = 0 | |
let b0.extent.1.constrained = 1 | |
let b0.stride.2.constrained = 1 | |
let b0.min.2.constrained = 0 | |
let b0.extent.2.constrained = 3 | |
let nan_or_one.stride.0.constrained = 1 | |
let b0.stride.0.proposed = 3 | |
let b0.min.0.proposed = 0 | |
let b0.extent.0.proposed = 1 | |
let b0.stride.1.proposed = 3 | |
let b0.min.1.proposed = 0 | |
let b0.extent.1.proposed = 1 | |
let b0.stride.2.proposed = 1 | |
let b0.min.2.proposed = 0 | |
let b0.extent.2.proposed = 3 | |
let nan_or_one.stride.0.proposed = 1 | |
let nan_or_one.min.0.proposed = nan_or_one.min.0.required | |
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required | |
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required | |
let nan_or_one.min.1.proposed = nan_or_one.min.1.required | |
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0) | |
} | |
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert((uint32)b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", (uint32)b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert((uint32)nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", (uint32)nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1)) | |
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1)) | |
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1)) | |
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1)) | |
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1)) | |
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained)) | |
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained)) | |
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained)) | |
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained)) | |
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained)) | |
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained)) | |
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained)) | |
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained)) | |
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained)) | |
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained)) | |
let b0.total_extent.0 = int64(b0.extent.0.constrained) | |
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0 | |
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1 | |
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0 | |
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.loop_min = nan_or_one.min.1 | |
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.loop_min = nan_or_one.min.0 | |
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.loop_max = nan_or_one.extent.0 + -1 | |
let nan_or_one.s0.col.col_outer.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.loop_max = 0 | |
let nan_or_one.s0.col.col_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.row.row_outer.loop_max = nan_or_one.extent.1 + -1 | |
let nan_or_one.s0.row.row_outer.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.loop_max = 0 | |
let nan_or_one.s0.row.row_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_min.orig = 0 | |
produce nan_or_one { | |
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, 0, nan_or_one.extent.1) { | |
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer | |
let nan_or_one.s0.col.col_outer.loop_min.orig = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, 0, nan_or_one.extent.0) { | |
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer | |
let nan_or_one.s0.row.row_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, 0, 1) { | |
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer | |
let nan_or_one.s0.col.col_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, 0, 1) { | |
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer | |
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer, 0))), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
} | |
Destructuring tuple-valued realizations... | |
Lowering after destructuring tuple-valued realizations: | |
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.min = nan_or_one.min.1 | |
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.min = nan_or_one.min.0 | |
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let b0.min.0.required = nan_or_one.min.0 + 0 | |
let b0.stride.0.required = 1 | |
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let b0.min.1.required = nan_or_one.min.1 + 0 | |
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required | |
let b0.extent.2.required = (0 + 1) - 0 | |
let b0.min.2.required = 0 | |
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required | |
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let nan_or_one.min.0.required = nan_or_one.min.0 + 0 | |
let nan_or_one.stride.0.required = 1 | |
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let nan_or_one.min.1.required = nan_or_one.min.1 + 0 | |
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required | |
let b0.stride.0.constrained = 3 | |
let b0.min.0.constrained = 0 | |
let b0.extent.0.constrained = 1 | |
let b0.stride.1.constrained = 3 | |
let b0.min.1.constrained = 0 | |
let b0.extent.1.constrained = 1 | |
let b0.stride.2.constrained = 1 | |
let b0.min.2.constrained = 0 | |
let b0.extent.2.constrained = 3 | |
let nan_or_one.stride.0.constrained = 1 | |
let b0.stride.0.proposed = 3 | |
let b0.min.0.proposed = 0 | |
let b0.extent.0.proposed = 1 | |
let b0.stride.1.proposed = 3 | |
let b0.min.1.proposed = 0 | |
let b0.extent.1.proposed = 1 | |
let b0.stride.2.proposed = 1 | |
let b0.min.2.proposed = 0 | |
let b0.extent.2.proposed = 3 | |
let nan_or_one.stride.0.proposed = 1 | |
let nan_or_one.min.0.proposed = nan_or_one.min.0.required | |
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required | |
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required | |
let nan_or_one.min.1.proposed = nan_or_one.min.1.required | |
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0) | |
} | |
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert((uint32)b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", (uint32)b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert((uint32)nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", (uint32)nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1)) | |
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1)) | |
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1)) | |
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1)) | |
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1)) | |
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained)) | |
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained)) | |
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained)) | |
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained)) | |
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained)) | |
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained)) | |
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained)) | |
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained)) | |
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained)) | |
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained)) | |
let b0.total_extent.0 = int64(b0.extent.0.constrained) | |
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0 | |
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1 | |
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0 | |
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.loop_min = nan_or_one.min.1 | |
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.loop_min = nan_or_one.min.0 | |
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.loop_max = nan_or_one.extent.0 + -1 | |
let nan_or_one.s0.col.col_outer.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.loop_max = 0 | |
let nan_or_one.s0.col.col_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.row.row_outer.loop_max = nan_or_one.extent.1 + -1 | |
let nan_or_one.s0.row.row_outer.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.loop_max = 0 | |
let nan_or_one.s0.row.row_inner.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_min.orig = 0 | |
produce nan_or_one { | |
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer, 0, nan_or_one.extent.1) { | |
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer | |
let nan_or_one.s0.col.col_outer.loop_min.orig = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer, 0, nan_or_one.extent.0) { | |
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer | |
let nan_or_one.s0.row.row_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner, 0, 1) { | |
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer | |
let nan_or_one.s0.col.col_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner, 0, 1) { | |
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer | |
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer, nan_or_one.min.1 + nan_or_one.s0.row.row_outer, 0))), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
} | |
Canonicalizing GPU var names... | |
Lowering after canonicalizing GPU var names: | |
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.min = nan_or_one.min.1 | |
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.min = nan_or_one.min.0 | |
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let b0.min.0.required = nan_or_one.min.0 + 0 | |
let b0.stride.0.required = 1 | |
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let b0.min.1.required = nan_or_one.min.1 + 0 | |
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required | |
let b0.extent.2.required = (0 + 1) - 0 | |
let b0.min.2.required = 0 | |
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required | |
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let nan_or_one.min.0.required = nan_or_one.min.0 + 0 | |
let nan_or_one.stride.0.required = 1 | |
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let nan_or_one.min.1.required = nan_or_one.min.1 + 0 | |
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required | |
let b0.stride.0.constrained = 3 | |
let b0.min.0.constrained = 0 | |
let b0.extent.0.constrained = 1 | |
let b0.stride.1.constrained = 3 | |
let b0.min.1.constrained = 0 | |
let b0.extent.1.constrained = 1 | |
let b0.stride.2.constrained = 1 | |
let b0.min.2.constrained = 0 | |
let b0.extent.2.constrained = 3 | |
let nan_or_one.stride.0.constrained = 1 | |
let b0.stride.0.proposed = 3 | |
let b0.min.0.proposed = 0 | |
let b0.extent.0.proposed = 1 | |
let b0.stride.1.proposed = 3 | |
let b0.min.1.proposed = 0 | |
let b0.extent.1.proposed = 1 | |
let b0.stride.2.proposed = 1 | |
let b0.min.2.proposed = 0 | |
let b0.extent.2.proposed = 3 | |
let nan_or_one.stride.0.proposed = 1 | |
let nan_or_one.min.0.proposed = nan_or_one.min.0.required | |
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required | |
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required | |
let nan_or_one.min.1.proposed = nan_or_one.min.1.required | |
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0) | |
} | |
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert((uint32)b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", (uint32)b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert((uint32)nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", (uint32)nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1)) | |
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1)) | |
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1)) | |
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1)) | |
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1)) | |
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained)) | |
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained)) | |
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained)) | |
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained)) | |
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained)) | |
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained)) | |
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained)) | |
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained)) | |
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained)) | |
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained)) | |
let b0.total_extent.0 = int64(b0.extent.0.constrained) | |
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0 | |
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1 | |
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0 | |
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.loop_min = nan_or_one.min.1 | |
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.loop_min = nan_or_one.min.0 | |
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_max = nan_or_one.extent.0 + -1 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_max = 0 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_max = nan_or_one.extent.1 + -1 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_max = 0 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_min.orig = 0 | |
produce nan_or_one { | |
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y | |
let nan_or_one.s0.col.col_outer.loop_min.orig = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x | |
let nan_or_one.s0.row.row_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) { | |
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y | |
let nan_or_one.s0.col.col_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) { | |
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x | |
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x, nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x, nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y, 0))), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
} | |
Bounding small realizations... | |
Lowering after bounding small realizations: | |
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.min = nan_or_one.min.1 | |
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.min = nan_or_one.min.0 | |
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let b0.min.0.required = nan_or_one.min.0 + 0 | |
let b0.stride.0.required = 1 | |
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let b0.min.1.required = nan_or_one.min.1 + 0 | |
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required | |
let b0.extent.2.required = (0 + 1) - 0 | |
let b0.min.2.required = 0 | |
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required | |
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let nan_or_one.min.0.required = nan_or_one.min.0 + 0 | |
let nan_or_one.stride.0.required = 1 | |
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let nan_or_one.min.1.required = nan_or_one.min.1 + 0 | |
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required | |
let b0.stride.0.constrained = 3 | |
let b0.min.0.constrained = 0 | |
let b0.extent.0.constrained = 1 | |
let b0.stride.1.constrained = 3 | |
let b0.min.1.constrained = 0 | |
let b0.extent.1.constrained = 1 | |
let b0.stride.2.constrained = 1 | |
let b0.min.2.constrained = 0 | |
let b0.extent.2.constrained = 3 | |
let nan_or_one.stride.0.constrained = 1 | |
let b0.stride.0.proposed = 3 | |
let b0.min.0.proposed = 0 | |
let b0.extent.0.proposed = 1 | |
let b0.stride.1.proposed = 3 | |
let b0.min.1.proposed = 0 | |
let b0.extent.1.proposed = 1 | |
let b0.stride.2.proposed = 1 | |
let b0.min.2.proposed = 0 | |
let b0.extent.2.proposed = 3 | |
let nan_or_one.stride.0.proposed = 1 | |
let nan_or_one.min.0.proposed = nan_or_one.min.0.required | |
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required | |
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required | |
let nan_or_one.min.1.proposed = nan_or_one.min.1.required | |
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0) | |
} | |
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert((uint32)b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", (uint32)b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert((uint32)nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", (uint32)nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1)) | |
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1)) | |
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1)) | |
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1)) | |
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1)) | |
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained)) | |
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained)) | |
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained)) | |
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained)) | |
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained)) | |
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained)) | |
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained)) | |
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained)) | |
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained)) | |
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained)) | |
let b0.total_extent.0 = int64(b0.extent.0.constrained) | |
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0 | |
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1 | |
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0 | |
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.loop_min = nan_or_one.min.1 | |
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.loop_min = nan_or_one.min.0 | |
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_max = nan_or_one.extent.0 + -1 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_max = 0 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_max = nan_or_one.extent.1 + -1 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_max = 0 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_min.orig = 0 | |
produce nan_or_one { | |
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y | |
let nan_or_one.s0.col.col_outer.loop_min.orig = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x | |
let nan_or_one.s0.row.row_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) { | |
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y | |
let nan_or_one.s0.col.col_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) { | |
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x | |
nan_or_one(nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x, nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y) = let t1 = (float32)strict_float(nanf) in (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0(nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x, nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y, 0))), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
} | |
Performing storage flattening... | |
load call to b0 0 | |
Lowering after storage flattening: | |
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.min = nan_or_one.min.1 | |
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.min = nan_or_one.min.0 | |
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let b0.min.0.required = nan_or_one.min.0 + 0 | |
let b0.stride.0.required = 1 | |
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let b0.min.1.required = nan_or_one.min.1 + 0 | |
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required | |
let b0.extent.2.required = (0 + 1) - 0 | |
let b0.min.2.required = 0 | |
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required | |
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let nan_or_one.min.0.required = nan_or_one.min.0 + 0 | |
let nan_or_one.stride.0.required = 1 | |
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let nan_or_one.min.1.required = nan_or_one.min.1 + 0 | |
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required | |
let b0.stride.0.constrained = 3 | |
let b0.min.0.constrained = 0 | |
let b0.extent.0.constrained = 1 | |
let b0.stride.1.constrained = 3 | |
let b0.min.1.constrained = 0 | |
let b0.extent.1.constrained = 1 | |
let b0.stride.2.constrained = 1 | |
let b0.min.2.constrained = 0 | |
let b0.extent.2.constrained = 3 | |
let nan_or_one.stride.0.constrained = 1 | |
let b0.stride.0.proposed = 3 | |
let b0.min.0.proposed = 0 | |
let b0.extent.0.proposed = 1 | |
let b0.stride.1.proposed = 3 | |
let b0.min.1.proposed = 0 | |
let b0.extent.1.proposed = 1 | |
let b0.stride.2.proposed = 1 | |
let b0.min.2.proposed = 0 | |
let b0.extent.2.proposed = 3 | |
let nan_or_one.stride.0.proposed = 1 | |
let nan_or_one.min.0.proposed = nan_or_one.min.0.required | |
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required | |
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required | |
let nan_or_one.min.1.proposed = nan_or_one.min.1.required | |
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0) | |
} | |
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert((uint32)b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", (uint32)b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert((uint32)nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", (uint32)nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1)) | |
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1)) | |
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1)) | |
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1)) | |
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1)) | |
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained)) | |
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained)) | |
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained)) | |
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained)) | |
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained)) | |
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained)) | |
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained)) | |
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained)) | |
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained)) | |
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained)) | |
let b0.total_extent.0 = int64(b0.extent.0.constrained) | |
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0 | |
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1 | |
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0 | |
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.loop_min = nan_or_one.min.1 | |
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.loop_min = nan_or_one.min.0 | |
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_max = nan_or_one.extent.0 + -1 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_max = 0 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_max = nan_or_one.extent.1 + -1 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_max = 0 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_min.orig = 0 | |
produce nan_or_one { | |
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y | |
let nan_or_one.s0.col.col_outer.loop_min.orig = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x | |
let nan_or_one.s0.row.row_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) { | |
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y | |
let nan_or_one.s0.col.col_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) { | |
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x | |
nan_or_one[((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*nan_or_one.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1)) - ((0 + (nan_or_one.min.0*nan_or_one.stride.0)) + (nan_or_one.min.1*nan_or_one.stride.1))] = | |
let t1 = (float32)strict_float(nanf) in | |
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0[(((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*b0.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*b0.stride.1)) + (0*b0.stride.2)) - (((0 + (b0.min.0*b0.stride.0)) + (b0.min.1*b0.stride.1)) + (b0.min.2*b0.stride.2))])), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
} | |
Adding atomic mutex allocation... | |
Lowering after adding atomic mutex allocation: (unchanged) | |
Unpacking buffer arguments... | |
Lowering after unpacking buffer arguments: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.min = nan_or_one.min.1 | |
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.min = nan_or_one.min.0 | |
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let b0.min.0.required = nan_or_one.min.0 + 0 | |
let b0.stride.0.required = 1 | |
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let b0.min.1.required = nan_or_one.min.1 + 0 | |
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required | |
let b0.extent.2.required = (0 + 1) - 0 | |
let b0.min.2.required = 0 | |
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required | |
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let nan_or_one.min.0.required = nan_or_one.min.0 + 0 | |
let nan_or_one.stride.0.required = 1 | |
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let nan_or_one.min.1.required = nan_or_one.min.1 + 0 | |
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required | |
let b0.stride.0.constrained = 3 | |
let b0.min.0.constrained = 0 | |
let b0.extent.0.constrained = 1 | |
let b0.stride.1.constrained = 3 | |
let b0.min.1.constrained = 0 | |
let b0.extent.1.constrained = 1 | |
let b0.stride.2.constrained = 1 | |
let b0.min.2.constrained = 0 | |
let b0.extent.2.constrained = 3 | |
let nan_or_one.stride.0.constrained = 1 | |
let b0.stride.0.proposed = 3 | |
let b0.min.0.proposed = 0 | |
let b0.extent.0.proposed = 1 | |
let b0.stride.1.proposed = 3 | |
let b0.min.1.proposed = 0 | |
let b0.extent.1.proposed = 1 | |
let b0.stride.2.proposed = 1 | |
let b0.min.2.proposed = 0 | |
let b0.extent.2.proposed = 3 | |
let nan_or_one.stride.0.proposed = 1 | |
let nan_or_one.min.0.proposed = nan_or_one.min.0.required | |
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required | |
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required | |
let nan_or_one.min.1.proposed = nan_or_one.min.1.required | |
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0) | |
} | |
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1)) | |
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1)) | |
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1)) | |
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1)) | |
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1)) | |
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained)) | |
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained)) | |
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained)) | |
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained)) | |
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained)) | |
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained)) | |
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained)) | |
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained)) | |
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained)) | |
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained)) | |
let b0.total_extent.0 = int64(b0.extent.0.constrained) | |
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0 | |
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1 | |
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0 | |
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.loop_min = nan_or_one.min.1 | |
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.loop_min = nan_or_one.min.0 | |
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_max = nan_or_one.extent.0 + -1 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_max = 0 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_max = nan_or_one.extent.1 + -1 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_max = 0 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_min.orig = 0 | |
produce nan_or_one { | |
gpu_block<Default_GPU> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y | |
let nan_or_one.s0.col.col_outer.loop_min.orig = 0 | |
gpu_block<Default_GPU> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x | |
let nan_or_one.s0.row.row_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) { | |
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y | |
let nan_or_one.s0.col.col_inner.loop_min.orig = 0 | |
gpu_thread<Default_GPU> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) { | |
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x | |
nan_or_one[((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*nan_or_one.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1)) - ((0 + (nan_or_one.min.0*nan_or_one.stride.0)) + (nan_or_one.min.1*nan_or_one.stride.1))] = | |
let t1 = (float32)strict_float(nanf) in | |
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[(((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*b0.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*b0.stride.1)) + (0*b0.stride.2)) - (((0 + (b0.min.0*b0.stride.0)) + (b0.min.1*b0.stride.1)) + (b0.min.2*b0.stride.2))])), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
} | |
Skipping rewriting memoized allocations... | |
Selecting a GPU API for GPU loops... | |
Lowering after selecting a GPU API: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.min = nan_or_one.min.1 | |
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.min = nan_or_one.min.0 | |
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let b0.min.0.required = nan_or_one.min.0 + 0 | |
let b0.stride.0.required = 1 | |
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let b0.min.1.required = nan_or_one.min.1 + 0 | |
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required | |
let b0.extent.2.required = (0 + 1) - 0 | |
let b0.min.2.required = 0 | |
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required | |
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let nan_or_one.min.0.required = nan_or_one.min.0 + 0 | |
let nan_or_one.stride.0.required = 1 | |
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let nan_or_one.min.1.required = nan_or_one.min.1 + 0 | |
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required | |
let b0.stride.0.constrained = 3 | |
let b0.min.0.constrained = 0 | |
let b0.extent.0.constrained = 1 | |
let b0.stride.1.constrained = 3 | |
let b0.min.1.constrained = 0 | |
let b0.extent.1.constrained = 1 | |
let b0.stride.2.constrained = 1 | |
let b0.min.2.constrained = 0 | |
let b0.extent.2.constrained = 3 | |
let nan_or_one.stride.0.constrained = 1 | |
let b0.stride.0.proposed = 3 | |
let b0.min.0.proposed = 0 | |
let b0.extent.0.proposed = 1 | |
let b0.stride.1.proposed = 3 | |
let b0.min.1.proposed = 0 | |
let b0.extent.1.proposed = 1 | |
let b0.stride.2.proposed = 1 | |
let b0.min.2.proposed = 0 | |
let b0.extent.2.proposed = 3 | |
let nan_or_one.stride.0.proposed = 1 | |
let nan_or_one.min.0.proposed = nan_or_one.min.0.required | |
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required | |
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required | |
let nan_or_one.min.1.proposed = nan_or_one.min.1.required | |
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0) | |
} | |
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1)) | |
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1)) | |
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1)) | |
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1)) | |
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1)) | |
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained)) | |
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained)) | |
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained)) | |
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained)) | |
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained)) | |
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained)) | |
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained)) | |
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained)) | |
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained)) | |
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained)) | |
let b0.total_extent.0 = int64(b0.extent.0.constrained) | |
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0 | |
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1 | |
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0 | |
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.loop_min = nan_or_one.min.1 | |
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.loop_min = nan_or_one.min.0 | |
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_max = nan_or_one.extent.0 + -1 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_max = 0 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_max = nan_or_one.extent.1 + -1 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_max = 0 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_min.orig = 0 | |
produce nan_or_one { | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y | |
let nan_or_one.s0.col.col_outer.loop_min.orig = 0 | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x | |
let nan_or_one.s0.row.row_inner.loop_min.orig = 0 | |
gpu_thread<OpenCL> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) { | |
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y | |
let nan_or_one.s0.col.col_inner.loop_min.orig = 0 | |
gpu_thread<OpenCL> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) { | |
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x | |
nan_or_one[((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*nan_or_one.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1)) - ((0 + (nan_or_one.min.0*nan_or_one.stride.0)) + (nan_or_one.min.1*nan_or_one.stride.1))] = | |
let t1 = (float32)strict_float(nanf) in | |
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[(((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*b0.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*b0.stride.1)) + (0*b0.stride.2)) - (((0 + (b0.min.0*b0.stride.0)) + (b0.min.1*b0.stride.1)) + (b0.min.2*b0.stride.2))])), t1, t1)) | |
} | |
} | |
} | |
} | |
} | |
} | |
Injecting host <-> dev buffer copies... | |
Lowering after injecting host <-> dev buffer copies: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.s0.row.max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.min = nan_or_one.min.1 | |
let nan_or_one.s0.col.max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.min = nan_or_one.min.0 | |
let b0.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let b0.min.0.required = nan_or_one.min.0 + 0 | |
let b0.stride.0.required = 1 | |
let b0.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let b0.min.1.required = nan_or_one.min.1 + 0 | |
let b0.stride.1.required = b0.stride.0.required*b0.extent.0.required | |
let b0.extent.2.required = (0 + 1) - 0 | |
let b0.min.2.required = 0 | |
let b0.stride.2.required = b0.stride.1.required*b0.extent.1.required | |
let nan_or_one.extent.0.required = ((nan_or_one.min.0 + (nan_or_one.extent.0 + -1)) + 1) - (nan_or_one.min.0 + 0) | |
let nan_or_one.min.0.required = nan_or_one.min.0 + 0 | |
let nan_or_one.stride.0.required = 1 | |
let nan_or_one.extent.1.required = ((nan_or_one.min.1 + (nan_or_one.extent.1 + -1)) + 1) - (nan_or_one.min.1 + 0) | |
let nan_or_one.min.1.required = nan_or_one.min.1 + 0 | |
let nan_or_one.stride.1.required = nan_or_one.stride.0.required*nan_or_one.extent.0.required | |
let b0.stride.0.constrained = 3 | |
let b0.min.0.constrained = 0 | |
let b0.extent.0.constrained = 1 | |
let b0.stride.1.constrained = 3 | |
let b0.min.1.constrained = 0 | |
let b0.extent.1.constrained = 1 | |
let b0.stride.2.constrained = 1 | |
let b0.min.2.constrained = 0 | |
let b0.extent.2.constrained = 3 | |
let nan_or_one.stride.0.constrained = 1 | |
let b0.stride.0.proposed = 3 | |
let b0.min.0.proposed = 0 | |
let b0.extent.0.proposed = 1 | |
let b0.stride.1.proposed = 3 | |
let b0.min.1.proposed = 0 | |
let b0.extent.1.proposed = 1 | |
let b0.stride.2.proposed = 1 | |
let b0.min.2.proposed = 0 | |
let b0.extent.2.proposed = 3 | |
let nan_or_one.stride.0.proposed = 1 | |
let nan_or_one.min.0.proposed = nan_or_one.min.0.required | |
let nan_or_one.extent.0.proposed = nan_or_one.extent.0.required | |
let nan_or_one.stride.1.proposed = nan_or_one.stride.1.required | |
let nan_or_one.min.1.proposed = nan_or_one.min.1.required | |
let nan_or_one.extent.1.proposed = nan_or_one.extent.1.required | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.0.proposed <= b0.min.0.required) && (((b0.min.0.proposed + b0.extent.0.proposed) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, b0.min.0.proposed, (b0.min.0.proposed + b0.extent.0.proposed) - 1, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.1.proposed <= b0.min.1.required) && (((b0.min.1.proposed + b0.extent.1.proposed) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, b0.min.1.proposed, (b0.min.1.proposed + b0.extent.1.proposed) - 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((b0.min.2.proposed <= b0.min.2.required) && (((b0.min.2.proposed + b0.extent.2.proposed) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1))), halide_error_constraints_make_required_region_smaller("Input buffer b0", 2, b0.min.2.proposed, (b0.min.2.proposed + b0.extent.2.proposed) - 1, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.0.proposed <= nan_or_one.min.0.required) && (((nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 0, nan_or_one.min.0.proposed, (nan_or_one.min.0.proposed + nan_or_one.extent.0.proposed) - 1, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer) || ((nan_or_one.min.1.proposed <= nan_or_one.min.1.required) && (((nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1))), halide_error_constraints_make_required_region_smaller("Output buffer nan_or_one", 1, nan_or_one.min.1.proposed, (nan_or_one.min.1.proposed + nan_or_one.extent.1.proposed) - 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(b0.min.0.proposed, b0.extent.0.proposed, b0.stride.0.proposed, 0, b0.min.1.proposed, b0.extent.1.proposed, b0.stride.1.proposed, 0, b0.min.2.proposed, b0.extent.2.proposed, b0.stride.2.proposed, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0.proposed, nan_or_one.extent.0.proposed, nan_or_one.stride.0.proposed, 0, nan_or_one.min.1.proposed, nan_or_one.extent.1.proposed, nan_or_one.stride.1.proposed, 0), (uint64)0) | |
} | |
if (!(((uint1)0 || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= b0.min.0.required) && (((b0.min.0 + b0.extent.0) - 1) >= ((b0.min.0.required + b0.extent.0.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 0, b0.min.0.required, (b0.min.0.required + b0.extent.0.required) - 1, b0.min.0, (b0.min.0 + b0.extent.0) - 1)) | |
assert(b0.extent.0 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= b0.min.1.required) && (((b0.min.1 + b0.extent.1) - 1) >= ((b0.min.1.required + b0.extent.1.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 1, b0.min.1.required, (b0.min.1.required + b0.extent.1.required) - 1, b0.min.1, (b0.min.1 + b0.extent.1) - 1)) | |
assert(b0.extent.1 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= b0.min.2.required) && (((b0.min.2 + b0.extent.2) - 1) >= ((b0.min.2.required + b0.extent.2.required) - 1)), halide_error_access_out_of_bounds("Input buffer b0", 2, b0.min.2.required, (b0.min.2.required + b0.extent.2.required) - 1, b0.min.2, (b0.min.2 + b0.extent.2) - 1)) | |
assert(b0.extent.2 >= 0, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert((nan_or_one.min.0 <= nan_or_one.min.0.required) && (((nan_or_one.min.0 + nan_or_one.extent.0) - 1) >= ((nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 0, nan_or_one.min.0.required, (nan_or_one.min.0.required + nan_or_one.extent.0.required) - 1, nan_or_one.min.0, (nan_or_one.min.0 + nan_or_one.extent.0) - 1)) | |
assert(nan_or_one.extent.0 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert((nan_or_one.min.1 <= nan_or_one.min.1.required) && (((nan_or_one.min.1 + nan_or_one.extent.1) - 1) >= ((nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1)), halide_error_access_out_of_bounds("Output buffer nan_or_one", 1, nan_or_one.min.1.required, (nan_or_one.min.1.required + nan_or_one.extent.1.required) - 1, nan_or_one.min.1, (nan_or_one.min.1 + nan_or_one.extent.1) - 1)) | |
assert(nan_or_one.extent.1 >= 0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == b0.stride.0.constrained, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", b0.stride.0.constrained)) | |
assert(b0.min.0 == b0.min.0.constrained, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", b0.min.0.constrained)) | |
assert(b0.extent.0 == b0.extent.0.constrained, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", b0.extent.0.constrained)) | |
assert(b0.stride.1 == b0.stride.1.constrained, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", b0.stride.1.constrained)) | |
assert(b0.min.1 == b0.min.1.constrained, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", b0.min.1.constrained)) | |
assert(b0.extent.1 == b0.extent.1.constrained, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", b0.extent.1.constrained)) | |
assert(b0.stride.2 == b0.stride.2.constrained, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", b0.stride.2.constrained)) | |
assert(b0.min.2 == b0.min.2.constrained, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", b0.min.2.constrained)) | |
assert(b0.extent.2 == b0.extent.2.constrained, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", b0.extent.2.constrained)) | |
assert(nan_or_one.stride.0 == nan_or_one.stride.0.constrained, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", nan_or_one.stride.0.constrained)) | |
let b0.total_extent.0 = int64(b0.extent.0.constrained) | |
let b0.total_extent.1 = int64(b0.extent.1.constrained)*b0.total_extent.0 | |
let b0.total_extent.2 = int64(b0.extent.2.constrained)*b0.total_extent.1 | |
let nan_or_one.total_extent.0 = int64(nan_or_one.extent.0) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*nan_or_one.total_extent.0 | |
assert((uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.0.constrained)*int64(b0.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.1.constrained)*int64(b0.stride.1.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.1, (int64)2147483647)) | |
assert((uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("b0", (uint64)abs(int64(b0.extent.2.constrained)*int64(b0.stride.2.constrained)), (uint64)2147483647)) | |
assert(b0.total_extent.2 <= (int64)2147483647, halide_error_buffer_extents_too_large("b0", b0.total_extent.2, (int64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.0)*int64(nan_or_one.stride.0.constrained)), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
let nan_or_one.s0.row.loop_max = (nan_or_one.extent.1 + nan_or_one.min.1) + -1 | |
let nan_or_one.s0.row.loop_min = nan_or_one.min.1 | |
let nan_or_one.s0.row.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.col.loop_max = (nan_or_one.extent.0 + nan_or_one.min.0) + -1 | |
let nan_or_one.s0.col.loop_min = nan_or_one.min.0 | |
let nan_or_one.s0.col.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_extent = nan_or_one.extent.0 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_max = nan_or_one.extent.0 + -1 | |
let nan_or_one.s0.col.col_outer.__block_id_x.loop_min = 0 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_extent = 1 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_max = 0 | |
let nan_or_one.s0.col.col_inner.__thread_id_x.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_extent = nan_or_one.extent.1 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_max = nan_or_one.extent.1 + -1 | |
let nan_or_one.s0.row.row_outer.__block_id_y.loop_min = 0 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_extent = 1 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_max = 0 | |
let nan_or_one.s0.row.row_inner.__thread_id_y.loop_min = 0 | |
let nan_or_one.s0.row.row_outer.loop_min.orig = 0 | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
let nan_or_one.s0.row.row_inner.base = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y | |
let nan_or_one.s0.col.col_outer.loop_min.orig = 0 | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
let nan_or_one.s0.col.col_inner.base = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x | |
let nan_or_one.s0.row.row_inner.loop_min.orig = 0 | |
gpu_thread<OpenCL> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) { | |
let nan_or_one.s0.row = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y | |
let nan_or_one.s0.col.col_inner.loop_min.orig = 0 | |
gpu_thread<OpenCL> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) { | |
let nan_or_one.s0.col = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x | |
nan_or_one[((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*nan_or_one.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1)) - ((0 + (nan_or_one.min.0*nan_or_one.stride.0)) + (nan_or_one.min.1*nan_or_one.stride.1))] = | |
let t1 = (float32)strict_float(nanf) in | |
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[(((0 + ((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)*b0.stride.0)) + ((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*b0.stride.1)) + (0*b0.stride.2)) - (((0 + (b0.min.0*b0.stride.0)) + (b0.min.1*b0.stride.1)) + (b0.min.2*b0.stride.2))])), t1, t1)) | |
} | |
} | |
} | |
} | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Selecting a GPU API for extern stages... | |
Lowering after selecting a GPU API for extern stages: (unchanged) | |
Simplifying... | |
Lowering after second simplifcation: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1)) | |
0 | |
0 | |
0 | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0) | |
} | |
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1)) | |
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1)) | |
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1)) | |
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
0 | |
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
0 | |
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3)) | |
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0)) | |
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1)) | |
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3)) | |
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0)) | |
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1)) | |
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1)) | |
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0)) | |
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3)) | |
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1)) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0) | |
assert(uint64(int64(nan_or_one.extent.0)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(int64(nan_or_one.extent.0)), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) { | |
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] = | |
let t1 = (float32)strict_float(nanf) in | |
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1)) | |
} | |
} | |
} | |
} | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Reduce prefetch dimension... | |
Lowering after reduce prefetch dimension: (unchanged) | |
Simplifying correlated differences... | |
Lowering after simplifying correlated differences: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1)) | |
0 | |
0 | |
0 | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0) | |
} | |
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1)) | |
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1)) | |
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1)) | |
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
0 | |
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
0 | |
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3)) | |
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0)) | |
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1)) | |
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3)) | |
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0)) | |
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1)) | |
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1)) | |
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0)) | |
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3)) | |
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1)) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0) | |
assert(uint64(int64(nan_or_one.extent.0)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(int64(nan_or_one.extent.0)), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) { | |
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] = | |
let t1 = (float32)strict_float(nanf) in | |
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1)) | |
} | |
} | |
} | |
} | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Unrolling... | |
Lowering after unrolling: (unchanged) | |
Vectorizing... | |
Lowering after vectorizing: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0) | |
} | |
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1)) | |
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1)) | |
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1)) | |
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3)) | |
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0)) | |
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1)) | |
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3)) | |
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0)) | |
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1)) | |
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1)) | |
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0)) | |
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3)) | |
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1)) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0) | |
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (nan_or_one.s0.row.row_inner.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (nan_or_one.s0.col.col_inner.__thread_id_x, 0, 1) { | |
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] = | |
let t1 = (float32)strict_float(nanf) in | |
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1)) | |
} | |
} | |
} | |
} | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Injecting per-block gpu synchronization... | |
Lowering after injecting per-block gpu synchronization: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0) | |
} | |
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1)) | |
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1)) | |
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1)) | |
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3)) | |
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0)) | |
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1)) | |
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3)) | |
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0)) | |
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1)) | |
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1)) | |
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0)) | |
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3)) | |
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1)) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0) | |
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) { | |
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] = | |
let t1 = (float32)strict_float(nanf) in | |
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1)) | |
} | |
} | |
} | |
} | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Detecting vector interleavings... | |
Lowering after rewriting vector interleavings: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0) | |
} | |
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1)) | |
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1)) | |
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1)) | |
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3)) | |
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0)) | |
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1)) | |
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3)) | |
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0)) | |
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1)) | |
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1)) | |
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0)) | |
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3)) | |
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1)) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0) | |
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) { | |
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] = | |
let t1 = (float32)strict_float(nanf) in | |
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1)) | |
} | |
} | |
} | |
} | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Partitioning loops to simplify boundary conditions... | |
Lowering after partitioning loops: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0) | |
} | |
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1)) | |
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1)) | |
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1)) | |
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3)) | |
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0)) | |
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1)) | |
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3)) | |
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0)) | |
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1)) | |
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1)) | |
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0)) | |
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3)) | |
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1)) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0) | |
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) { | |
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] = | |
let t1 = (float32)strict_float(nanf) in | |
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1)) | |
} | |
} | |
} | |
} | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Trimming loops to the region over which they do something... | |
Lowering after loop trimming: (unchanged) | |
Rebasing loops to zero... | |
Lowering after rebasing loops to zero: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0) | |
} | |
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1)) | |
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1)) | |
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1)) | |
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3)) | |
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0)) | |
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1)) | |
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3)) | |
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0)) | |
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1)) | |
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1)) | |
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0)) | |
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3)) | |
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1)) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0) | |
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) { | |
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] = | |
let t1 = (float32)strict_float(nanf) in | |
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1)) | |
} | |
} | |
} | |
} | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Hoisting loop invariant if statements... | |
Lowering after hoisting loop invariant if statements: (unchanged) | |
Injecting early frees... | |
Lowering after injecting early frees: (unchanged) | |
Simplifying correlated differences... | |
Lowering after simplifying correlated differences: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0) | |
} | |
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1)) | |
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1)) | |
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1)) | |
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3)) | |
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0)) | |
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1)) | |
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3)) | |
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0)) | |
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1)) | |
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1)) | |
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0)) | |
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3)) | |
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1)) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0) | |
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) { | |
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] = | |
let t1 = (float32)strict_float(nanf) in | |
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1)) | |
} | |
} | |
} | |
} | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Bounding small allocations... | |
Lowering after bounding small allocations: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0) | |
} | |
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1)) | |
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1)) | |
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1)) | |
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3)) | |
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0)) | |
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1)) | |
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3)) | |
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0)) | |
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1)) | |
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1)) | |
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0)) | |
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3)) | |
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1)) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0) | |
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) { | |
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + (nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x)) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] = | |
let t1 = (float32)strict_float(nanf) in | |
(float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x) + (nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y))*3])), t1, t1)) | |
} | |
} | |
} | |
} | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Simplifying... | |
Lowering unsafe promises... | |
Lowering after lowering unsafe promises: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0) | |
} | |
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1)) | |
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1)) | |
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1)) | |
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3)) | |
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0)) | |
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1)) | |
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3)) | |
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0)) | |
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1)) | |
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1)) | |
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0)) | |
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3)) | |
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1)) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0) | |
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) { | |
let t6 = nan_or_one.min.0 + nan_or_one.s0.col.col_outer.__block_id_x | |
let t7 = nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y | |
let t8 = (float32)strict_float(nanf) | |
nan_or_one[((t7*nan_or_one.stride.1) + t6) - ((nan_or_one.min.1*nan_or_one.stride.1) + nan_or_one.min.0)] = (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[(t6 + t7)*3])), t8, t8)) | |
} | |
} | |
} | |
} | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Flattening nested ramps... | |
Lowering after flattening nested ramps: (unchanged) | |
Removing dead allocations and moving loop invariant code... | |
Lowering after removing dead allocations and hoisting loop invariants: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0) | |
} | |
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1)) | |
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1)) | |
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1)) | |
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3)) | |
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0)) | |
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1)) | |
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3)) | |
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0)) | |
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1)) | |
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1)) | |
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0)) | |
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3)) | |
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1)) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0) | |
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
let t10 = 0 - (nan_or_one.min.1*nan_or_one.stride.1) | |
let t9 = nan_or_one.min.0 + nan_or_one.min.1 | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) { | |
let t8 = (float32)strict_float(nanf) | |
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + t10) + nan_or_one.s0.col.col_outer.__block_id_x] = (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.s0.row.row_outer.__block_id_y + t9) + nan_or_one.s0.col.col_outer.__block_id_x)*3])), t8, t8)) | |
} | |
} | |
} | |
} | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Finding intrinsics... | |
Lowering after finding intrinsics: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0) | |
} | |
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1)) | |
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1)) | |
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1)) | |
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3)) | |
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0)) | |
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1)) | |
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3)) | |
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0)) | |
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1)) | |
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1)) | |
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0)) | |
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3)) | |
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1)) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0) | |
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
let t10 = 0 - (nan_or_one.min.1*nan_or_one.stride.1) | |
let t9 = nan_or_one.min.0 + nan_or_one.min.1 | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) { | |
let t8 = (float32)strict_float(nanf) | |
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + t10) + nan_or_one.s0.col.col_outer.__block_id_x] = (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.s0.row.row_outer.__block_id_y + t9) + nan_or_one.s0.col.col_outer.__block_id_x)*3])), t8, t8)) | |
} | |
} | |
} | |
} | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Hoisting prefetches... | |
Lowering after hoisting prefetches: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0) | |
} | |
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1)) | |
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1)) | |
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1)) | |
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3)) | |
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0)) | |
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1)) | |
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3)) | |
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0)) | |
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1)) | |
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1)) | |
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0)) | |
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3)) | |
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1)) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0) | |
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
let t10 = 0 - (nan_or_one.min.1*nan_or_one.stride.1) | |
let t9 = nan_or_one.min.0 + nan_or_one.min.1 | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) { | |
let t8 = (float32)strict_float(nanf) | |
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + t10) + nan_or_one.s0.col.col_outer.__block_id_x] = (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.s0.row.row_outer.__block_id_y + t9) + nan_or_one.s0.col.col_outer.__block_id_x)*3])), t8, t8)) | |
} | |
} | |
} | |
} | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Lowering after final simplification: | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0) | |
} | |
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1)) | |
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1)) | |
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1)) | |
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3)) | |
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0)) | |
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1)) | |
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3)) | |
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0)) | |
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1)) | |
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1)) | |
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0)) | |
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3)) | |
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1)) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0) | |
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
let t10 = 0 - (nan_or_one.min.1*nan_or_one.stride.1) | |
let t9 = nan_or_one.min.0 + nan_or_one.min.1 | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) { | |
let t8 = (float32)strict_float(nanf) | |
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + t10) + nan_or_one.s0.col.col_outer.__block_id_x] = (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float(b0[((nan_or_one.s0.row.row_outer.__block_id_y + t9) + nan_or_one.s0.col.col_outer.__block_id_x)*3])), t8, t8)) | |
} | |
} | |
} | |
} | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Skipping Hexagon offload... | |
Offloading GPU loops... | |
OpenCL device codegen init_module | |
Kernel launch: nan_or_one.s0.row.row_outer.__block_id_y | |
Kernel bounds: (1, 1, 1, 1) threads, (nan_or_one.extent.0, nan_or_one.extent.1, 1, 1) blocks | |
var: nan_or_one.min.1 | |
var: nan_or_one.stride.1 | |
var: t10 | |
var: t9 | |
buffer: b0 12 (read) dims=3 | |
buffer: nan_or_one 0 (write) dims=0 | |
CodeGen_OpenCL_Dev::compile _kernel_nan_or_one_s0_row_row_outer___block_id_y | |
CodeGen_OpenCL_Dev: after removing predication: | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) { | |
let t8 = (float32)strict_float(nanf) | |
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + t10) + nan_or_one.s0.col.col_outer.__block_id_x] = (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0[((nan_or_one.s0.row.row_outer.__block_id_y + t9) + nan_or_one.s0.col.col_outer.__block_id_x)*3])), t8, t8)) | |
} | |
} | |
} | |
} | |
Adding OpenCL kernel _kernel_nan_or_one_s0_row_row_outer___block_id_y | |
Eliminating bool vectors | |
After eliminating bool vectors: | |
gpu_block<OpenCL> (nan_or_one.s0.row.row_outer.__block_id_y, 0, nan_or_one.extent.1) { | |
gpu_block<OpenCL> (nan_or_one.s0.col.col_outer.__block_id_x, 0, nan_or_one.extent.0) { | |
gpu_thread<OpenCL> (.__thread_id_y, 0, 1) { | |
gpu_thread<OpenCL> (.__thread_id_x, 0, 1) { | |
let t8 = (float32)strict_float(nanf) | |
nan_or_one[(((nan_or_one.min.1 + nan_or_one.s0.row.row_outer.__block_id_y)*nan_or_one.stride.1) + t10) + nan_or_one.s0.col.col_outer.__block_id_x] = (float32)strict_float(select((uint1)is_nan_f32((float32)strict_float((float32)b0[((nan_or_one.s0.row.row_outer.__block_id_y + t9) + nan_or_one.s0.col.col_outer.__block_id_x)*3])), t8, t8)) | |
} | |
} | |
} | |
} | |
Compiled launch to kernel "_kernel_nan_or_one_s0_row_row_outer___block_id_y" | |
Generating init_kernels for opencl | |
OpenCL kernel: | |
/*OpenCL C x86-64-linux-jit-opencl-sse41-strict_float-user_context*/ | |
#pragma OPENCL FP_CONTRACT ON | |
inline float float_from_bits(unsigned int x) {return as_float(x);} | |
inline float nan_f32() { return NAN; } | |
inline float neg_inf_f32() { return -INFINITY; } | |
inline float inf_f32() { return INFINITY; } | |
inline bool is_nan_f32(float x) {return isnan(x); } | |
inline bool is_inf_f32(float x) {return isinf(x); } | |
inline bool is_finite_f32(float x) {return isfinite(x); } | |
#define sqrt_f32 sqrt | |
#define sin_f32 sin | |
#define cos_f32 cos | |
#define exp_f32 exp | |
#define log_f32 log | |
#define abs_f32 fabs | |
#define floor_f32 floor | |
#define ceil_f32 ceil | |
#define round_f32 round | |
#define trunc_f32 trunc | |
#define pow_f32 pow | |
#define asin_f32 asin | |
#define acos_f32 acos | |
#define tan_f32 tan | |
#define atan_f32 atan | |
#define atan2_f32 atan2 | |
#define sinh_f32 sinh | |
#define asinh_f32 asinh | |
#define cosh_f32 cosh | |
#define acosh_f32 acosh | |
#define tanh_f32 tanh | |
#define atanh_f32 atanh | |
#define fast_inverse_f32 native_recip | |
#define fast_inverse_sqrt_f32 native_rsqrt | |
#define halide_unused(x) | |
__kernel void _at_least_one_kernel(int x) { } | |
// Address spaces for _kernel_nan_or_one_s0_row_row_outer___block_id_y | |
#if 12 <= MAX_CONSTANT_BUFFER_SIZE && 0 < MAX_CONSTANT_ARGS | |
#define __address_space__b0 __constant | |
#else | |
#define __address_space__b0 __global | |
#endif | |
#define __address_space__nan_or_one __global | |
__kernel void _kernel_nan_or_one_s0_row_row_outer___block_id_y( | |
__address_space__b0 const float *restrict _b0, | |
__address_space__nan_or_one float *restrict _nan_or_one, | |
const int _nan_or_one_min_1, | |
const int _nan_or_one_stride_1, | |
const int _t10, | |
const int _t9, | |
__local int16* __shared) | |
{ | |
int _nan_or_one_s0_row_row_outer___block_id_y = get_group_id(1); | |
int _nan_or_one_s0_col_col_outer___block_id_x = get_group_id(0); | |
int ___thread_id_y = get_local_id(1); | |
int ___thread_id_x = get_local_id(0); | |
float _0 = (nan_f32()); | |
int _1 = _nan_or_one_s0_row_row_outer___block_id_y + _t9; | |
int _2 = _1 + _nan_or_one_s0_col_col_outer___block_id_x; | |
int _3 = _2 * 3; | |
float _4 = _b0[_3]; | |
float _5 = (_4); | |
bool _6 = is_nan_f32(_5); | |
float _7 = (float)(_6 ? _0 : _0); | |
float _8 = (_7); | |
int _9 = _nan_or_one_min_1 + _nan_or_one_s0_row_row_outer___block_id_y; | |
int _10 = _9 * _nan_or_one_stride_1; | |
int _11 = _10 + _t10; | |
int _12 = _11 + _nan_or_one_s0_col_col_outer___block_id_x; | |
_nan_or_one[_12] = _8; | |
} // kernel _kernel_nan_or_one_s0_row_row_outer___block_id_y | |
#undef __address_space__b0 | |
#undef __address_space__nan_or_one | |
Lowering after splitting off GPU loops: | |
let opencl = (void *)_halide_buffer_get_host((halide_buffer_t *)opencl_buf.buffer) | |
let halide_opencl_initialize_kernels_result = halide_opencl_initialize_kernels(opencl, (void *)_halide_buffer_get_host((halide_buffer_t *)opencl_gpu_source_kernels.buffer), 2510) | |
assert(halide_opencl_initialize_kernels_result == 0, halide_opencl_initialize_kernels_result) | |
(void *)register_destructor("halide_opencl_finalize_kernels", opencl[0]) | |
assert((uint64)reinterpret((halide_buffer_t *)nan_or_one.buffer) != (uint64)0, halide_error_buffer_argument_is_null("nan_or_one")) | |
assert((uint64)reinterpret((halide_buffer_t *)b0.buffer) != (uint64)0, halide_error_buffer_argument_is_null("b0")) | |
let b0 = (void *)_halide_buffer_get_host((halide_buffer_t *)b0.buffer) | |
let b0.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)b0.buffer) | |
let b0.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)b0.buffer) | |
let b0.min.0 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 0) | |
let b0.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 0) | |
let b0.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 0) | |
let b0.min.1 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 1) | |
let b0.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 1) | |
let b0.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 1) | |
let b0.min.2 = _halide_buffer_get_min((halide_buffer_t *)b0.buffer, 2) | |
let b0.extent.2 = _halide_buffer_get_extent((halide_buffer_t *)b0.buffer, 2) | |
let b0.stride.2 = _halide_buffer_get_stride((halide_buffer_t *)b0.buffer, 2) | |
let nan_or_one = (void *)_halide_buffer_get_host((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.type = (uint32)_halide_buffer_get_type((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.dimensions = _halide_buffer_get_dimensions((halide_buffer_t *)nan_or_one.buffer) | |
let nan_or_one.min.0 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.extent.0 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.stride.0 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 0) | |
let nan_or_one.min.1 = _halide_buffer_get_min((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.extent.1 = _halide_buffer_get_extent((halide_buffer_t *)nan_or_one.buffer, 1) | |
let nan_or_one.stride.1 = _halide_buffer_get_stride((halide_buffer_t *)nan_or_one.buffer, 1) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 0, 0, 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1)) | |
assert(!(uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || ((0 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= 1)), halide_error_constraints_make_required_region_smaller("Input buffer b0", 1, 0, 0, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1)) | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)b0.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)b0.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 3, (halide_dimension_t *)make_struct(0, 1, 3, 0, 0, 1, 3, 0, 0, 3, 1, 0), (uint64)0) | |
} | |
if ((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer)) { | |
(halide_buffer_t *)_halide_buffer_init((halide_buffer_t *)nan_or_one.buffer, (halide_dimension_t *)_halide_buffer_get_shape((halide_buffer_t *)nan_or_one.buffer), (void *)reinterpret((uint64)0), (uint64)0, (halide_device_interface_t *)reinterpret((uint64)0), 2, 32, 2, (halide_dimension_t *)make_struct(nan_or_one.min.0, nan_or_one.extent.0, 1, 0, nan_or_one.min.1, nan_or_one.extent.1, nan_or_one.extent.0, 0), (uint64)0) | |
} | |
if (!((uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)b0.buffer) || (uint1)_halide_buffer_is_bounds_query((halide_buffer_t *)nan_or_one.buffer))) { | |
assert(b0.type == (uint32)73730, halide_error_bad_type("Input buffer b0", b0.type, (uint32)73730)) | |
assert(b0.dimensions == 3, halide_error_bad_dimensions("Input buffer b0", b0.dimensions, 3)) | |
assert(nan_or_one.type == (uint32)73730, halide_error_bad_type("Output buffer nan_or_one", nan_or_one.type, (uint32)73730)) | |
assert(nan_or_one.dimensions == 2, halide_error_bad_dimensions("Output buffer nan_or_one", nan_or_one.dimensions, 2)) | |
assert((b0.min.0 <= nan_or_one.min.0) && ((nan_or_one.extent.0 + nan_or_one.min.0) <= (b0.extent.0 + b0.min.0)), halide_error_access_out_of_bounds("Input buffer b0", 0, nan_or_one.min.0, (nan_or_one.extent.0 + nan_or_one.min.0) + -1, b0.min.0, (b0.extent.0 + b0.min.0) + -1)) | |
assert(0 <= b0.extent.0, halide_error_buffer_extents_negative("Input buffer b0", 0, b0.extent.0)) | |
assert((b0.min.1 <= nan_or_one.min.1) && ((nan_or_one.extent.1 + nan_or_one.min.1) <= (b0.extent.1 + b0.min.1)), halide_error_access_out_of_bounds("Input buffer b0", 1, nan_or_one.min.1, (nan_or_one.extent.1 + nan_or_one.min.1) + -1, b0.min.1, (b0.extent.1 + b0.min.1) + -1)) | |
assert(0 <= b0.extent.1, halide_error_buffer_extents_negative("Input buffer b0", 1, b0.extent.1)) | |
assert((b0.min.2 <= 0) && (1 <= (b0.extent.2 + b0.min.2)), halide_error_access_out_of_bounds("Input buffer b0", 2, 0, 0, b0.min.2, (b0.extent.2 + b0.min.2) + -1)) | |
assert(0 <= b0.extent.2, halide_error_buffer_extents_negative("Input buffer b0", 2, b0.extent.2)) | |
assert(0 <= nan_or_one.extent.0, halide_error_buffer_extents_negative("Output buffer nan_or_one", 0, nan_or_one.extent.0)) | |
assert(0 <= nan_or_one.extent.1, halide_error_buffer_extents_negative("Output buffer nan_or_one", 1, nan_or_one.extent.1)) | |
assert(b0.stride.0 == 3, halide_error_constraint_violated("b0.stride.0", b0.stride.0, "3", 3)) | |
assert(b0.min.0 == 0, halide_error_constraint_violated("b0.min.0", b0.min.0, "0", 0)) | |
assert(b0.extent.0 == 1, halide_error_constraint_violated("b0.extent.0", b0.extent.0, "1", 1)) | |
assert(b0.stride.1 == 3, halide_error_constraint_violated("b0.stride.1", b0.stride.1, "3", 3)) | |
assert(b0.min.1 == 0, halide_error_constraint_violated("b0.min.1", b0.min.1, "0", 0)) | |
assert(b0.extent.1 == 1, halide_error_constraint_violated("b0.extent.1", b0.extent.1, "1", 1)) | |
assert(b0.stride.2 == 1, halide_error_constraint_violated("b0.stride.2", b0.stride.2, "1", 1)) | |
assert(b0.min.2 == 0, halide_error_constraint_violated("b0.min.2", b0.min.2, "0", 0)) | |
assert(b0.extent.2 == 3, halide_error_constraint_violated("b0.extent.2", b0.extent.2, "3", 3)) | |
assert(nan_or_one.stride.0 == 1, halide_error_constraint_violated("nan_or_one.stride.0", nan_or_one.stride.0, "1", 1)) | |
let nan_or_one.total_extent.1 = int64(nan_or_one.extent.1)*int64(nan_or_one.extent.0) | |
assert(uint64(nan_or_one.extent.0) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", uint64(nan_or_one.extent.0), (uint64)2147483647)) | |
assert((uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)) <= (uint64)2147483647, halide_error_buffer_allocation_too_large("nan_or_one", (uint64)abs(int64(nan_or_one.extent.1)*int64(nan_or_one.stride.1)), (uint64)2147483647)) | |
assert(nan_or_one.total_extent.1 <= (int64)2147483647, halide_error_buffer_extents_too_large("nan_or_one", nan_or_one.total_extent.1, (int64)2147483647)) | |
produce nan_or_one { | |
let halide_copy_to_device_result = halide_copy_to_device((halide_buffer_t *)b0.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result == 0, halide_copy_to_device_result) | |
let halide_copy_to_device_result$1 = halide_copy_to_device((halide_buffer_t *)nan_or_one.buffer, (halide_device_interface_t *)halide_opencl_device_interface()) | |
assert(halide_copy_to_device_result$1 == 0, halide_copy_to_device_result$1) | |
let t10 = 0 - (nan_or_one.min.1*nan_or_one.stride.1) | |
let t9 = nan_or_one.min.0 + nan_or_one.min.1 | |
let halide_opencl_run_result = halide_opencl_run(opencl[0], "_kernel_nan_or_one_s0_row_row_outer___block_id_y", nan_or_one.extent.0, nan_or_one.extent.1, 1, 1, 1, 1, 0, (void *)make_struct((int64)8, (int64)8, (int64)4, (int64)4, (int64)4, (int64)4, (int64)0), (void *)make_struct((void *)b0.buffer, (void *)nan_or_one.buffer, (void *)make_struct(nan_or_one.min.1), (void *)make_struct(nan_or_one.stride.1), (void *)make_struct(t10), (void *)make_struct(t9), (void *)reinterpret((uint64)0)), (void *)make_struct((uint8)1, (uint8)1, (uint8)0, (uint8)0, (uint8)0, (uint8)0, (uint8)0)) | |
assert(halide_opencl_run_result == 0, halide_opencl_run_result) | |
_halide_buffer_set_device_dirty((halide_buffer_t *)nan_or_one.buffer, (uint1)1) | |
} | |
} | |
Embedding image opencl_buf | |
Embedding image opencl_gpu_source_kernels | |
Target triple of initial module: x86_64--linux-gnu | |
Generating llvm bitcode... | |
Generating llvm bitcode prolog for function nan_or_one... | |
Generating llvm bitcode for function nan_or_one... | |
0x2aa5050 | |
Done generating llvm bitcode | |
; ModuleID = 'nan_or_one' | |
source_filename = "/host/buildtrees/halide/src/v13.0.4-7253dad238.clean/src/runtime/halide_buffer_t.cpp" | |
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" | |
target triple = "x86_64--linux-gnu" | |
%struct.halide_filter_argument_t = type { i8*, i32, i32, %struct.halide_type_t, %struct.halide_scalar_value_t*, %struct.halide_scalar_value_t*, %struct.halide_scalar_value_t*, %struct.halide_scalar_value_t*, i64** } | |
%struct.halide_type_t = type { i8, i8, i16 } | |
%struct.halide_scalar_value_t = type { %union.anon } | |
%union.anon = type { double } | |
%struct.halide_filter_metadata_t = type { i32, i32, %struct.halide_filter_argument_t*, i8*, i8* } | |
%struct.halide_buffer_t.4 = type { i64, %struct.halide_device_interface_t.1*, i8*, i64, %struct.halide_type_t, i32, %struct.halide_dimension_t*, i8* } | |
%struct.halide_dimension_t = type { i32, i32, i32, i32 } | |
%struct.halide_device_interface_t.1 = type { i32 (i8*, %struct.halide_buffer_t.4*, %struct.halide_device_interface_t.1*)*, i32 (i8*, %struct.halide_buffer_t.4*)*, i32 (i8*, %struct.halide_buffer_t.4*)*, void (i8*, %struct.halide_device_interface_t.1*)*, i32 (i8*, %struct.halide_buffer_t.4*)*, i32 (i8*, %struct.halide_buffer_t.4*, %struct.halide_device_interface_t.1*)*, i32 (i8*, %struct.halide_buffer_t.4*, %struct.halide_device_interface_t.1*)*, i32 (i8*, %struct.halide_buffer_t.4*)*, {}*, i32 (i8*, %struct.halide_buffer_t.4*, %struct.halide_buffer_t.4*)*, i32 (i8*, %struct.halide_buffer_t.4*, i32, i32, %struct.halide_buffer_t.4*)*, i32 (i8*, %struct.halide_buffer_t.4*)*, i32 (i8*, %struct.halide_buffer_t.4*, i64, %struct.halide_device_interface_t.1*)*, i32 (i8*, %struct.halide_buffer_t.4*)*, i32 (i8*, i32*, i32*)*, %struct.halide_device_interface_impl_t* } | |
%struct.halide_device_interface_impl_t = type opaque | |
%struct.halide_buffer_t = type { i64, %struct.halide_device_interface_t*, i8*, i64, %struct.halide_type_t, i32, %struct.halide_dimension_t*, i8* } | |
%struct.halide_device_interface_t = type { i32 (i8*, %struct.halide_buffer_t*, %struct.halide_device_interface_t*)*, i32 (i8*, %struct.halide_buffer_t*)*, i32 (i8*, %struct.halide_buffer_t*)*, void (i8*, %struct.halide_device_interface_t*)*, i32 (i8*, %struct.halide_buffer_t*)*, i32 (i8*, %struct.halide_buffer_t*, %struct.halide_device_interface_t*)*, i32 (i8*, %struct.halide_buffer_t*, %struct.halide_device_interface_t*)*, i32 (i8*, %struct.halide_buffer_t*)*, i32 (i8*, %struct.halide_buffer_t*, %struct.halide_device_interface_t*, %struct.halide_buffer_t*)*, i32 (i8*, %struct.halide_buffer_t*, %struct.halide_buffer_t*)*, i32 (i8*, %struct.halide_buffer_t*, i32, i32, %struct.halide_buffer_t*)*, i32 (i8*, %struct.halide_buffer_t*)*, i32 (i8*, %struct.halide_buffer_t*, i64, %struct.halide_device_interface_t*)*, i32 (i8*, %struct.halide_buffer_t*)*, i32 (i8*, i32*, i32*)*, %struct.halide_device_interface_impl_t* } | |
@opencl_buf.data = private global [8 x i8] zeroinitializer, align 32 | |
@opencl_gpu_source_kernels.data = private constant [2510 x i8] c"/*OpenCL C x86-64-linux-jit-opencl-sse41-strict_float-user_context*/\0A#pragma OPENCL FP_CONTRACT ON\0Ainline float float_from_bits(unsigned int x) {return as_float(x);}\0Ainline float nan_f32() { return NAN; }\0Ainline float neg_inf_f32() { return -INFINITY; }\0Ainline float inf_f32() { return INFINITY; }\0Ainline bool is_nan_f32(float x) {return isnan(x); }\0Ainline bool is_inf_f32(float x) {return isinf(x); }\0Ainline bool is_finite_f32(float x) {return isfinite(x); }\0A#define sqrt_f32 sqrt \0A#define sin_f32 sin \0A#define cos_f32 cos \0A#define exp_f32 exp \0A#define log_f32 log \0A#define abs_f32 fabs \0A#define floor_f32 floor \0A#define ceil_f32 ceil \0A#define round_f32 round \0A#define trunc_f32 trunc \0A#define pow_f32 pow\0A#define asin_f32 asin \0A#define acos_f32 acos \0A#define tan_f32 tan \0A#define atan_f32 atan \0A#define atan2_f32 atan2\0A#define sinh_f32 sinh \0A#define asinh_f32 asinh \0A#define cosh_f32 cosh \0A#define acosh_f32 acosh \0A#define tanh_f32 tanh \0A#define atanh_f32 atanh \0A#define fast_inverse_f32 native_recip \0A#define fast_inverse_sqrt_f32 native_rsqrt \0A#define halide_unused(x)\0A\0A__kernel void _at_least_one_kernel(int x) { }\0A// Address spaces for _kernel_nan_or_one_s0_row_row_outer___block_id_y\0A#if 12 <= MAX_CONSTANT_BUFFER_SIZE && 0 < MAX_CONSTANT_ARGS\0A#define __address_space__b0 __constant\0A#else\0A#define __address_space__b0 __global\0A#endif\0A#define __address_space__nan_or_one __global\0A__kernel void _kernel_nan_or_one_s0_row_row_outer___block_id_y(\0A __address_space__b0 const float *restrict _b0,\0A __address_space__nan_or_one float *restrict _nan_or_one,\0A const int _nan_or_one_min_1,\0A const int _nan_or_one_stride_1,\0A const int _t10,\0A const int _t9,\0A __local int16* __shared)\0A{\0A int _nan_or_one_s0_row_row_outer___block_id_y = get_group_id(1);\0A int _nan_or_one_s0_col_col_outer___block_id_x = get_group_id(0);\0A int ___thread_id_y = get_local_id(1);\0A int ___thread_id_x = get_local_id(0);\0A float _0 = (nan_f32());\0A int _1 = _nan_or_one_s0_row_row_outer___block_id_y + _t9;\0A int _2 = _1 + _nan_or_one_s0_col_col_outer___block_id_x;\0A int _3 = _2 * 3;\0A float _4 = _b0[_3];\0A float _5 = (_4);\0A bool _6 = is_nan_f32(_5);\0A float _7 = (float)(_6 ? _0 : _0);\0A float _8 = (_7);\0A int _9 = _nan_or_one_min_1 + _nan_or_one_s0_row_row_outer___block_id_y;\0A int _10 = _9 * _nan_or_one_stride_1;\0A int _11 = _10 + _t10;\0A int _12 = _11 + _nan_or_one_s0_col_col_outer___block_id_x;\0A _nan_or_one[_12] = _8;\0A} // kernel _kernel_nan_or_one_s0_row_row_outer___block_id_y\0A#undef __address_space__b0\0A#undef __address_space__nan_or_one\0A\00", align 32 | |
@str = private constant [11 x i8] c"nan_or_one\00", align 32 | |
@str.3 = private constant [3 x i8] c"b0\00", align 32 | |
@str.4 = private constant [16 x i8] c"Input buffer b0\00", align 32 | |
@str.5 = private constant [25 x i8] c"Output buffer nan_or_one\00", align 32 | |
@str.6 = private constant [12 x i8] c"b0.stride.0\00", align 32 | |
@str.7 = private constant [2 x i8] c"3\00", align 32 | |
@str.8 = private constant [9 x i8] c"b0.min.0\00", align 32 | |
@str.9 = private constant [2 x i8] c"0\00", align 32 | |
@str.10 = private constant [12 x i8] c"b0.extent.0\00", align 32 | |
@str.11 = private constant [2 x i8] c"1\00", align 32 | |
@str.12 = private constant [12 x i8] c"b0.stride.1\00", align 32 | |
@str.13 = private constant [9 x i8] c"b0.min.1\00", align 32 | |
@str.14 = private constant [12 x i8] c"b0.extent.1\00", align 32 | |
@str.15 = private constant [12 x i8] c"b0.stride.2\00", align 32 | |
@str.16 = private constant [9 x i8] c"b0.min.2\00", align 32 | |
@str.17 = private constant [12 x i8] c"b0.extent.2\00", align 32 | |
@str.18 = private constant [20 x i8] c"nan_or_one.stride.0\00", align 32 | |
@str.19 = private constant [49 x i8] c"_kernel_nan_or_one_s0_row_row_outer___block_id_y\00", align 32 | |
@str.20 = private constant [15 x i8] c"__user_context\00", align 32 | |
@0 = private constant [4 x i64*] zeroinitializer | |
@1 = private constant [3 x %struct.halide_filter_argument_t] [%struct.halide_filter_argument_t { i8* getelementptr inbounds ([3 x i8], [3 x i8]* @str.3, i32 0, i32 0), i32 1, i32 3, %struct.halide_type_t { i8 2, i8 32, i16 1 }, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, i64** null }, %struct.halide_filter_argument_t { i8* getelementptr inbounds ([15 x i8], [15 x i8]* @str.20, i32 0, i32 0), i32 0, i32 0, %struct.halide_type_t { i8 3, i8 64, i16 1 }, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, i64** null }, %struct.halide_filter_argument_t { i8* getelementptr inbounds ([11 x i8], [11 x i8]* @str, i32 0, i32 0), i32 2, i32 2, %struct.halide_type_t { i8 2, i8 32, i16 1 }, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, i64** getelementptr inbounds ([4 x i64*], [4 x i64*]* @0, i32 0, i32 0) }] | |
@str.21 = private constant [56 x i8] c"x86-64-linux-jit-opencl-sse41-strict_float-user_context\00", align 32 | |
@nan_or_one_metadata_storage = private constant %struct.halide_filter_metadata_t { i32 1, i32 3, %struct.halide_filter_argument_t* getelementptr inbounds ([3 x %struct.halide_filter_argument_t], [3 x %struct.halide_filter_argument_t]* @1, i32 0, i32 0), i8* getelementptr inbounds ([56 x i8], [56 x i8]* @str.21, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @str, i32 0, i32 0) } | |
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn | |
declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #0 | |
; Function Attrs: argmemonly mustprogress nofree nounwind willreturn | |
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly %0, i8* noalias nocapture readonly %1, i64 %2, i1 immarg %3) #1 | |
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn | |
declare void @llvm.lifetime.end.p0i8(i64 immarg %0, i8* nocapture %1) #0 | |
declare i8* @memcpy(i8* %0, i8* %1, i64 %2) local_unnamed_addr #2 | |
declare i32 @halide_copy_to_device(i8* %0, %struct.halide_buffer_t.4* %1, %struct.halide_device_interface_t.1* %2) local_unnamed_addr #2 | |
declare i32 @halide_error_access_out_of_bounds(i8* %0, i8* %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) local_unnamed_addr #2 | |
declare i32 @halide_error_bad_dimensions(i8* %0, i8* %1, i32 %2, i32 %3) local_unnamed_addr #2 | |
declare i32 @halide_error_bad_type(i8* %0, i8* %1, i32 %2, i32 %3) local_unnamed_addr #2 | |
declare i32 @halide_error_buffer_allocation_too_large(i8* %0, i8* %1, i64 %2, i64 %3) local_unnamed_addr #2 | |
declare i32 @halide_error_buffer_argument_is_null(i8* %0, i8* %1) local_unnamed_addr #2 | |
declare i32 @halide_error_buffer_extents_negative(i8* %0, i8* %1, i32 %2, i32 %3) local_unnamed_addr #2 | |
declare i32 @halide_error_buffer_extents_too_large(i8* %0, i8* %1, i64 %2, i64 %3) local_unnamed_addr #2 | |
declare i32 @halide_error_constraint_violated(i8* %0, i8* %1, i32 %2, i8* %3, i32 %4) local_unnamed_addr #2 | |
declare i32 @halide_error_constraints_make_required_region_smaller(i8* %0, i8* %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) local_unnamed_addr #2 | |
declare %struct.halide_device_interface_t.1* @halide_opencl_device_interface() local_unnamed_addr #2 | |
declare i32 @halide_opencl_initialize_kernels(i8* %0, i8** %1, i8* %2, i32 %3) local_unnamed_addr #2 | |
declare void @halide_opencl_finalize_kernels(i8* %0, i8* %1) local_unnamed_addr #2 | |
declare i32 @halide_opencl_run(i8* %0, i8* %1, i8* %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i64* %10, i8** %11, i8* %12) local_unnamed_addr #2 | |
; Function Attrs: nounwind | |
define i32 @nan_or_one(%struct.halide_buffer_t* noalias %b0.buffer, i8* %__user_context, %struct.halide_buffer_t* noalias %nan_or_one.buffer) local_unnamed_addr #3 { | |
entry: | |
%0 = alloca i32, align 4 | |
%1 = alloca i32, align 4 | |
%2 = alloca [7 x i8], align 1 | |
%3 = alloca { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, align 8 | |
%4 = alloca i32, align 4 | |
%5 = alloca i32, align 4 | |
%6 = alloca i32, align 4 | |
%7 = alloca i32, align 4 | |
%8 = alloca [7 x i64], align 16 | |
%9 = alloca [8 x i32], align 4 | |
%halide_opencl_initialize_kernels_result = tail call i32 @halide_opencl_initialize_kernels(i8* %__user_context, i8** bitcast ([8 x i8]* @opencl_buf.data to i8**), i8* getelementptr inbounds ([2510 x i8], [2510 x i8]* @opencl_gpu_source_kernels.data, i64 0, i64 0), i32 2510) #5 | |
%10 = icmp eq i32 %halide_opencl_initialize_kernels_result, 0 | |
br i1 %10, label %"assert succeeded", label %common.ret, !prof !11 | |
"assert succeeded": ; preds = %entry | |
%11 = load i64, i64* bitcast ([8 x i8]* @opencl_buf.data to i64*), align 32, !tbaa !12 | |
%12 = inttoptr i64 %11 to i8* | |
%.not = icmp eq %struct.halide_buffer_t* %nan_or_one.buffer, null | |
br i1 %.not, label %"assert failed1", label %"assert succeeded2", !prof !26 | |
common.ret: ; preds = %entry, %15, %destructor_block, %assert_failed35, %assert_failed34, %assert_failed33, %assert_failed32, %assert_failed31, %assert_failed30, %assert_failed29, %assert_failed28, %assert_failed27, %assert_failed26, %assert_failed25, %assert_failed24, %assert_failed23, %assert_failed22, %assert_failed21, %assert_failed20, %assert_failed19, %assert_failed18, %assert_failed17, %assert_failed16, %assert_failed15, %assert_failed | |
%common.ret.op = phi i32 [ %203, %assert_failed ], [ %204, %assert_failed15 ], [ %205, %assert_failed16 ], [ %206, %assert_failed17 ], [ %209, %assert_failed18 ], [ %210, %assert_failed19 ], [ %213, %assert_failed20 ], [ %214, %assert_failed21 ], [ %216, %assert_failed22 ], [ %217, %assert_failed23 ], [ %218, %assert_failed24 ], [ %219, %assert_failed25 ], [ %221, %assert_failed26 ], [ %222, %assert_failed27 ], [ %223, %assert_failed28 ], [ %224, %assert_failed29 ], [ %225, %assert_failed30 ], [ %226, %assert_failed31 ], [ %227, %assert_failed32 ], [ %228, %assert_failed33 ], [ %229, %assert_failed34 ], [ %230, %assert_failed35 ], [ %13, %destructor_block ], [ %13, %15 ], [ %halide_opencl_initialize_kernels_result, %entry ] | |
ret i32 %common.ret.op | |
destructor_block: ; preds = %"assert succeeded45", %"assert succeeded43", %"produce nan_or_one", %_halide_buffer_is_bounds_query.exit66, %"assert succeeded47", %"assert failed40", %"assert failed38", %"assert failed36", %"assert failed7", %"assert failed5", %"assert failed3", %"assert failed1" | |
%13 = phi i32 [ %16, %"assert failed1" ], [ %17, %"assert failed3" ], [ %75, %"assert failed5" ], [ %82, %"assert failed7" ], [ %231, %"assert failed36" ], [ %235, %"assert failed38" ], [ %237, %"assert failed40" ], [ 0, %"assert succeeded47" ], [ 0, %_halide_buffer_is_bounds_query.exit66 ], [ %halide_copy_to_device_result, %"produce nan_or_one" ], [ %"halide_copy_to_device_result$1", %"assert succeeded43" ], [ %halide_opencl_run_result, %"assert succeeded45" ] | |
%14 = icmp eq i64 %11, 0 | |
br i1 %14, label %common.ret, label %15 | |
15: ; preds = %destructor_block | |
call void @halide_opencl_finalize_kernels(i8* %__user_context, i8* nonnull %12) #10 | |
br label %common.ret | |
"assert failed1": ; preds = %"assert succeeded" | |
%16 = tail call i32 @halide_error_buffer_argument_is_null(i8* %__user_context, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @str, i64 0, i64 0)) #5 | |
br label %destructor_block | |
"assert succeeded2": ; preds = %"assert succeeded" | |
%.not48 = icmp eq %struct.halide_buffer_t* %b0.buffer, null | |
br i1 %.not48, label %"assert failed3", label %"assert succeeded4", !prof !26 | |
"assert failed3": ; preds = %"assert succeeded2" | |
%17 = tail call i32 @halide_error_buffer_argument_is_null(i8* %__user_context, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @str.3, i64 0, i64 0)) #5 | |
br label %destructor_block | |
"assert succeeded4": ; preds = %"assert succeeded2" | |
%18 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 2 | |
%19 = bitcast i32* %1 to i8* | |
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %19) #5 | |
%20 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 4, i32 0 | |
%21 = call i8* @memcpy(i8* nonnull %19, i8* nonnull %20, i64 4) #10 | |
%22 = load i32, i32* %1, align 4, !tbaa !27 | |
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %19) #5 | |
%23 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 5 | |
%24 = load i32, i32* %23, align 4, !tbaa !31 | |
%25 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 6 | |
%26 = load %struct.halide_dimension_t*, %struct.halide_dimension_t** %25, align 8, !tbaa !38 | |
%27 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %26, i64 0, i32 0 | |
%28 = load i32, i32* %27, align 4, !tbaa !39 | |
%29 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %26, i64 0, i32 1 | |
%30 = bitcast i32* %29 to <2 x i32>* | |
%31 = load <2 x i32>, <2 x i32>* %30, align 4, !tbaa !27 | |
%32 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %26, i64 1, i32 0 | |
%33 = bitcast i32* %32 to <2 x i32>* | |
%34 = load <2 x i32>, <2 x i32>* %33, align 4, !tbaa !27 | |
%35 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %26, i64 1, i32 2 | |
%36 = load i32, i32* %35, align 4, !tbaa !41 | |
%37 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %26, i64 2, i32 0 | |
%38 = load i32, i32* %37, align 4, !tbaa !39 | |
%39 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %26, i64 2, i32 1 | |
%40 = load i32, i32* %39, align 4, !tbaa !42 | |
%41 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %26, i64 2, i32 2 | |
%42 = load i32, i32* %41, align 4, !tbaa !41 | |
%43 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 2 | |
%44 = bitcast i32* %0 to i8* | |
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %44) #5 | |
%45 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 4, i32 0 | |
%46 = call i8* @memcpy(i8* nonnull %44, i8* nonnull %45, i64 4) #10 | |
%47 = load i32, i32* %0, align 4, !tbaa !27 | |
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %44) #5 | |
%48 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 5 | |
%49 = load i32, i32* %48, align 4, !tbaa !31 | |
%50 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 6 | |
%51 = load %struct.halide_dimension_t*, %struct.halide_dimension_t** %50, align 8, !tbaa !38 | |
%52 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %51, i64 0, i32 0 | |
%53 = load i32, i32* %52, align 4, !tbaa !39 | |
%54 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %51, i64 0, i32 1 | |
%55 = load i32, i32* %54, align 4, !tbaa !42 | |
%56 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %51, i64 0, i32 2 | |
%57 = load i32, i32* %56, align 4, !tbaa !41 | |
%58 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %51, i64 1, i32 0 | |
%59 = load i32, i32* %58, align 4, !tbaa !39 | |
%60 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %51, i64 1, i32 1 | |
%61 = load i32, i32* %60, align 4, !tbaa !42 | |
%62 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %51, i64 1, i32 2 | |
%63 = load i32, i32* %62, align 4, !tbaa !41 | |
%64 = load i8*, i8** %18, align 8, !tbaa !43 | |
%65 = icmp eq i8* %64, null | |
br i1 %65, label %_halide_buffer_is_bounds_query.exit, label %_halide_buffer_is_bounds_query.exit63.thread | |
_halide_buffer_is_bounds_query.exit: ; preds = %"assert succeeded4" | |
%66 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 0 | |
%67 = load i64, i64* %66, align 8, !tbaa !44 | |
%68 = icmp ne i64 %67, 0 | |
%69 = icmp sgt i32 %53, -1 | |
%70 = add nsw i32 %55, %53 | |
%71 = icmp slt i32 %70, 2 | |
%72 = and i1 %69, %71 | |
%73 = or i1 %72, %68 | |
br i1 %73, label %_halide_buffer_is_bounds_query.exit62, label %"assert failed5", !prof !11 | |
"assert failed5": ; preds = %_halide_buffer_is_bounds_query.exit | |
%74 = add nsw i32 %70, -1 | |
%75 = call i32 @halide_error_constraints_make_required_region_smaller(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 0, i32 0, i32 0, i32 %53, i32 %74) #5 | |
br label %destructor_block | |
_halide_buffer_is_bounds_query.exit62: ; preds = %_halide_buffer_is_bounds_query.exit | |
%76 = icmp sgt i32 %59, -1 | |
%77 = add nsw i32 %61, %59 | |
%78 = icmp slt i32 %77, 2 | |
%79 = and i1 %76, %78 | |
%80 = or i1 %79, %68 | |
br i1 %80, label %_halide_buffer_is_bounds_query.exit63, label %"assert failed7", !prof !11 | |
"assert failed7": ; preds = %_halide_buffer_is_bounds_query.exit62 | |
%81 = add nsw i32 %77, -1 | |
%82 = call i32 @halide_error_constraints_make_required_region_smaller(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 1, i32 0, i32 0, i32 %59, i32 %81) #5 | |
br label %destructor_block | |
_halide_buffer_is_bounds_query.exit63.thread: ; preds = %"assert succeeded4" | |
%83 = add nsw i32 %55, %53 | |
%84 = add nsw i32 %61, %59 | |
br label %after_bb | |
_halide_buffer_is_bounds_query.exit63: ; preds = %_halide_buffer_is_bounds_query.exit62 | |
%85 = icmp eq i64 %67, 0 | |
br i1 %85, label %then_bb, label %after_bb | |
after_bb: ; preds = %_halide_buffer_is_bounds_query.exit63.thread, %_halide_buffer_is_bounds_query.exit63, %then_bb | |
%86 = phi i32 [ %84, %_halide_buffer_is_bounds_query.exit63.thread ], [ %77, %_halide_buffer_is_bounds_query.exit63 ], [ %77, %then_bb ] | |
%87 = phi i32 [ %83, %_halide_buffer_is_bounds_query.exit63.thread ], [ %70, %_halide_buffer_is_bounds_query.exit63 ], [ %70, %then_bb ] | |
%88 = load i8*, i8** %43, align 8, !tbaa !43 | |
%89 = icmp eq i8* %88, null | |
br i1 %89, label %_halide_buffer_is_bounds_query.exit64, label %after_bb9 | |
_halide_buffer_is_bounds_query.exit64: ; preds = %after_bb | |
%90 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 0 | |
%91 = load i64, i64* %90, align 8, !tbaa !44 | |
%92 = icmp eq i64 %91, 0 | |
br i1 %92, label %_halide_buffer_init.exit68, label %after_bb9 | |
then_bb: ; preds = %_halide_buffer_is_bounds_query.exit63 | |
%93 = bitcast %struct.halide_dimension_t** %25 to <4 x i32>** | |
%94 = load <4 x i32>*, <4 x i32>** %93, align 8, !tbaa !38 | |
%95 = bitcast %struct.halide_buffer_t* %b0.buffer to i8* | |
call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(24) %95, i8 0, i64 24, i1 false) | |
store i8 2, i8* %20, align 8, !tbaa !45 | |
%96 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 4, i32 1 | |
store i8 32, i8* %96, align 1, !tbaa !46 | |
%97 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 4, i32 2 | |
store i16 1, i16* %97, align 2, !tbaa !47 | |
store i32 3, i32* %23, align 4, !tbaa !31 | |
store <4 x i32> <i32 0, i32 1, i32 3, i32 0>, <4 x i32>* %94, align 4 | |
%98 = load %struct.halide_dimension_t*, %struct.halide_dimension_t** %25, align 8, !tbaa !38 | |
%.sroa.5.16..sroa_idx = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %98, i64 1, i32 0 | |
%99 = bitcast i32* %.sroa.5.16..sroa_idx to <4 x i32>* | |
store <4 x i32> <i32 0, i32 1, i32 3, i32 0>, <4 x i32>* %99, align 4 | |
%100 = load %struct.halide_dimension_t*, %struct.halide_dimension_t** %25, align 8, !tbaa !38 | |
%.sroa.10.32..sroa_idx = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %100, i64 2, i32 0 | |
%101 = bitcast i32* %.sroa.10.32..sroa_idx to <4 x i32>* | |
store <4 x i32> <i32 0, i32 3, i32 1, i32 0>, <4 x i32>* %101, align 4 | |
%102 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 3 | |
store i64 0, i64* %102, align 8, !tbaa !48 | |
br label %after_bb | |
after_bb9: ; preds = %after_bb, %_halide_buffer_is_bounds_query.exit64, %_halide_buffer_init.exit68 | |
%103 = load i8*, i8** %18, align 8, !tbaa !43 | |
%104 = icmp eq i8* %103, null | |
br i1 %104, label %105, label %_halide_buffer_is_bounds_query.exit65 | |
105: ; preds = %after_bb9 | |
%106 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %b0.buffer, i64 0, i32 0 | |
%107 = load i64, i64* %106, align 8, !tbaa !44 | |
%108 = icmp eq i64 %107, 0 | |
br label %_halide_buffer_is_bounds_query.exit65 | |
_halide_buffer_is_bounds_query.exit65: ; preds = %after_bb9, %105 | |
%109 = phi i1 [ false, %after_bb9 ], [ %108, %105 ] | |
%110 = load i8*, i8** %43, align 8, !tbaa !43 | |
%111 = icmp eq i8* %110, null | |
br i1 %111, label %112, label %_halide_buffer_is_bounds_query.exit66 | |
112: ; preds = %_halide_buffer_is_bounds_query.exit65 | |
%113 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 0 | |
%114 = load i64, i64* %113, align 8, !tbaa !44 | |
%115 = icmp eq i64 %114, 0 | |
br label %_halide_buffer_is_bounds_query.exit66 | |
_halide_buffer_is_bounds_query.exit66: ; preds = %_halide_buffer_is_bounds_query.exit65, %112 | |
%116 = phi i1 [ false, %_halide_buffer_is_bounds_query.exit65 ], [ %115, %112 ] | |
%117 = or i1 %109, %116 | |
br i1 %117, label %destructor_block, label %then_bb13 | |
_halide_buffer_init.exit68: ; preds = %_halide_buffer_is_bounds_query.exit64 | |
%118 = bitcast %struct.halide_dimension_t** %50 to i8** | |
%119 = load i8*, i8** %118, align 8, !tbaa !38 | |
%120 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 0 | |
store i32 %53, i32* %120, align 4 | |
%121 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 1 | |
store i32 %55, i32* %121, align 4 | |
%122 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 2 | |
store i32 1, i32* %122, align 4 | |
%123 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 3 | |
store i32 0, i32* %123, align 4 | |
%124 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 4 | |
store i32 %59, i32* %124, align 4 | |
%125 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 5 | |
store i32 %61, i32* %125, align 4 | |
%126 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 6 | |
store i32 %55, i32* %126, align 4 | |
%127 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 7 | |
store i32 0, i32* %127, align 4 | |
%128 = bitcast %struct.halide_buffer_t* %nan_or_one.buffer to i8* | |
call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(24) %128, i8 0, i64 24, i1 false) | |
store i8 2, i8* %45, align 8, !tbaa !45 | |
%129 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 4, i32 1 | |
store i8 32, i8* %129, align 1, !tbaa !46 | |
%130 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 4, i32 2 | |
store i16 1, i16* %130, align 2, !tbaa !47 | |
store i32 2, i32* %48, align 4, !tbaa !31 | |
%131 = bitcast [8 x i32]* %9 to i8* | |
call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %119, i8* noundef nonnull align 4 dereferenceable(16) %131, i64 16, i1 false) #5, !tbaa.struct !49 | |
%132 = load %struct.halide_dimension_t*, %struct.halide_dimension_t** %50, align 8, !tbaa !38 | |
%133 = getelementptr inbounds [8 x i32], [8 x i32]* %9, i64 0, i64 4 | |
%134 = getelementptr inbounds %struct.halide_dimension_t, %struct.halide_dimension_t* %132, i64 1 | |
%135 = bitcast %struct.halide_dimension_t* %134 to i8* | |
%136 = bitcast i32* %133 to i8* | |
call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %135, i8* noundef nonnull align 4 dereferenceable(16) %136, i64 16, i1 false) #5, !tbaa.struct !49 | |
%137 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 3 | |
store i64 0, i64* %137, align 8, !tbaa !48 | |
br label %after_bb9 | |
then_bb13: ; preds = %_halide_buffer_is_bounds_query.exit66 | |
%138 = icmp ne i32 %22, 73730 | |
%139 = zext i1 %138 to i64 | |
%.not50 = icmp eq i32 %47, 73730 | |
%140 = select i1 %.not50, i64 0, i64 4 | |
%.not51 = icmp eq i32 %49, 2 | |
%141 = select i1 %.not51, i64 0, i64 8 | |
%142 = icmp sgt i32 %28, %53 | |
%143 = extractelement <2 x i32> %31, i32 0 | |
%144 = add nsw i32 %143, %28 | |
%145 = icmp sgt i32 %87, %144 | |
%146 = or i1 %142, %145 | |
%147 = select i1 %146, i64 16, i64 0 | |
%148 = lshr i32 %143, 26 | |
%149 = and i32 %148, 32 | |
%150 = zext i32 %149 to i64 | |
%151 = extractelement <2 x i32> %34, i32 0 | |
%152 = icmp sgt i32 %151, %59 | |
%153 = extractelement <2 x i32> %34, i32 1 | |
%154 = add nsw i32 %153, %151 | |
%155 = icmp sgt i32 %86, %154 | |
%156 = or i1 %152, %155 | |
%157 = select i1 %156, i64 64, i64 0 | |
%158 = lshr i32 %153, 24 | |
%159 = and i32 %158, 128 | |
%160 = zext i32 %159 to i64 | |
%161 = icmp sgt i32 %38, 0 | |
%162 = add nsw i32 %40, %38 | |
%163 = icmp slt i32 %162, 1 | |
%164 = or i1 %161, %163 | |
%165 = select i1 %164, i64 256, i64 0 | |
%166 = lshr i32 %40, 22 | |
%167 = and i32 %166, 512 | |
%168 = zext i32 %167 to i64 | |
%169 = lshr i32 %55, 21 | |
%170 = and i32 %169, 1024 | |
%171 = zext i32 %170 to i64 | |
%172 = lshr i32 %61, 20 | |
%173 = and i32 %172, 2048 | |
%174 = zext i32 %173 to i64 | |
%.not58 = icmp eq i32 %42, 1 | |
%175 = select i1 %.not58, i64 0, i64 262144 | |
%176 = insertelement <8 x i32> poison, i32 %24, i32 0 | |
%177 = insertelement <8 x i32> %176, i32 %28, i32 1 | |
%178 = shufflevector <2 x i32> %31, <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> | |
%179 = shufflevector <8 x i32> %177, <8 x i32> %178, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef> | |
%180 = shufflevector <2 x i32> %34, <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> | |
%181 = shufflevector <8 x i32> %179, <8 x i32> %180, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef> | |
%182 = insertelement <8 x i32> %181, i32 %36, i32 6 | |
%183 = insertelement <8 x i32> %182, i32 %38, i32 7 | |
%184 = icmp eq <8 x i32> %183, <i32 3, i32 0, i32 1, i32 3, i32 0, i32 1, i32 3, i32 0> | |
%185 = select <8 x i1> %184, <8 x i64> zeroinitializer, <8 x i64> <i64 2, i64 8192, i64 16384, i64 4096, i64 65536, i64 131072, i64 32768, i64 524288> | |
%.not60 = icmp eq i32 %40, 3 | |
%186 = select i1 %.not60, i64 0, i64 1048576 | |
%.not61 = icmp eq i32 %57, 1 | |
%187 = select i1 %.not61, i64 0, i64 2097152 | |
%188 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %185) | |
%189 = or i64 %188, %186 | |
%190 = or i64 %189, %175 | |
%191 = or i64 %190, %165 | |
%192 = or i64 %191, %140 | |
%193 = or i64 %192, %141 | |
%194 = or i64 %193, %187 | |
%195 = or i64 %194, %157 | |
%op.extra = or i64 %195, %139 | |
%op.extra78 = or i64 %op.extra, %150 | |
%op.extra79 = or i64 %op.extra78, %160 | |
%op.extra80 = or i64 %op.extra79, %168 | |
%op.extra81 = or i64 %op.extra80, %171 | |
%op.extra82 = or i64 %op.extra81, %174 | |
%196 = or i64 %op.extra82, %147 | |
%197 = or i64 %196, -9223372036854775808 | |
%198 = call i64 @llvm.cttz.i64(i64 %197, i1 true), !range !50 | |
%199 = trunc i64 %198 to i32 | |
switch i32 %199, label %no_errors_bb [ | |
i32 0, label %assert_failed | |
i32 1, label %assert_failed15 | |
i32 2, label %assert_failed16 | |
i32 3, label %assert_failed17 | |
i32 4, label %assert_failed18 | |
i32 5, label %assert_failed19 | |
i32 6, label %assert_failed20 | |
i32 7, label %assert_failed21 | |
i32 8, label %assert_failed22 | |
i32 9, label %assert_failed23 | |
i32 10, label %assert_failed24 | |
i32 11, label %assert_failed25 | |
i32 12, label %assert_failed26 | |
i32 13, label %assert_failed27 | |
i32 14, label %assert_failed28 | |
i32 15, label %assert_failed29 | |
i32 16, label %assert_failed30 | |
i32 17, label %assert_failed31 | |
i32 18, label %assert_failed32 | |
i32 19, label %assert_failed33 | |
i32 20, label %assert_failed34 | |
i32 21, label %assert_failed35 | |
], !prof !51 | |
no_errors_bb: ; preds = %then_bb13 | |
%200 = sext i32 %61 to i64 | |
%201 = sext i32 %55 to i64 | |
%nan_or_one.total_extent.1 = mul nsw i64 %200, %201 | |
%202 = icmp sgt i32 %55, -1 | |
br i1 %202, label %"assert succeeded37", label %"assert failed36", !prof !11 | |
assert_failed: ; preds = %then_bb13 | |
%203 = call i32 @halide_error_bad_type(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 %22, i32 73730) #5 | |
br label %common.ret | |
assert_failed15: ; preds = %then_bb13 | |
%204 = call i32 @halide_error_bad_dimensions(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 %24, i32 3) #5 | |
br label %common.ret | |
assert_failed16: ; preds = %then_bb13 | |
%205 = call i32 @halide_error_bad_type(i8* %__user_context, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @str.5, i64 0, i64 0), i32 %47, i32 73730) #5 | |
br label %common.ret | |
assert_failed17: ; preds = %then_bb13 | |
%206 = call i32 @halide_error_bad_dimensions(i8* %__user_context, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @str.5, i64 0, i64 0), i32 %49, i32 2) #5 | |
br label %common.ret | |
assert_failed18: ; preds = %then_bb13 | |
%207 = add nsw i32 %87, -1 | |
%208 = add nsw i32 %144, -1 | |
%209 = call i32 @halide_error_access_out_of_bounds(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 0, i32 %53, i32 %207, i32 %28, i32 %208) #5 | |
br label %common.ret | |
assert_failed19: ; preds = %then_bb13 | |
%210 = call i32 @halide_error_buffer_extents_negative(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 0, i32 %143) #5 | |
br label %common.ret | |
assert_failed20: ; preds = %then_bb13 | |
%211 = add nsw i32 %86, -1 | |
%212 = add nsw i32 %154, -1 | |
%213 = call i32 @halide_error_access_out_of_bounds(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 1, i32 %59, i32 %211, i32 %151, i32 %212) #5 | |
br label %common.ret | |
assert_failed21: ; preds = %then_bb13 | |
%214 = call i32 @halide_error_buffer_extents_negative(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 1, i32 %153) #5 | |
br label %common.ret | |
assert_failed22: ; preds = %then_bb13 | |
%215 = add nsw i32 %162, -1 | |
%216 = call i32 @halide_error_access_out_of_bounds(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 2, i32 0, i32 0, i32 %38, i32 %215) #5 | |
br label %common.ret | |
assert_failed23: ; preds = %then_bb13 | |
%217 = call i32 @halide_error_buffer_extents_negative(i8* %__user_context, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str.4, i64 0, i64 0), i32 2, i32 %40) #5 | |
br label %common.ret | |
assert_failed24: ; preds = %then_bb13 | |
%218 = call i32 @halide_error_buffer_extents_negative(i8* %__user_context, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @str.5, i64 0, i64 0), i32 0, i32 %55) #5 | |
br label %common.ret | |
assert_failed25: ; preds = %then_bb13 | |
%219 = call i32 @halide_error_buffer_extents_negative(i8* %__user_context, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @str.5, i64 0, i64 0), i32 1, i32 %61) #5 | |
br label %common.ret | |
assert_failed26: ; preds = %then_bb13 | |
%220 = extractelement <2 x i32> %31, i32 1 | |
%221 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str.6, i64 0, i64 0), i32 %220, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.7, i64 0, i64 0), i32 3) #5 | |
br label %common.ret | |
assert_failed27: ; preds = %then_bb13 | |
%222 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @str.8, i64 0, i64 0), i32 %28, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.9, i64 0, i64 0), i32 0) #5 | |
br label %common.ret | |
assert_failed28: ; preds = %then_bb13 | |
%223 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str.10, i64 0, i64 0), i32 %143, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.11, i64 0, i64 0), i32 1) #5 | |
br label %common.ret | |
assert_failed29: ; preds = %then_bb13 | |
%224 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str.12, i64 0, i64 0), i32 %36, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.7, i64 0, i64 0), i32 3) #5 | |
br label %common.ret | |
assert_failed30: ; preds = %then_bb13 | |
%225 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @str.13, i64 0, i64 0), i32 %151, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.9, i64 0, i64 0), i32 0) #5 | |
br label %common.ret | |
assert_failed31: ; preds = %then_bb13 | |
%226 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str.14, i64 0, i64 0), i32 %153, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.11, i64 0, i64 0), i32 1) #5 | |
br label %common.ret | |
assert_failed32: ; preds = %then_bb13 | |
%227 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str.15, i64 0, i64 0), i32 %42, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.11, i64 0, i64 0), i32 1) #5 | |
br label %common.ret | |
assert_failed33: ; preds = %then_bb13 | |
%228 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @str.16, i64 0, i64 0), i32 %38, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.9, i64 0, i64 0), i32 0) #5 | |
br label %common.ret | |
assert_failed34: ; preds = %then_bb13 | |
%229 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str.17, i64 0, i64 0), i32 %40, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.7, i64 0, i64 0), i32 3) #5 | |
br label %common.ret | |
assert_failed35: ; preds = %then_bb13 | |
%230 = call i32 @halide_error_constraint_violated(i8* %__user_context, i8* getelementptr inbounds ([20 x i8], [20 x i8]* @str.18, i64 0, i64 0), i32 %57, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str.11, i64 0, i64 0), i32 1) #5 | |
br label %common.ret | |
"assert failed36": ; preds = %no_errors_bb | |
%231 = call i32 @halide_error_buffer_allocation_too_large(i8* %__user_context, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @str, i64 0, i64 0), i64 %201, i64 2147483647) #5 | |
br label %destructor_block | |
"assert succeeded37": ; preds = %no_errors_bb | |
%232 = sext i32 %63 to i64 | |
%x0 = mul nsw i64 %232, %200 | |
%233 = call i64 @llvm.abs.i64(i64 %x0, i1 true) | |
%234 = icmp ult i64 %233, 2147483648 | |
br i1 %234, label %"assert succeeded39", label %"assert failed38", !prof !11 | |
"assert failed38": ; preds = %"assert succeeded37" | |
%235 = call i32 @halide_error_buffer_allocation_too_large(i8* %__user_context, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @str, i64 0, i64 0), i64 %233, i64 2147483647) #5 | |
br label %destructor_block | |
"assert succeeded39": ; preds = %"assert succeeded37" | |
%236 = icmp slt i64 %nan_or_one.total_extent.1, 2147483648 | |
br i1 %236, label %"produce nan_or_one", label %"assert failed40", !prof !11 | |
"assert failed40": ; preds = %"assert succeeded39" | |
%237 = call i32 @halide_error_buffer_extents_too_large(i8* %__user_context, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @str, i64 0, i64 0), i64 %nan_or_one.total_extent.1, i64 2147483647) #5 | |
br label %destructor_block | |
"produce nan_or_one": ; preds = %"assert succeeded39" | |
%238 = call %struct.halide_device_interface_t.1* @halide_opencl_device_interface() #5 | |
%239 = bitcast %struct.halide_buffer_t* %b0.buffer to %struct.halide_buffer_t.4* | |
%halide_copy_to_device_result = call i32 @halide_copy_to_device(i8* %__user_context, %struct.halide_buffer_t.4* nonnull %239, %struct.halide_device_interface_t.1* %238) #5 | |
%240 = icmp eq i32 %halide_copy_to_device_result, 0 | |
br i1 %240, label %"assert succeeded43", label %destructor_block, !prof !11 | |
"assert succeeded43": ; preds = %"produce nan_or_one" | |
%241 = call %struct.halide_device_interface_t.1* @halide_opencl_device_interface() #5 | |
%242 = bitcast %struct.halide_buffer_t* %nan_or_one.buffer to %struct.halide_buffer_t.4* | |
%"halide_copy_to_device_result$1" = call i32 @halide_copy_to_device(i8* %__user_context, %struct.halide_buffer_t.4* nonnull %242, %struct.halide_device_interface_t.1* %241) #5 | |
%243 = icmp eq i32 %"halide_copy_to_device_result$1", 0 | |
br i1 %243, label %"assert succeeded45", label %destructor_block, !prof !11 | |
"assert succeeded45": ; preds = %"assert succeeded43" | |
%244 = mul i32 %59, %63 | |
%t10 = sub i32 0, %244 | |
%t9 = add nsw i32 %59, %53 | |
%245 = load i64, i64* bitcast ([8 x i8]* @opencl_buf.data to i64*), align 32, !tbaa !12 | |
%246 = inttoptr i64 %245 to i8* | |
%247 = getelementptr inbounds [7 x i64], [7 x i64]* %8, i64 0, i64 0 | |
%248 = bitcast [7 x i64]* %8 to <2 x i64>* | |
store <2 x i64> <i64 8, i64 8>, <2 x i64>* %248, align 16 | |
%249 = getelementptr inbounds [7 x i64], [7 x i64]* %8, i64 0, i64 2 | |
%250 = bitcast i64* %249 to <2 x i64>* | |
store <2 x i64> <i64 4, i64 4>, <2 x i64>* %250, align 16 | |
%251 = getelementptr inbounds [7 x i64], [7 x i64]* %8, i64 0, i64 4 | |
%252 = bitcast i64* %251 to <2 x i64>* | |
store <2 x i64> <i64 4, i64 4>, <2 x i64>* %252, align 16 | |
%253 = getelementptr inbounds [7 x i64], [7 x i64]* %8, i64 0, i64 6 | |
store i64 0, i64* %253, align 16 | |
store i32 %59, i32* %7, align 4 | |
store i32 %63, i32* %6, align 4 | |
store i32 %t10, i32* %5, align 4 | |
store i32 %t9, i32* %4, align 4 | |
%254 = getelementptr inbounds { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3, i64 0, i32 0 | |
store %struct.halide_buffer_t* %b0.buffer, %struct.halide_buffer_t** %254, align 8 | |
%255 = getelementptr inbounds { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3, i64 0, i32 1 | |
store %struct.halide_buffer_t* %nan_or_one.buffer, %struct.halide_buffer_t** %255, align 8 | |
%256 = getelementptr inbounds { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3, i64 0, i32 2 | |
store i32* %7, i32** %256, align 8 | |
%257 = getelementptr inbounds { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3, i64 0, i32 3 | |
store i32* %6, i32** %257, align 8 | |
%258 = getelementptr inbounds { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3, i64 0, i32 4 | |
store i32* %5, i32** %258, align 8 | |
%259 = getelementptr inbounds { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3, i64 0, i32 5 | |
store i32* %4, i32** %259, align 8 | |
%260 = getelementptr inbounds { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }, { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3, i64 0, i32 6 | |
store i8* null, i8** %260, align 8 | |
%261 = getelementptr inbounds [7 x i8], [7 x i8]* %2, i64 0, i64 0 | |
store i8 1, i8* %261, align 1 | |
%262 = getelementptr inbounds [7 x i8], [7 x i8]* %2, i64 0, i64 1 | |
store i8 1, i8* %262, align 1 | |
%263 = getelementptr inbounds [7 x i8], [7 x i8]* %2, i64 0, i64 2 | |
%264 = bitcast { %struct.halide_buffer_t*, %struct.halide_buffer_t*, i32*, i32*, i32*, i32*, i8* }* %3 to i8** | |
call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) %263, i8 0, i64 5, i1 false) | |
%halide_opencl_run_result = call i32 @halide_opencl_run(i8* %__user_context, i8* %246, i8* getelementptr inbounds ([49 x i8], [49 x i8]* @str.19, i64 0, i64 0), i32 %55, i32 %61, i32 1, i32 1, i32 1, i32 1, i32 0, i64* nonnull %247, i8** nonnull %264, i8* nonnull %261) #5 | |
%265 = icmp eq i32 %halide_opencl_run_result, 0 | |
br i1 %265, label %"assert succeeded47", label %destructor_block, !prof !11 | |
"assert succeeded47": ; preds = %"assert succeeded45" | |
%266 = getelementptr inbounds %struct.halide_buffer_t, %struct.halide_buffer_t* %nan_or_one.buffer, i64 0, i32 3 | |
%267 = load i64, i64* %266, align 8, !tbaa !48 | |
%268 = or i64 %267, 2 | |
store i64 %268, i64* %266, align 8, !tbaa !48 | |
br label %destructor_block | |
} | |
; Function Attrs: mustprogress nofree nosync nounwind readnone speculatable willreturn | |
declare i64 @llvm.cttz.i64(i64 %0, i1 immarg %1) #4 | |
; Function Attrs: nounwind | |
define i32 @nan_or_one_argv(i8** nocapture readonly %0) local_unnamed_addr #5 { | |
entry: | |
%1 = bitcast i8** %0 to %struct.halide_buffer_t** | |
%2 = load %struct.halide_buffer_t*, %struct.halide_buffer_t** %1, align 8 | |
%3 = getelementptr i8*, i8** %0, i64 1 | |
%4 = bitcast i8** %3 to i8*** | |
%5 = load i8**, i8*** %4, align 8 | |
%6 = load i8*, i8** %5, align 8 | |
%7 = getelementptr i8*, i8** %0, i64 2 | |
%8 = bitcast i8** %7 to %struct.halide_buffer_t** | |
%9 = load %struct.halide_buffer_t*, %struct.halide_buffer_t** %8, align 8 | |
%10 = tail call i32 @nan_or_one(%struct.halide_buffer_t* %2, i8* %6, %struct.halide_buffer_t* %9) #11 | |
ret i32 %10 | |
} | |
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn | |
define nonnull %struct.halide_filter_metadata_t* @nan_or_one_metadata() local_unnamed_addr #6 { | |
entry: | |
ret %struct.halide_filter_metadata_t* @nan_or_one_metadata_storage | |
} | |
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn | |
declare i64 @llvm.abs.i64(i64 %0, i1 immarg %1) #7 | |
; Function Attrs: argmemonly nofree nounwind willreturn writeonly | |
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly %0, i8 %1, i64 %2, i1 immarg %3) #8 | |
; Function Attrs: nofree nosync nounwind readnone willreturn | |
declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %0) #9 | |
attributes #0 = { argmemonly mustprogress nofree nosync nounwind willreturn } | |
attributes #1 = { argmemonly mustprogress nofree nounwind willreturn } | |
attributes #2 = { "frame-pointer"="all" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } | |
attributes #3 = { nounwind "reciprocal-estimates"="none" } | |
attributes #4 = { mustprogress nofree nosync nounwind readnone speculatable willreturn } | |
attributes #5 = { nounwind } | |
attributes #6 = { mustprogress nofree norecurse nosync nounwind readnone willreturn } | |
attributes #7 = { nofree nosync nounwind readnone speculatable willreturn } | |
attributes #8 = { argmemonly nofree nounwind willreturn writeonly } | |
attributes #9 = { nofree nosync nounwind readnone willreturn } | |
attributes #10 = { nobuiltin nounwind "no-builtins" } | |
attributes #11 = { noinline } | |
!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9} | |
!llvm.ident = !{!10, !10, !10, !10, !10} | |
!0 = !{i32 1, !"wchar_size", i32 4} | |
!1 = !{i32 7, !"PIC Level", i32 1} | |
!2 = !{i32 7, !"frame-pointer", i32 2} | |
!3 = !{i32 2, !"halide_use_soft_float_abi", i32 0} | |
!4 = !{i32 2, !"halide_mcpu", !"penryn"} | |
!5 = !{i32 2, !"halide_mattrs", !""} | |
!6 = !{i32 2, !"halide_mabi", !""} | |
!7 = !{i32 2, !"halide_use_pic", i32 1} | |
!8 = !{i32 2, !"halide_use_large_code_model", i32 0} | |
!9 = !{i32 2, !"halide_per_instruction_fast_math_flags", i32 1} | |
!10 = !{!"clang version 13.0.0 (https://github.com/microsoft/vcpkg.git 6f1d72a4bd98b635c98691054864229aed61fa82)"} | |
!11 = !{!"branch_weights", i32 1073741824, i32 0} | |
!12 = !{!13, !13, i64 0} | |
!13 = !{!"opencl.width1.base0", !14, i64 0} | |
!14 = !{!"opencl.width2.base0", !15, i64 0} | |
!15 = !{!"opencl.width4.base0", !16, i64 0} | |
!16 = !{!"opencl.width8.base0", !17, i64 0} | |
!17 = !{!"opencl.width16.base0", !18, i64 0} | |
!18 = !{!"opencl.width32.base0", !19, i64 0} | |
!19 = !{!"opencl.width64.base0", !20, i64 0} | |
!20 = !{!"opencl.width128.base0", !21, i64 0} | |
!21 = !{!"opencl.width256.base0", !22, i64 0} | |
!22 = !{!"opencl.width512.base0", !23, i64 0} | |
!23 = !{!"opencl.width1024.base0", !24, i64 0} | |
!24 = !{!"opencl", !25, i64 0} | |
!25 = !{!"Halide buffer"} | |
!26 = !{!"branch_weights", i32 0, i32 1073741824} | |
!27 = !{!28, !28, i64 0} | |
!28 = !{!"int", !29, i64 0} | |
!29 = !{!"omnipotent char", !30, i64 0} | |
!30 = !{!"Simple C++ TBAA"} | |
!31 = !{!32, !28, i64 36} | |
!32 = !{!"_ZTS15halide_buffer_t", !33, i64 0, !34, i64 8, !34, i64 16, !33, i64 24, !35, i64 32, !28, i64 36, !34, i64 40, !34, i64 48} | |
!33 = !{!"long long", !29, i64 0} | |
!34 = !{!"any pointer", !29, i64 0} | |
!35 = !{!"_ZTS13halide_type_t", !36, i64 0, !29, i64 1, !37, i64 2} | |
!36 = !{!"_ZTS18halide_type_code_t", !29, i64 0} | |
!37 = !{!"short", !29, i64 0} | |
!38 = !{!32, !34, i64 40} | |
!39 = !{!40, !28, i64 0} | |
!40 = !{!"_ZTS18halide_dimension_t", !28, i64 0, !28, i64 4, !28, i64 8, !28, i64 12} | |
!41 = !{!40, !28, i64 8} | |
!42 = !{!40, !28, i64 4} | |
!43 = !{!32, !34, i64 16} | |
!44 = !{!32, !33, i64 0} | |
!45 = !{!32, !36, i64 32} | |
!46 = !{!32, !29, i64 33} | |
!47 = !{!32, !37, i64 34} | |
!48 = !{!32, !33, i64 24} | |
!49 = !{i64 0, i64 4, !27, i64 4, i64 4, !27, i64 8, i64 4, !27, i64 12, i64 4, !27} | |
!50 = !{i64 0, i64 64} | |
!51 = !{!"branch_weights", i32 1073741824, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0} | |
Creating new execution engine | |
Target triple: x86_64--linux-gnu | |
JIT compiling shared runtime for x86-64-linux-jit-opencl-sse41-strict_float-user_context | |
JIT Compiling halide_buffer_copy | |
Function halide_buffer_copy is at 0x7f93e7843ed0 | |
JIT Compiling halide_buffer_copy_already_locked | |
Function halide_buffer_copy_already_locked is at 0x7f93e7843c10 | |
JIT Compiling halide_buffer_to_string | |
Function halide_buffer_to_string is at 0x7f93e7844a10 | |
JIT Compiling halide_cache_cleanup | |
Function halide_cache_cleanup is at 0x7f93ea435f40 | |
JIT Compiling halide_can_reuse_device_allocations | |
Function halide_can_reuse_device_allocations is at 0x7f93ea434fd0 | |
JIT Compiling halide_cond_broadcast | |
Function halide_cond_broadcast is at 0x7f93eb155c60 | |
JIT Compiling halide_cond_signal | |
Function halide_cond_signal is at 0x7f93ea435340 | |
JIT Compiling halide_cond_wait | |
Function halide_cond_wait is at 0x7f93eb155d20 | |
JIT Compiling halide_copy_to_device | |
Function halide_copy_to_device is at 0x7f93e7843300 | |
JIT Compiling halide_copy_to_host | |
Function halide_copy_to_host is at 0x7f93ea434b50 | |
JIT Compiling halide_current_time_ns | |
Function halide_current_time_ns is at 0x7f93eb1552b0 | |
JIT Compiling halide_debug_to_file | |
Function halide_debug_to_file is at 0x7f93ea432000 | |
JIT Compiling halide_default_buffer_copy | |
Function halide_default_buffer_copy is at 0x7f93e7844fd0 | |
JIT Compiling halide_default_device_and_host_free | |
Function halide_default_device_and_host_free is at 0x7f93e78437f0 | |
JIT Compiling halide_default_device_and_host_malloc | |
Function halide_default_device_and_host_malloc is at 0x7f93e7843660 | |
JIT Compiling halide_default_device_crop | |
Function halide_default_device_crop is at 0x7f93e7843f70 | |
JIT Compiling halide_default_device_detach_native | |
Function halide_default_device_detach_native is at 0x7f93e7843b60 | |
JIT Compiling halide_default_device_release_crop | |
Function halide_default_device_release_crop is at 0x7f93e7842190 | |
JIT Compiling halide_default_device_slice | |
Function halide_default_device_slice is at 0x7f93e7843fa0 | |
JIT Compiling halide_default_device_wrap_native | |
Function halide_default_device_wrap_native is at 0x7f93e7843ac0 | |
JIT Compiling halide_default_do_loop_task | |
Function halide_default_do_loop_task is at 0x7f93eb1553c0 | |
JIT Compiling halide_default_do_par_for | |
Function halide_default_do_par_for is at 0x7f93eb1553e0 | |
JIT Compiling halide_default_do_parallel_tasks | |
Function halide_default_do_parallel_tasks is at 0x7f93ea435040 | |
JIT Compiling halide_default_do_task | |
Function halide_default_do_task is at 0x7f93eb1553b0 | |
JIT Compiling halide_default_error | |
Function halide_default_error is at 0x7f93eb155100 | |
JIT Compiling halide_default_free | |
Function halide_default_free is at 0x7f93eb155060 | |
JIT Compiling halide_default_get_library_symbol | |
Function halide_default_get_library_symbol is at 0x7f93ea435760 | |
JIT Compiling halide_default_get_symbol | |
Function halide_default_get_symbol is at 0x7f93ea435700 | |
JIT Compiling halide_default_load_library | |
Function halide_default_load_library is at 0x7f93ea435720 | |
JIT Compiling halide_default_malloc | |
Function halide_default_malloc is at 0x7f93eb155000 | |
JIT Compiling halide_default_print | |
Function halide_default_print is at 0x7f93eb155330 | |
JIT Compiling halide_default_semaphore_init | |
Function halide_default_semaphore_init is at 0x7f93ea435190 | |
JIT Compiling halide_default_semaphore_release | |
Function halide_default_semaphore_release is at 0x7f93ea4351a0 | |
JIT Compiling halide_default_semaphore_try_acquire | |
Function halide_default_semaphore_try_acquire is at 0x7f93eb155cb0 | |
JIT Compiling halide_default_trace | |
Function halide_default_trace is at 0x7f93ea434000 | |
JIT Compiling halide_device_and_host_free | |
Function halide_device_and_host_free is at 0x7f93e7843530 | |
JIT Compiling halide_device_and_host_free_as_destructor | |
Function halide_device_and_host_free_as_destructor is at 0x7f93e7844fb0 | |
JIT Compiling halide_device_and_host_malloc | |
Function halide_device_and_host_malloc is at 0x7f93e7843420 | |
JIT Compiling halide_device_crop | |
Function halide_device_crop is at 0x7f93e7842000 | |
JIT Compiling halide_device_detach_native | |
Function halide_device_detach_native is at 0x7f93e78439c0 | |
JIT Compiling halide_device_free | |
Function halide_device_free is at 0x7f93ea434cf0 | |
JIT Compiling halide_device_free_as_destructor | |
Function halide_device_free_as_destructor is at 0x7f93ea432fd0 | |
JIT Compiling halide_device_host_nop_free | |
Function halide_device_host_nop_free is at 0x7f93ea433fe0 | |
JIT Compiling halide_device_malloc | |
Function halide_device_malloc is at 0x7f93e7843210 | |
JIT Compiling halide_device_release | |
Function halide_device_release is at 0x7f93ea433fd0 | |
JIT Compiling halide_device_release_crop | |
Function halide_device_release_crop is at 0x7f93e78421c0 | |
JIT Compiling halide_device_slice | |
Function halide_device_slice is at 0x7f93e78420c0 | |
JIT Compiling halide_device_sync | |
Function halide_device_sync is at 0x7f93e7843360 | |
JIT Compiling halide_device_wrap_native | |
Function halide_device_wrap_native is at 0x7f93e78438c0 | |
JIT Compiling halide_do_loop_task | |
Function halide_do_loop_task is at 0x7f93eb155ce0 | |
JIT Compiling halide_do_par_for | |
Function halide_do_par_for is at 0x7f93ea435660 | |
JIT Compiling halide_do_parallel_tasks | |
Function halide_do_parallel_tasks is at 0x7f93ea435680 | |
JIT Compiling halide_do_task | |
Function halide_do_task is at 0x7f93eb155d00 | |
JIT Compiling halide_double_to_string | |
Function halide_double_to_string is at 0x7f93ea433000 | |
JIT Compiling halide_error | |
Function halide_error is at 0x7f93eb155210 | |
JIT Compiling halide_error_access_out_of_bounds | |
Function halide_error_access_out_of_bounds is at 0x7f93e78429a0 | |
JIT Compiling halide_error_bad_dimensions | |
Function halide_error_bad_dimensions is at 0x7f93e7842840 | |
JIT Compiling halide_error_bad_extern_fold | |
Function halide_error_bad_extern_fold is at 0x7f93e78404b0 | |
JIT Compiling halide_error_bad_fold | |
Function halide_error_bad_fold is at 0x7f93e7840340 | |
JIT Compiling halide_error_bad_type | |
Function halide_error_bad_type is at 0x7f93e78426d0 | |
JIT Compiling halide_error_bounds_inference_call_failed | |
Function halide_error_bounds_inference_call_failed is at 0x7f93e78422b0 | |
JIT Compiling halide_error_buffer_allocation_too_large | |
Function halide_error_buffer_allocation_too_large is at 0x7f93e7842c40 | |
JIT Compiling halide_error_buffer_argument_is_null | |
Function halide_error_buffer_argument_is_null is at 0x7f93e7841ce0 | |
JIT Compiling halide_error_buffer_extents_negative | |
Function halide_error_buffer_extents_negative is at 0x7f93e7842d90 | |
JIT Compiling halide_error_buffer_extents_too_large | |
Function halide_error_buffer_extents_too_large is at 0x7f93e7841000 | |
JIT Compiling halide_error_buffer_is_null | |
Function halide_error_buffer_is_null is at 0x7f93e7844df0 | |
JIT Compiling halide_error_constraint_violated | |
Function halide_error_constraint_violated is at 0x7f93e7841360 | |
JIT Compiling halide_error_constraints_make_required_region_smaller | |
Function halide_error_constraints_make_required_region_smaller is at 0x7f93e7841150 | |
JIT Compiling halide_error_debug_to_file_failed | |
Function halide_error_debug_to_file_failed is at 0x7f93e7841de0 | |
JIT Compiling halide_error_device_dirty_with_no_device_support | |
Function halide_error_device_dirty_with_no_device_support is at 0x7f93e7840130 | |
JIT Compiling halide_error_device_interface_no_device | |
Function halide_error_device_interface_no_device is at 0x7f93e7844ef0 | |
JIT Compiling halide_error_explicit_bounds_too_small | |
Function halide_error_explicit_bounds_too_small is at 0x7f93e78424f0 | |
JIT Compiling halide_error_extern_stage_failed | |
Function halide_error_extern_stage_failed is at 0x7f93e78423d0 | |
JIT Compiling halide_error_fold_factor_too_small | |
Function halide_error_fold_factor_too_small is at 0x7f93e7840820 | |
JIT Compiling halide_error_host_and_device_dirty | |
Function halide_error_host_and_device_dirty is at 0x7f93e7843000 | |
JIT Compiling halide_error_host_is_null | |
Function halide_error_host_is_null is at 0x7f93e7840240 | |
JIT Compiling halide_error_no_device_interface | |
Function halide_error_no_device_interface is at 0x7f93e7844d30 | |
JIT Compiling halide_error_out_of_memory | |
Function halide_error_out_of_memory is at 0x7f93e7842ef0 | |
JIT Compiling halide_error_param_too_large_f64 | |
Function halide_error_param_too_large_f64 is at 0x7f93e7841b90 | |
JIT Compiling halide_error_param_too_large_i64 | |
Function halide_error_param_too_large_i64 is at 0x7f93e78418f0 | |
JIT Compiling halide_error_param_too_large_u64 | |
Function halide_error_param_too_large_u64 is at 0x7f93e7841a40 | |
JIT Compiling halide_error_param_too_small_f64 | |
Function halide_error_param_too_small_f64 is at 0x7f93e78417a0 | |
JIT Compiling halide_error_param_too_small_i64 | |
Function halide_error_param_too_small_i64 is at 0x7f93e7841500 | |
JIT Compiling halide_error_param_too_small_u64 | |
Function halide_error_param_too_small_u64 is at 0x7f93e7841650 | |
JIT Compiling halide_error_requirement_failed | |
Function halide_error_requirement_failed is at 0x7f93e78409f0 | |
JIT Compiling halide_error_specialize_fail | |
Function halide_error_specialize_fail is at 0x7f93e7840b20 | |
JIT Compiling halide_error_unaligned_host_ptr | |
Function halide_error_unaligned_host_ptr is at 0x7f93e7840000 | |
JIT Compiling halide_float16_bits_to_double | |
Function halide_float16_bits_to_double is at 0x7f93ea435c50 | |
JIT Compiling halide_float16_bits_to_float | |
Function halide_float16_bits_to_float is at 0x7f93e7842240 | |
JIT Compiling halide_free | |
Function halide_free is at 0x7f93eb1550e0 | |
JIT Compiling halide_get_gpu_device | |
Function halide_get_gpu_device is at 0x7f93ea435860 | |
JIT Compiling halide_get_library_symbol | |
Function halide_get_library_symbol is at 0x7f93ea435820 | |
JIT Compiling halide_get_symbol | |
Function halide_get_symbol is at 0x7f93ea4357e0 | |
JIT Compiling halide_get_trace_file | |
Function halide_get_trace_file is at 0x7f93ea4358f0 | |
JIT Compiling halide_host_cpu_count | |
Function halide_host_cpu_count is at 0x7f93eb155370 | |
JIT Compiling halide_int64_to_string | |
Function halide_int64_to_string is at 0x7f93ea435a20 | |
JIT Compiling halide_join_thread | |
Function halide_join_thread is at 0x7f93ea435300 | |
JIT Compiling halide_load_library | |
Function halide_load_library is at 0x7f93ea435800 | |
JIT Compiling halide_malloc | |
Function halide_malloc is at 0x7f93eb1550c0 | |
JIT Compiling halide_malloc_alignment | |
Function halide_malloc_alignment is at 0x7f93eb155050 | |
JIT Compiling halide_memoization_cache_cleanup | |
Function halide_memoization_cache_cleanup is at 0x7f93ea434c30 | |
JIT Compiling halide_memoization_cache_evict | |
Function halide_memoization_cache_evict is at 0x7f93ea432e70 | |
JIT Compiling halide_memoization_cache_lookup | |
Function halide_memoization_cache_lookup is at 0x7f93e7844000 | |
JIT Compiling halide_memoization_cache_release | |
Function halide_memoization_cache_release is at 0x7f93ea433f20 | |
JIT Compiling halide_memoization_cache_set_size | |
Function halide_memoization_cache_set_size is at 0x7f93ea433ec0 | |
JIT Compiling halide_memoization_cache_store | |
Function halide_memoization_cache_store is at 0x7f93e78444e0 | |
JIT Compiling halide_msan_annotate_buffer_is_initialized | |
Function halide_msan_annotate_buffer_is_initialized is at 0x7f93ea433fc0 | |
JIT Compiling halide_msan_annotate_buffer_is_initialized_as_destructor | |
Function halide_msan_annotate_buffer_is_initialized_as_destructor is at 0x7f93d5f95de0 | |
JIT Compiling halide_msan_annotate_memory_is_initialized | |
Function halide_msan_annotate_memory_is_initialized is at 0x7f93eb1551e0 | |
JIT Compiling halide_msan_check_buffer_is_initialized | |
Function halide_msan_check_buffer_is_initialized is at 0x7f93e7841fe0 | |
JIT Compiling halide_msan_check_memory_is_initialized | |
Function halide_msan_check_memory_is_initialized is at 0x7f93e7841fd0 | |
JIT Compiling halide_mutex_array_create | |
Function halide_mutex_array_create is at 0x7f93ea4353c0 | |
JIT Compiling halide_mutex_array_destroy | |
Function halide_mutex_array_destroy is at 0x7f93ea435440 | |
JIT Compiling halide_mutex_array_lock | |
Function halide_mutex_array_lock is at 0x7f93ea435480 | |
JIT Compiling halide_mutex_array_unlock | |
Function halide_mutex_array_unlock is at 0x7f93ea4354b0 | |
JIT Compiling halide_mutex_lock | |
Function halide_mutex_lock is at 0x7f93eb1554d0 | |
JIT Compiling halide_mutex_unlock | |
Function halide_mutex_unlock is at 0x7f93eb155920 | |
JIT Compiling halide_pointer_to_string | |
Function halide_pointer_to_string is at 0x7f93ea435a50 | |
JIT Compiling halide_print | |
Function halide_print is at 0x7f93eb1551f0 | |
JIT Compiling halide_profiler_get_pipeline_state | |
Function halide_profiler_get_pipeline_state is at 0x7f93e7840ec0 | |
JIT Compiling halide_profiler_get_state | |
Function halide_profiler_get_state is at 0x7f93e7843fd0 | |
JIT Compiling halide_profiler_memory_allocate | |
Function halide_profiler_memory_allocate is at 0x7f93d5f95b30 | |
JIT Compiling halide_profiler_memory_free | |
Function halide_profiler_memory_free is at 0x7f93d5f95c70 | |
JIT Compiling halide_profiler_pipeline_end | |
Function halide_profiler_pipeline_end is at 0x7f93e7843fe0 | |
JIT Compiling halide_profiler_pipeline_start | |
Function halide_profiler_pipeline_start is at 0x7f93e7840f30 | |
JIT Compiling halide_profiler_report | |
Function halide_profiler_report is at 0x7f93d5f95d40 | |
JIT Compiling halide_profiler_report_unlocked | |
Function halide_profiler_report_unlocked is at 0x7f93d5f95000 | |
JIT Compiling halide_profiler_reset | |
Function halide_profiler_reset is at 0x7f93d5f95d90 | |
JIT Compiling halide_profiler_reset_unlocked | |
Function halide_profiler_reset_unlocked is at 0x7f93e7842f90 | |
JIT Compiling halide_profiler_shutdown | |
Function halide_profiler_shutdown is at 0x7f93e7842f20 | |
JIT Compiling halide_profiler_stack_peak_update | |
Function halide_profiler_stack_peak_update is at 0x7f93d5f95a90 | |
JIT Compiling halide_register_device_allocation_pool | |
Function halide_register_device_allocation_pool is at 0x7f93ea432f80 | |
JIT Compiling halide_release_jit_module | |
Function halide_release_jit_module is at 0x7f93d5f95e20 | |
JIT Compiling halide_reuse_device_allocations | |
Function halide_reuse_device_allocations is at 0x7f93e7844c30 | |
JIT Compiling halide_semaphore_init | |
Function halide_semaphore_init is at 0x7f93ea4356a0 | |
JIT Compiling halide_semaphore_release | |
Function halide_semaphore_release is at 0x7f93ea4356c0 | |
JIT Compiling halide_semaphore_try_acquire | |
Function halide_semaphore_try_acquire is at 0x7f93ea4356e0 | |
JIT Compiling halide_set_custom_do_loop_task | |
Function halide_set_custom_do_loop_task is at 0x7f93ea4355b0 | |
JIT Compiling halide_set_custom_do_par_for | |
Function halide_set_custom_do_par_for is at 0x7f93ea4355d0 | |
JIT Compiling halide_set_custom_do_task | |
Function halide_set_custom_do_task is at 0x7f93ea435590 | |
JIT Compiling halide_set_custom_free | |
Function halide_set_custom_free is at 0x7f93eb1550a0 | |
JIT Compiling halide_set_custom_get_library_symbol | |
Function halide_set_custom_get_library_symbol is at 0x7f93ea4357c0 | |
JIT Compiling halide_set_custom_get_symbol | |
Function halide_set_custom_get_symbol is at 0x7f93ea435780 | |
JIT Compiling halide_set_custom_load_library | |
Function halide_set_custom_load_library is at 0x7f93ea4357a0 | |
JIT Compiling halide_set_custom_malloc | |
Function halide_set_custom_malloc is at 0x7f93eb155080 | |
JIT Compiling halide_set_custom_parallel_runtime | |
Function halide_set_custom_parallel_runtime is at 0x7f93ea4355f0 | |
JIT Compiling halide_set_custom_print | |
Function halide_set_custom_print is at 0x7f93eb155250 | |
JIT Compiling halide_set_custom_trace | |
Function halide_set_custom_trace is at 0x7f93ea435db0 | |
JIT Compiling halide_set_error_handler | |
Function halide_set_error_handler is at 0x7f93eb155230 | |
JIT Compiling halide_set_gpu_device | |
Function halide_set_gpu_device is at 0x7f93ea435840 | |
JIT Compiling halide_set_num_threads | |
Function halide_set_num_threads is at 0x7f93ea4354e0 | |
JIT Compiling halide_set_trace_file | |
Function halide_set_trace_file is at 0x7f93ea435cf0 | |
JIT Compiling halide_shutdown_thread_pool | |
Function halide_shutdown_thread_pool is at 0x7f93ea435230 | |
JIT Compiling halide_shutdown_trace | |
Function halide_shutdown_trace is at 0x7f93ea435d30 | |
JIT Compiling halide_sleep_ms | |
Function halide_sleep_ms is at 0x7f93eb155310 | |
JIT Compiling halide_spawn_thread | |
Function halide_spawn_thread is at 0x7f93eb155c00 | |
JIT Compiling halide_start_clock | |
Function halide_start_clock is at 0x7f93eb155270 | |
JIT Compiling halide_string_to_string | |
Function halide_string_to_string is at 0x7f93eb1551b0 | |
JIT Compiling halide_thread_pool_cleanup | |
Function halide_thread_pool_cleanup is at 0x7f93ea435210 | |
JIT Compiling halide_thread_yield | |
Function halide_thread_yield is at 0x7f93eb155390 | |
JIT Compiling halide_trace | |
Function halide_trace is at 0x7f93ea435dd0 | |
JIT Compiling halide_trace_cleanup | |
Function halide_trace_cleanup is at 0x7f93ea435d10 | |
JIT Compiling halide_trace_helper | |
Function halide_trace_helper is at 0x7f93ea435df0 | |
JIT Compiling halide_type_to_string | |
Function halide_type_to_string is at 0x7f93e7844940 | |
JIT Compiling halide_uint64_to_string | |
Function halide_uint64_to_string is at 0x7f93ea435c70 | |
JIT Compiling halide_use_jit_module | |
Function halide_use_jit_module is at 0x7f93d5f95df0 | |
Finalizing object | |
Creating new execution engine | |
Target triple: x86_64--linux-gnu | |
JIT compiling opencl for x86-64-linux-jit-opencl-sse41-strict_float-user_context | |
JIT Compiling halide_acquire_cl_context | |
Function halide_acquire_cl_context is at 0x7f93d5d29750 | |
JIT Compiling halide_opencl_buffer_copy | |
Function halide_opencl_buffer_copy is at 0x7f93d5d241f0 | |
JIT Compiling halide_opencl_cleanup | |
Function halide_opencl_cleanup is at 0x7f93d5d23f70 | |
JIT Compiling halide_opencl_compute_capability | |
Function halide_opencl_compute_capability is at 0x7f93d5d29230 | |
JIT Compiling halide_opencl_copy_to_device | |
Function halide_opencl_copy_to_device is at 0x7f93d5d27f60 | |
JIT Compiling halide_opencl_copy_to_host | |
Function halide_opencl_copy_to_host is at 0x7f93d5d27f40 | |
JIT Compiling halide_opencl_detach_cl_mem | |
Function halide_opencl_detach_cl_mem is at 0x7f93d5d234c0 | |
JIT Compiling halide_opencl_device_and_host_free | |
Function halide_opencl_device_and_host_free is at 0x7f93d5d27fa0 | |
JIT Compiling halide_opencl_device_and_host_malloc | |
Function halide_opencl_device_and_host_malloc is at 0x7f93d5d27f80 | |
JIT Compiling halide_opencl_device_crop | |
Function halide_opencl_device_crop is at 0x7f93d5d24ef0 | |
JIT Compiling halide_opencl_device_free | |
Function halide_opencl_device_free is at 0x7f93d5d27aa0 | |
JIT Compiling halide_opencl_device_interface | |
Function halide_opencl_device_interface is at 0x7f93d5d29fe0 | |
JIT Compiling halide_opencl_device_malloc | |
Function halide_opencl_device_malloc is at 0x7f93d5d29b80 | |
JIT Compiling halide_opencl_device_release | |
Function halide_opencl_device_release is at 0x7f93d5d24000 | |
JIT Compiling halide_opencl_device_release_crop | |
Function halide_opencl_device_release_crop is at 0x7f93d5d23260 | |
JIT Compiling halide_opencl_device_slice | |
Function halide_opencl_device_slice is at 0x7f93d5d24f70 | |
JIT Compiling halide_opencl_device_sync | |
Function halide_opencl_device_sync is at 0x7f93d5d27cd0 | |
JIT Compiling halide_opencl_finalize_kernels | |
Function halide_opencl_finalize_kernels is at 0x7f93d59c5590 | |
JIT Compiling halide_opencl_get_build_options | |
Function halide_opencl_get_build_options is at 0x7f93d5d22f40 | |
JIT Compiling halide_opencl_get_cl_mem | |
Function halide_opencl_get_cl_mem is at 0x7f93d59c57a0 | |
JIT Compiling halide_opencl_get_crop_offset | |
Function halide_opencl_get_crop_offset is at 0x7f93d59c5810 | |
JIT Compiling halide_opencl_get_device_type | |
Function halide_opencl_get_device_type is at 0x7f93d5d22b40 | |
JIT Compiling halide_opencl_get_platform_name | |
Function halide_opencl_get_platform_name is at 0x7f93d5d22aa0 | |
JIT Compiling halide_opencl_get_symbol | |
Function halide_opencl_get_symbol is at 0x7f93d5d296a0 | |
JIT Compiling halide_opencl_image_buffer_copy | |
Function halide_opencl_image_buffer_copy is at 0x7f93d5d22000 | |
JIT Compiling halide_opencl_image_copy_to_device | |
Function halide_opencl_image_copy_to_device is at 0x7f93d5d24fc0 | |
JIT Compiling halide_opencl_image_copy_to_host | |
Function halide_opencl_image_copy_to_host is at 0x7f93d5d27fc0 | |
JIT Compiling halide_opencl_image_device_and_host_free | |
Function halide_opencl_image_device_and_host_free is at 0x7f93d5d23cc0 | |
JIT Compiling halide_opencl_image_device_and_host_malloc | |
Function halide_opencl_image_device_and_host_malloc is at 0x7f93d5d23ca0 | |
JIT Compiling halide_opencl_image_device_crop | |
Function halide_opencl_image_device_crop is at 0x7f93d5d23ce0 | |
JIT Compiling halide_opencl_image_device_interface | |
Function halide_opencl_image_device_interface is at 0x7f93d5d27fe0 | |
JIT Compiling halide_opencl_image_device_malloc | |
Function halide_opencl_image_device_malloc is at 0x7f93d5d23550 | |
JIT Compiling halide_opencl_image_device_release_crop | |
Function halide_opencl_image_device_release_crop is at 0x7f93d5d23eb0 | |
JIT Compiling halide_opencl_image_device_slice | |
Function halide_opencl_image_device_slice is at 0x7f93d5d23df0 | |
JIT Compiling halide_opencl_image_wrap_cl_mem | |
Function halide_opencl_image_wrap_cl_mem is at 0x7f93d5d22870 | |
JIT Compiling halide_opencl_initialize_kernels | |
Function halide_opencl_initialize_kernels is at 0x7f93d59c5000 | |
JIT Compiling halide_opencl_run | |
Function halide_opencl_run is at 0x7f93d59c4000 | |
JIT Compiling halide_opencl_set_build_options | |
Function halide_opencl_set_build_options is at 0x7f93d5d22ed0 | |
JIT Compiling halide_opencl_set_device_type | |
Function halide_opencl_set_device_type is at 0x7f93d5d22e60 | |
JIT Compiling halide_opencl_set_platform_name | |
Function halide_opencl_set_platform_name is at 0x7f93d5d22df0 | |
JIT Compiling halide_opencl_wrap_cl_mem | |
Function halide_opencl_wrap_cl_mem is at 0x7f93d5d23430 | |
JIT Compiling halide_release_cl_context | |
Function halide_release_cl_context is at 0x7f93d5d29b60 | |
JIT Compiling halide_release_jit_module | |
Function halide_release_jit_module is at 0x7f93d5d29200 | |
JIT Compiling halide_use_jit_module | |
Function halide_use_jit_module is at 0x7f93d5d291d0 | |
Finalizing object | |
Creating new execution engine | |
Target triple: x86_64--linux-gnu | |
JIT compiling nan_or_one for x86-64-linux-jit-opencl-sse41-strict_float-user_context | |
JIT Compiling nan_or_one | |
Function nan_or_one is at 0x7f93d59c0000 | |
JIT Compiling nan_or_one_argv | |
Function nan_or_one_argv is at 0x7f93d59c0cf0 | |
Finalizing object | |
custom_print: 0x7f93d1511460 | |
custom_malloc: 0x7f93eb155000 | |
custom_free: 0x7f93eb155060 | |
custom_do_task: 0x7f93eb1553b0 | |
custom_do_par_for: 0x7f93eb1553e0 | |
custom_error: 0x7f93d17ae020 | |
custom_trace: 0x7f93ea434000 | |
JIT input Image argument b0 @ 0x2a7a788 | |
__user_context @ 0x7ffddfa04658 | |
JIT output buffer @ 0x2a7a5d8, 0 | |
Calling jitted function | |
Back from jitted function. Exit status was 0 | |
Realizing Pipeline for target(x86-64-linux-opencl-sse41-strict_float) | |
Reusing old jit module compiled for : | |
target(x86-64-linux-jit-opencl-sse41-strict_float-user_context) | |
custom_print: 0x7f93d1511460 | |
custom_malloc: 0x7f93eb155000 | |
custom_free: 0x7f93eb155060 | |
custom_do_task: 0x7f93eb1553b0 | |
custom_do_par_for: 0x7f93eb1553e0 | |
custom_error: 0x7f93d17ae020 | |
custom_trace: 0x7f93ea434000 | |
JIT input Image argument b0 @ 0x2a7a788 | |
__user_context @ 0x7ffddfa04658 | |
JIT output buffer @ 0x2a7a5d8, 0x38dee80 | |
Calling jitted function | |
Adjusting refcount for module GPU by 1 | |
Adjusting refcount for module GPU by 1 | |
Adjusting refcount for module GPU by -1 | |
Adjusting refcount for module GPU by 1 | |
Adjusting refcount for module GPU by 1 | |
Adjusting refcount for module GPU by -1 | |
Back from jitted function. Exit status was 0 | |
Adjusting refcount for module GPU by 1 | |
Adjusting refcount for module GPU by -1 | |
Adjusting refcount for module GPU by -1 | |
xyz= [[[2. 3. 4.]]] output= [[nan]] | |
Traceback (most recent call last): | |
File "(... removed ...)xyz_to_xyzw.py", line 69, in <module> | |
_tests() | |
File "(... removed ...)xyz_to_xyzw.py", line 59, in _tests | |
test_case( | |
File "(... removed ...)xyz_to_xyzw.py", line 57, in test_case | |
np.testing.assert_allclose(output, expected_output) | |
File "/home/stian/venv/lib/python3.9/site-packages/numpy/testing/_private/utils.py", line 1530, in assert_allclose | |
assert_array_compare(compare, actual, desired, err_msg=str(err_msg), | |
File "/home/stian/venv/lib/python3.9/site-packages/numpy/testing/_private/utils.py", line 768, in assert_array_compare | |
flagged = func_assert_same_pos(x, y, func=isnan, hasval='nan') | |
File "/home/stian/venv/lib/python3.9/site-packages/numpy/testing/_private/utils.py", line 745, in func_assert_same_pos | |
raise AssertionError(msg) | |
AssertionError: | |
Not equal to tolerance rtol=1e-07, atol=0 | |
x and y nan location mismatch: | |
x: array([[nan]], dtype=float32) | |
y: array([[1.]], dtype=float32) | |
Adjusting refcount for module GPU by 1 | |
Adjusting refcount for module GPU by -1 | |
Adjusting refcount for module GPU by -1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment