Skip to content

Instantly share code, notes, and snippets.

@abadams
Created November 13, 2018 18:29
Show Gist options
  • Save abadams/cfd82f98306dd40eeb875f25282c172f to your computer and use it in GitHub Desktop.
Save abadams/cfd82f98306dd40eeb875f25282c172f to your computer and use it in GitHub Desktop.
autoscheduled camera pipe
module name=processed, target=x86-64-linux-avx-avx2-f16c-fma-no_asserts-no_bounds_query-no_runtime-sse41
external_plus_metadata func processed (input, matrix_3200, matrix_7000, black_level, color_temp, contrast, gamma, sharpen_strength, white_level, processed) {
assert((reinterpret(uint64, processed.buffer) != (uint64)0), halide_error_buffer_argument_is_null("processed"))
assert((reinterpret(uint64, matrix_7000.buffer) != (uint64)0), halide_error_buffer_argument_is_null("matrix_7000"))
assert((reinterpret(uint64, matrix_3200.buffer) != (uint64)0), halide_error_buffer_argument_is_null("matrix_3200"))
assert((reinterpret(uint64, input.buffer) != (uint64)0), halide_error_buffer_argument_is_null("input"))
let input = _halide_buffer_get_host(input.buffer)
let input.min.0 = _halide_buffer_get_min(input.buffer, 0)
let input.stride.0 = _halide_buffer_get_stride(input.buffer, 0)
let input.min.1 = _halide_buffer_get_min(input.buffer, 1)
let input.stride.1 = _halide_buffer_get_stride(input.buffer, 1)
let matrix_3200 = _halide_buffer_get_host(matrix_3200.buffer)
let matrix_3200.min.0 = _halide_buffer_get_min(matrix_3200.buffer, 0)
let matrix_3200.stride.0 = _halide_buffer_get_stride(matrix_3200.buffer, 0)
let matrix_3200.min.1 = _halide_buffer_get_min(matrix_3200.buffer, 1)
let matrix_3200.stride.1 = _halide_buffer_get_stride(matrix_3200.buffer, 1)
let matrix_7000 = _halide_buffer_get_host(matrix_7000.buffer)
let matrix_7000.min.0 = _halide_buffer_get_min(matrix_7000.buffer, 0)
let matrix_7000.stride.0 = _halide_buffer_get_stride(matrix_7000.buffer, 0)
let matrix_7000.min.1 = _halide_buffer_get_min(matrix_7000.buffer, 1)
let matrix_7000.stride.1 = _halide_buffer_get_stride(matrix_7000.buffer, 1)
let processed = _halide_buffer_get_host(processed.buffer)
let processed.min.0 = _halide_buffer_get_min(processed.buffer, 0)
let processed.extent.0 = _halide_buffer_get_extent(processed.buffer, 0)
let processed.stride.0 = _halide_buffer_get_stride(processed.buffer, 0)
let processed.min.1 = _halide_buffer_get_min(processed.buffer, 1)
let processed.extent.1 = _halide_buffer_get_extent(processed.buffer, 1)
let processed.stride.1 = _halide_buffer_get_stride(processed.buffer, 1)
let processed.min.2 = _halide_buffer_get_min(processed.buffer, 2)
let processed.stride.2 = _halide_buffer_get_stride(processed.buffer, 2)
assert((input.stride.0 == 1), 0)
assert((matrix_3200.stride.0 == 1), 0)
assert((matrix_7000.stride.0 == 1), 0)
assert((processed.stride.0 == 1), 0)
produce processed {
let t283409108 = (1.000000f/color_temp)
let t283409109 = ((input.min.1*input.stride.1) + input.min.0)
let t283409100 = pow_f32(2.000000f, (contrast*0.010000f))
let t283409099 = (1.000000f/gamma)
let t283409098 = (1.000000f/float32((white_level - black_level)))
let t283409091 = ((processed.extent.1 + 46)/48)
let t283409093 = ((processed.extent.0 + 1278)/1280)
let t283409106 = (((2 - matrix_7000.min.1)*matrix_7000.stride.1) - matrix_7000.min.0)
let t283409107 = (((2 - matrix_3200.min.1)*matrix_3200.stride.1) - matrix_3200.min.0)
let t283409096 = (1286 - t283409109)
let t283409095 = (14 - t283409109)
let t283409104 = ((t283409108*5894.736816f) + -0.842105f)
let t283409103 = ((t283409108*-5894.736816f) + 1.842105f)
let t283409092 = (((processed.extent.1/2)*2) + -48)
let t283409094 = (((processed.extent.0/2)*2) + -1280)
let t283409101 = ((matrix_7000.min.1*matrix_7000.stride.1) + matrix_7000.min.0)
let t283409102 = ((matrix_3200.min.1*matrix_3200.stride.1) + matrix_3200.min.0)
let t283409105 = (((processed.min.2*processed.stride.2) + (processed.min.1*processed.stride.1)) + processed.min.0)
let t283409097 = uint8(max(min((sharpen_strength*32.000000f), 255.000000f), 0.000000f))
parallel (processed.s0._::y._::yo, 0, t283409091) {
let processed.s0._::y._::yi.base = min((processed.s0._::y._::yo*48), t283409092)
let t283409123 = (processed.s0._::y._::yi.base + -1)
let t283409124 = (processed.s0._::y._::yi.base/2)
let t283409125 = (processed.s0._::y._::yi.base % 2)
let t283409126 = (0 - t283409125)
let t283409127 = ((processed.s0._::y._::yi.base + 81)/2)
let t283409128 = max((t283409124 + 21), (((t283409126/10)*5) + t283409127))
let t283409129 = ((processed.s0._::y._::yi.base + -3)/2)
let t283409130 = (t283409123/2)
let t283409115 = ((processed.s0._::y._::yi.base + 89)/2)
let t283409112 = (t283409128 - t283409130)
let t283409111 = (t283409128 - t283409129)
let t283409110 = (54 - t283409125)
let t283409121 = ((t283409100*2.000000f) + -2.000000f)
parallel (processed.s0.v0.v0o, 0, t283409093) {
let processed.s0.v0.v0i.base = min((processed.s0.v0.v0o*1280), t283409094)
allocate deinterleaved[int16 * 657 * ((t283409110/2) + 1) * 4] in Stack
let g_gb.v0.extent_realized.s = (max(max(((processed.s0.v0.v0i.base + 1309)/2), (((processed.s0.v0.v0i.base + 1293)/2) + (((1296 - (processed.s0.v0.v0i.base % 2))/16)*8))), ((processed.s0.v0.v0i.base + 1307)/2)) - ((processed.s0.v0.v0i.base + -3)/2))
allocate g_gb[int16 * (g_gb.v0.extent_realized.s + 1) * 43] in Stack
allocate g_gr[int16 * 657 * (t283409111 + 5)] in Stack
allocate g_r[int16 * 657 * (t283409112 + 5)] in Stack
allocate r_r[int16 * 657 * ((t283409126/2) + 27)] in Stack
allocate r_b[int16 * 977 * ((t283409126/2) + 26)] in Stack
allocate f1[int16 * 1297 * ((t283409126/2) + 26)] in Stack
allocate f3[int16 * 2548 * ((t283409126/2) + 26)] in Stack
let b_b._::y.extent_realized.s = (max(max((((t283409126/10)*5) + t283409115), t283409127), (t283409124 + 24)) - t283409129)
let b_b.v0.extent_realized.s = (max(max(max(((processed.s0.v0.v0i.base + 1311)/2), (((processed.s0.v0.v0i.base + 1295)/2) + (((1296 - (processed.s0.v0.v0i.base % 2))/16)*8))), ((processed.s0.v0.v0i.base/2) + 1287)), ((processed.s0.v0.v0i.base + 1309)/2)) - ((processed.s0.v0.v0i.base + -1)/2))
allocate b_b[int16 * (b_b.v0.extent_realized.s + 1) * (b_b._::y.extent_realized.s + 1)] in Stack
let t283409152 = (t283409110/2)
let t283409153 = (t283409152*657)
let t283409154 = ((processed.s0.v0.v0i.base + -3)/2)
let t283409155 = (processed.s0.v0.v0i.base/2)
let t283409156 = (processed.s0.v0.v0i.base % 2)
let t283409157 = (4 - t283409156)
let t283409158 = ((processed.s0.v0.v0i.base + -1)/2)
let t283409144 = (t283409156 == 0)
let t283409135 = ((1256 - t283409156)/2)
let t283409145 = (t283409157/2)
let t283409138 = (max(max(((processed.s0.v0.v0i.base + 1311)/2), (((processed.s0.v0.v0i.base + 1295)/2) + (((((processed.s0.v0.v0i.base + 1) % 2) + 647)/8)*8))), ((processed.s0.v0.v0i.base + 1309)/2)) - t283409158)
let t283409151 = (processed.s0.v0.v0i.base - t283409105)
let t283409140 = (-1 - g_gb.v0.extent_realized.s)
let t283409149 = (((0 - t283409156)/2) + 1)
let t283409134 = (t283409153 + 657)
let t283409133 = ((t283409155*2) + t283409096)
let t283409136 = (((t283409152*1971) - t283409154) + 1971)
let t283409146 = (((t283409152*1314) - t283409154) + 1314)
let t283409141 = ((t283409153 - t283409154) + 657)
let t283409137 = (g_gb.v0.extent_realized.s + 1)
for (processed.s0._::y._::yi._::yio, 0, 3) {
let f7.s0._::y.max_2.s = ((processed.s0._::y._::yi._::yio*8) + t283409124)
let f7.s0._::y.min_2 = ((processed.s0._::y._::yi._::yio*8) + t283409130)
let b_b.s0._::y.min_2 = select((0 < processed.s0._::y._::yi._::yio), (f7.s0._::y.max_2.s + 1), ((processed.s0._::y._::yi._::yio*8) + t283409129))
let f3.s0._::y.min_2 = select((0 < processed.s0._::y._::yi._::yio), (f7.s0._::y.max_2.s + 1), f7.s0._::y.min_2)
let r_r.s0._::y.min_2 = select((0 < processed.s0._::y._::yi._::yio), (f7.s0._::y.max_2.s + 2), f7.s0._::y.min_2)
let g_b.s0._::y.min_2 = min((f7.s0._::y.min_2 + -1), f3.s0._::y.min_2)
let g_gr.s0._::y.min_2 = select((0 < processed.s0._::y._::yi._::yio), (f7.s0._::y.max_2.s + 2), ((processed.s0._::y._::yi._::yio*8) + t283409129))
produce deinterleaved {
let t283409163 = (t283409157/2)
let t283409159 = (((f7.s0._::y.max_2.s - g_gr.s0._::y.min_2) + 15)/6)
let t283409161 = ((t283409163*2) + 1272)
let t283409160 = (f7.s0._::y.max_2.s + 4)
for (deinterleaved.s0._::y._::yo, 0, t283409159) {
let deinterleaved.s0._::y._::yi.base = min(((deinterleaved.s0._::y._::yo*6) + g_gr.s0._::y.min_2), t283409160)
allocate shifted[int16 * 1300 * 16] in Stack
produce shifted {
for (shifted.s0._::y._::yo, 0, 2) {
allocate input_im[uint16 * 1292] in Stack
let t283409166 = (shifted.s0._::y._::yo*8)
let t283409165 = ((((shifted.s0._::y._::yo*4) + deinterleaved.s0._::y._::yi.base)*2) + 10)
for (shifted.s0._::y._::yi, 0, 8) {
produce input_im {
let t283409167 = (((shifted.s0._::y._::yi + t283409165)*input.stride.1) + t283409095)
for (input_im.s0._0._0, 0, 80) {
let input_im.s0._0._0_vec.base.s = ((input_im.s0._0._0*8) + t283409154)
input_im[ramp(((input_im.s0._0._0_vec.base.s - t283409154)*2), 1, 16)] = input[ramp(((input_im.s0._0._0_vec.base.s*2) + t283409167), 1, 16)]
}
input_im[ramp(t283409161, 1, 16)] = input[ramp((((shifted.s0._::y._::yi + t283409165)*input.stride.1) + t283409133), 1, 16)]
}
consume input_im {
let t283409168 = (((shifted.s0._::y._::yi + t283409166)*650) - t283409154)
for (shifted.s0.v0.v0, 0, 80) {
let shifted.s0.v0.v0_vec.base.s = ((shifted.s0.v0.v0*8) + t283409154)
shifted[ramp(((shifted.s0.v0.v0_vec.base.s + t283409168)*2), 1, 16)] = int16x16(input_im[ramp(((shifted.s0.v0.v0_vec.base.s - t283409154)*2), 1, 16)])
}
shifted[ramp((((((shifted.s0._::y._::yi + t283409166)*650) + t283409163)*2) + 1272), 1, 16)] = int16x16(input_im[ramp(t283409161, 1, 16)])
}
}
free input_im
}
}
consume shifted {
let t283409170 = (deinterleaved.s0._::y._::yi.base - t283409129)
for (deinterleaved.s0._::y._::yi._::yio, 0, 3) {
allocate denoised[int16 * 5184] in Stack
produce denoised {
let t283409171 = (((deinterleaved.s0._::y._::yi._::yio*2) + deinterleaved.s0._::y._::yi.base)*2)
let t283409172 = (deinterleaved.s0._::y._::yi.base*2)
for (denoised.s0._::y, t283409171, 4) {
let t283409173 = (((denoised.s0._::y - t283409172)*650) - t283409154)
let t283409174 = (((denoised.s0._::y - t283409171)*648) - t283409154)
for (denoised.s0.v0.v0, 0, 81) {
let denoised.s0.v0.v0_vec.base.s = ((denoised.s0.v0.v0*8) + t283409154)
denoised[ramp(((denoised.s0.v0.v0_vec.base.s + t283409174)*2), 1, 16)] = max(min(shifted[ramp((((denoised.s0.v0.v0_vec.base.s + t283409173)*2) + 2602), 1, 16)], max(shifted[ramp((((denoised.s0.v0.v0_vec.base.s + t283409173)*2) + 2600), 1, 16)], max(shifted[ramp((((denoised.s0.v0.v0_vec.base.s + t283409173)*2) + 2604), 1, 16)], max(shifted[ramp((((denoised.s0.v0.v0_vec.base.s + t283409173)*2) + 2), 1, 16)], shifted[ramp((((denoised.s0.v0.v0_vec.base.s + t283409173)*2) + 5202), 1, 16)])))), x16((int16)0))
}
}
}
consume denoised {
let t283409175 = ((deinterleaved.s0._::y._::yi._::yio*2) + t283409170)
for (deinterleaved.s0.v1, 0, 4) {
let t283409181 = (deinterleaved.s0.v1*t283409134)
let t283409178 = (deinterleaved.s0.v1 == 2)
let t283409177 = (deinterleaved.s0.v1 == 1)
let t283409176 = (deinterleaved.s0.v1 == 0)
let t283409179 = (t283409181 - t283409154)
let t283409180 = (t283409135 + t283409181)
for (deinterleaved.s0._::y._::yi._::yii, 0, 2) {
let t283409182 = ((deinterleaved.s0._::y._::yi._::yii*1296) - t283409154)
let t283409183 = (((deinterleaved.s0._::y._::yi._::yii + t283409175)*657) + t283409179)
for (deinterleaved.s0.v0.v0, 0, 40) {
let deinterleaved.s0.v0.v0_vec.base = ((deinterleaved.s0.v0.v0*16) + t283409154)
deinterleaved[ramp((deinterleaved.s0.v0.v0_vec.base + t283409183), 1, 16)] = select(t283409176, denoised[ramp(((deinterleaved.s0.v0.v0_vec.base + t283409182)*2), 2, 16)], select(t283409177, denoised[ramp((((deinterleaved.s0.v0.v0_vec.base + t283409182)*2) + 1), 2, 16)], select(t283409178, denoised[ramp((((deinterleaved.s0.v0.v0_vec.base + t283409182)*2) + 1296), 2, 16)], denoised[ramp((((deinterleaved.s0.v0.v0_vec.base + t283409182)*2) + 1297), 2, 16)])))
}
deinterleaved[ramp((((deinterleaved.s0._::y._::yi._::yii + t283409175)*657) + t283409180), 1, 16)] = select(t283409176, denoised[ramp((((deinterleaved.s0._::y._::yi._::yii*1296) + t283409135)*2), 2, 16)], select(t283409177, denoised[ramp(((((deinterleaved.s0._::y._::yi._::yii*1296) + t283409135)*2) + 1), 2, 16)], select(t283409178, denoised[ramp(((((deinterleaved.s0._::y._::yi._::yii*1296) + t283409135)*2) + 1296), 2, 16)], denoised[ramp(((((deinterleaved.s0._::y._::yi._::yii*1296) + t283409135)*2) + 1297), 2, 16)])))
}
}
}
free denoised
}
}
free shifted
}
}
produce g_gb {
consume deinterleaved {
let t283409184 = ((f7.s0._::y.max_2.s - g_gr.s0._::y.min_2) + 10)
for (g_gb.s0._::y, g_gr.s0._::y.min_2, t283409184) {
let t283409187 = (g_gb.s0._::y - t283409129)
let t283409186 = ((t283409137*t283409187) - t283409154)
let t283409185 = ((t283409187*657) + t283409136)
for (g_gb.s0.v0.v0, 0, 41) {
let g_gb.s0.v0.v0_vec.base = ((g_gb.s0.v0.v0*16) + t283409154)
g_gb[ramp((g_gb.s0.v0.v0_vec.base + t283409186), 1, 16)] = deinterleaved[ramp((g_gb.s0.v0.v0_vec.base + t283409185), 1, 16)]
}
}
}
}
produce g_gr {
consume deinterleaved {
let t283409188 = ((f7.s0._::y.max_2.s - g_gr.s0._::y.min_2) + 10)
for (g_gr.s0._::y, g_gr.s0._::y.min_2, t283409188) {
let t283409189 = (((g_gr.s0._::y - t283409129)*657) - t283409154)
for (g_gr.s0.v0.v0, 0, 41) {
let g_gr.s0.v0.v0_vec.base = ((g_gr.s0.v0.v0*16) + t283409154)
g_gr[ramp((g_gr.s0.v0.v0_vec.base + t283409189), 1, 16)] = deinterleaved[ramp((g_gr.s0.v0.v0_vec.base + t283409189), 1, 16)]
}
}
}
}
let g_b._::y.min_realized = min(min(min(f3.s0._::y.min_2, f7.s0._::y.min_2), (f7.s0._::y.min_2 + -1)), g_b.s0._::y.min_2)
let g_b._::y.extent_realized.s.s = max((f7.s0._::y.max_2.s + 4), (((((f7.s0._::y.max_2.s - f7.s0._::y.min_2) + 8)/5)*5) + f7.s0._::y.min_2))
allocate g_b[int16 * (t283409138 + 1) * ((g_b._::y.extent_realized.s.s - g_b._::y.min_realized) + 5)] in Stack
produce g_b {
consume g_gr {
consume g_gb {
let t283409190 = ((f7.s0._::y.max_2.s - g_b.s0._::y.min_2) + 9)
let t283409191 = (t283409138 + 1)
for (g_b.s0._::y, g_b.s0._::y.min_2, t283409190) {
let t283409193 = (((g_b.s0._::y - g_b._::y.min_realized)*t283409191) - t283409158)
let t283409192 = (g_b.s0._::y - t283409129)
for (g_b.s0.v0.v0, 0, 41) {
let g_b.s0.v0.v0_vec.base = ((g_b.s0.v0.v0*16) + t283409158)
g_b[ramp((g_b.s0.v0.v0_vec.base + t283409193), 1, 16)] = (let t283409016 = g_gb[ramp((((t283409137*t283409192) + (g_b.s0.v0.v0_vec.base - t283409154)) + -1), 1, 16)] in (let t283409017 = g_gb[ramp(((t283409137*t283409192) + (g_b.s0.v0.v0_vec.base - t283409154)), 1, 16)] in (let t283409018 = g_gr[ramp((((t283409192*657) + (g_b.s0.v0.v0_vec.base - t283409154)) + 657), 1, 16)] in (let t283409019 = g_gr[ramp(((t283409192*657) + (g_b.s0.v0.v0_vec.base - t283409154)), 1, 16)] in select((absd(t283409016, t283409017) < absd(t283409018, t283409019)), int16x16((((int32x16(t283409016) + int32x16(t283409017)) + x16(1))/x16(2))), int16x16((((int32x16(t283409018) + int32x16(t283409019)) + x16(1))/x16(2))))))))
}
}
}
}
}
produce g_r {
consume g_gr {
consume g_gb {
let t283409194 = ((f7.s0._::y.max_2.s - r_r.s0._::y.min_2) + 10)
for (g_r.s0._::y, r_r.s0._::y.min_2, t283409194) {
let t283409196 = (((g_r.s0._::y - t283409130)*657) - t283409154)
let t283409195 = (g_r.s0._::y - t283409129)
for (g_r.s0.v0.v0, 0, 41) {
let g_r.s0.v0.v0_vec.base = ((g_r.s0.v0.v0*16) + t283409154)
g_r[ramp((g_r.s0.v0.v0_vec.base + t283409196), 1, 16)] = (let t283409022 = g_gr[ramp((((t283409195*657) + (g_r.s0.v0.v0_vec.base - t283409154)) + 1), 1, 16)] in (let t283409023 = g_gr[ramp(((t283409195*657) + (g_r.s0.v0.v0_vec.base - t283409154)), 1, 16)] in (let t283409025 = g_gb[ramp(((g_r.s0.v0.v0_vec.base - t283409154) + ((t283409137*t283409195) + t283409140)), 1, 16)] in (let t283409026 = g_gb[ramp(((t283409137*t283409195) + (g_r.s0.v0.v0_vec.base - t283409154)), 1, 16)] in select((absd(t283409022, t283409023) < absd(t283409025, t283409026)), int16x16((((int32x16(t283409022) + int32x16(t283409023)) + x16(1))/x16(2))), int16x16((((int32x16(t283409025) + int32x16(t283409026)) + x16(1))/x16(2))))))))
}
}
}
}
}
produce r_r {
consume deinterleaved {
let t283409197 = ((f7.s0._::y.max_2.s - r_r.s0._::y.min_2) + 10)
for (r_r.s0._::y, r_r.s0._::y.min_2, t283409197) {
let t283409199 = (((r_r.s0._::y - t283409130)*657) - t283409154)
let t283409198 = (((r_r.s0._::y - t283409129)*657) + t283409141)
for (r_r.s0.v0.v0, 0, 41) {
let r_r.s0.v0.v0_vec.base = ((r_r.s0.v0.v0*16) + t283409154)
r_r[ramp((r_r.s0.v0.v0_vec.base + t283409199), 1, 16)] = deinterleaved[ramp((r_r.s0.v0.v0_vec.base + t283409198), 1, 16)]
}
}
}
}
produce r_b {
consume r_r {
consume g_r {
consume g_b {
let t283409200 = ((f7.s0._::y.max_2.s - f3.s0._::y.min_2) + 9)
let t283409201 = (t283409138 + 1)
for (r_b.s0._::y, f3.s0._::y.min_2, t283409200) {
let t283409205 = (r_b.s0._::y - t283409130)
let t283409204 = ((t283409205*977) - t283409158)
let t283409202 = ((t283409205*657) - t283409154)
let t283409203 = (((r_b.s0._::y - g_b._::y.min_realized)*t283409201) - t283409158)
for (r_b.s0.v0.v0, 0, 41) {
let r_b.s0.v0.v0_vec.base = ((r_b.s0.v0.v0*16) + t283409158)
r_b[ramp((r_b.s0.v0.v0_vec.base + t283409204), 1, 16)] = (let t283409028 = r_r[ramp((r_b.s0.v0.v0_vec.base + t283409202), 1, 16)] in (let t283409029 = r_r[ramp(((r_b.s0.v0.v0_vec.base + t283409202) + 656), 1, 16)] in (let t283409030 = r_r[ramp(((r_b.s0.v0.v0_vec.base + t283409202) + -1), 1, 16)] in (let t283409031 = r_r[ramp(((r_b.s0.v0.v0_vec.base + t283409202) + 657), 1, 16)] in (let t283409032 = g_b[ramp((r_b.s0.v0.v0_vec.base + t283409203), 1, 16)] in select((absd(t283409028, t283409029) < absd(t283409030, t283409031)), ((t283409032 - int16x16((((int32x16(g_r[ramp((r_b.s0.v0.v0_vec.base + t283409202), 1, 16)]) + int32x16(g_r[ramp(((r_b.s0.v0.v0_vec.base + t283409202) + 656), 1, 16)])) + x16(1))/x16(2)))) + int16x16((((int32x16(t283409028) + int32x16(t283409029)) + x16(1))/x16(2)))), ((t283409032 - int16x16((((int32x16(g_r[ramp(((r_b.s0.v0.v0_vec.base + t283409202) + -1), 1, 16)]) + int32x16(g_r[ramp(((r_b.s0.v0.v0_vec.base + t283409202) + 657), 1, 16)])) + x16(1))/x16(2)))) + int16x16((((int32x16(t283409030) + int32x16(t283409031)) + x16(1))/x16(2))))))))))
}
}
}
}
}
}
allocate r_gb[int16 * 977 * ((f7.s0._::y.max_2.s - f7.s0._::y.min_2) + 9)] in Stack
allocate r_gr[int16 * 656 * ((f7.s0._::y.max_2.s - f3.s0._::y.min_2) + 9)] in Stack
produce r_gr {
consume r_r {
consume g_r {
consume g_gr {
let t283409206 = ((f7.s0._::y.max_2.s - f3.s0._::y.min_2) + 9)
for (r_gr.s0._::y, f3.s0._::y.min_2, t283409206) {
let t283409208 = ((r_gr.s0._::y - t283409129)*657)
let t283409209 = (((r_gr.s0._::y - f3.s0._::y.min_2)*656) - t283409158)
let t283409207 = (r_gr.s0._::y - t283409130)
for (r_gr.s0.v0.v0, 0, 41) {
let r_gr.s0.v0.v0_vec.base = ((r_gr.s0.v0.v0*16) + t283409158)
r_gr[ramp((r_gr.s0.v0.v0_vec.base + t283409209), 1, 16)] = (let t283409035 = ((t283409207*657) + (r_gr.s0.v0.v0_vec.base - t283409154)) in (let t283409036.s = ((t283409207*657) + (r_gr.s0.v0.v0_vec.base - t283409154)) in ((g_gr[ramp(((r_gr.s0.v0.v0_vec.base - t283409154) + t283409208), 1, 16)] - int16x16((((int32x16(g_r[ramp(t283409035, 1, 16)]) + int32x16(g_r[ramp((t283409036.s + -1), 1, 16)])) + x16(1))/x16(2)))) + int16x16((((int32x16(r_r[ramp((t283409036.s + -1), 1, 16)]) + int32x16(r_r[ramp(t283409035, 1, 16)])) + x16(1))/x16(2))))))
}
}
}
}
}
}
produce f1 {
consume r_gr {
consume r_r {
let t283409210 = ((f7.s0._::y.max_2.s - f3.s0._::y.min_2) + 9)
for (f1.s0._::y, f3.s0._::y.min_2, t283409210) {
let t283409214 = (f1.s0._::y - t283409130)
let t283409212 = (((f1.s0._::y - f3.s0._::y.min_2)*656) - t283409158)
for (f1.s0.v0.v0, 0, 81) {
f1[ramp(((t283409214*1297) + (f1.s0.v0.v0*16)), 1, 16)] = interleave_vectors(r_r[ramp((((f1.s0.v0.v0*8) + (t283409214*657)) + 1), 1, 8)], select(t283409144, r_gr[ramp((((f1.s0.v0.v0*8) + t283409155) + t283409212), 1, 8)], r_r[ramp((((t283409214*657) - t283409154) + ((f1.s0.v0.v0*8) + t283409155)), 1, 8)]))
}
}
}
}
}
free r_gr
produce f3 {
consume g_b {
consume g_gb {
let t283409215 = ((f7.s0._::y.max_2.s - f3.s0._::y.min_2) + 9)
let t283409216 = (t283409138 + 1)
for (f3.s0._::y, f3.s0._::y.min_2, t283409215) {
let t283409219 = ((f3.s0._::y - t283409130)*2548)
let t283409217 = ((f3.s0._::y - t283409129)*t283409137)
let t283409218 = (((f3.s0._::y - g_b._::y.min_realized)*t283409216) - t283409158)
for (f3.s0.v0.v0, 0, 81) {
f3[ramp(((f3.s0.v0.v0*16) + t283409219), 1, 16)] = interleave_vectors(g_gb[ramp((((f3.s0.v0.v0*8) + t283409217) + 1), 1, 8)], select(t283409144, g_b[ramp((((f3.s0.v0.v0*8) + t283409155) + t283409218), 1, 8)], g_gb[ramp(((t283409217 - t283409154) + ((f3.s0.v0.v0*8) + t283409155)), 1, 8)]))
}
}
}
}
}
allocate f4[int16 * 2548 * ((f7.s0._::y.max_2.s - f7.s0._::y.min_2) + 9)] in Stack
produce f4 {
consume g_r {
consume g_gr {
let t283409220 = ((f7.s0._::y.max_2.s - f7.s0._::y.min_2) + 9)
for (f4.s0._::y, f7.s0._::y.min_2, t283409220) {
let t283409222 = ((f4.s0._::y - t283409129)*657)
let t283409223 = ((f4.s0._::y - f7.s0._::y.min_2)*2548)
let t283409221 = (f4.s0._::y - t283409130)
for (f4.s0.v0.v0, 0, 81) {
f4[ramp(((f4.s0.v0.v0*16) + t283409223), 1, 16)] = interleave_vectors(g_r[ramp((((f4.s0.v0.v0*8) + (t283409221*657)) + 1), 1, 8)], select(t283409144, g_gr[ramp((((f4.s0.v0.v0*8) + t283409145) + t283409222), 1, 8)], g_r[ramp(((t283409221*657) + ((f4.s0.v0.v0*8) + t283409145)), 1, 8)]))
}
}
}
}
}
produce b_b {
consume deinterleaved {
let t283409224 = ((f7.s0._::y.max_2.s - b_b.s0._::y.min_2) + 9)
let t283409225 = (b_b.v0.extent_realized.s + 1)
for (b_b.s0._::y, b_b.s0._::y.min_2, t283409224) {
let t283409228 = (b_b.s0._::y - t283409129)
let t283409227 = ((t283409225*t283409228) - t283409158)
let t283409226 = ((t283409228*657) + t283409146)
for (b_b.s0.v0.v0, 0, 41) {
let b_b.s0.v0.v0_vec.base = ((b_b.s0.v0.v0*16) + t283409158)
b_b[ramp((b_b.s0.v0.v0_vec.base + t283409227), 1, 16)] = deinterleaved[ramp((b_b.s0.v0.v0_vec.base + t283409226), 1, 16)]
}
}
}
}
allocate b_gr[int16 * 5248] in Stack
let f7._::y.extent_realized.s = max(((((f7.s0._::y.max_2.s - f7.s0._::y.min_2) + 8)/5)*5), ((f7.s0._::y.max_2.s - f7.s0._::y.min_2) + 4))
allocate f7[int16 * 1296 * (f7._::y.extent_realized.s + 5)] in Stack
produce f7 {
consume b_b {
consume g_r {
consume g_b {
consume g_gr {
let t283409229 = (((f7.s0._::y.max_2.s - f7.s0._::y.min_2) + 13)/5)
for (f7.s0._::y._::yo, 0, t283409229) {
produce b_gr {
let t283409237 = (-1 - t283409138)
let t283409238 = (-1 - b_b.v0.extent_realized.s)
let t283409234 = ((f7.s0._::y._::yo*5) + f7.s0._::y.min_2)
let t283409235 = (t283409138 + 1)
let t283409236 = (b_b.v0.extent_realized.s + 1)
for (b_gr.s0._::y, t283409234, 5) {
let t283409240 = ((b_gr.s0._::y - g_b._::y.min_realized)*t283409235)
let t283409241 = (((b_gr.s0._::y % 8)*656) - t283409158)
let t283409239 = (b_gr.s0._::y - t283409129)
for (b_gr.s0.v0.v0, 0, 41) {
let b_gr.s0.v0.v0_vec.base = ((b_gr.s0.v0.v0*16) + t283409158)
b_gr[ramp((b_gr.s0.v0.v0_vec.base + t283409241), 1, 16)] = ((g_gr[ramp((((t283409239*657) - t283409154) + b_gr.s0.v0.v0_vec.base), 1, 16)] - int16x16((((int32x16(g_b[ramp(((b_gr.s0.v0.v0_vec.base - t283409158) + t283409240), 1, 16)]) + int32x16(g_b[ramp(((b_gr.s0.v0.v0_vec.base - t283409158) + (t283409237 + t283409240)), 1, 16)])) + x16(1))/x16(2)))) + int16x16((((int32x16(b_b[ramp(((t283409236*t283409239) + (b_gr.s0.v0.v0_vec.base - t283409158)), 1, 16)]) + int32x16(b_b[ramp(((b_gr.s0.v0.v0_vec.base - t283409158) + ((t283409236*t283409239) + t283409238)), 1, 16)])) + x16(1))/x16(2))))
}
}
}
allocate b_r[int16 * 3280] in Stack
produce b_r {
let t283409242 = ((f7.s0._::y._::yo*5) + f7.s0._::y.min_2)
for (b_r.s0._::y, t283409242, 5) {
let t283409247 = ((b_r.s0._::y - t283409129)*(b_b.v0.extent_realized.s + 1))
let t283409250 = ((b_r.s0._::y - g_b._::y.min_realized)*(t283409138 + 1))
let t283409252 = (((b_r.s0._::y - t283409242)*656) - t283409158)
let t283409249 = (((b_r.s0._::y - t283409130)*657) - t283409154)
for (b_r.s0.v0.v0, 0, 41) {
let b_r.s0.v0.v0_vec.base = ((b_r.s0.v0.v0*16) + t283409158)
b_r[ramp((b_r.s0.v0.v0_vec.base + t283409252), 1, 16)] = (let t283409049 = b_b[ramp(((b_r.s0.v0.v0_vec.base - t283409158) + t283409247), 1, 16)] in (let t283409050 = b_b[ramp(((t283409247 - b_b.v0.extent_realized.s) + (b_r.s0.v0.v0_vec.base - t283409158)), 1, 16)] in (let t283409051 = b_b[ramp((((b_r.s0.v0.v0_vec.base - t283409158) + t283409247) + 1), 1, 16)] in (let t283409052 = b_b[ramp((((b_r.s0.v0.v0_vec.base - t283409158) + (t283409247 - b_b.v0.extent_realized.s)) + -1), 1, 16)] in (let t283409053 = g_r[ramp((b_r.s0.v0.v0_vec.base + t283409249), 1, 16)] in select((absd(t283409049, t283409050) < absd(t283409051, t283409052)), ((t283409053 - int16x16((((int32x16(g_b[ramp(((b_r.s0.v0.v0_vec.base - t283409158) + t283409250), 1, 16)]) + int32x16(g_b[ramp(((t283409250 - t283409138) + (b_r.s0.v0.v0_vec.base - t283409158)), 1, 16)])) + x16(1))/x16(2)))) + int16x16((((int32x16(t283409049) + int32x16(t283409050)) + x16(1))/x16(2)))), ((t283409053 - int16x16((((int32x16(g_b[ramp((((b_r.s0.v0.v0_vec.base - t283409158) + t283409250) + 1), 1, 16)]) + int32x16(g_b[ramp((((b_r.s0.v0.v0_vec.base - t283409158) + (t283409250 - t283409138)) + -1), 1, 16)])) + x16(1))/x16(2)))) + int16x16((((int32x16(t283409051) + int32x16(t283409052)) + x16(1))/x16(2))))))))))
}
}
}
consume b_r {
consume b_gr {
let t283409255 = (f7.s0._::y._::yo*5)
let t283409253 = (f7.s0._::y.min_2 + t283409255)
for (f7.s0._::y._::yi, 0, 5) {
let t283409257 = (((f7.s0._::y._::yi + t283409253) % 8)*656)
let t283409259 = ((f7.s0._::y._::yi + t283409255)*81)
let t283409258 = (f7.s0._::y._::yi*656)
let t283409256 = (f7.s0._::y._::yi*82)
for (f7.s0.v0.v0, 0, 81) {
f7[ramp(((f7.s0.v0.v0 + t283409259)*16), 1, 16)] = interleave_vectors(b_r[ramp(((f7.s0.v0.v0 + t283409256)*8), 1, 8)], select(t283409144, b_gr[ramp((((f7.s0.v0.v0*8) + t283409149) + t283409257), 1, 8)], b_r[ramp((((f7.s0.v0.v0*8) + t283409149) + t283409258), 1, 8)]))
}
}
}
}
free b_r
}
}
}
}
}
}
free b_gr
allocate f8[int16 * 1296 * 18] in Stack
produce f8 {
consume f7 {
consume b_b {
consume g_b {
consume g_gb {
let t283409260 = ((processed.s0._::y._::yi._::yio*16) + processed.s0._::y._::yi.base)
let t283409261 = (t283409138 + 1)
let t283409262 = (b_b.v0.extent_realized.s + 1)
for (f8.s0._::y._::yo, 0, 6) {
let f8.s0._::y._::yi.base.s = ((f8.s0._::y._::yo*3) + t283409260)
let t283409265 = (f8.s0._::y._::yi.base.s - t283409260)
for (f8.s0.v0.v0o, 0, 81) {
allocate f6[int16 * 16] in Stack
let t283409270 = (f8.s0.v0.v0o*8)
let t283409268 = (t283409158 + t283409270)
let t283409267 = (t283409155 + t283409270)
for (f8.s0._::y._::yi, 0, 3) {
let f6.s0._::y.min_4 = select((0 < f8.s0._::y._::yi), ((f8.s0._::y._::yi + f8.s0._::y._::yi.base.s)/2), (((f8.s0._::y._::yi + f8.s0._::y._::yi.base.s) + -1)/2))
allocate b_gb[int16 * (((((t283409267 - t283409268) + 7)/8)*8) + 8) * ((((f8.s0._::y._::yi + f8.s0._::y._::yi.base.s) + 1)/2) - f6.s0._::y.min_4)] in Stack
produce b_gb {
let t283409276 = (t283409267 - t283409268)
let t283409272 = ((t283409276 + 15)/8)
let t283409271 = ((((f8.s0._::y._::yi + f8.s0._::y._::yi.base.s) + 1)/2) - f6.s0._::y.min_4)
let t283409273 = (t283409268 - t283409158)
let t283409274 = (t283409268 - t283409154)
let t283409275 = ((((t283409276 + 7)/8)*8) + 8)
for (b_gb.s0._::y, f6.s0._::y.min_4, t283409271) {
let t283409278 = ((b_gb.s0._::y - g_b._::y.min_realized)*t283409261)
let t283409279 = ((b_gb.s0._::y - f6.s0._::y.min_4)*t283409275)
let t283409277 = (b_gb.s0._::y - t283409129)
for (b_gb.s0.v0.v0, 0, t283409272) {
b_gb[ramp(((b_gb.s0.v0.v0*8) + t283409279), 1, 8)] = ((g_gb[ramp(((b_gb.s0.v0.v0*8) + ((t283409137*t283409277) + t283409274)), 1, 8)] - int16x8((((int32x8(g_b[ramp((((b_gb.s0.v0.v0*8) + t283409273) + t283409278), 1, 8)]) + int32x8(g_b[ramp(((((b_gb.s0.v0.v0*8) + t283409273) + t283409278) + 1), 1, 8)])) + x8(1))/x8(2)))) + int16x8((((int32x8(b_b[ramp(((t283409262*t283409277) + ((b_gb.s0.v0.v0*8) + t283409273)), 1, 8)]) + int32x8(b_b[ramp((((t283409262*t283409277) + ((b_gb.s0.v0.v0*8) + t283409273)) + 1), 1, 8)])) + x8(1))/x8(2))))
}
}
}
produce f6 {
consume b_gb {
let t283409284 = (t283409267 - t283409268)
let t283409280 = ((((f8.s0._::y._::yi + f8.s0._::y._::yi.base.s) + 1)/2) - f6.s0._::y.min_4)
let t283409282 = (t283409267 - t283409158)
let t283409281 = ((((t283409284 + 7)/8)*8) + 8)
for (f6.s0._::y, f6.s0._::y.min_4, t283409280) {
f6[ramp(0, 1, 16)] = interleave_vectors(b_gb[ramp(((f6.s0._::y - f6.s0._::y.min_4)*t283409281), 1, 8)], select(t283409144, b_b[ramp((((f6.s0._::y - t283409129)*t283409262) + t283409282), 1, 8)], b_gb[ramp((((f6.s0._::y - f6.s0._::y.min_4)*t283409281) + t283409284), 1, 8)]))
}
}
}
free b_gb
consume f6 {
f8[ramp(((((f8.s0._::y._::yi + t283409265)*81) + f8.s0.v0.v0o)*16), 1, 16)] = select(((((f8.s0._::y._::yi + f8.s0._::y._::yi.base.s) + 1) % 2) == 0), f7[ramp((((((((f8.s0._::y._::yi + f8.s0._::y._::yi.base.s) + -1)/2) - f7.s0._::y.min_2)*81) + f8.s0.v0.v0o)*16), 1, 16)], f6[ramp(0, 1, 16)])
}
}
free f6
}
}
}
}
}
}
}
free g_b
free f7
allocate sharpen_strength_x32[uint8 * 1] in Stack
produce sharpen_strength_x32 {
sharpen_strength_x32[0] = t283409097
}
consume sharpen_strength_x32 {
consume f8 {
consume f4 {
consume f3 {
consume f1 {
consume r_b {
consume r_r {
consume g_r {
consume g_gb {
let t283409290 = (processed.s0._::y._::yi._::yio*16)
let t283409286 = (processed.s0._::y._::yi._::yio*4)
let t283409285 = (processed.s0._::y._::yi._::yio*2)
for (processed.s0._::y._::yi._::yii._::yiio, 0, 2) {
let f0.s0._::y.max_3.s = (((processed.s0._::y._::yi._::yii._::yiio + t283409285)*4) + t283409124)
let r_gb.s0._::y.min_3 = select((0 < processed.s0._::y._::yi._::yii._::yiio), (f0.s0._::y.max_3.s + 1), (((processed.s0._::y._::yi._::yii._::yiio + t283409285)*4) + t283409130))
produce r_gb {
let t283409291 = ((f0.s0._::y.max_3.s - r_gb.s0._::y.min_3) + 5)
for (r_gb.s0._::y, r_gb.s0._::y.min_3, t283409291) {
let t283409293 = ((r_gb.s0._::y - t283409129)*t283409137)
let t283409294 = (((r_gb.s0._::y - f7.s0._::y.min_2)*977) - t283409158)
let t283409292 = (r_gb.s0._::y - t283409130)
for (r_gb.s0.v0.v0, 0, 41) {
let r_gb.s0.v0.v0_vec.base = ((r_gb.s0.v0.v0*16) + t283409158)
r_gb[ramp((r_gb.s0.v0.v0_vec.base + t283409294), 1, 16)] = (let t283409063 = ((t283409292*657) + (r_gb.s0.v0.v0_vec.base - t283409154)) in (let t283409064.s = ((t283409292*657) + (r_gb.s0.v0.v0_vec.base - t283409154)) in ((g_gb[ramp(((r_gb.s0.v0.v0_vec.base - t283409154) + t283409293), 1, 16)] - int16x16((((int32x16(g_r[ramp(t283409063, 1, 16)]) + int32x16(g_r[ramp((t283409064.s + 657), 1, 16)])) + x16(1))/x16(2)))) + int16x16((((int32x16(r_r[ramp(t283409063, 1, 16)]) + int32x16(r_r[ramp((t283409064.s + 657), 1, 16)])) + x16(1))/x16(2))))))
}
}
}
allocate f2[int16 * 10376] in Stack
allocate curve[uint8 * 2048] in Stack
consume r_gb {
let t283409300 = (processed.s0._::y._::yi.base + t283409290)
let t283409301 = (processed.s0._::y._::yi._::yii._::yiio*8)
let t283409299 = (t283409300 + t283409301)
let t283409298 = (t283409290 + t283409301)
let t283409295 = ((processed.s0._::y._::yi._::yii._::yiio*2) + t283409286)
for (processed.s0._::y._::yi._::yii._::yiii._::yiiio, 0, 2) {
let processed.s0._::y.min_5.s = (processed.s0._::y._::yi._::yii._::yiii._::yiiio + t283409295)
let curve.s0.v0.min_4 = select((0 < processed.s0._::y._::yi._::yii._::yiii._::yiiio), 1024, 0)
let f2.s0._::y.min_4 = ((processed.s0._::y.min_5.s*4) + (select((0 < processed.s0._::y._::yi._::yii._::yiii._::yiiio), 1, -1) + processed.s0._::y._::yi.base))
produce f2 {
let t283409302 = ((((processed.s0._::y.min_5.s*4) + processed.s0._::y._::yi.base) - f2.s0._::y.min_4) + 5)
for (f2.s0.v0.v0o, 0, 2) {
allocate f0[int16 * 656] in Stack
let t283409305 = (f2.s0.v0.v0o*641)
let t283409303 = (processed.s0.v0.v0i.base + t283409305)
for (f2.s0._::y, f2.s0._::y.min_4, t283409302) {
let f0.s0._::y.min_6 = select((f2.s0._::y.min_4 < f2.s0._::y), ((f2.s0._::y + 1)/2), (f2.s0._::y/2))
produce f0 {
let t283409306 = (((f2.s0._::y/2) - f0.s0._::y.min_6) + 1)
for (f0.s0._::y, f0.s0._::y.min_6, t283409306) {
let t283409307 = (f0.s0._::y - t283409130)
let t283409308 = (f0.s0._::y - f7.s0._::y.min_2)
for (f0.s0.v0.v0, 0, 41) {
let f0.s0.v0.v0_vec.base.s = ((f0.s0.v0.v0*16) + t283409303)
f0[ramp((f0.s0.v0.v0_vec.base.s - t283409303), 1, 16)] = interleave_vectors(select((((f0.s0.v0.v0_vec.base.s + 1) % 2) == 0), r_b[ramp(((t283409307*977) + (((f0.s0.v0.v0_vec.base.s + -1)/2) - t283409158)), 1, 8)], r_gb[ramp(((t283409308*977) + (((f0.s0.v0.v0_vec.base.s + -1)/2) - t283409158)), 1, 8)]), select(((f0.s0.v0.v0_vec.base.s % 2) == 0), r_b[ramp(((t283409307*977) + ((f0.s0.v0.v0_vec.base.s/2) - t283409158)), 1, 8)], r_gb[ramp(((t283409308*977) + ((f0.s0.v0.v0_vec.base.s/2) - t283409158)), 1, 8)]))
}
}
}
consume f0 {
let t283409309 = ((f2.s0._::y % 2) == 0)
let t283409311 = (((f2.s0._::y % 8)*1297) + t283409305)
let t283409310 = ((((f2.s0._::y/2) - t283409130)*1297) + t283409305)
for (f2.s0.v0.v0i.v0i, 0, 41) {
f2[ramp(((f2.s0.v0.v0i.v0i*16) + t283409311), 1, 16)] = select(t283409309, f1[ramp(((f2.s0.v0.v0i.v0i*16) + t283409310), 1, 16)], f0[ramp((f2.s0.v0.v0i.v0i*16), 1, 16)])
}
}
}
free f0
}
}
produce curve {
let t283409312 = ((1055 - curve.s0.v0.min_4)/32)
for (curve.s0.v0.v0, 0, t283409312) {
curve[ramp(((curve.s0.v0.v0*32) + curve.s0.v0.min_4), 1, 32)] = (let t283409069 = pow_f32(max(min((float32x32(ramp(((curve.s0.v0.v0*32) + (curve.s0.v0.min_4 - black_level)), 1, 32))*x32(t283409098)), x32(1.000000f)), x32(0.000000f)), x32(t283409099)) in select((x32(black_level) < ramp(((curve.s0.v0.v0*32) + curve.s0.v0.min_4), 1, 32)), select((x32(white_level) < ramp(((curve.s0.v0.v0*32) + curve.s0.v0.min_4), 1, 32)), x32((uint8)255), uint8x32((max(min((select((x32(0.500000f) < t283409069), (x32(1.000000f) - ((x32(1.000000f) - t283409069)*((((x32(1.000000f) - t283409069)*x32(t283409121)) - x32(t283409100)) + x32(2.000000f)))), ((((t283409069*x32(t283409121)) - x32(t283409100)) + x32(2.000000f))*t283409069))*x32(255.000000f)), x32(254.500000f)), x32(-0.500000f)) + x32(0.500000f)))), x32((uint8)0)))
}
}
allocate curved[uint8 * 2560 * 6 * 5] in Stack
allocate mask[int16 * 15360] in Stack
consume curve {
consume f2 {
produce curved {
let t283409316 = (processed.s0._::y.min_5.s*4)
let t283409314 = (t283409123 + t283409316)
let t283409315 = (processed.s0._::y._::yi.base + t283409316)
for (curved.s0.v0.v0o, 0, 2) {
allocate matrix_7000_im[float32 * 8] in Stack
produce matrix_7000_im {
for (matrix_7000_im.s0._1, 0, 2) {
matrix_7000_im[ramp((matrix_7000_im.s0._1*4), 1, 4)] = matrix_7000[ramp(((matrix_7000.stride.1*matrix_7000_im.s0._1) - t283409101), 1, 4)]
}
}
consume matrix_7000_im {
let t283409322 = (curved.s0.v0.v0o*641)
for (curved.s0.v0.v0i.v0io, 0, 10) {
allocate matrix_3200_im[float32 * 8] in Stack
produce matrix_3200_im {
for (matrix_3200_im.s0._1, 0, 2) {
matrix_3200_im[ramp((matrix_3200_im.s0._1*4), 1, 4)] = matrix_3200[ramp(((matrix_3200.stride.1*matrix_3200_im.s0._1) - t283409102), 1, 4)]
}
}
consume matrix_3200_im {
let t283409328 = ((curved.s0.v0.v0i.v0io*59) + t283409322)
for (curved.s0._::y, t283409314, 6) {
allocate f5[int16 * 32] in Stack
produce f5 {
f5[ramp(0, 1, 16)] = select(((curved.s0._::y % 2) == 0), f4[ramp(((((curved.s0._::y/2) - f7.s0._::y.min_2)*2548) + t283409328), 1, 16)], f3[ramp(((((curved.s0._::y/2) - t283409130)*2548) + t283409328), 1, 16)])
f5[ramp(16, 1, 16)] = select(((curved.s0._::y % 2) == 0), f4[ramp((((((curved.s0._::y/2) - f7.s0._::y.min_2)*2548) + t283409328) + 16), 1, 16)], f3[ramp((((((curved.s0._::y/2) - t283409130)*2548) + t283409328) + 16), 1, 16)])
}
consume f5 {
let t283409335 = (((curved.s0._::y - t283409300)*1296) + t283409328)
let t283409336 = (((curved.s0._::y - t283409315)*2560) + t283409328)
let t283409337 = (((curved.s0._::y % 8)*1297) + t283409328)
let t283409334 = (t283409336 + 2574)
let t283409331 = (t283409336 + 2560)
let t283409332 = (t283409335 + 1310)
let t283409329 = (t283409335 + 1296)
for (curved.s0.v1, 0, 2) {
allocate matrix[int16 * 4] in Stack
produce matrix {
matrix[ramp(0, 1, 4)] = int16x4((((matrix_3200_im[ramp((curved.s0.v1*4), 1, 4)]*x4(t283409103)) + (matrix_7000_im[ramp((curved.s0.v1*4), 1, 4)]*x4(t283409104)))*x4(256.000000f)))
}
consume matrix {
curved[ramp(((curved.s0.v1*15360) + t283409331), 1, 16)] = curve[int32x16(max(min(int16x16((((int32x16(f2[ramp(t283409337, 1, 16)])*x16(int32(matrix[0]))) + ((int32x16(f8[ramp(t283409329, 1, 16)])*x16(int32(matrix[2]))) + ((int32x16(f5[ramp(0, 1, 16)])*x16(int32(matrix[1]))) + x16(int32(matrix[3])))))/x16(256))), x16((int16)1023)), x16((int16)0)))]
curved[ramp(((curved.s0.v1*15360) + t283409334), 1, 16)] = curve[int32x16(max(min(int16x16((((int32x16(f2[ramp((t283409337 + 14), 1, 16)])*x16(int32(matrix[0]))) + ((int32x16(f8[ramp(t283409332, 1, 16)])*x16(int32(matrix[2]))) + ((int32x16(f5[ramp(14, 1, 16)])*x16(int32(matrix[1]))) + x16(int32(matrix[3])))))/x16(256))), x16((int16)1023)), x16((int16)0)))]
free matrix
}
}
}
free f5
allocate f5[int16 * 32] in Stack
produce f5 {
f5[ramp(0, 1, 16)] = select(((curved.s0._::y % 2) == 0), f4[ramp((((((curved.s0._::y/2) - f7.s0._::y.min_2)*2548) + t283409328) + 29), 1, 16)], f3[ramp((((((curved.s0._::y/2) - t283409130)*2548) + t283409328) + 29), 1, 16)])
f5[ramp(16, 1, 16)] = select(((curved.s0._::y % 2) == 0), f4[ramp((((((curved.s0._::y/2) - f7.s0._::y.min_2)*2548) + t283409328) + 45), 1, 16)], f3[ramp((((((curved.s0._::y/2) - t283409130)*2548) + t283409328) + 45), 1, 16)])
}
consume f5 {
let t283409344 = (((curved.s0._::y - t283409300)*1296) + t283409328)
let t283409345 = (((curved.s0._::y - t283409315)*2560) + t283409328)
let t283409346 = (((curved.s0._::y % 8)*1297) + t283409328)
let t283409342 = (t283409346 + 43)
let t283409339 = (t283409346 + 29)
let t283409343 = (t283409345 + 2603)
let t283409340 = (t283409345 + 2589)
let t283409341 = (t283409344 + 1339)
let t283409338 = (t283409344 + 1325)
for (curved.s0.v1, 0, 2) {
allocate matrix[int16 * 4] in Stack
produce matrix {
matrix[ramp(0, 1, 4)] = int16x4((((matrix_3200_im[ramp((curved.s0.v1*4), 1, 4)]*x4(t283409103)) + (matrix_7000_im[ramp((curved.s0.v1*4), 1, 4)]*x4(t283409104)))*x4(256.000000f)))
}
consume matrix {
curved[ramp(((curved.s0.v1*15360) + t283409340), 1, 16)] = curve[int32x16(max(min(int16x16((((int32x16(f2[ramp(t283409339, 1, 16)])*x16(int32(matrix[0]))) + ((int32x16(f8[ramp(t283409338, 1, 16)])*x16(int32(matrix[2]))) + ((int32x16(f5[ramp(0, 1, 16)])*x16(int32(matrix[1]))) + x16(int32(matrix[3])))))/x16(256))), x16((int16)1023)), x16((int16)0)))]
curved[ramp(((curved.s0.v1*15360) + t283409343), 1, 16)] = curve[int32x16(max(min(int16x16((((int32x16(f2[ramp(t283409342, 1, 16)])*x16(int32(matrix[0]))) + ((int32x16(f8[ramp(t283409341, 1, 16)])*x16(int32(matrix[2]))) + ((int32x16(f5[ramp(14, 1, 16)])*x16(int32(matrix[1]))) + x16(int32(matrix[3])))))/x16(256))), x16((int16)1023)), x16((int16)0)))]
free matrix
}
}
}
free f5
}
}
free matrix_3200_im
}
allocate matrix_3200_im[float32 * 8] in Stack
produce matrix_3200_im {
for (matrix_3200_im.s0._1, 0, 2) {
matrix_3200_im[ramp((matrix_3200_im.s0._1*4), 1, 4)] = matrix_3200[ramp(((matrix_3200.stride.1*matrix_3200_im.s0._1) - t283409102), 1, 4)]
}
}
consume matrix_3200_im {
let t283409350 = (curved.s0.v0.v0o*641)
for (curved.s0._::y, t283409314, 6) {
let t283409362 = (((curved.s0._::y - t283409315)*2560) + t283409350)
let t283409363 = ((curved.s0._::y - t283409300)*1296)
let t283409364 = (curved.s0._::y/2)
let t283409365 = ((t283409364 - f7.s0._::y.min_2)*2548)
let t283409366 = ((t283409364 - t283409130)*2548)
let t283409367 = ((curved.s0._::y % 8)*1297)
let t283409351 = ((curved.s0._::y % 2) == 0)
let t283409360 = (t283409367 + 596)
let t283409357 = (t283409367 + 582)
let t283409355 = (t283409366 + 598)
let t283409353 = (t283409366 + 582)
let t283409354 = (t283409365 + 598)
let t283409352 = (t283409365 + 582)
let t283409359 = (t283409363 + 1892)
let t283409356 = (t283409363 + 1878)
let t283409361 = (t283409362 + 3156)
let t283409358 = (t283409362 + 3142)
for (curved.s0.v0.v0i.v0ii.v0iio, 0, 2) {
allocate f5[int16 * 32] in Stack
produce f5 {
f5[ramp(0, 1, 16)] = select(t283409351, f4[ramp(((min((curved.s0.v0.v0i.v0ii.v0iio*30), 29) + t283409350) + t283409352), 1, 16)], f3[ramp(((min((curved.s0.v0.v0i.v0ii.v0iio*30), 29) + t283409350) + t283409353), 1, 16)])
f5[ramp(16, 1, 16)] = select(t283409351, f4[ramp(((min((curved.s0.v0.v0i.v0ii.v0iio*30), 29) + t283409350) + t283409354), 1, 16)], f3[ramp(((min((curved.s0.v0.v0i.v0ii.v0iio*30), 29) + t283409350) + t283409355), 1, 16)])
}
consume f5 {
let t283409371 = min((curved.s0.v0.v0i.v0ii.v0iio*30), 29)
let t283409370 = (t283409361 + t283409371)
let t283409369 = (t283409358 + t283409371)
let t283409368 = (t283409350 + t283409371)
for (curved.s0.v1, 0, 2) {
allocate matrix[int16 * 4] in Stack
produce matrix {
matrix[ramp(0, 1, 4)] = int16x4((((matrix_3200_im[ramp((curved.s0.v1*4), 1, 4)]*x4(t283409103)) + (matrix_7000_im[ramp((curved.s0.v1*4), 1, 4)]*x4(t283409104)))*x4(256.000000f)))
}
consume matrix {
curved[ramp(((curved.s0.v1*15360) + t283409369), 1, 16)] = curve[int32x16(max(min(int16x16((((int32x16(f2[ramp((t283409357 + t283409368), 1, 16)])*x16(int32(matrix[0]))) + ((int32x16(f8[ramp((t283409356 + t283409368), 1, 16)])*x16(int32(matrix[2]))) + ((int32x16(f5[ramp(0, 1, 16)])*x16(int32(matrix[1]))) + x16(int32(matrix[3])))))/x16(256))), x16((int16)1023)), x16((int16)0)))]
curved[ramp(((curved.s0.v1*15360) + t283409370), 1, 16)] = curve[int32x16(max(min(int16x16((((int32x16(f2[ramp((t283409360 + t283409368), 1, 16)])*x16(int32(matrix[0]))) + ((int32x16(f8[ramp((t283409359 + t283409368), 1, 16)])*x16(int32(matrix[2]))) + ((int32x16(f5[ramp(14, 1, 16)])*x16(int32(matrix[1]))) + x16(int32(matrix[3])))))/x16(256))), x16((int16)1023)), x16((int16)0)))]
free matrix
}
}
}
free f5
}
}
}
free matrix_3200_im
free matrix_7000_im
}
}
}
produce mask {
consume curved {
let t283409372 = ((processed.s0._::y.min_5.s*4) + processed.s0._::y._::yi.base)
for (mask.s0._::y, t283409372, 4) {
let t283409375 = (mask.s0._::y - t283409372)
let t283409373 = (t283409375*80)
let t283409374 = (t283409375*40)
for (mask.s0.v0.v0o, 0, 40) {
allocate unsharp_y[uint8 * 128] in Stack
produce unsharp_y {
let t283409376 = (mask.s0.v0.v0o + t283409373)
for (unsharp_y.s0.v1, 0, 2) {
unsharp_y[ramp((unsharp_y.s0.v1*64), 1, 32)] = uint8x32((((uint16x32(uint8x32((((uint16x32(curved[ramp((((unsharp_y.s0.v1*480) + t283409376)*32), 1, 32)]) + uint16x32(curved[ramp(((((unsharp_y.s0.v1*480) + t283409376)*32) + 5120), 1, 32)])) + x32((uint16)1))/x32((uint16)2)))) + uint16x32(curved[ramp(((((unsharp_y.s0.v1*480) + t283409376)*32) + 2560), 1, 32)])) + x32((uint16)1))/x32((uint16)2)))
unsharp_y[ramp(((unsharp_y.s0.v1*64) + 32), 1, 32)] = uint8x32((((uint16x32(uint8x32((((uint16x32(curved[ramp(((((unsharp_y.s0.v1*480) + t283409376)*32) + 32), 1, 32)]) + uint16x32(curved[ramp(((((unsharp_y.s0.v1*480) + t283409376)*32) + 5152), 1, 32)])) + x32((uint16)1))/x32((uint16)2)))) + uint16x32(curved[ramp(((((unsharp_y.s0.v1*480) + t283409376)*32) + 2592), 1, 32)])) + x32((uint16)1))/x32((uint16)2)))
}
}
allocate unsharp[uint8 * 32] in Stack
consume unsharp_y {
let t283409378 = (mask.s0.v0.v0o + t283409374)
let t283409377 = (mask.s0.v0.v0o + t283409373)
for (mask.s0.v1, 0, 2) {
produce unsharp {
unsharp[ramp(0, 1, 32)] = uint8x32((((uint16x32(uint8x32((((uint16x32(unsharp_y[ramp((mask.s0.v1*64), 1, 32)]) + uint16x32(unsharp_y[ramp(((mask.s0.v1*64) + 2), 1, 32)])) + x32((uint16)1))/x32((uint16)2)))) + uint16x32(unsharp_y[ramp(((mask.s0.v1*64) + 1), 1, 32)])) + x32((uint16)1))/x32((uint16)2)))
}
consume unsharp {
mask[ramp((((mask.s0.v1*160) + t283409378)*32), 1, 32)] = (int16x32(curved[ramp(((((mask.s0.v1*480) + t283409377)*32) + 2561), 1, 32)]) - int16x32(unsharp[ramp(0, 1, 32)]))
}
}
}
free unsharp_y
free unsharp
}
}
}
}
allocate sharpened[uint8 * 5120] in Stack
consume mask {
consume curved {
let t283409382 = (processed.s0._::y._::yi._::yii._::yiii._::yiiio*4)
let t283409383 = (processed.s0._::y.min_5.s*4)
let t283409380 = ((t283409298 + t283409382) - t283409383)
let t283409379 = (processed.s0._::y._::yi.base + t283409383)
let t283409381 = (t283409299 + t283409382)
for (processed.s0.v1.v1i, 0, 2) {
produce sharpened {
let t283409385 = (processed.s0.v1.v1i*480)
let t283409384 = (processed.s0.v1.v1i*160)
for (sharpened.s0._::y, t283409379, 4) {
let t283409388 = (sharpened.s0._::y - t283409379)
for (sharpened.s0.v0.v0, 0, 40) {
sharpened[ramp((((t283409388*40) + sharpened.s0.v0.v0)*32), 1, 32)] = uint8x32(max(min((((mask[ramp(((((t283409388*40) + t283409384) + sharpened.s0.v0.v0)*32), 1, 32)]*x32(int16(sharpen_strength_x32[0])))/x32((int16)32)) + int16x32(curved[ramp((((((t283409388*80) + t283409385) + sharpened.s0.v0.v0)*32) + 2561), 1, 32)])), x32((int16)255)), x32((int16)0)))
}
}
}
consume sharpened {
let t283409389 = ((processed.s0.v1.v1i*processed.stride.2) + t283409151)
for (processed.s0._::y._::yi._::yii._::yiii._::yiiii, 0, 4) {
let t283409390 = ((processed.s0._::y._::yi._::yii._::yiii._::yiiii + t283409380)*40)
let t283409391 = (((processed.s0._::y._::yi._::yii._::yiii._::yiiii + t283409381)*processed.stride.1) + t283409389)
for (processed.s0.v0.v0i.v0i, 0, 40) {
processed[ramp(((processed.s0.v0.v0i.v0i*32) + t283409391), 1, 32)] = sharpened[ramp(((processed.s0.v0.v0i.v0i + t283409390)*32), 1, 32)]
}
}
}
}
}
}
free sharpened
produce curved {
let t283409394 = (processed.s0._::y.min_5.s*4)
let t283409392 = (t283409123 + t283409394)
let t283409393 = (processed.s0._::y._::yi.base + t283409394)
for (curved.s0.v0.v0o, 0, 2) {
allocate matrix_7000_im[float32 * 4] in Stack
produce matrix_7000_im {
matrix_7000_im[ramp(0, 1, 4)] = matrix_7000[ramp(t283409106, 1, 4)]
}
consume matrix_7000_im {
let t283409402 = (curved.s0.v0.v0o*641)
for (curved.s0.v0.v0i.v0io, 0, 11) {
allocate matrix_3200_im[float32 * 4] in Stack
produce matrix_3200_im {
matrix_3200_im[ramp(0, 1, 4)] = matrix_3200[ramp(t283409107, 1, 4)]
}
consume matrix_3200_im {
let t283409403 = min((curved.s0.v0.v0i.v0io*59), 582)
for (curved.s0._::y, t283409392, 6) {
let t283409421 = (((curved.s0._::y - t283409300)*1296) + t283409402)
let t283409422 = (((curved.s0._::y - t283409393)*2560) + t283409402)
let t283409423 = (curved.s0._::y/2)
let t283409424 = (((t283409423 - f7.s0._::y.min_2)*2548) + t283409402)
let t283409425 = (((t283409423 - t283409130)*2548) + t283409402)
let t283409426 = (((curved.s0._::y % 8)*1297) + t283409402)
let t283409410 = ((curved.s0._::y % 2) == 0)
let t283409420 = (t283409422 + 33294)
let t283409417 = (t283409422 + 33280)
let t283409418 = (t283409421 + 1310)
let t283409415 = (t283409421 + 1296)
for (curved.s0.v0.v0i.v0ii.v0iio, 0, 2) {
let curved.s0.v0.min_8.s = (min((curved.s0.v0.v0i.v0ii.v0iio*30), 29) + t283409403)
let f5.v0.min_realized.s = min((min((curved.s0.v0.v0i.v0ii.v0iio*30), 29) + t283409403), curved.s0.v0.min_8.s)
allocate f5[int16 * 643] in Stack
produce f5 {
f5[ramp((curved.s0.v0.min_8.s - f5.v0.min_realized.s), 1, 16)] = select(t283409410, f4[ramp((curved.s0.v0.min_8.s + t283409424), 1, 16)], f3[ramp((curved.s0.v0.min_8.s + t283409425), 1, 16)])
f5[ramp(((curved.s0.v0.min_8.s - f5.v0.min_realized.s) + 16), 1, 16)] = select(t283409410, f4[ramp(((curved.s0.v0.min_8.s + t283409424) + 16), 1, 16)], f3[ramp(((curved.s0.v0.min_8.s + t283409425) + 16), 1, 16)])
}
consume f5 {
allocate matrix[int16 * 4] in Stack
produce matrix {
matrix[ramp(0, 1, 4)] = int16x4((((matrix_3200_im[ramp(0, 1, 4)]*x4(t283409103)) + (matrix_7000_im[ramp(0, 1, 4)]*x4(t283409104)))*x4(256.000000f)))
}
consume matrix {
curved[ramp((curved.s0.v0.min_8.s + t283409417), 1, 16)] = curve[int32x16(max(min(int16x16((((int32x16(f2[ramp((curved.s0.v0.min_8.s + t283409426), 1, 16)])*x16(int32(matrix[0]))) + ((int32x16(f5[ramp((curved.s0.v0.min_8.s - f5.v0.min_realized.s), 1, 16)])*x16(int32(matrix[1]))) + ((int32x16(f8[ramp((curved.s0.v0.min_8.s + t283409415), 1, 16)])*x16(int32(matrix[2]))) + x16(int32(matrix[3])))))/x16(256))), x16((int16)1023)), x16((int16)0)))]
curved[ramp((curved.s0.v0.min_8.s + t283409420), 1, 16)] = curve[int32x16(max(min(int16x16((((int32x16(f2[ramp(((curved.s0.v0.min_8.s + t283409426) + 14), 1, 16)])*x16(int32(matrix[0]))) + ((int32x16(f5[ramp(((curved.s0.v0.min_8.s - f5.v0.min_realized.s) + 14), 1, 16)])*x16(int32(matrix[1]))) + ((int32x16(f8[ramp((curved.s0.v0.min_8.s + t283409418), 1, 16)])*x16(int32(matrix[2]))) + x16(int32(matrix[3])))))/x16(256))), x16((int16)1023)), x16((int16)0)))]
free matrix
}
}
free f5
}
}
}
free matrix_3200_im
}
}
free matrix_7000_im
}
}
produce mask {
consume curved {
let t283409427 = ((processed.s0._::y.min_5.s*4) + processed.s0._::y._::yi.base)
for (mask.s0._::y, t283409427, 4) {
let t283409430 = (mask.s0._::y - t283409427)
let t283409428 = (t283409430*80)
let t283409429 = (t283409430*40)
for (mask.s0.v0.v0o, 0, 40) {
allocate unsharp_y[uint8 * 64] in Stack
produce unsharp_y {
unsharp_y[ramp(0, 1, 32)] = uint8x32((((uint16x32(uint8x32((((uint16x32(curved[ramp((((mask.s0.v0.v0o + t283409428)*32) + 30720), 1, 32)]) + uint16x32(curved[ramp((((mask.s0.v0.v0o + t283409428)*32) + 35840), 1, 32)])) + x32((uint16)1))/x32((uint16)2)))) + uint16x32(curved[ramp((((mask.s0.v0.v0o + t283409428)*32) + 33280), 1, 32)])) + x32((uint16)1))/x32((uint16)2)))
unsharp_y[ramp(32, 1, 32)] = uint8x32((((uint16x32(uint8x32((((uint16x32(curved[ramp((((mask.s0.v0.v0o + t283409428)*32) + 30752), 1, 32)]) + uint16x32(curved[ramp((((mask.s0.v0.v0o + t283409428)*32) + 35872), 1, 32)])) + x32((uint16)1))/x32((uint16)2)))) + uint16x32(curved[ramp((((mask.s0.v0.v0o + t283409428)*32) + 33312), 1, 32)])) + x32((uint16)1))/x32((uint16)2)))
}
allocate unsharp[uint8 * 32] in Stack
consume unsharp_y {
produce unsharp {
unsharp[ramp(0, 1, 32)] = uint8x32((((uint16x32(uint8x32((((uint16x32(unsharp_y[ramp(0, 1, 32)]) + uint16x32(unsharp_y[ramp(2, 1, 32)])) + x32((uint16)1))/x32((uint16)2)))) + uint16x32(unsharp_y[ramp(1, 1, 32)])) + x32((uint16)1))/x32((uint16)2)))
}
consume unsharp {
mask[ramp((((mask.s0.v0.v0o + t283409429)*32) + 10240), 1, 32)] = (int16x32(curved[ramp((((mask.s0.v0.v0o + t283409428)*32) + 33281), 1, 32)]) - int16x32(unsharp[ramp(0, 1, 32)]))
}
}
free unsharp_y
free unsharp
}
}
}
}
allocate sharpened[uint8 * 5120] in Stack
consume mask {
consume curved {
let t283409434 = (processed.s0._::y._::yi._::yii._::yiii._::yiiio*4)
let t283409435 = (processed.s0._::y.min_5.s*4)
let t283409432 = ((t283409298 + t283409434) - t283409435)
let t283409431 = (processed.s0._::y._::yi.base + t283409435)
let t283409433 = (t283409299 + t283409434)
for (processed.s0.v1.v1i, 0, 2) {
produce sharpened {
let t283409437 = (processed.s0.v1.v1i*480)
let t283409436 = (processed.s0.v1.v1i*160)
for (sharpened.s0._::y, t283409431, 4) {
let t283409440 = (sharpened.s0._::y - t283409431)
for (sharpened.s0.v0.v0, 0, 40) {
sharpened[ramp((((t283409440*40) + sharpened.s0.v0.v0)*32), 1, 32)] = uint8x32(max(min((((mask[ramp((((((t283409440*40) + t283409436) + sharpened.s0.v0.v0)*32) + 5120), 1, 32)]*x32(int16(sharpen_strength_x32[0])))/x32((int16)32)) + int16x32(curved[ramp((((((t283409440*80) + t283409437) + sharpened.s0.v0.v0)*32) + 17921), 1, 32)])), x32((int16)255)), x32((int16)0)))
}
}
}
consume sharpened {
let t283409441 = (((processed.s0.v1.v1i + 1)*processed.stride.2) + t283409151)
for (processed.s0._::y._::yi._::yii._::yiii._::yiiii, 0, 4) {
let t283409442 = ((processed.s0._::y._::yi._::yii._::yiii._::yiiii + t283409432)*40)
let t283409443 = (((processed.s0._::y._::yi._::yii._::yiii._::yiiii + t283409433)*processed.stride.1) + t283409441)
for (processed.s0.v0.v0i.v0i, 0, 40) {
processed[ramp(((processed.s0.v0.v0i.v0i*32) + t283409443), 1, 32)] = sharpened[ramp(((processed.s0.v0.v0i.v0i + t283409442)*32), 1, 32)]
}
}
}
}
}
}
free sharpened
free curved
free mask
}
}
}
}
free f2
free curve
}
}
}
}
}
}
}
}
}
}
free r_gb
free f4
free f8
free sharpen_strength_x32
}
free deinterleaved
free g_gb
free g_gr
free g_r
free r_r
free r_b
free f1
free f3
free b_b
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment