Created
December 15, 2018 13:36
-
-
Save omo/9a5930384bd1fd062c509fb07f814711 to your computer and use it in GitHub Desktop.
A Halide IR
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module name=lesson02_x86_64_linux_c_plus_plus_name_mangling_no_runtime_sse41, target=x86-64-linux-c_plus_plus_name_mangling-no_runtime-sse41 | |
external_plus_metadata func lesson02_x86_64_linux_c_plus_plus_name_mangling_no_runtime_sse41 (input, output$2) { | |
assert((reinterpret(uint64, output$2.buffer) != (uint64)0), halide_error_buffer_argument_is_null("output$2")) | |
assert((reinterpret(uint64, input.buffer) != (uint64)0), halide_error_buffer_argument_is_null("input")) | |
let input = _halide_buffer_get_host(input.buffer) | |
let input.type.code = _halide_buffer_get_type_code(input.buffer) | |
let input.type.bits = _halide_buffer_get_type_bits(input.buffer) | |
let input.type.lanes = _halide_buffer_get_type_lanes(input.buffer) | |
let input.dimensions = _halide_buffer_get_dimensions(input.buffer) | |
let input.min.0 = _halide_buffer_get_min(input.buffer, 0) | |
let input.extent.0 = _halide_buffer_get_extent(input.buffer, 0) | |
let input.stride.0 = _halide_buffer_get_stride(input.buffer, 0) | |
let input.min.1 = _halide_buffer_get_min(input.buffer, 1) | |
let input.extent.1 = _halide_buffer_get_extent(input.buffer, 1) | |
let input.stride.1 = _halide_buffer_get_stride(input.buffer, 1) | |
let input.min.2 = _halide_buffer_get_min(input.buffer, 2) | |
let input.extent.2 = _halide_buffer_get_extent(input.buffer, 2) | |
let input.stride.2 = _halide_buffer_get_stride(input.buffer, 2) | |
let output$2 = _halide_buffer_get_host(output$2.buffer) | |
let output$2.type.code = _halide_buffer_get_type_code(output$2.buffer) | |
let output$2.type.bits = _halide_buffer_get_type_bits(output$2.buffer) | |
let output$2.type.lanes = _halide_buffer_get_type_lanes(output$2.buffer) | |
let output$2.dimensions = _halide_buffer_get_dimensions(output$2.buffer) | |
let output$2.min.0 = _halide_buffer_get_min(output$2.buffer, 0) | |
let output$2.extent.0 = _halide_buffer_get_extent(output$2.buffer, 0) | |
let output$2.stride.0 = _halide_buffer_get_stride(output$2.buffer, 0) | |
let output$2.min.1 = _halide_buffer_get_min(output$2.buffer, 1) | |
let output$2.extent.1 = _halide_buffer_get_extent(output$2.buffer, 1) | |
let output$2.stride.1 = _halide_buffer_get_stride(output$2.buffer, 1) | |
let output$2.min.2 = _halide_buffer_get_min(output$2.buffer, 2) | |
let output$2.extent.2 = _halide_buffer_get_extent(output$2.buffer, 2) | |
let output$2.stride.2 = _halide_buffer_get_stride(output$2.buffer, 2) | |
let input.extent.0.required.s = (min((((output$2.extent.0 + -1)/4)*4), (output$2.extent.0 + -4)) - min(output$2.extent.0, 4)) | |
if (_halide_buffer_is_bounds_query(input.buffer)) { | |
_halide_buffer_init(input.buffer, _halide_buffer_get_shape(input.buffer), reinterpret((void *), (uint64)0), (uint64)0, reinterpret((halide_device_interface_t *), (uint64)0), 1, 8, 3, make_struct((halide_dimension_t *), ((min(output$2.extent.0, 4) + output$2.min.0) + -4), (input.extent.0.required.s + 8), 1, 0, output$2.min.1, output$2.extent.1, (input.extent.0.required.s + 8), 0, output$2.min.2, output$2.extent.2, ((input.extent.0.required.s + 8)*output$2.extent.1), 0), (uint64)0) | |
} | |
if (_halide_buffer_is_bounds_query(output$2.buffer)) { | |
_halide_buffer_init(output$2.buffer, _halide_buffer_get_shape(output$2.buffer), reinterpret((void *), (uint64)0), (uint64)0, reinterpret((halide_device_interface_t *), (uint64)0), 1, 8, 3, make_struct((halide_dimension_t *), ((min(output$2.extent.0, 4) + output$2.min.0) + -4), (input.extent.0.required.s + 8), 1, 0, output$2.min.1, output$2.extent.1, (input.extent.0.required.s + 8), 0, output$2.min.2, output$2.extent.2, ((input.extent.0.required.s + 8)*output$2.extent.1), 0), (uint64)0) | |
} | |
if (!(_halide_buffer_is_bounds_query(input.buffer) || _halide_buffer_is_bounds_query(output$2.buffer))) { | |
assert((((input.type.code == (uint8)1) && (input.type.bits == (uint8)8)) && (input.type.lanes == (uint16)1)), halide_error_bad_type("Input buffer input", input.type.code, (uint8)1, input.type.bits, (uint8)8, input.type.lanes, (uint16)1)) | |
assert((input.dimensions == 3), halide_error_bad_dimensions("Input buffer input", input.dimensions, 3)) | |
assert((((output$2.type.code == (uint8)1) && (output$2.type.bits == (uint8)8)) && (output$2.type.lanes == (uint16)1)), halide_error_bad_type("Output buffer output$2", output$2.type.code, (uint8)1, output$2.type.bits, (uint8)8, output$2.type.lanes, (uint16)1)) | |
assert((output$2.dimensions == 3), halide_error_bad_dimensions("Output buffer output$2", output$2.dimensions, 3)) | |
assert((((input.min.0 + 4) <= (min(output$2.extent.0, 4) + output$2.min.0)) && ((((min(output$2.extent.0, 4) + output$2.min.0) + input.extent.0.required.s) + 4) <= (input.extent.0 + input.min.0))), halide_error_access_out_of_bounds("Input buffer input", 0, ((min(output$2.extent.0, 4) + output$2.min.0) + -4), (((min(output$2.extent.0, 4) + output$2.min.0) + input.extent.0.required.s) + 3), input.min.0, ((input.extent.0 + input.min.0) + -1))) | |
assert((0 <= input.extent.0), halide_error_buffer_extents_negative("Input buffer input", 0, input.extent.0)) | |
assert(((input.min.1 <= output$2.min.1) && ((output$2.extent.1 + output$2.min.1) <= (input.extent.1 + input.min.1))), halide_error_access_out_of_bounds("Input buffer input", 1, output$2.min.1, ((output$2.extent.1 + output$2.min.1) + -1), input.min.1, ((input.extent.1 + input.min.1) + -1))) | |
assert((0 <= input.extent.1), halide_error_buffer_extents_negative("Input buffer input", 1, input.extent.1)) | |
assert(((input.min.2 <= output$2.min.2) && ((output$2.extent.2 + output$2.min.2) <= (input.extent.2 + input.min.2))), halide_error_access_out_of_bounds("Input buffer input", 2, output$2.min.2, ((output$2.extent.2 + output$2.min.2) + -1), input.min.2, ((input.extent.2 + input.min.2) + -1))) | |
assert((0 <= input.extent.2), halide_error_buffer_extents_negative("Input buffer input", 2, input.extent.2)) | |
assert(((4 <= output$2.extent.0) && ((((min(output$2.extent.0, 4) + output$2.min.0) + input.extent.0.required.s) + 4) <= (output$2.extent.0 + output$2.min.0))), halide_error_access_out_of_bounds("Output buffer output$2", 0, ((min(output$2.extent.0, 4) + output$2.min.0) + -4), (((min(output$2.extent.0, 4) + output$2.min.0) + input.extent.0.required.s) + 3), output$2.min.0, ((output$2.extent.0 + output$2.min.0) + -1))) | |
assert((0 <= output$2.extent.1), halide_error_buffer_extents_negative("Output buffer output$2", 1, output$2.extent.1)) | |
assert((0 <= output$2.extent.2), halide_error_buffer_extents_negative("Output buffer output$2", 2, output$2.extent.2)) | |
assert((input.stride.0 == 1), halide_error_constraint_violated("input.stride.0", input.stride.0, "1", 1)) | |
assert((output$2.stride.0 == 1), halide_error_constraint_violated("output$2.stride.0", output$2.stride.0, "1", 1)) | |
let input.total_extent.1 = (int64(input.extent.1)*int64(input.extent.0)) | |
let input.total_extent.2 = (input.total_extent.1*int64(input.extent.2)) | |
let output$2.total_extent.1 = (int64(output$2.extent.1)*int64(output$2.extent.0)) | |
let output$2.total_extent.2 = (output$2.total_extent.1*int64(output$2.extent.2)) | |
assert((abs(int64(input.extent.0)) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("input", abs(int64(input.extent.0)), (uint64)2147483647)) | |
assert((abs((int64(input.extent.1)*int64(input.stride.1))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("input", abs((int64(input.extent.1)*int64(input.stride.1))), (uint64)2147483647)) | |
assert((input.total_extent.1 <= (int64)2147483647), halide_error_buffer_extents_too_large("input", input.total_extent.1, (int64)2147483647)) | |
assert((abs((int64(input.extent.2)*int64(input.stride.2))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("input", abs((int64(input.extent.2)*int64(input.stride.2))), (uint64)2147483647)) | |
assert((input.total_extent.2 <= (int64)2147483647), halide_error_buffer_extents_too_large("input", input.total_extent.2, (int64)2147483647)) | |
assert((abs(int64(output$2.extent.0)) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("output$2", abs(int64(output$2.extent.0)), (uint64)2147483647)) | |
assert((abs((int64(output$2.extent.1)*int64(output$2.stride.1))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("output$2", abs((int64(output$2.extent.1)*int64(output$2.stride.1))), (uint64)2147483647)) | |
assert((output$2.total_extent.1 <= (int64)2147483647), halide_error_buffer_extents_too_large("output$2", output$2.total_extent.1, (int64)2147483647)) | |
assert((abs((int64(output$2.extent.2)*int64(output$2.stride.2))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("output$2", abs((int64(output$2.extent.2)*int64(output$2.stride.2))), (uint64)2147483647)) | |
assert((output$2.total_extent.2 <= (int64)2147483647), halide_error_buffer_extents_too_large("output$2", output$2.total_extent.2, (int64)2147483647)) | |
assert((input != reinterpret((void *), (uint64)0)), halide_error_host_is_null("Input buffer input")) | |
assert((output$2 != reinterpret((void *), (uint64)0)), halide_error_host_is_null("Output buffer output$2")) | |
produce output$2 { | |
let t70 = (output$2.extent.0 + output$2.min.0) | |
let t71 = (input.min.2*input.stride.2) | |
let t72 = (input.min.1*input.stride.1) | |
let t73 = (output$2.min.2*output$2.stride.2) | |
let t74 = (output$2.min.1*output$2.stride.1) | |
let t75 = max((output$2.extent.0/4), 0) | |
let t67 = (t75 < ((output$2.extent.0 + 3)/4)) | |
let t66 = (((output$2.min.0 - t73) - t74) - output$2.min.0) | |
let t65 = (((output$2.min.0 - t71) - t72) - input.min.0) | |
let t69 = ((((t70 - t73) - t74) - output$2.min.0) + -4) | |
let t68 = ((((t70 - t71) - t72) - input.min.0) + -4) | |
for (output$2.s0.v34, output$2.min.2, output$2.extent.2) { | |
let t80 = (input.stride.2*output$2.s0.v34) | |
let t81 = (output$2.s0.v34*output$2.stride.2) | |
let t79 = (t69 + t81) | |
let t77 = (t66 + t81) | |
let t78 = (t68 + t80) | |
let t76 = (t65 + t80) | |
parallel (output$2.s0.v33, output$2.min.1, output$2.extent.1) { | |
let t83 = ((output$2.s0.v33*output$2.stride.1) + t77) | |
let t82 = ((input.stride.1*output$2.s0.v33) + t76) | |
for (output$2.s0.v32.v32, 0, t75) { | |
output$2[ramp(((output$2.s0.v32.v32*4) + t83), 1, 4)] = uint8x4((min(float32x4(input[ramp(((output$2.s0.v32.v32*4) + t82), 1, 4)]), x4(170.000000f))*x4(1.500000f))) | |
} | |
if (t67) { | |
output$2[ramp(((output$2.s0.v33*output$2.stride.1) + t79), 1, 4)] = uint8x4((min(float32x4(input[ramp(((input.stride.1*output$2.s0.v33) + t78), 1, 4)]), x4(170.000000f))*x4(1.500000f))) | |
} | |
} | |
} | |
} | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment