Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save omo/9a5930384bd1fd062c509fb07f814711 to your computer and use it in GitHub Desktop.
Save omo/9a5930384bd1fd062c509fb07f814711 to your computer and use it in GitHub Desktop.
A Halide IR
module name=lesson02_x86_64_linux_c_plus_plus_name_mangling_no_runtime_sse41, target=x86-64-linux-c_plus_plus_name_mangling-no_runtime-sse41
external_plus_metadata func lesson02_x86_64_linux_c_plus_plus_name_mangling_no_runtime_sse41 (input, output$2) {
assert((reinterpret(uint64, output$2.buffer) != (uint64)0), halide_error_buffer_argument_is_null("output$2"))
assert((reinterpret(uint64, input.buffer) != (uint64)0), halide_error_buffer_argument_is_null("input"))
let input = _halide_buffer_get_host(input.buffer)
let input.type.code = _halide_buffer_get_type_code(input.buffer)
let input.type.bits = _halide_buffer_get_type_bits(input.buffer)
let input.type.lanes = _halide_buffer_get_type_lanes(input.buffer)
let input.dimensions = _halide_buffer_get_dimensions(input.buffer)
let input.min.0 = _halide_buffer_get_min(input.buffer, 0)
let input.extent.0 = _halide_buffer_get_extent(input.buffer, 0)
let input.stride.0 = _halide_buffer_get_stride(input.buffer, 0)
let input.min.1 = _halide_buffer_get_min(input.buffer, 1)
let input.extent.1 = _halide_buffer_get_extent(input.buffer, 1)
let input.stride.1 = _halide_buffer_get_stride(input.buffer, 1)
let input.min.2 = _halide_buffer_get_min(input.buffer, 2)
let input.extent.2 = _halide_buffer_get_extent(input.buffer, 2)
let input.stride.2 = _halide_buffer_get_stride(input.buffer, 2)
let output$2 = _halide_buffer_get_host(output$2.buffer)
let output$2.type.code = _halide_buffer_get_type_code(output$2.buffer)
let output$2.type.bits = _halide_buffer_get_type_bits(output$2.buffer)
let output$2.type.lanes = _halide_buffer_get_type_lanes(output$2.buffer)
let output$2.dimensions = _halide_buffer_get_dimensions(output$2.buffer)
let output$2.min.0 = _halide_buffer_get_min(output$2.buffer, 0)
let output$2.extent.0 = _halide_buffer_get_extent(output$2.buffer, 0)
let output$2.stride.0 = _halide_buffer_get_stride(output$2.buffer, 0)
let output$2.min.1 = _halide_buffer_get_min(output$2.buffer, 1)
let output$2.extent.1 = _halide_buffer_get_extent(output$2.buffer, 1)
let output$2.stride.1 = _halide_buffer_get_stride(output$2.buffer, 1)
let output$2.min.2 = _halide_buffer_get_min(output$2.buffer, 2)
let output$2.extent.2 = _halide_buffer_get_extent(output$2.buffer, 2)
let output$2.stride.2 = _halide_buffer_get_stride(output$2.buffer, 2)
let input.extent.0.required.s = (min((((output$2.extent.0 + -1)/4)*4), (output$2.extent.0 + -4)) - min(output$2.extent.0, 4))
if (_halide_buffer_is_bounds_query(input.buffer)) {
_halide_buffer_init(input.buffer, _halide_buffer_get_shape(input.buffer), reinterpret((void *), (uint64)0), (uint64)0, reinterpret((halide_device_interface_t *), (uint64)0), 1, 8, 3, make_struct((halide_dimension_t *), ((min(output$2.extent.0, 4) + output$2.min.0) + -4), (input.extent.0.required.s + 8), 1, 0, output$2.min.1, output$2.extent.1, (input.extent.0.required.s + 8), 0, output$2.min.2, output$2.extent.2, ((input.extent.0.required.s + 8)*output$2.extent.1), 0), (uint64)0)
}
if (_halide_buffer_is_bounds_query(output$2.buffer)) {
_halide_buffer_init(output$2.buffer, _halide_buffer_get_shape(output$2.buffer), reinterpret((void *), (uint64)0), (uint64)0, reinterpret((halide_device_interface_t *), (uint64)0), 1, 8, 3, make_struct((halide_dimension_t *), ((min(output$2.extent.0, 4) + output$2.min.0) + -4), (input.extent.0.required.s + 8), 1, 0, output$2.min.1, output$2.extent.1, (input.extent.0.required.s + 8), 0, output$2.min.2, output$2.extent.2, ((input.extent.0.required.s + 8)*output$2.extent.1), 0), (uint64)0)
}
if (!(_halide_buffer_is_bounds_query(input.buffer) || _halide_buffer_is_bounds_query(output$2.buffer))) {
assert((((input.type.code == (uint8)1) && (input.type.bits == (uint8)8)) && (input.type.lanes == (uint16)1)), halide_error_bad_type("Input buffer input", input.type.code, (uint8)1, input.type.bits, (uint8)8, input.type.lanes, (uint16)1))
assert((input.dimensions == 3), halide_error_bad_dimensions("Input buffer input", input.dimensions, 3))
assert((((output$2.type.code == (uint8)1) && (output$2.type.bits == (uint8)8)) && (output$2.type.lanes == (uint16)1)), halide_error_bad_type("Output buffer output$2", output$2.type.code, (uint8)1, output$2.type.bits, (uint8)8, output$2.type.lanes, (uint16)1))
assert((output$2.dimensions == 3), halide_error_bad_dimensions("Output buffer output$2", output$2.dimensions, 3))
assert((((input.min.0 + 4) <= (min(output$2.extent.0, 4) + output$2.min.0)) && ((((min(output$2.extent.0, 4) + output$2.min.0) + input.extent.0.required.s) + 4) <= (input.extent.0 + input.min.0))), halide_error_access_out_of_bounds("Input buffer input", 0, ((min(output$2.extent.0, 4) + output$2.min.0) + -4), (((min(output$2.extent.0, 4) + output$2.min.0) + input.extent.0.required.s) + 3), input.min.0, ((input.extent.0 + input.min.0) + -1)))
assert((0 <= input.extent.0), halide_error_buffer_extents_negative("Input buffer input", 0, input.extent.0))
assert(((input.min.1 <= output$2.min.1) && ((output$2.extent.1 + output$2.min.1) <= (input.extent.1 + input.min.1))), halide_error_access_out_of_bounds("Input buffer input", 1, output$2.min.1, ((output$2.extent.1 + output$2.min.1) + -1), input.min.1, ((input.extent.1 + input.min.1) + -1)))
assert((0 <= input.extent.1), halide_error_buffer_extents_negative("Input buffer input", 1, input.extent.1))
assert(((input.min.2 <= output$2.min.2) && ((output$2.extent.2 + output$2.min.2) <= (input.extent.2 + input.min.2))), halide_error_access_out_of_bounds("Input buffer input", 2, output$2.min.2, ((output$2.extent.2 + output$2.min.2) + -1), input.min.2, ((input.extent.2 + input.min.2) + -1)))
assert((0 <= input.extent.2), halide_error_buffer_extents_negative("Input buffer input", 2, input.extent.2))
assert(((4 <= output$2.extent.0) && ((((min(output$2.extent.0, 4) + output$2.min.0) + input.extent.0.required.s) + 4) <= (output$2.extent.0 + output$2.min.0))), halide_error_access_out_of_bounds("Output buffer output$2", 0, ((min(output$2.extent.0, 4) + output$2.min.0) + -4), (((min(output$2.extent.0, 4) + output$2.min.0) + input.extent.0.required.s) + 3), output$2.min.0, ((output$2.extent.0 + output$2.min.0) + -1)))
assert((0 <= output$2.extent.1), halide_error_buffer_extents_negative("Output buffer output$2", 1, output$2.extent.1))
assert((0 <= output$2.extent.2), halide_error_buffer_extents_negative("Output buffer output$2", 2, output$2.extent.2))
assert((input.stride.0 == 1), halide_error_constraint_violated("input.stride.0", input.stride.0, "1", 1))
assert((output$2.stride.0 == 1), halide_error_constraint_violated("output$2.stride.0", output$2.stride.0, "1", 1))
let input.total_extent.1 = (int64(input.extent.1)*int64(input.extent.0))
let input.total_extent.2 = (input.total_extent.1*int64(input.extent.2))
let output$2.total_extent.1 = (int64(output$2.extent.1)*int64(output$2.extent.0))
let output$2.total_extent.2 = (output$2.total_extent.1*int64(output$2.extent.2))
assert((abs(int64(input.extent.0)) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("input", abs(int64(input.extent.0)), (uint64)2147483647))
assert((abs((int64(input.extent.1)*int64(input.stride.1))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("input", abs((int64(input.extent.1)*int64(input.stride.1))), (uint64)2147483647))
assert((input.total_extent.1 <= (int64)2147483647), halide_error_buffer_extents_too_large("input", input.total_extent.1, (int64)2147483647))
assert((abs((int64(input.extent.2)*int64(input.stride.2))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("input", abs((int64(input.extent.2)*int64(input.stride.2))), (uint64)2147483647))
assert((input.total_extent.2 <= (int64)2147483647), halide_error_buffer_extents_too_large("input", input.total_extent.2, (int64)2147483647))
assert((abs(int64(output$2.extent.0)) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("output$2", abs(int64(output$2.extent.0)), (uint64)2147483647))
assert((abs((int64(output$2.extent.1)*int64(output$2.stride.1))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("output$2", abs((int64(output$2.extent.1)*int64(output$2.stride.1))), (uint64)2147483647))
assert((output$2.total_extent.1 <= (int64)2147483647), halide_error_buffer_extents_too_large("output$2", output$2.total_extent.1, (int64)2147483647))
assert((abs((int64(output$2.extent.2)*int64(output$2.stride.2))) <= (uint64)2147483647), halide_error_buffer_allocation_too_large("output$2", abs((int64(output$2.extent.2)*int64(output$2.stride.2))), (uint64)2147483647))
assert((output$2.total_extent.2 <= (int64)2147483647), halide_error_buffer_extents_too_large("output$2", output$2.total_extent.2, (int64)2147483647))
assert((input != reinterpret((void *), (uint64)0)), halide_error_host_is_null("Input buffer input"))
assert((output$2 != reinterpret((void *), (uint64)0)), halide_error_host_is_null("Output buffer output$2"))
produce output$2 {
let t70 = (output$2.extent.0 + output$2.min.0)
let t71 = (input.min.2*input.stride.2)
let t72 = (input.min.1*input.stride.1)
let t73 = (output$2.min.2*output$2.stride.2)
let t74 = (output$2.min.1*output$2.stride.1)
let t75 = max((output$2.extent.0/4), 0)
let t67 = (t75 < ((output$2.extent.0 + 3)/4))
let t66 = (((output$2.min.0 - t73) - t74) - output$2.min.0)
let t65 = (((output$2.min.0 - t71) - t72) - input.min.0)
let t69 = ((((t70 - t73) - t74) - output$2.min.0) + -4)
let t68 = ((((t70 - t71) - t72) - input.min.0) + -4)
for (output$2.s0.v34, output$2.min.2, output$2.extent.2) {
let t80 = (input.stride.2*output$2.s0.v34)
let t81 = (output$2.s0.v34*output$2.stride.2)
let t79 = (t69 + t81)
let t77 = (t66 + t81)
let t78 = (t68 + t80)
let t76 = (t65 + t80)
parallel (output$2.s0.v33, output$2.min.1, output$2.extent.1) {
let t83 = ((output$2.s0.v33*output$2.stride.1) + t77)
let t82 = ((input.stride.1*output$2.s0.v33) + t76)
for (output$2.s0.v32.v32, 0, t75) {
output$2[ramp(((output$2.s0.v32.v32*4) + t83), 1, 4)] = uint8x4((min(float32x4(input[ramp(((output$2.s0.v32.v32*4) + t82), 1, 4)]), x4(170.000000f))*x4(1.500000f)))
}
if (t67) {
output$2[ramp(((output$2.s0.v33*output$2.stride.1) + t79), 1, 4)] = uint8x4((min(float32x4(input[ramp(((input.stride.1*output$2.s0.v33) + t78), 1, 4)]), x4(170.000000f))*x4(1.500000f)))
}
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment