This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import z3 | |
#kernel = [1, 2, 1] | |
#ops = 2 | |
kernel = [1, 4, 6, 4, 1] | |
ops = 10 | |
kernel_sum = sum(kernel) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
add x9, x8, x17 | |
ldr d26, [x8] | |
add x13, x29, x15 | |
ldr d28, [x9] | |
add x9, x9, x17 | |
ld1r { v29.4s }, [x13] | |
sub x13, x13, | |
ldr d27, [x9] | |
add x9, x9, x17 | |
ld1r { v30.4s }, [x13] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
assert((reinterpret(uint64, tent.buffer) != (uint64)0), halide_error_buffer_argument_is_null("tent")) | |
assert((reinterpret(uint64, in.buffer) != (uint64)0), halide_error_buffer_argument_is_null("in")) | |
assert((reinterpret(uint64, blur2.buffer) != (uint64)0), halide_error_buffer_argument_is_null("blur2")) | |
let blur2 = _halide_buffer_get_host(blur2.buffer) | |
let blur2.min.0 = _halide_buffer_get_min(blur2.buffer, 0) | |
let blur2.extent.0 = _halide_buffer_get_extent(blur2.buffer, 0) | |
let blur2.stride.0 = _halide_buffer_get_stride(blur2.buffer, 0) | |
let blur2.min.1 = _halide_buffer_get_min(blur2.buffer, 1) | |
let blur2.extent.1 = _halide_buffer_get_extent(blur2.buffer, 1) | |
let blur2.stride.1 = _halide_buffer_get_stride(blur2.buffer, 1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Attempting to disprove: (((1 == 1) && ((7 == 7) && ((4 == 4) && (g.s0.y.y > g.s0.y.y$8)))) && (((((uint1)1 && (f.s0.x == g.s0.x$1)) && (f.s0.y == (((g.s0.y.y$8*5) + g.min.1) + g.s0.y.v10$1))) && (let f.s0.x.loop_extent = max((100 - g.min.0), g.extent.0) in (((((((uint1)1 && (g.s0.y.y >= 0)) && (g.s0.y.y < (0 + ((g.extent.1 + 4)/5)))) && (f.s0.y >= ((g.s0.y.y*5) + g.min.1))) && (f.s0.y < (((g.s0.y.y*5) + g.min.1) + 5))) && (f.s0.x >= g.min.0)) && (f.s0.x < (g.min.0 + f.s0.x.loop_extent))))) && (((((((((((((uint1)1 && (g.s0.y.y$8 >= 0)) && (g.s0.y.y$8 < (0 + ((g.extent.1 + 4)/5)))) && (f.s0.y >= ((g.s0.y.y$8*5) + g.min.1))) && (f.s0.y < (((g.s0.y.y$8*5) + g.min.1) + 5))) && (f.s1.y >= ((g.s0.y.y$8*5) + g.min.1))) && (f.s1.y < (((g.s0.y.y$8*5) + g.min.1) + 5))) && (f.s2.y >= ((g.s0.y.y$8*5) + g.min.1))) && (f.s2.y < (((g.s0.y.y$8*5) + g.min.1) + 5))) && (g.s0.y.v10$1 >= 0)) && (g.s0.y.v10$1 < (0 + 5))) && (g.s0.x$1 >= g.min.0)) && (g.s0.x$1 < (g.min.0 + g.extent.0))))) | |
Expr: (((((((((0 <= g.s0.y.y) && (g.s0.y.y |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set -x | |
# Build the generator to autotune. This script will be autotuning the | |
# autoscheduler's cost model training pipeline, which is large enough | |
# to be interesting. | |
GENERATOR=./cost_model.generator | |
PIPELINE=halide_autoscheduler_train_cost_model | |
c++ ../../src/CostModelGenerator.cpp -std=c++11 -DCOMPILING_SEPARATELY -I ../../include/ ../../lib/libHalide.a -o cost_model.generator -ldl -lpthread -lz -fno-rtti -fno-exceptions ../../tools/GenGen.cpp | |
# Make sure Halide is built |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
realize: processed | |
processed 2 40 1 t | |
realize: b_b | |
realize: f3 | |
realize: f1 | |
realize: r_b | |
realize: r_r | |
realize: g_r | |
realize: g_gr | |
realize: g_gb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module name=processed, target=x86-64-linux-avx-avx2-f16c-fma-no_asserts-no_bounds_query-no_runtime-sse41 | |
external_plus_metadata func processed (input, matrix_3200, matrix_7000, black_level, color_temp, contrast, gamma, sharpen_strength, white_level, processed) { | |
assert((reinterpret(uint64, processed.buffer) != (uint64)0), halide_error_buffer_argument_is_null("processed")) | |
assert((reinterpret(uint64, matrix_7000.buffer) != (uint64)0), halide_error_buffer_argument_is_null("matrix_7000")) | |
assert((reinterpret(uint64, matrix_3200.buffer) != (uint64)0), halide_error_buffer_argument_is_null("matrix_3200")) | |
assert((reinterpret(uint64, input.buffer) != (uint64)0), halide_error_buffer_argument_is_null("input")) | |
let input = _halide_buffer_get_host(input.buffer) | |
let input.min.0 = _halide_buffer_get_min(input.buffer, 0) | |
let input.stride.0 = _halide_buffer_get_stride(input.buffer, 0) | |
let input.min.1 = _halide_buffer_get_min(input.buffer, 1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
halide_sgemm_notrans | |
total time: 3508.201904 ms samples: 3198 runs: 155 time/run: 22.633560 ms | |
average threads used: 19.534710 | |
heap allocations: 155 peak heap usage: 17305600 bytes | |
As: 0.700ms (3%) threads: 14.474 peak: 17305600 num: 155 avg: 17305600 | |
IPC: 1.96 L1: 231707996 (77%) LLC: 1912567 (74%) | |
Atmp: 0.505ms (2%) threads: 17.577 stack: 1024 | |
IPC: 2.11 L1: 194752315 (80%) LLC: 1802731 (81%) | |
result: 7.521ms (33%) threads: 19.817 | |
IPC: 3.14 L1: 2345267500 (79%) LLC: 10048920 (94%) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Type 1: | |
def fun(float(N) X, float(N) Y) -> (Z, W) { | |
tmp(i) = X(i) + 4 | |
Z(i) = tmp(i) | |
W(i) = Y(i) + 4 | |
} | |
Type 2: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(max(((17 - (v0 % 4))/4), ((max(((min((v1*2), ((v0 % 2) + 5)) + (v0/2))/2), ((v0/4) + v1)) - (((v0 + -6)/4) + v1)) + 2)) <= 1024) | |
(max(((17 - (v0 % 4))/4), ((((min((v1*2), ((v0 % 2) + 20)) + (v0/2))/2) - (((v0 + -6)/4) + v1)) + 2)) <= 1024) | |
(max(((8 - (((v0*161) + v1) % 2))/2), ((((min((v2*2), 159) + ((v0*161) + v1))/2) - (((((v0*161) + v1) + -3)/2) + v2)) + 2)) <= 1024) | |
(max(max(min(v0, 12), (min(min(min(min(min(min(min(min(min(min(min(min(min(min(min(((v1 + -15)/16), (v2 + -1)), ((v1 + -14)/16)), ((v1 + -13)/16)), ((v1 + -2)/16)), ((v1 + -1)/16)), ((v1 + 1)/16)), ((v3 + -2)/16)), ((v3 + -1)/16)), ((v4 + -2)/16)), ((v4 + -1)/16)), ((v5 + -2)/16)), ((v5 + -1)/16)), ((v6 + -2)/16)), ((v6 + -1)/16)), 11) + 1)), 0) <= max(min(v0, 12), 0)) | |
(max(max(min(((v0 + 131)/144), 2), (min(((min((min(min(min(min(min(min((min((((v1 + v2) + 25)/2), v3) - ((v4*288) + v5)), v6), (min((((v1 + v2) + 29)/2), v3) - ((v4*288) + v5))), (v7 + 13)), v8), v9), (v7 + 15)) + 128), v8) + -140)/144), 1) + 1)), 0) <= max(min(((v0 + 131) |
NewerOlder