Skip to content

Instantly share code, notes, and snippets.

@catid
Last active May 18, 2021 19:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save catid/f84ab44cff9c2be97ebb0d811418a8b6 to your computer and use it in GitHub Desktop.
Save catid/f84ab44cff9c2be97ebb0d811418a8b6 to your computer and use it in GitHub Desktop.
Weird compile error on ARM:
```
Unhandled exception: Error: Output "output" requires type float32 but was defined as type float32x12793.
```
```
const int expected_width = 640;
const int expected_height = 512;
static Func blur_x("blur_x"), blur_y("blur_y"), blur2_x("blur2_x");
static Func input_float("input_float");
//------------------------------------------------------------------------------
// Constants
// High-quality taps
const Expr HQT[3] = {
1.f / 16.f, 4.f / 16.f, 6.f / 16.f
};
// Low-quality taps
const Expr LQT[2] = {
1.f / 4.f, 2.f / 4.f,
};
//------------------------------------------------------------------------------
// Gaussian Downsample 2x2 float -> float
class pyrdown_float_generator : public Halide::Generator<pyrdown_float_generator>
{
public:
Input<Buffer<float>> input{"input", 2};
Output<Buffer<float>> output{"output", 2};
// Low quality separable filter taps: [1 2 1] / 4
// High quality separable filter taps: [1 4 6 4 1] / 16
GeneratorParam<bool> high_quality{"high_quality", false};
void generate()
{
Func input_repeat = BoundaryConditions::mirror_image(input);
Expr blur_y;
if (high_quality) {
blur_x(x, y) =
input_repeat(y*2-2, x) * HQT[0] +
input_repeat(y*2-1, x) * HQT[1] +
input_repeat(y*2, x) * HQT[2] +
input_repeat(y*2+1, x) * HQT[1] +
input_repeat(y*2+2, x) * HQT[0];
blur_y =
blur_x(y*2-2, x) * HQT[0] +
blur_x(y*2-1, x) * HQT[1] +
blur_x(y*2, x) * HQT[2] +
blur_x(y*2+1, x) * HQT[1] +
blur_x(y*2+2, x) * HQT[0];
} else {
blur_x(x, y) =
input_repeat(y*2-1, x) * LQT[0] +
input_repeat(y*2, x) * LQT[1] +
input_repeat(y*2+1, x) * LQT[2];
blur_y =
blur_x(y*2-1, x) * LQT[0] +
blur_x(y*2, x) * LQT[1] +
blur_x(y*2+1, x) * LQT[0];
}
output(x, y) = blur_y;
}
inline void apply_schedule(
::Halide::Pipeline pipeline,
::Halide::Target target)
{
output.vectorize(x, target.natural_vector_size<float>()).parallel(y);
}
void schedule()
{
input.dim(0).set_estimate(1, expected_width);
input.dim(1).set_estimate(1, expected_height);
output.set_estimate(x, 1, expected_width/2).set_estimate(y, 1, expected_height/2);
if (auto_schedule) {
return;
}
if (get_target().has_gpu_feature())
{
// FIXME
output.compute_root().gpu_tile(x, y, xo, yo, xi, yi, 8, 8);
}
else
{
apply_schedule(get_pipeline(), get_target());
}
}
};
HALIDE_REGISTER_GENERATOR(pyrdown_float_generator, pyrdown_float)
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment