Skip to content

Instantly share code, notes, and snippets.

@jrk
Created July 2, 2015 02:32
Show Gist options
  • Save jrk/d64823d754a732106a60 to your computer and use it in GitHub Desktop.
Save jrk/d64823d754a732106a60 to your computer and use it in GitHub Desktop.
// This algorithm is meant to be equivalent to CV_TM_CCOEFF_NORMED in OpenCV, and normxcorr2 in MATLAB:
// via http://stackoverflow.com/q/31060974/3815
#include <Halide.h>
typedef uint8_t pixel_t;
void normxcorr( Halide::ImageParam input,
Halide::ImageParam kernel,
Halide::Param<pixel_t> kernel_mean,
Halide::Param<pixel_t> kernel_var,
Halide::Func& output )
{
Halide::Var x, y;
Halide::RDom rk( kernel );
// reduction domain for cumulative sums
Halide::RDom ri( 1, input.width() - kernel.width() - 1,
1, input.height() - kernel.height() - 1 );
Halide::Func input_32,
bounded_input,
kernel_32,
knorm,
conv,
normxcorr_internal,
sq_sum_x,
sq_sum_x_local,
sq_sum_y,
sq_sum_y_local,
sum_x,
sum_x_local,
sum_y,
sum_y_local,
win_var,
win_mean;
Halide::Expr ksize = kernel.width() * kernel.height();
// accessing outside the input image always returns 0
bounded_input( x, y ) = Halide::BoundaryConditions::constant_exterior( input, 0 )( x, y );
// cast to 32-bit to make room for multiplication
input_32( x, y ) = Halide::cast<int32_t>( bounded_input( x, y ) );
kernel_32( x, y ) = Halide::cast<int32_t>( kernel( x, y ) );
// cumulative sum along each row
sum_x( x, y ) = input_32( x, y );
sum_x( ri.x, ri.y ) += sum_x( ri.x - 1, ri.y );
// sum of 1 x W strips
// (W is the width of the kernel)
sum_x_local( x, y ) = sum_x( x + kernel.width() - 1, y ) - sum_x( x - 1, y );
// cumulative sums of the 1 x W strips along each column
sum_y( x, y ) = sum_x_local( x, y );
sum_y( ri.x, ri.y ) += sum_y( ri.x, ri.y - 1);
// sums up H strips (as above) to get the sum of an H x W rectangle
// (H is the height of the kernel)
sum_y_local( x, y ) = sum_y( x, y + kernel.height() - 1 ) - sum_y( x, y - 1 );
// same as above, just with squared image values
sq_sum_x( x, y ) = input_32( x, y ) * input_32( x, y );
sq_sum_x( ri.x, ri.y ) += sq_sum_x( ri.x - 1, ri.y );
sq_sum_x_local( x, y ) = sq_sum_x( x + kernel.width() - 1, y ) - sq_sum_x( x - 1, y );
sq_sum_y( x, y ) = sq_sum_x_local( x, y );
sq_sum_y( ri.x, ri.y ) += sq_sum_y( ri.x, ri.y - 1);
sq_sum_y_local( x, y ) = sq_sum_y( x, y + kernel.height() - 1 ) - sq_sum_y( x, y - 1 );
// the mean value of each window
win_mean( x, y ) = sum_y_local( x, y ) / ksize;
// the variance of each window
win_var( x, y ) = sq_sum_y_local( x, y ) / ksize;
win_var( x, y) -= win_mean( x, y ) * win_mean( x, y );
// partially normalize the kernel
// (we'll divide by std. dev. at the end)
knorm( x, y ) = kernel_32( x, y ) - kernel_mean;
// convolve kernel and the input
conv( x, y ) = Halide::sum( knorm( rk.x, rk.y ) * input_32( x + rk.x, y + rk.y ) );
// calculate normxcorr, except scaled to 0 to 254 (for an 8-bit image)
normxcorr_internal( x, y ) = conv( x, y ) * 127 / Halide::sqrt( kernel_var * win_var( x, y ) ) + 127;
// after scaling pixel values, it's safe to cast down to 8-bit
output( x, y ) = Halide::cast<pixel_t>( normxcorr_internal( x, y ) );
knorm.compute_root();
input_32.compute_root();
sum_x.compute_root();
sum_y.compute_root();
sq_sum_x.compute_root();
sq_sum_y.compute_root();
#if 0
sum_x.trace_realizations();
sum_y.trace_realizations();
sq_sum_x.trace_realizations();
sq_sum_y.trace_realizations();
#endif
#if 0
sum_x.vectorize(x, 32).parallel(y);
sum_y.vectorize(x, 32).parallel(y);
sq_sum_x.vectorize(x, 32).parallel(y);
sq_sum_y.vectorize(x, 32).parallel(y);
#endif
}
int main (int argc, char const *argv[])
{
Halide::Func gen;
Halide::ImageParam in(Halide::UInt(8), 2), k(Halide::UInt(8), 2);
Halide::Param<pixel_t> kMean, kVar;
normxcorr(in, k, kMean, kVar, gen);
gen.compile_to_file("normxcorr", {in, k, kMean, kVar});
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment