Skip to content

Instantly share code, notes, and snippets.

@joa
Last active February 20, 2018 12:28
Show Gist options
  • Save joa/6792379 to your computer and use it in GitHub Desktop.
Save joa/6792379 to your computer and use it in GitHub Desktop.
Stackblur algorithm by Mario Klingemann (http://www.quasimondo.com/StackBlurForCanvas/StackBlurDemo.html) Based on C++ implementation by Victor Laskin (http://vitiy.info/stackblur-algorithm-multi-threaded-blur-for-cpp/)
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
/**
*
*/
public final class StackBlur {
private static final int PARALLEL_THRESHOLD = 2048 * 2048;
private static final int PARALLEL_PADDING = 16;
private static final int PARALLEL_AMOUNT = Runtime.getRuntime().availableProcessors();
private static final ExecutorService PARALLEL_POOL = Executors.newFixedThreadPool(PARALLEL_AMOUNT);
private static final int[] TABLE_MUL = {
512, 512, 456, 512, 328, 456, 335, 512, 405, 328, 271, 456, 388, 335, 292, 512,
454, 405, 364, 328, 298, 271, 496, 456, 420, 388, 360, 335, 312, 292, 273, 512,
482, 454, 428, 405, 383, 364, 345, 328, 312, 298, 284, 271, 259, 496, 475, 456,
437, 420, 404, 388, 374, 360, 347, 335, 323, 312, 302, 292, 282, 273, 265, 512,
497, 482, 468, 454, 441, 428, 417, 405, 394, 383, 373, 364, 354, 345, 337, 328,
320, 312, 305, 298, 291, 284, 278, 271, 265, 259, 507, 496, 485, 475, 465, 456,
446, 437, 428, 420, 412, 404, 396, 388, 381, 374, 367, 360, 354, 347, 341, 335,
329, 323, 318, 312, 307, 302, 297, 292, 287, 282, 278, 273, 269, 265, 261, 512,
505, 497, 489, 482, 475, 468, 461, 454, 447, 441, 435, 428, 422, 417, 411, 405,
399, 394, 389, 383, 378, 373, 368, 364, 359, 354, 350, 345, 341, 337, 332, 328,
324, 320, 316, 312, 309, 305, 301, 298, 294, 291, 287, 284, 281, 278, 274, 271,
268, 265, 262, 259, 257, 507, 501, 496, 491, 485, 480, 475, 470, 465, 460, 456,
451, 446, 442, 437, 433, 428, 424, 420, 416, 412, 408, 404, 400, 396, 392, 388,
385, 381, 377, 374, 370, 367, 363, 360, 357, 354, 350, 347, 344, 341, 338, 335,
332, 329, 326, 323, 320, 318, 315, 312, 310, 307, 304, 302, 299, 297, 294, 292,
289, 287, 285, 282, 280, 278, 275, 273, 271, 269, 267, 265, 263, 261, 259};
private static final int[] TABLE_SHR = {
9, 11, 12, 13, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17,
17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24};
private final int[] stack;
private final int div;
private final int radius;
public StackBlur(final int radius) {
if(radius > 254 || radius < 0) {
throw new IllegalArgumentException("radius must be in [0, 254]");
}
if (radius < 2) {
stack = null;
div = 0;
} else {
div = (radius << 1) + 1;
stack = new int[((div << 2) + PARALLEL_PADDING) * PARALLEL_AMOUNT];
}
this.radius = radius;
}
public void apply(final byte[] src, final int srcOffset, final byte[] dst, final int dstOffset, final int width, final int height) {
if (src.length < (srcOffset + width * height * 4)) {
throw new IllegalArgumentException("src has insufficient size");
}
if (dst.length < (dstOffset + width * height * 4)) {
throw new IllegalArgumentException("dst has insufficient size");
}
if(radius < 2) {
System.arraycopy(src, srcOffset, dst, dstOffset, width * height * 4);
} else if (width * height * radius < PARALLEL_THRESHOLD) {
blur(src, srcOffset, dst, dstOffset, stack, 0, radius, div, width, height, 0, 1);
blur(dst, dstOffset, dst, dstOffset, stack, 0, radius, div, width, height, 0, 2);
} else {
final Future<?>[] futures = new Future<?>[PARALLEL_AMOUNT];
final StackBlurTask[] tasks = new StackBlurTask[PARALLEL_AMOUNT];
for(int i = 0; i < PARALLEL_AMOUNT; ++i) {
tasks[i] =
new StackBlurTask(src, srcOffset, dst, dstOffset, stack, ((div << 2) + PARALLEL_PADDING) * i, radius, div, width, height, i);
futures[i] = PARALLEL_POOL.submit(tasks[i]);
}
join(futures);
for(int i = 0; i < PARALLEL_AMOUNT; ++i) {
tasks[i].step = 2;
futures[i] = PARALLEL_POOL.submit(tasks[i]);
}
join(futures);
}
}
private static void blur(
final byte[] src, final int srcOffset,
final byte[] dst, final int dstOffset,
final int[] stack,
final int stackOffset,
final int radius,
final int div,
final int w,
final int h,
final int core,
final int step) {
int x, y, xp, yp, i;
int sp;
int stackStart;
int stackIndex;
int srcIndex;
int dstIndex;
int sumR, sumG, sumB, sumA;
int sumInR, sumInG, sumInB, sumInA;
int sumOutR, sumOutG, sumOutB, sumOutA;
int r, g, b, a;
final int wm = w - 1;
final int hm = h - 1;
final int w4 = w * 4;
final int mul_sum = TABLE_MUL[radius];
final int shr_sum = TABLE_SHR[radius];
if (step == 1) {
final int minY = core * h / PARALLEL_AMOUNT;
final int maxY = (core + 1) * h / PARALLEL_AMOUNT;
for (y = minY; y < maxY; y++) {
sumR = sumG = sumB = sumA =
sumInR = sumInG = sumInB = sumInA =
sumOutR = sumOutG = sumOutB = sumOutA = 0;
srcIndex = srcOffset + w4 * y; // start of line (0,y)
for(i = 0; i <= radius; i++) {
r = src[srcIndex] & 0xff;
g = src[srcIndex + 1] & 0xff;
b = src[srcIndex + 2] & 0xff;
a = src[srcIndex + 3] & 0xff;
stackIndex = stackOffset + 4 * i;
stack[stackIndex] = r;
stack[stackIndex + 1] = g;
stack[stackIndex + 2] = b;
stack[stackIndex + 3] = a;
sumR += r * (i + 1);
sumG += g * (i + 1);
sumB += b * (i + 1);
sumA += a * (i + 1);
sumOutR += r;
sumOutG += g;
sumOutB += b;
sumOutA += a;
}
for(i = 1; i <= radius; i++) {
if(i <= wm) srcIndex += 4;
r = src[srcIndex] & 0xff;
g = src[srcIndex + 1] & 0xff;
b = src[srcIndex + 2] & 0xff;
a = src[srcIndex + 3] & 0xff;
stackIndex = stackOffset + 4 * (i + radius);
stack[stackIndex] = r;
stack[stackIndex + 1] = g;
stack[stackIndex + 2] = b;
stack[stackIndex + 3] = a;
sumR += r * (radius + 1 - i);
sumG += g * (radius + 1 - i);
sumB += b * (radius + 1 - i);
sumA += a * (radius + 1 - i);
sumInR += r;
sumInG += g;
sumInB += b;
sumInA += a;
}
sp = radius;
xp = radius;
if (xp > wm) xp = wm;
srcIndex = srcOffset + 4 * (xp + y * w);
dstIndex = dstOffset + y * w4;
for (x = 0; x < w; x++) {
dst[dstIndex] = (byte)((sumR * mul_sum) >> shr_sum);
dst[dstIndex + 1] = (byte)((sumG * mul_sum) >> shr_sum);
dst[dstIndex + 2] = (byte)((sumB * mul_sum) >> shr_sum);
dst[dstIndex + 3] = (byte)((sumA * mul_sum) >> shr_sum);
dstIndex += 4;
sumR -= sumOutR;
sumG -= sumOutG;
sumB -= sumOutB;
sumA -= sumOutA;
stackStart = sp + div - radius;
if(stackStart >= div) stackStart -= div;
stackIndex = stackOffset + 4 * stackStart;
sumOutR -= stack[stackIndex];
sumOutG -= stack[stackIndex + 1];
sumOutB -= stack[stackIndex + 2];
sumOutA -= stack[stackIndex + 3];
if(xp < wm) {
srcIndex += 4;
++xp;
}
r = src[srcIndex] & 0xff;
g = src[srcIndex + 1] & 0xff;
b = src[srcIndex + 2] & 0xff;
a = src[srcIndex + 3] & 0xff;
stack[stackIndex] = r;
stack[stackIndex + 1] = g;
stack[stackIndex + 2] = b;
stack[stackIndex + 3] = a;
sumInR += r;
sumInG += g;
sumInB += b;
sumInA += a;
sumR += sumInR;
sumG += sumInG;
sumB += sumInB;
sumA += sumInA;
++sp;
if(sp >= div) sp = 0;
stackIndex = stackOffset + sp * 4;
r = stack[stackIndex];
g = stack[stackIndex + 1];
b = stack[stackIndex + 2];
a = stack[stackIndex + 3];
sumOutR += r;
sumOutG += g;
sumOutB += b;
sumOutA += a;
sumInR -= r;
sumInG -= g;
sumInB -= b;
sumInA -= a;
}
}
}
// step 2
if(step == 2) {
final int minX = core * w / PARALLEL_AMOUNT;
final int maxX = (core + 1) * w / PARALLEL_AMOUNT;
for(x = minX; x < maxX; x++) {
sumR = sumG = sumB = sumA =
sumInR = sumInG = sumInB = sumInA =
sumOutR = sumOutG = sumOutB = sumOutA = 0;
srcIndex = srcOffset + 4 * x; // x,0
for(i = 0; i <= radius; i++) {
r = src[srcIndex] & 0xff;
g = src[srcIndex + 1] & 0xff;
b = src[srcIndex + 2] & 0xff;
a = src[srcIndex + 3] & 0xff;
stackIndex = stackOffset + 4 * i;
stack[stackIndex] = r;
stack[stackIndex + 1] = g;
stack[stackIndex + 2] = b;
stack[stackIndex + 3] = a;
sumR += r * (i + 1);
sumG += g * (i + 1);
sumB += b * (i + 1);
sumA += a * (i + 1);
sumOutR += r;
sumOutG += g;
sumOutB += b;
sumOutA += a;
}
for(i = 1; i <= radius; i++) {
if(i <= hm) srcIndex += w4; // +stride
r = src[srcIndex] & 0xff;
g = src[srcIndex + 1] & 0xff;
b = src[srcIndex + 2] & 0xff;
a = src[srcIndex + 3] & 0xff;
stackIndex = stackOffset + 4 * (i + radius);
stack[stackIndex] = r;
stack[stackIndex + 1] = g;
stack[stackIndex + 2] = b;
stack[stackIndex + 3] = a;
sumR += r * (radius + 1 - i);
sumG += g * (radius + 1 - i);
sumB += b * (radius + 1 - i);
sumA += a * (radius + 1 - i);
sumInR += r;
sumInG += g;
sumInB += b;
sumInA += a;
}
sp = radius;
yp = radius;
if(yp > hm) yp = hm;
srcIndex = srcOffset + 4 * (x + yp * w);
dstIndex = dstOffset + 4 * x;
for(y = 0; y < h; y++) {
dst[dstIndex] = (byte)((sumR * mul_sum) >> shr_sum);
dst[dstIndex + 1] = (byte)((sumG * mul_sum) >> shr_sum);
dst[dstIndex + 2] = (byte)((sumB * mul_sum) >> shr_sum);
dst[dstIndex + 3] = (byte)((sumA * mul_sum) >> shr_sum);
dstIndex += w4;
sumR -= sumOutR;
sumG -= sumOutG;
sumB -= sumOutB;
sumA -= sumOutA;
stackStart = sp + div - radius;
if(stackStart >= div) stackStart -= div;
stackIndex = stackOffset + 4 * stackStart;
sumOutR -= stack[stackIndex];
sumOutG -= stack[stackIndex + 1];
sumOutB -= stack[stackIndex + 2];
sumOutA -= stack[stackIndex + 3];
if(yp < hm) {
srcIndex += w4; // stride
++yp;
}
r = src[srcIndex] & 0xff;
g = src[srcIndex + 1] & 0xff;
b = src[srcIndex + 2] & 0xff;
a = src[srcIndex + 3] & 0xff;
stack[stackIndex] = r;
stack[stackIndex + 1] = g;
stack[stackIndex + 2] = b;
stack[stackIndex + 3] = a;
sumInR += r;
sumInG += g;
sumInB += b;
sumInA += a;
sumR += sumInR;
sumG += sumInG;
sumB += sumInB;
sumA += sumInA;
++sp;
if(sp >= div) sp = 0;
stackIndex = stackOffset + sp * 4;
r = stack[stackIndex];
g = stack[stackIndex + 1];
b = stack[stackIndex + 2];
a = stack[stackIndex + 3];
sumOutR += r;
sumOutG += g;
sumOutB += b;
sumOutA += a;
sumInR -= r;
sumInG -= g;
sumInB -= b;
sumInA -= a;
}
}
}
}
private static void sneakyThrow(Throwable t) {
StackBlur.<RuntimeException>sneakyThrow2(t);
}
@SuppressWarnings("unchecked")
private static <T extends Throwable> T sneakyThrow2(final Throwable t) throws T {
throw (T)t;
}
private static void join(Future<?>[] futures) {
for(int i = 0; i < PARALLEL_AMOUNT; ++i) {
try {
futures[i].get();
} catch (InterruptedException interrupt) {
Thread.currentThread().interrupt();
} catch (ExecutionException executionException) {
sneakyThrow(executionException.getCause());
}
}
}
private static class StackBlurTask implements Runnable {
private final byte[] src;
private final int srcOffset;
private final byte[] dst;
private final int dstOffset;
private final int[] stack;
private final int stackOffset;
private final int radius;
private final int div;
private final int width;
private final int height;
private final int core;
private volatile int step;
public StackBlurTask(byte[] src, int srcOffset, byte[] dst, int dstOffset, int[] stack, int stackOffset, int radius, int div, int width, int height, int core) {
this.src = src;
this.srcOffset = srcOffset;
this.dst = dst;
this.dstOffset = dstOffset;
this.stack = stack;
this.stackOffset = stackOffset;
this.radius = radius;
this.div = div;
this.width = width;
this.height = height;
this.core = core;
this.step = 1;
}
@Override
public void run() {
if(step == 1) {
blur(src, srcOffset, dst, dstOffset, stack, stackOffset, radius, div, width, height, core, 1);
} else {
blur(dst, dstOffset, dst, dstOffset, stack, stackOffset, radius, div, width, height, core, 2);
}
}
}
}
import javax.imageio.ImageIO;
import javax.swing.*;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.InputStream;
import java.nio.file.*;
/**
*
*/
public final class StackBlurDemo extends JPanel {
private static final boolean BENCHMARK = true;
public static void main(final String[] args) throws Throwable {
if(args.length < 1) {
System.out.println("Usage: path");
return;
}
final Path p = Paths.get(args[0]);
if(!Files.isReadable(p)) {
System.err.println("Cannot read "+p);
return;
}
final byte[] colors;
final int width;
final int height;
try(final InputStream in = Files.newInputStream(p)) {
final BufferedImage image = ImageIO.read(in);
width = image.getWidth();
height = image.getHeight();
final int size = width * height;
image.coerceData(false);
final int[] packed = image.getRGB(0, 0, width, height, null, 0, width);
colors = new byte[size << 2];
for(int i = 0, j = 0; i < size; i++) {
final int color = packed[i];
colors[j++] = (byte)((color >> 0x18) & 0xff);
colors[j++] = (byte)((color >> 0x10) & 0xff);
colors[j++] = (byte)((color >> 0x08) & 0xff);
colors[j++] = (byte)(color & 0xff);
}
}
if(BENCHMARK) {
final StackBlur blur = new StackBlur(100);
final byte[] src = colors;
final byte[] dst = new byte[colors.length];
System.out.println("warmup ...");
for(int i = 0; i < 100; ++i) {
blur.apply(src, 0, dst, 0, width, height);
}
System.out.println("measure ...");
final long[] timings = new long[100];
for(int i = 0; i < timings.length; ++i) {
final long t0 = System.nanoTime();
blur.apply(src, 0, dst, 0, width, height);
final long t1 = System.nanoTime();
timings[i] = t1 - t0;
}
final long total = sum(timings);
System.out.println("avg[ms]: "+((double)total / (double)timings.length * 1e-6));
}
final JFrame frame = new JFrame("StackBlur");
frame.setSize(width, height);
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
frame.add(new StackBlurDemo(colors, width, height));
frame.setVisible(true);
}
private final byte[] src;
private final byte[] dst;
private final int[] packed;
private final int width, height;
private final BufferedImage image;
private final StackBlur blur;
private StackBlurDemo(final byte[] src, final int width, final int height) {
this.src = src;
this.dst = new byte[src.length];
this.width = width;
this.height = height;
this.packed = new int[width * height];
this.image = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
this.blur = new StackBlur(100);
applyFilter();
}
private void applyFilter() {
blur.apply(src, 0, dst, 0, width, height);
}
private void updateImage() {
for(int i = 0, j = 0; i < packed.length; ++i) {
packed[i] = ((dst[j++] & 0xff) << 0x18) | ((dst[j++] & 0xff) << 0x10) | ((dst[j++] & 0xff) << 0x08) | (dst[j++] & 0xff);
}
image.setRGB(0, 0, width, height, packed, 0, width);
}
@Override
protected void paintComponent(Graphics g) {
super.paintComponent(g);
updateImage();
g.drawImage(image, 0, 0, null);
}
private static long sum(final long[] values) {
long x = 0L;
for(final long value : values) {
x += value;
}
return x;
}
}
@akkie
Copy link

akkie commented Aug 28, 2015

Hi,

there must be an error in your algorithm. If I use an image that is smaller than 281px then the filter doesn't work as expected.

src
src_error

If the image is greater as 280 pixels then it works as expected.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment