Skip to content

Instantly share code, notes, and snippets.

@bellbind bellbind/convolve.js
Last active Jan 3, 2019

Embed
What would you like to do?
[javascript][webworker]waifu2x converter in JavaScript
var convolve = function (src2d, mat) {
"use strict";
var src = src2d.a, w = src2d.w, h = src2d.h;
var n = (mat.length - 1) / 2;
var off = n - 1;
var rw = w - 2 * n, rh = h - 2 * n;
var r = new Float32Array(rw * rh);
// [hand-optimized] to use flat matrix and remove forEach
var mh = mat.length, mw = mat[0].length;
var mat2 = new Float32Array(mh * mw);
for (var j = 0; j < mh; j++) {
for (var i = 0; i < mw; i++) mat2[j * mw + i] = mat[j][i];
}
for (var y = 0; y < rh; y++) {
var yoff = y - off, yrw = y * rw;
for (var x = 0; x < rw; x++) {
var xoff = x - off, s = 0;
for (var j = 0; j < mh; j++) {
var jmw = j * mw, yjwx = (yoff + j) * w + xoff;
for (var i = 0; i < mw; i++) {
s += mat2[jmw + i] * src[yjwx + i];
}
}
r[yrw + x] = s;
}
}
return {a: r, w: rw, h: rh};
};
var convolve3x3 = function (src2d, mat) {
"use strict";
//if (mat.length !== 3) return convolve(src2d, mat);
var src = src2d.a, w = src2d.w, h = src2d.h;
var rw = w - 2, rh = h - 2;
var r = new Float32Array(rw * rh);
// [hand-optimized] to use extracted matrix
var m00 = mat[0][0], m01 = mat[0][1], m02 = mat[0][2],
m10 = mat[1][0], m11 = mat[1][1], m12 = mat[1][2],
m20 = mat[2][0], m21 = mat[2][1], m22 = mat[2][2];
var off00 = 0*w + 0, off01 = 0*w + 1, off02 = 0*w + 2,
off10 = 1*w + 0, off11 = 1*w + 1, off12 = 1*w + 2,
off20 = 2*w + 0, off21 = 2*w + 1, off22 = 2*w + 2;
for (var y = 0; y < rh; y++) {
var yrw = y * rw, yw = y * w;
for (var x = 0; x < rw; x++) {
var ri = yrw + x, i = yw + x;
r[ri] =
m00 * src[off00+i] + m01 * src[off01+i] + m02 * src[off02+i] +
m10 * src[off10+i] + m11 * src[off11+i] + m12 * src[off12+i] +
m20 * src[off20+i] + m21 * src[off21+i] + m22 * src[off22+i];
}
}
return {a: r, w: rw, h: rh};
};
var sumConvolve3x3s = function (src2ds, mats) {
"use strict";
var w = src2ds[0].w|0, h = src2ds[0].h|0;
var rw = w - 2|0, rh = h - 2|0;
var r = new Float32Array(rw * rh|0);
var o00 = 0 |0, o01 = 1 |0, o02 = 2 |0,
o10 = w + 0 |0, o11 = w + 1 |0, o12 = w + 2 |0,
o20 = 2*w + 0 |0, o21 = 2*w + 1 |0, o22 = 2*w + 2 |0;
for (var si = 0, sl = src2ds.length|0; si < sl; si++) {
var s = src2ds[si].a, m = mats[si];
var m0 = m[0], m1 = m[1], m2 = m[2];
var m00 = m0[0], m01 = m0[1], m02 = m0[2],
m10 = m1[0], m11 = m1[1], m12 = m1[2],
m20 = m2[0], m21 = m2[1], m22 = m2[2];
for (var y = 0; y < rh; y++) {
var yrw = y * rw |0, yw = y * w |0;
for (var x = 0; x < rw; x++) {
var ri = yrw + x |0, i = yw + x |0;
r[ri] +=
m00 * s[o00+i|0] + m01 * s[o01+i|0] + m02 * s[o02+i|0] +
m10 * s[o10+i|0] + m11 * s[o11+i|0] + m12 * s[o12+i|0] +
m20 * s[o20+i|0] + m21 * s[o21+i|0] + m22 * s[o22+i|0];
}
}
}
return {a: r, w: rw, h: rh};
};
var sumConvolve3x3sEx = function (src2ds, mats) {
"use strict";
var w = src2ds[0].w|0, h = src2ds[0].h|0;
var rw = w - 2|0, rh = h - 2|0;
var r = new Float32Array(rw * rh|0);
var ww = w << 1 |0;
var o00 = 0, o10 = w |0, o20 = ww |0,
o01 = 1, o11 = w + 1|0, o21 = ww + 1|0,
o02 = 2, o12 = w + 2|0, o22 = ww + 2|0,
o03 = 3, o13 = w + 3|0, o23 = ww + 3|0,
o04 = 4, o14 = w + 4|0, o24 = ww + 4|0,
o05 = 5, o15 = w + 5|0, o25 = ww + 5|0;
var rr4 = rw % 4 |0;
var rw4 = rw - rr4 |0;
for (var si = 0, sl = src2ds.length|0; si < sl; si++) {
var s = src2ds[si].a, m = mats[si];
var m0 = m[0], m1 = m[1], m2 = m[2];
var m00 = m0[0], m01 = m0[1], m02 = m0[2],
m10 = m1[0], m11 = m1[1], m12 = m1[2],
m20 = m2[0], m21 = m2[1], m22 = m2[2];
for (var y = 0; y < rh; y++) {
var yrw = y * rw |0, yw = y * w |0;
// extract loop
for (var x = 0; x < rw4; x += 4) {
var ri = yrw + x|0, i = yw + x|0;
var s00 = s[o00+i|0], s10 = s[o10+i|0], s20 = s[o20+i|0],
s01 = s[o01+i|0], s11 = s[o11+i|0], s21 = s[o21+i|0],
s02 = s[o02+i|0], s12 = s[o12+i|0], s22 = s[o22+i|0],
s03 = s[o03+i|0], s13 = s[o13+i|0], s23 = s[o23+i|0],
s04 = s[o04+i|0], s14 = s[o14+i|0], s24 = s[o24+i|0],
s05 = s[o05+i|0], s15 = s[o15+i|0], s25 = s[o25+i|0];
r[ri] +=
m00 * s00 + m01 * s01 + m02 * s02 +
m10 * s10 + m11 * s11 + m12 * s12 +
m20 * s20 + m21 * s21 + m22 * s22;
r[ri+1|0] +=
m00 * s01 + m01 * s02 + m02 * s03 +
m10 * s11 + m11 * s12 + m12 * s13 +
m20 * s21 + m21 * s22 + m22 * s23;
r[ri+2|0] +=
m00 * s02 + m01 * s03 + m02 * s04 +
m10 * s12 + m11 * s13 + m12 * s14 +
m20 * s22 + m21 * s23 + m22 * s24;
r[ri+3|0] +=
m00 * s03 + m01 * s04 + m02 * s05 +
m10 * s13 + m11 * s14 + m12 * s15 +
m20 * s23 + m21 * s24 + m22 * s25;
}
/*
// this extracted code run slow on chrome
var ri = yrw + rw4|0, i = yw + rw4|0;
switch (rr4) {
case 3: var s04 = s[o04+i|0], s14 = s[o14+i|0], s24 = s[o24+i|0];
case 2: var s03 = s[o03+i|0], s13 = s[o13+i|0], s23 = s[o23+i|0];
case 1: var s02 = s[o02+i|0], s12 = s[o12+i|0], s22 = s[o22+i|0],
s01 = s[o01+i|0], s11 = s[o11+i|0], s21 = s[o21+i|0],
s00 = s[o00+i|0], s10 = s[o10+i|0], s20 = s[o20+i|0];
case 0:;
}
switch (rr4) {
case 3: r[ri+2|0] +=
m00 * s02 + m01 * s03 + m02 * s04 +
m10 * s12 + m11 * s13 + m12 * s14 +
m20 * s22 + m21 * s23 + m22 * s24;
case 2: r[ri+1|0] +=
m00 * s01 + m01 * s02 + m02 * s03 +
m10 * s11 + m11 * s12 + m12 * s13 +
m20 * s21 + m21 * s22 + m22 * s23;
case 1: r[ri] +=
m00 * s00 + m01 * s01 + m02 * s02 +
m10 * s10 + m11 * s11 + m12 * s12 +
m20 * s20 + m21 * s21 + m22 * s22;
case 0:;
}
*/
for (var x = rw4; x < rw; x++) {
var ri = yrw + x |0, i = yw + x |0;
r[ri] +=
m00 * s[o00+i|0] + m01 * s[o01+i|0] + m02 * s[o02+i|0] +
m10 * s[o10+i|0] + m11 * s[o11+i|0] + m12 * s[o12+i|0] +
m20 * s[o20+i|0] + m21 * s[o21+i|0] + m22 * s[o22+i|0];
}
}
}
return {a: r, w: rw, h: rh};
};
var sumConvolve3x3sEx8 = function (src2ds, mats) {
"use strict";
var w = src2ds[0].w|0, h = src2ds[0].h|0;
var rw = w - 2|0, rh = h - 2|0;
var r = new Float32Array(rw * rh|0);
var ww = w << 1 |0;
var o00 = 0, o10 = w |0, o20 = ww |0,
o01 = 1, o11 = w + 1|0, o21 = ww + 1|0,
o02 = 2, o12 = w + 2|0, o22 = ww + 2|0,
o03 = 3, o13 = w + 3|0, o23 = ww + 3|0,
o04 = 4, o14 = w + 4|0, o24 = ww + 4|0,
o05 = 5, o15 = w + 5|0, o25 = ww + 5|0,
o06 = 6, o16 = w + 6|0, o26 = ww + 6|0,
o07 = 7, o17 = w + 7|0, o27 = ww + 7|0,
o08 = 8, o18 = w + 8|0, o28 = ww + 8|0,
o09 = 9, o19 = w + 9|0, o29 = ww + 9|0;
var rr8 = rw % 8 |0;
var rw8 = rw - rr8 |0;
for (var si = 0, sl = src2ds.length|0; si < sl; si++) {
var s = src2ds[si].a, m = mats[si];
var m0 = m[0], m1 = m[1], m2 = m[2];
var m00 = m0[0], m01 = m0[1], m02 = m0[2],
m10 = m1[0], m11 = m1[1], m12 = m1[2],
m20 = m2[0], m21 = m2[1], m22 = m2[2];
for (var y = 0; y < rh; y++) {
var yrw = y * rw |0, yw = y * w |0;
// extract loop
for (var x = 0; x < rw8; x += 8) {
var ri = yrw + x|0, i = yw + x|0;
var s00 = s[o00+i|0], s10 = s[o10+i|0], s20 = s[o20+i|0],
s01 = s[o01+i|0], s11 = s[o11+i|0], s21 = s[o21+i|0],
s02 = s[o02+i|0], s12 = s[o12+i|0], s22 = s[o22+i|0],
s03 = s[o03+i|0], s13 = s[o13+i|0], s23 = s[o23+i|0],
s04 = s[o04+i|0], s14 = s[o14+i|0], s24 = s[o24+i|0],
s05 = s[o05+i|0], s15 = s[o15+i|0], s25 = s[o25+i|0],
s06 = s[o06+i|0], s16 = s[o16+i|0], s26 = s[o26+i|0],
s07 = s[o07+i|0], s17 = s[o17+i|0], s27 = s[o27+i|0],
s08 = s[o08+i|0], s18 = s[o18+i|0], s28 = s[o28+i|0],
s09 = s[o09+i|0], s19 = s[o19+i|0], s29 = s[o29+i|0];
r[ri] +=
m00 * s00 + m01 * s01 + m02 * s02 +
m10 * s10 + m11 * s11 + m12 * s12 +
m20 * s20 + m21 * s21 + m22 * s22;
r[ri+1|0] +=
m00 * s01 + m01 * s02 + m02 * s03 +
m10 * s11 + m11 * s12 + m12 * s13 +
m20 * s21 + m21 * s22 + m22 * s23;
r[ri+2|0] +=
m00 * s02 + m01 * s03 + m02 * s04 +
m10 * s12 + m11 * s13 + m12 * s14 +
m20 * s22 + m21 * s23 + m22 * s24;
r[ri+3|0] +=
m00 * s03 + m01 * s04 + m02 * s05 +
m10 * s13 + m11 * s14 + m12 * s15 +
m20 * s23 + m21 * s24 + m22 * s25;
r[ri+4|0] +=
m00 * s04 + m01 * s05 + m02 * s06 +
m10 * s14 + m11 * s15 + m12 * s16 +
m20 * s24 + m21 * s25 + m22 * s26;
r[ri+5|0] +=
m00 * s05 + m01 * s06 + m02 * s07 +
m10 * s15 + m11 * s16 + m12 * s17 +
m20 * s25 + m21 * s26 + m22 * s27;
r[ri+6|0] +=
m00 * s06 + m01 * s07 + m02 * s08 +
m10 * s16 + m11 * s17 + m12 * s18 +
m20 * s26 + m21 * s27 + m22 * s28;
r[ri+7|0] +=
m00 * s07 + m01 * s08 + m02 * s09 +
m10 * s17 + m11 * s18 + m12 * s19 +
m20 * s27 + m21 * s28 + m22 * s29;
}
/*
//NOTE: this extracted code run slow on chrome
var ri = yrw + rw8|0, i = yw + rw8|0;
switch (rr8) {
case 7: var s08 = s[o08+i|0], s18 = s[o18+i|0], s28 = s[o28+i|0];
case 6: var s07 = s[o07+i|0], s17 = s[o17+i|0], s27 = s[o27+i|0];
case 5: var s06 = s[o06+i|0], s16 = s[o16+i|0], s26 = s[o26+i|0];
case 4: var s05 = s[o05+i|0], s15 = s[o15+i|0], s25 = s[o25+i|0];
case 3: var s04 = s[o04+i|0], s14 = s[o14+i|0], s24 = s[o24+i|0];
case 2: var s03 = s[o03+i|0], s13 = s[o13+i|0], s23 = s[o23+i|0];
case 1: var s02 = s[o02+i|0], s12 = s[o12+i|0], s22 = s[o22+i|0],
s01 = s[o01+i|0], s11 = s[o11+i|0], s21 = s[o21+i|0],
s00 = s[o00+i|0], s10 = s[o10+i|0], s20 = s[o20+i|0];
case 0:;
}
switch (rr8) {
case 7: r[ri+6|0] +=
m00 * s06 + m01 * s07 + m02 * s08 +
m10 * s16 + m11 * s17 + m12 * s18 +
m20 * s26 + m21 * s27 + m22 * s28;
case 6: r[ri+5|0] +=
m00 * s05 + m01 * s06 + m02 * s07 +
m10 * s15 + m11 * s16 + m12 * s17 +
m20 * s25 + m21 * s26 + m22 * s27;
case 5: r[ri+4|0] +=
m00 * s04 + m01 * s05 + m02 * s06 +
m10 * s14 + m11 * s15 + m12 * s16 +
m20 * s24 + m21 * s25 + m22 * s26;
case 4: r[ri+3|0] +=
m00 * s03 + m01 * s04 + m02 * s05 +
m10 * s13 + m11 * s14 + m12 * s15 +
m20 * s23 + m21 * s24 + m22 * s25;
case 3: r[ri+2|0] +=
m00 * s02 + m01 * s03 + m02 * s04 +
m10 * s12 + m11 * s13 + m12 * s14 +
m20 * s22 + m21 * s23 + m22 * s24;
case 2: r[ri+1|0] +=
m00 * s01 + m01 * s02 + m02 * s03 +
m10 * s11 + m11 * s12 + m12 * s13 +
m20 * s21 + m21 * s22 + m22 * s23;
case 1: r[ri] +=
m00 * s00 + m01 * s01 + m02 * s02 +
m10 * s10 + m11 * s11 + m12 * s12 +
m20 * s20 + m21 * s21 + m22 * s22;
case 0:;
}
*/
for (var x = rw8; x < rw; x++) {
var ri = yrw + x |0, i = yw + x |0;
r[ri] +=
m00 * s[o00+i|0] + m01 * s[o01+i|0] + m02 * s[o02+i|0] +
m10 * s[o10+i|0] + m11 * s[o11+i|0] + m12 * s[o12+i|0] +
m20 * s[o20+i|0] + m21 * s[o21+i|0] + m22 * s[o22+i|0];
}
}
}
return {a: r, w: rw, h: rh};
};
var sumConvolve3x3sSIMD = function (src2ds, mats) {
"use strict";
// very very slow
var f4 = SIMD.Float32x4;
var f4add = f4.add, f4mul = f4.mul, f4ld3 = f4.load3,
f4ex = f4.extractLane, f4sw = f4.swizzle, f4re = f4.replaceLane;
var w = src2ds[0].w|0, h = src2ds[0].h|0;
var rw = w - 2|0, rh = h - 2|0;
var r = new Float32Array(rw * rh|0);
var o00 = 0 |0, o01 = 1 |0, o02 = 2 |0,
o10 = w + 0 |0, o11 = w + 1 |0, o12 = w + 2 |0,
o20 = 2*w + 0 |0, o21 = 2*w + 1 |0, o22 = 2*w + 2 |0;
for (var si = 0, sl = src2ds.length|0; si < sl; si++) {
var s = src2ds[si].a, m = mats[si];
var m0 = m[0], m1 = m[1], m2 = m[2];
var m00 = m0[0], m01 = m0[1], m02 = m0[2],
m10 = m1[0], m11 = m1[1], m12 = m1[2],
m20 = m2[0], m21 = m2[1], m22 = m2[2];
var f4mf = f4(m00, m01, m02, m10);
var f4mr = f4(m20, m21, m22, m12);
for (var y = 0; y < rh; y++) {
var yrw = y * rw |0, yw = y * w |0;
for (var x = 0; x < rw; x++) {
var ri = yrw + x |0, i = yw + x |0;
var f4sf = f4re(f4ld3(s, o00+i), 3, s[o10+i|0]);
var f4sr = f4re(f4ld3(s, o20+i), 3, s[o12+i|0]);
var f4r = f4add(f4mul(f4mf, f4sf), f4mul(f4mr, f4sr));
f4r = f4add(f4r, f4sw(f4r, 1, 0, 3, 2));
f4r = f4add(f4r, f4sw(f4r, 2, 3, 0, 1));
r[ri] += f4ex(f4r, 0) + m11*s[o11+i|0];
}
}
}
return {a: r, w: rw, h: rh};
};
var sumConvolve3x3sExSIMD = function (src2ds, mats) {
"use strict";
// modified from http://inside.pixiv.net/entry/2015/07/28/230317
var f4 = SIMD.Float32x4;
var f4add = f4.add, f4mul = f4.mul, f4ld = f4.load, f4st = f4.store,
f4sp = f4.splat;
var w = src2ds[0].w|0, h = src2ds[0].h|0;
var rw = w - 2|0, rh = h - 2|0;
var r = new Float32Array(rw * rh|0);
var o00 = 0 |0, o01 = 1 |0, o02 = 2 |0,
o10 = w + 0 |0, o11 = w + 1 |0, o12 = w + 2 |0,
o20 = 2*w + 0 |0, o21 = 2*w + 1 |0, o22 = 2*w + 2 |0;
var rw4 = rw - rw % 4 |0;
for (var si = 0, sl = src2ds.length|0; si < sl; si++) {
var s = src2ds[si].a, m = mats[si];
var m0 = m[0], m1 = m[1], m2 = m[2];
var m00 = m0[0], m01 = m0[1], m02 = m0[2],
m10 = m1[0], m11 = m1[1], m12 = m1[2],
m20 = m2[0], m21 = m2[1], m22 = m2[2];
var f4m00 = f4sp(m00), f4m01 = f4sp(m01), f4m02 = f4sp(m02),
f4m10 = f4sp(m10), f4m11 = f4sp(m11), f4m12 = f4sp(m12),
f4m20 = f4sp(m20), f4m21 = f4sp(m21), f4m22 = f4sp(m22);
for (var y = 0; y < rh; y++) {
var yrw = y * rw |0, yw = y * w |0;
// extract loop
for (var x = 0; x < rw4; x += 4) {
var ri = yrw + x|0, i = yw + x|0;
var f4r = f4mul(f4m00, f4ld(s, o00+i));
f4r = f4add(f4r, f4mul(f4m01, f4ld(s, o01+i)));
f4r = f4add(f4r, f4mul(f4m02, f4ld(s, o02+i)));
f4r = f4add(f4r, f4mul(f4m10, f4ld(s, o10+i)));
f4r = f4add(f4r, f4mul(f4m11, f4ld(s, o11+i)));
f4r = f4add(f4r, f4mul(f4m12, f4ld(s, o12+i)));
f4r = f4add(f4r, f4mul(f4m20, f4ld(s, o20+i)));
f4r = f4add(f4r, f4mul(f4m21, f4ld(s, o21+i)));
f4r = f4add(f4r, f4mul(f4m22, f4ld(s, o22+i)));
f4st(r, ri, f4add(f4r, f4ld(r, ri)));
}
for (var x = rw4; x < rw; x++) {
var ri = yrw + x |0, i = yw + x |0;
r[ri] +=
m00 * s[o00+i|0] + m01 * s[o01+i|0] + m02 * s[o02+i|0] +
m10 * s[o10+i|0] + m11 * s[o11+i|0] + m12 * s[o12+i|0] +
m20 * s[o20+i|0] + m21 * s[o21+i|0] + m22 * s[o22+i|0];
}
}
}
return {a: r, w: rw, h: rh};
};
var edgePad = function (src2d, len) {
"use strict";
var src = src2d.a, w = src2d.w|0, h = src2d.h|0;
var rw = (w + 2 * len)|0, rh = (h + 2 * len)|0;
var r = new Float32Array(rw * rh);
for (var y = 0; y < h; y++) {
for (var x = 0; x < w; x++) {
r[(y + len) * rw + x + len|0] = src[y * w + x|0];
}
}
for (var i = 0; i < len; i++) {
var xs = len - i|0, ys = len - i|0;
var xe = len + w - 1 + i|0, ye = len + h - 1 + i|0;
for (var j = 0; j < w + 2 * i; j++) {
r[(ys - 1) * rw + xs + j|0] = r[ys * rw + xs + j|0];
r[(ye + 1) * rw + xs + j|0] = r[ye * rw + xs + j|0];
}
for (var j = 0; j < h + 2 * i; j++) {
r[(ys + j) * rw + xs - 1|0] = r[(ys + j) * rw + xs|0];
r[(ys + j) * rw + xe + 1|0] = r[(ys + j) * rw + xe|0];
}
r[(ys - 1) * rw + xs - 1|0] = r[ys * rw + xs|0];
r[(ys - 1) * rw + xe + 1|0] = r[ys * rw + xe|0];
r[(ye + 1) * rw + xe + 1|0] = r[ye * rw + xe|0];
r[(ye + 1) * rw + xs - 1|0] = r[ye * rw + xs|0];
}
return {a: r, w: rw, h: rh};
};
/*
var w = 3, h = 3;
var src = {a: new Float32Array(w * h), w: w, h: h};
for (var y = 0; y < h; y++) {
for (var x = 0; x < w; x++) {
src.a[y * w + x] = y * w + x + 1;
}
}
//console.log(src.a);
var pad = edgePad(src, 2);
//console.log(pad.a);
var mat = [
[1/8, 1/8, 1/8],
[1/8, 0/8, 1/8],
[1/8, 1/8, 1/8],
];
var r = convolve(pad, mat);
console.log(r.a);
*/
<!doctype html>
<html>
<head>
<meta charset="utf-8" />
<title>waifu2xjs</title>
<script src="rgb2yuv.js"></script>
<script src="loadjson.js"></script>
<script src="script.js"></script>
</head>
<body>
<h1>(Slow) <a href="https://github.com/nagadomi/waifu2x">waifu2x</a>
converter in JavaScript</h1>
<div>
Choose a Model: <select id="models">
<option value="z-scale2.0x_model.json">scale2.0x</option>
<option value="z-noise1_model.json">noise1</option>
<option value="z-noise2_model.json">noise2</option>
</select>
then Set an Image File: <input id="file" type="file" />
</div>
<hr />
<div>
Source: <span id="source"></span>
</div>
<div>
Image 2x: <span id="image2x"></span>
</div>
<div>Progress: <span id="progress"></span></div>
<div>Computed Time: <span id="time"></span></div>
<div>Finish Estimated: <span id="expected"></span></div>
<hr />
<div>
Result: <span id="result"></span>
</div>
<hr />
<div>
The original algorithm comes from
<a href="https://github.com/marcan/cl-waifu2x/tree/master/tools/"
>waifu2x.py</a>.
</div>
<div>
[<a href="https://gist.github.com/bellbind/d9dc9ccdd4a8735a9990"
>sources</a>]
The license is same as the original:
<a href="http://opensource.org/licenses/mit-license.php">
MIT License</a>
</div>
</body>
</html>
var loadJson = function (url) {
"use strict";
return new Promise(function (f, r) {
if (typeof XMLHttpRequest === "function") {
var req = new XMLHttpRequest();
req.addEventListener("error", function (ev) {
r(req.statusText);
}, false);
req.addEventListener("load", function (ev) {
f(req.response);
}, false);
req.open("GET", url, true);
req.responseType = "json";
req.send();
} else {
require("fs").readFile(url, function (err, data) {
if (err) r(err);
else f(JSON.parse(data));
});
}
});
};
/*
loadJson("scale2.0x_model.json").then(function (json) {
console.log(json);
});
*/
// from http://stackoverflow.com/questions/7041172/
var rgb2ycbcr = function (rgb2d) {
"use strict";
var rgb = rgb2d.a, w = rgb2d.w|0, h = rgb2d.h|0;
var ret = new Uint8ClampedArray(w * h * 3);
for (var y = 0; y < h; y++) {
for (var x = 0; x < w; x++) {
var ofs = 3 * (y * w + x) |0;
var R = rgb[ofs + 0 |0], G = rgb[ofs + 1 |0], B = rgb[ofs + 2 |0];
ret[ofs + 0 |0] = 0.29900 * R + 0.58700 * G + 0.11400 * B;
ret[ofs + 1 |0] = -0.16874 * R - 0.33126 * G + 0.50000 * B + 128;
ret[ofs + 2 |0] = 0.50000 * R - 0.41869 * G - 0.08131 * B + 128;
}
}
return {a: ret, alpha: rgb2d.alpha, w: w, h: h};
};
var ycbcr2rgb = function (ycbcr2d) {
"use strict";
var ycbcr = ycbcr2d.a, w = ycbcr2d.w|0, h = ycbcr2d.h|0;
var ret = new Uint8ClampedArray(w * h * 3);
for (var y = 0; y < h; y++) {
for (var x = 0; x < w; x++) {
var ofs = 3 * (y * w + x) |0;
var Y = ycbcr[ofs + 0 |0];
var Cb = ycbcr[ofs + 1|0] - 128 |0, Cr = ycbcr[ofs + 2|0] - 128 |0;
ret[ofs + 0|0] = Y + 1.40200 * Cr;
ret[ofs + 1|0] = Y - 0.34414 * Cb - 0.71414 * Cr;
ret[ofs + 2|0] = Y + 1.77200 * Cb;
}
}
return {a: ret, alpha: ycbcr2d.alpha, w: w, h: h};
};
var resize2x = function (color3bmp2d) {
"use strict";
var color3bmp = color3bmp2d.a, w = color3bmp2d.w|0, h = color3bmp2d.h|0;
var rw = 2 * w|0, rh = 2 * h|0;
var r = new Uint8ClampedArray(rw * rh * 3);
var alpha = new Uint8ClampedArray(rw * rh);
for (var y = 0; y < rh; y++) {
for (var x = 0; x < rw; x++) {
var i = y * rw + x |0;
var hi = (y >> 1) * w + (x >> 1) |0; // nearest
//var hi = (y % h) * w + (x % w); // loop
r[3 * i + 0 |0] = color3bmp[3 * hi + 0 |0];
r[3 * i + 1 |0] = color3bmp[3 * hi + 1 |0];
r[3 * i + 2 |0] = color3bmp[3 * hi + 2 |0];
alpha[i] = color3bmp2d.alpha ? color3bmp2d.alpha[hi] : 255;
}
}
return {a: r, alpha: alpha, w: rw, h: rh};
};
var getColors = function (color3bmp2d, index) {
"use strict";
var color3bmp = color3bmp2d.a, w = color3bmp2d.w|0, h = color3bmp2d.h|0;
var r = new Uint8ClampedArray(4 * w * h);
for (var y = 0; y < h; y++) {
for (var x = 0; x < w; x++) {
var i = y * w + x |0;
r[i] = color3bmp[3 * i + index |0];
}
}
return r;
};
var setColors = function (color3bmp2d, index, colors) {
"use strict";
var color3bmp = color3bmp2d.a, w = color3bmp2d.w|0, h = color3bmp2d.h|0;
for (var y = 0; y < h; y++) {
for (var x = 0; x < w; x++) {
var i = y * w + x |0;
color3bmp[3 * i + index|0] = colors[i];
}
}
return color3bmp2d;
};
window.addEventListener("load", function () {
"use strict";
var models = document.getElementById("models");
var file = document.getElementById("file");
var source = document.getElementById("source");
var image2x = document.getElementById("image2x");
var result = document.getElementById("result");
var progress = document.getElementById("progress");
var time = document.getElementById("time");
var expected = document.getElementById("expected");
file.addEventListener("change", function (ev) {
var imageFile = ev.target.files[0];
if (!imageFile || !imageFile.type.match("image.*")) return;
var reader = new FileReader();
reader.addEventListener("load", function (ev) {
var url = ev.target.result;
file.disabled = true;
models.disabled = true;
progress.textContent = "loading model: " + models.value + " ...";
Promise.all([
loadImage(url), loadJson(models.value)
]).then(function (data) {
runWorker(data[1], data[0]);
}).catch(function (err) {
console.log(err);
});
}, false);
reader.readAsDataURL(imageFile);
}, false);
var loadImage = function (imageUrl) {
return new Promise(function (f, r) {
var src = document.createElement("img");
src.addEventListener("load", function (ev) {f(src);}, false);
src.src = imageUrl;
source.appendChild(src);
});
};
var runWorker = function (model, src) {
//alert([src.width, src.height]);
var original = img2rgb(src);
console.log("image converted", original);
var scale2x = resize2x(original);
console.log("scale 2x", scale2x);
image2x.appendChild(rgb2img(scale2x, "nearest2x"));
progress.textContent = "initialize worker...";
var start = Date.now();
var worker = new Worker("worker.js");
//var worker = new Worker("y-parallel-worker.js");
worker.addEventListener("message", function (ev) {
//console.log(ev.data);
if (ev.data.msg) {
console.log(ev.data.msg);
} else if (ev.data.boot) {
worker.postMessage({model: model, original: original});
} else if (ev.data.count) {
var now = Date.now();
var spend = now - start;
time.textContent = spend/60000 + "m";
var remain = spend / ev.data.progress *
(ev.data.count - ev.data.progress);
expected.textContent = new Date(now + (0|remain));
progress.textContent = ev.data.progress + "/" + ev.data.count;
} else if (ev.data.rgb2x) {
console.log("finished:", ev.data.rgb2x);
time.textContent = (Date.now() - start)/60000 + "m";
var img = rgb2img(ev.data.rgb2x, "waifu2x");
result.appendChild(img);
worker.terminate();
file.disabled = false;
models.disabled = false;
} else {
console.log("unknown message", ev.data);
}
}, false);
worker.addEventListener("error", function (ev) {
console.log(ev);
worker.terminate();
file.disabled = false;
models.disabled = false;
}, false);
};
var img2rgb = function (src) {
var canvas = document.createElement("canvas");
canvas.width = src.width, canvas.height = src.height;
var c2d = canvas.getContext("2d");
c2d.drawImage(src, 0, 0);
var image = c2d.getImageData(0, 0, canvas.width, canvas.height);
var rgb = new Uint8ClampedArray(3 * image.width * image.height);
var alpha = new Uint8ClampedArray(image.width * image.height);
for (var y = 0; y < image.height; y++) {
for (var x = 0; x < image.width; x++) {
var index = y * image.width + x;
rgb[3 * index + 0] = image.data[4 * index + 0];
rgb[3 * index + 1] = image.data[4 * index + 1];
rgb[3 * index + 2] = image.data[4 * index + 2];
alpha[index] = image.data[4 * index + 3];
}
}
return {a: rgb, alpha: alpha, w: image.width, h: image.height};
};
var rgb2img = function (rgb, name) {
var canvas = document.createElement("canvas");
canvas.width = rgb.w, canvas.height = rgb.h;
var c2d = canvas.getContext("2d");
var image = c2d.createImageData(canvas.width, canvas.height);
for (var y = 0; y < image.height; y++) {
for (var x = 0; x < image.width; x++) {
var index = y * image.width + x;
image.data[4 * index + 0] = rgb.a[3 * index + 0];
image.data[4 * index + 1] = rgb.a[3 * index + 1];
image.data[4 * index + 2] = rgb.a[3 * index + 2];
image.data[4 * index + 3] = rgb.alpha ? rgb.alpha[index] : 255;
}
}
c2d.putImageData(image, 0, 0);
var img = document.createElement("img");
var a = document.createElement("a");
a.href = img.src = canvas.toDataURL("image/png");
a.download = name + ".png";
a.appendChild(img);
return a;
};
});
// translated from https://marcan.st/transf/waifu2x.py
var waifu2x = function (rgb2d, model, callback) {
"use strict";
callback = callback || function (progress, count) {};
//return rgb2d;
//return resize2x(rgb2d);
var ycbcr2d = rgb2ycbcr(rgb2d);
//return ycbcr2d;
//return ycbcr2rgb(ycbcr2d);
var ycbcr2d2x = resize2x(ycbcr2d);
//return ycbcr2rgb(ycbcr2d2x);
var ysU8 = getColors(ycbcr2d2x, 0);
//setColors(ycbcr2d2x, 0, ysU8);
//return ycbcr2rgb(ycbcr2d2x);
var ys = mulTo(new Float32Array(ysU8), 1/255);
//setColors(ycbcr2d2x, 0, new Uint8ClampedArray(mulTo(ys, 255)));
//return ycbcr2rgb(ycbcr2d2x);
var padYs = edgePad({a: ys, w: ycbcr2d2x.w, h: ycbcr2d2x.h}, model.length);
var count = model.reduce(function (sum, step) {
return sum + step.nInputPlane * step.nOutputPlane;
}, 0);
var progress = 0;
//var sumConvolve = sumConvolve3x3s;
//var sumConvolve = sumConvolve3x3sEx;
var sumConvolve = sumConvolve3x3sEx8;
if (typeof SIMD === "object") sumConvolve = sumConvolve3x3sExSIMD;
var finished = model.reduce(function (planes, step) {
return step.bias.map(function (bias, index) {
// [NOTE] specialized 3x3 convolutions addition
var next = sumConvolve(planes, step.weight[index|0]);
for (var i = 0, na = next.a, l = na.length|0; i < l; i++) {
var v = na[i] + bias;
na[i] = v < 0 ? 0.1 * v : v;
}
progress += planes.length;
callback(progress, count);
return next;
});
}, [padYs]);
setColors(ycbcr2d2x, 0, new Uint8ClampedArray(mulTo(finished[0].a, 255)));
return ycbcr2rgb(ycbcr2d2x);
};
// TypedArray utility
var mulTo = function (a, v) {
"use strict";
for (var i = 0, l = a.length|0; i < l; i++) a[i] *= v;
return a;
};
"use strict";
self.importScripts("convolve.js", "rgb2yuv.js", "waifu2x.js");
self.postMessage({msg: "script loaded: " + typeof waifu2x});
self.postMessage({msg: "Promise available: " + typeof Promise});
self.addEventListener("message", function (ev) {
self.postMessage({msg: "data accpeted: " + typeof ev.data});
try {
var model = ev.data.model, original = ev.data.original;
var rgb2x = waifu2x(original, model, function (progress, count) {
self.postMessage({progress: progress, count: count});
});
self.postMessage({rgb2x: rgb2x});
} catch (err) {
self.postMessage({msg: "error: " + err.toString() + "\n" + err.stack});
}
}, false);
self.postMessage({boot: true});
"use strict";
self.importScripts("convolve.js");
var sumConvolve = sumConvolve3x3sEx8;
if (typeof SIMD === "object") sumConvolve = sumConvolve3x3sExSIMD;
self.addEventListener("message", function (ev) {
var planes = ev.data.planes, weight = ev.data.weight, bias = ev.data.bias;
var next = sumConvolve(planes, weight);
for (var i = 0, na = next.a, l = na.length|0; i < l; i++) {
var v = na[i] + bias;
na[i] = v < 0 ? 0.1 * v : v;
}
self.postMessage({next: next, i: ev.data.i});
}, false);
// [FYI] Processing convolutions in sub workers
// NOTE1: It is slower than the single worker version on firefox
// NOTE2: Sub Worker(use "Worker" in Woker thread) not yet supported on Chrome
var waifu2x = function (rgb2d, model, callback) {
"use strict";
callback = callback || function (progress, count) {};
var ycbcr2d = rgb2ycbcr(rgb2d);
var ycbcr2d2x = resize2x(ycbcr2d);
var ysU8 = getColors(ycbcr2d2x, 0);
var ys = mulTo(new Float32Array(ysU8), 1/255);
var padYs = edgePad({a: ys, w: ycbcr2d2x.w, h: ycbcr2d2x.h}, model.length);
var count = model.reduce(function (sum, step) {
return sum + step.nInputPlane * step.nOutputPlane;
}, 0);
var progress = 0;
var workerSize = 3;
var workers = [];
for (var i = 0; i < workerSize; i++) {
workers.push(new Worker("y-parallel-sub.js"));
}
var steps = model.slice();
return Promise.all([padYs]).then(function doStep(planes) {
var step = steps.shift();
if (!step) return planes;
return new Promise(function (f, r) {
var nexts = new Array(step.bias.length);
var accepts = 0;
var handler = function (ev) {
nexts[ev.data.i] = ev.data.next;
accepts++;
progress += planes.length;
callback(progress, count);
if (accepts === step.bias.length) {
workers.forEach(function (w) {
w.removeEventListener("message", handler, false);
w.removeEventListener("error", r, false);
});
f(nexts);
}
};
workers.forEach(function (w) {
w.addEventListener("message", handler, false);
w.addEventListener("error", r, false);
});
for (var i = 0; i < step.bias.length; i++) {
var bias = step.bias[i], weight = step.weight[i];
workers[i % workerSize].postMessage({
planes: planes, bias: bias, weight: weight, i: i});
}
}).then(doStep);
}).then(function (finished) {
setColors(ycbcr2d2x, 0,
new Uint8ClampedArray(mulTo(finished[0].a, 255)));
workers.forEach(function (w) {w.terminate();});
return ycbcr2rgb(ycbcr2d2x);
});
};
// TypedArray utility
var mulTo = function (a, v) {
"use strict";
for (var i = 0, l = a.length|0; i < l; i++) a[i] *= v;
return a;
};
"use strict";
// FYI: waifu2x with convolving on sub workers (but slower than single worker)
self.importScripts("convolve.js", "rgb2yuv.js", "y-parallel-waifu2x.js");
self.postMessage({msg: "script loaded: " + typeof waifu2x});
self.postMessage({msg: "Promise available: " + typeof Promise});
self.addEventListener("message", function (ev) {
self.postMessage({msg: "data accpeted: " + typeof ev.data});
var model = ev.data.model, original = ev.data.original;
waifu2x(original, model, function (progress, count) {
self.postMessage({progress: progress, count: count});
}).then(function (rgb2x) {
self.postMessage({rgb2x: rgb2x});
}).catch(function (err) {
self.postMessage({msg: "error: " + err.toString() + "\n" + err.stack});
})
}, false);
self.postMessage({boot: true});
This file has been truncated, but you can view the full file.
View raw

(Sorry about that, but we can’t show files that are this big right now.)

View raw

(Sorry about that, but we can’t show files that are this big right now.)

@bellbind

This comment has been minimized.

Copy link
Owner Author

commented Jun 6, 2015

demo: https://rawgit.com/bellbind/d9dc9ccdd4a8735a9990/raw/index.html

48x96 image converting time (with initial version convole()):

  • 13 minutes on firefox
  • 20 minutes on chrome

(extracted matrix as linear becomes > 10x faster)


(limit 3x3 convolve matrix become 3x faster from generic extracted matrix version)

  • 0.31 minutes on firefox
  • 0.26 minutes on chrome

(appending to 3x3 convolve result instead of adding results become 2x faster)

@bellbind

This comment has been minimized.

Copy link
Owner Author

commented Jun 8, 2015

440x335 image (ref) converting time:

  • 5.13 minutes on firefox
  • 3.58 minutes on chrome
@bellbind

This comment has been minimized.

Copy link
Owner Author

commented Jul 30, 2015

Before applying SIMD.js, I tried to apply just only loop unrolling(extraction).

In the browser runtimes, array accessing(includes reading) count is directly effect its execution time
(than count of comparing index).

In the case, (3 * ) 3 * w times access to array s in inner loop, if unrolling loop for each 4th, array access reduced to (3 * ) 6/4 * w. if 8th then to (3 * ) 10 / 8 * w. (the rate becomes as 3 : 1.5 : 1.25).

Applying loop unrolling affects especially on firefox. Unrolling loop for each 8th gains almost twice faster on firefox. It also affects on chrome.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.