Skip to content

Instantly share code, notes, and snippets.

@bellbind
Last active April 10, 2022 10:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bellbind/b7803cd78249c95bc95f1084adb36eeb to your computer and use it in GitHub Desktop.
Save bellbind/b7803cd78249c95bc95f1084adb36eeb to your computer and use it in GitHub Desktop.
[browser] Display MNIST images with Web API DecompressionStream
// BYOB emulation as TransformStream for `u8readable.pipeThrough(new BYOBTransform())`
const newQueue = () => {
const [gets, polls] = [[], []];
const next = () => new Promise(get => polls.length > 0 ? polls.shift()(get) : gets.push(get));
const poll = () => new Promise(poll => gets.length > 0 ? poll(gets.shift()) : polls.push(poll));
const push = async value => (await poll())({value, done: false});
const close = async () => (await poll())({done: true});
return {next, push, close, [Symbol.asyncIterator]() {return this}};
};
export const BYOBTransform = class {
constructor(transform = {}) {
const queue = newQueue();
let chunk = null;
this.readable = new ReadableStream({
type: "bytes",
autoAllocateChunkSize: transform.autoAllocateChunkSize,
async pull(controller) {
let view = controller.byobRequest.view;
while (!chunk || chunk.byteLength < view.byteLength) {
if (chunk) {
view.set(chunk);
view = view.subarray(chunk.byteLength);
}
const {done, value} = await queue.next();
chunk = value;
if (done) {
const size = view.byteOffset - controller.byobRequest.view.byteOffset;
if (size === 0) controller.close();
controller.byobRequest.respond(size);
if (size > 0) controller.close();
return;
}
}
view.set(chunk.subarray(0, view.byteLength));
chunk = chunk.subarray(view.byteLength);
const size = view.byteOffset + view.byteLength - controller.byobRequest.view.byteOffset;
controller.byobRequest.respond(size);
},
});
this.writable = new WritableStream({
async write(chunk, controller) {await queue.push(chunk);},
async close(controller) {await queue.close();},
});
}
};
<!doctype html>
<html>
<head>
<link rel="icon" href="data:," />
<meta charset="utf-8" />
<script type="module" src="./main.js"></script>
</head>
<body></body>
</html>
import {MimicBYOBReader} from "./mimic-byob-reader.js";
import {BYOBTransform} from "./byob-transform.js";
// MNIST data from: http://yann.lecun.com/exdb/mnist/
const mnistUrl = {
train: {
images: "./train-images-idx3-ubyte.gz",
labels: "./train-labels-idx1-ubyte.gz",
},
t10k: {
images: "./t10k-images-idx3-ubyte.gz",
labels: "./t10k-labels-idx1-ubyte.gz",
},
};
//[MNIST gzip decompressed file format]
// images:
// 0-3: magic = 2051 (Big Endian)
// 4-7: image count = train 60000 | t10k 10000 (Big Endian)
// 8-11: image width = 28 (Big Endian)
// 12-15: image height = 28 (Big Endian)
// 16-799: 28x28 pixel bytes(white 0-255 black) of image[0]
// 800-1583: 28x28 pixel bytes(white 0-255 black) of image[1]
// ...
// labels:
// 0-3: magic = 2049 (Big Endian)
// 4-7: image count = train 60000 | t10k 10000 (Big Endian)
// 8: a number value(0-9) of image[0]
// 9: a number value(0-9) of image[1]
// ...
//
// load mnist images and labels with urls
const loadMnist = async function* (urls) {
const imageReadable = (await fetch(urls.images)).body.pipeThrough(new DecompressionStream("gzip"));
const labelReadable = (await fetch(urls.labels)).body.pipeThrough(new DecompressionStream("gzip"));
const imageReader = imageReadable.pipeThrough(new BYOBTransform()).getReader({mode: "byob"});
//const labelReader = labelReadable.pipeThrough(new BYOBTransform()).getReader({mode: "byob"});
//const imageReader = new MimicBYOBReader(imageReadable.getReader());
const labelReader = new MimicBYOBReader(labelReadable.getReader());
try {
const imageMagic = (await imageReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false);
if (imageMagic !== 2051) throw new TypeError("invalid magic of images file");
const labelMagic = (await labelReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false);
if (labelMagic !== 2049) throw new TypeError("invalid magic of labels file");
const count = (await imageReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false);
const labelCount = (await labelReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false);
if (count !== labelCount) throw new TypeError(`mismatched counts: images, labels: ${count}, ${labelCount}`);
const width = (await imageReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false);
const height = (await imageReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false);
if (width !== 28 || height !== 28) throw new TypeError(`invalid image size(28x28): ${width}x${height}`);
const bytes = width * height, images = new Array(count), labels = new Array(count);
for (let i = 0; i < count; i++) {
const image = (await imageReader.read(new Uint8Array(bytes))).value;
const label = (await labelReader.read(new Uint8Array(1))).value;
yield {image, label};
[images[i], labels[i]] = [image, label];
}
return {images, labels};
} finally {
imageReader.releaseLock();
labelReader.releaseLock();
}
};
// mnist image view for HTML canvas
const toImageData = image => {
const id = new ImageData(28, 28);
for (let x = 0; x < 28; x++) for (let y = 0; y < 28; y++) {
const i = y * 28 + x, offs = i * 4;
id.data[offs] = id.data[offs + 1] = id.data[offs + 2] = 255 - image[i];
id.data[offs + 3] = 255;
}
return id;
};
const toCanvas = image => {
const canvas = document.createElement("canvas");
canvas.width = canvas.height = 28;
canvas.style.borderStyle = "solid";
canvas.getContext("2d").putImageData(toImageData(image), 0, 0);
return canvas;
};
//[example] show MNIST images
for await (const {image, label} of loadMnist(mnistUrl.t10k)) {
const canvas = toCanvas(image);
canvas.title = label;
document.body.append(canvas);
//await new Promise(f => requestAnimationFrame(f));
}
// Emulate BYOB reader for Uint8Array reader of DecompressionStream readable
export const MimicBYOBReader = class {
#closed; #reader; #chunk; #done;
constructor(u8reader) {
this.#reader = u8reader;
this.#chunk = null;
this.#done = false;
this.#closed = false;
}
get closed() {return this.#reader.closed;}
cancel() {return this.#reader.cancel();}
releaseLock() {return this.#reader.releaseLock();}
//NOTE: Web API BYOBReader's passed view `buffer` is "detached" (it cannot access view's array values)
async read(view) {
if (view.byteLength === 0) throw new TypeError("it must be view.byteLength > 0");
let u8view = new Uint8Array(view.buffer, view.byteOffset, view.byteLength);
if (this.#closed) return {done: true, value: new view.constructor(view.buffer, view.byteOffset, 0)};
while (!this.#chunk || this.#chunk.byteLength < u8view.byteLength) {//[chunk shorter than view]
if (this.#chunk) {
u8view.set(this.#chunk);
u8view = u8view.subarray(this.#chunk.byteLength);
}
const {done, value} = await this.#reader.read();
this.#done = done;
if (done) {//[just after the last chunk]
this.#closed = true;
this.#chunk = null;
const blen = u8view.byteOffset - view.byteOffset, bpe = view.BYTES_PER_ELEMENT ?? 1;
const len = Math.trunc(blen / bpe) * bpe;
return {done: blen === 0, value: new view.constructor(view.buffer, view.byteOffset, len)};
}
if (!(value instanceof Uint8Array)) throw new TypeError(`Must be Uint8Array reader but: ${value}`);
this.#chunk = value;
}
u8view.set(this.#chunk.subarray(0, u8view.byteLength));
this.#chunk = this.#chunk.subarray(u8view.byteLength);
return {done: false, value: new view.constructor(view.buffer, view.byteOffset, view.length)};
}
};
This file has been truncated, but you can view the full file.
View raw

(Sorry about that, but we can’t show files that are this big right now.)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment