Last active
April 10, 2022 10:16
-
-
Save bellbind/b7803cd78249c95bc95f1084adb36eeb to your computer and use it in GitHub Desktop.
[browser] Display MNIST images with Web API DecompressionStream
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// BYOB emulation as TransformStream for `u8readable.pipeThrough(new BYOBTransform())` | |
const newQueue = () => { | |
const [gets, polls] = [[], []]; | |
const next = () => new Promise(get => polls.length > 0 ? polls.shift()(get) : gets.push(get)); | |
const poll = () => new Promise(poll => gets.length > 0 ? poll(gets.shift()) : polls.push(poll)); | |
const push = async value => (await poll())({value, done: false}); | |
const close = async () => (await poll())({done: true}); | |
return {next, push, close, [Symbol.asyncIterator]() {return this}}; | |
}; | |
export const BYOBTransform = class { | |
constructor(transform = {}) { | |
const queue = newQueue(); | |
let chunk = null; | |
this.readable = new ReadableStream({ | |
type: "bytes", | |
autoAllocateChunkSize: transform.autoAllocateChunkSize, | |
async pull(controller) { | |
let view = controller.byobRequest.view; | |
while (!chunk || chunk.byteLength < view.byteLength) { | |
if (chunk) { | |
view.set(chunk); | |
view = view.subarray(chunk.byteLength); | |
} | |
const {done, value} = await queue.next(); | |
chunk = value; | |
if (done) { | |
const size = view.byteOffset - controller.byobRequest.view.byteOffset; | |
if (size === 0) controller.close(); | |
controller.byobRequest.respond(size); | |
if (size > 0) controller.close(); | |
return; | |
} | |
} | |
view.set(chunk.subarray(0, view.byteLength)); | |
chunk = chunk.subarray(view.byteLength); | |
const size = view.byteOffset + view.byteLength - controller.byobRequest.view.byteOffset; | |
controller.byobRequest.respond(size); | |
}, | |
}); | |
this.writable = new WritableStream({ | |
async write(chunk, controller) {await queue.push(chunk);}, | |
async close(controller) {await queue.close();}, | |
}); | |
} | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!doctype html> | |
<html> | |
<head> | |
<link rel="icon" href="data:," /> | |
<meta charset="utf-8" /> | |
<script type="module" src="./main.js"></script> | |
</head> | |
<body></body> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import {MimicBYOBReader} from "./mimic-byob-reader.js"; | |
import {BYOBTransform} from "./byob-transform.js"; | |
// MNIST data from: http://yann.lecun.com/exdb/mnist/ | |
const mnistUrl = { | |
train: { | |
images: "./train-images-idx3-ubyte.gz", | |
labels: "./train-labels-idx1-ubyte.gz", | |
}, | |
t10k: { | |
images: "./t10k-images-idx3-ubyte.gz", | |
labels: "./t10k-labels-idx1-ubyte.gz", | |
}, | |
}; | |
//[MNIST gzip decompressed file format] | |
// images: | |
// 0-3: magic = 2051 (Big Endian) | |
// 4-7: image count = train 60000 | t10k 10000 (Big Endian) | |
// 8-11: image width = 28 (Big Endian) | |
// 12-15: image height = 28 (Big Endian) | |
// 16-799: 28x28 pixel bytes(white 0-255 black) of image[0] | |
// 800-1583: 28x28 pixel bytes(white 0-255 black) of image[1] | |
// ... | |
// labels: | |
// 0-3: magic = 2049 (Big Endian) | |
// 4-7: image count = train 60000 | t10k 10000 (Big Endian) | |
// 8: a number value(0-9) of image[0] | |
// 9: a number value(0-9) of image[1] | |
// ... | |
// | |
// load mnist images and labels with urls | |
const loadMnist = async function* (urls) { | |
const imageReadable = (await fetch(urls.images)).body.pipeThrough(new DecompressionStream("gzip")); | |
const labelReadable = (await fetch(urls.labels)).body.pipeThrough(new DecompressionStream("gzip")); | |
const imageReader = imageReadable.pipeThrough(new BYOBTransform()).getReader({mode: "byob"}); | |
//const labelReader = labelReadable.pipeThrough(new BYOBTransform()).getReader({mode: "byob"}); | |
//const imageReader = new MimicBYOBReader(imageReadable.getReader()); | |
const labelReader = new MimicBYOBReader(labelReadable.getReader()); | |
try { | |
const imageMagic = (await imageReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false); | |
if (imageMagic !== 2051) throw new TypeError("invalid magic of images file"); | |
const labelMagic = (await labelReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false); | |
if (labelMagic !== 2049) throw new TypeError("invalid magic of labels file"); | |
const count = (await imageReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false); | |
const labelCount = (await labelReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false); | |
if (count !== labelCount) throw new TypeError(`mismatched counts: images, labels: ${count}, ${labelCount}`); | |
const width = (await imageReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false); | |
const height = (await imageReader.read(new DataView(new ArrayBuffer(4)))).value.getUint32(0, false); | |
if (width !== 28 || height !== 28) throw new TypeError(`invalid image size(28x28): ${width}x${height}`); | |
const bytes = width * height, images = new Array(count), labels = new Array(count); | |
for (let i = 0; i < count; i++) { | |
const image = (await imageReader.read(new Uint8Array(bytes))).value; | |
const label = (await labelReader.read(new Uint8Array(1))).value; | |
yield {image, label}; | |
[images[i], labels[i]] = [image, label]; | |
} | |
return {images, labels}; | |
} finally { | |
imageReader.releaseLock(); | |
labelReader.releaseLock(); | |
} | |
}; | |
// mnist image view for HTML canvas | |
const toImageData = image => { | |
const id = new ImageData(28, 28); | |
for (let x = 0; x < 28; x++) for (let y = 0; y < 28; y++) { | |
const i = y * 28 + x, offs = i * 4; | |
id.data[offs] = id.data[offs + 1] = id.data[offs + 2] = 255 - image[i]; | |
id.data[offs + 3] = 255; | |
} | |
return id; | |
}; | |
const toCanvas = image => { | |
const canvas = document.createElement("canvas"); | |
canvas.width = canvas.height = 28; | |
canvas.style.borderStyle = "solid"; | |
canvas.getContext("2d").putImageData(toImageData(image), 0, 0); | |
return canvas; | |
}; | |
//[example] show MNIST images | |
for await (const {image, label} of loadMnist(mnistUrl.t10k)) { | |
const canvas = toCanvas(image); | |
canvas.title = label; | |
document.body.append(canvas); | |
//await new Promise(f => requestAnimationFrame(f)); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Emulate BYOB reader for Uint8Array reader of DecompressionStream readable | |
export const MimicBYOBReader = class { | |
#closed; #reader; #chunk; #done; | |
constructor(u8reader) { | |
this.#reader = u8reader; | |
this.#chunk = null; | |
this.#done = false; | |
this.#closed = false; | |
} | |
get closed() {return this.#reader.closed;} | |
cancel() {return this.#reader.cancel();} | |
releaseLock() {return this.#reader.releaseLock();} | |
//NOTE: Web API BYOBReader's passed view `buffer` is "detached" (it cannot access view's array values) | |
async read(view) { | |
if (view.byteLength === 0) throw new TypeError("it must be view.byteLength > 0"); | |
let u8view = new Uint8Array(view.buffer, view.byteOffset, view.byteLength); | |
if (this.#closed) return {done: true, value: new view.constructor(view.buffer, view.byteOffset, 0)}; | |
while (!this.#chunk || this.#chunk.byteLength < u8view.byteLength) {//[chunk shorter than view] | |
if (this.#chunk) { | |
u8view.set(this.#chunk); | |
u8view = u8view.subarray(this.#chunk.byteLength); | |
} | |
const {done, value} = await this.#reader.read(); | |
this.#done = done; | |
if (done) {//[just after the last chunk] | |
this.#closed = true; | |
this.#chunk = null; | |
const blen = u8view.byteOffset - view.byteOffset, bpe = view.BYTES_PER_ELEMENT ?? 1; | |
const len = Math.trunc(blen / bpe) * bpe; | |
return {done: blen === 0, value: new view.constructor(view.buffer, view.byteOffset, len)}; | |
} | |
if (!(value instanceof Uint8Array)) throw new TypeError(`Must be Uint8Array reader but: ${value}`); | |
this.#chunk = value; | |
} | |
u8view.set(this.#chunk.subarray(0, u8view.byteLength)); | |
this.#chunk = this.#chunk.subarray(u8view.byteLength); | |
return {done: false, value: new view.constructor(view.buffer, view.byteOffset, view.length)}; | |
} | |
}; |
This file has been truncated, but you can view the full file.
View raw
(Sorry about that, but we can’t show files that are this big right now.)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
demo: https://gist.githack.com/bellbind/b7803cd78249c95bc95f1084adb36eeb/raw/index.html