Last active
July 22, 2022 11:44
-
-
Save postspectacular/5a22b579af5beab34d9b1bfc65c98753 to your computer and use it in GitHub Desktop.
Interval-based compression (using thi.ng/bitstream). Finds and compresses sub-ranges of successive values in an array of monotonically increasing integer values in [0..n] interval.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { equiv } from "@thi.ng/equiv"; | |
import { encode, decode } from "./pack-intervals.js"; | |
// example source array (will be compressed into just 7 bytes) | |
const src = [ | |
1, 2, 3, 4, | |
6, 7, | |
9, 10, 11, 12, 13, 14, | |
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 | |
]; | |
const enc = encode(src); | |
const dec = decode(enc); | |
console.log("valid", equiv(dec, src)); | |
console.log("encoded", enc); | |
console.log("decoded", dec); | |
console.log("ratio", enc.length / src.length); | |
///////////////////////////////////////////////// | |
// output: | |
///////////////////////////////////////////////// | |
// valid true | |
// encoded Uint8Array(7) [ | |
// 236, 36, 152, 84, | |
// 155, 22, 128 | |
// ] | |
// decoded [ | |
// 1, 2, 3, 4, 6, 7, 9, 10, | |
// 11, 12, 13, 14, 17, 18, 19, 20, | |
// 21, 22, 23, 24, 25, 26, 27, 28, | |
// 29 | |
// ] | |
// ratio 0.28 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { bitWriter, bitReader } from "@thi.ng/bitstream"; | |
/** | |
* Takes an array of monotonically increasing integer values in [0..n] interval | |
* and a word `size` in bits. Finds and compresses sub-ranges of successive | |
* values. All values are written to a bit stream and encoded using given word | |
* size. Returns bit stream encoded as byte array. | |
* | |
* @remarks | |
* Max word size is 8 bits. Max value in `buf` must be < 2^size. | |
* To support larger word sizes, update impl to use `BitOutputStream` | |
* (from same @thi.ng/bitstream package)... | |
* | |
* The format of the result stream is as follows: | |
* | |
* - `size` bits: max value (i.e. last item of `buf`), followed by repeats of: | |
* - 1 bit: 1 for encoded interval, 0 for single value | |
* - 1x or 2x `size` bits: value (and number of successive values, iff interval) | |
* | |
* @param buf - source buffer | |
* @param size - word size in bits | |
*/ | |
export const encode = (buf, size = 5) => { | |
const { write, bytes } = bitWriter(); | |
const n = buf.length; | |
write(buf[n - 1], size); | |
for (let i = 0, j, k; i < n; ) { | |
j = i; | |
k = i; | |
while (++k < n && buf[k] === buf[j++] + 1); | |
const l = k - i; | |
write(~~(l > 1)); | |
write(buf[i], size); | |
l > 1 && write(l, size); | |
i = k; | |
} | |
return bytes(); | |
}; | |
/** | |
* Reverse op of {@link encode}. Decodes & decompresses encoded | |
* intervals & values. Returns result array. | |
* | |
* @param buf - packed/encoded byte array | |
* @param size - word size in bits | |
*/ | |
export const decode = (buf, size = 5) => { | |
const read = bitReader(buf); | |
const max = read(size); | |
const out = []; | |
do { | |
if (read()) { | |
for (let a = read(size), b = read(size); b-- > 0; a++) out.push(a); | |
} else { | |
out.push(read(size)); | |
} | |
} while (out[out.length - 1] < max); | |
return out; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment