Lunr Test
import { readFileSync, writeFileSync } from 'fs' | |
import { brotliCompressSync, gzipSync } from 'zlib' | |
import hf from 'human-format' | |
import lunr from 'lunr' | |
// https://gutenberg.org/ebooks/6130 | |
const file = readFileSync('./6130-0.txt', 'utf8') | |
const opts = { | |
scale: 'binary', | |
unit: 'B', | |
} | |
const data = [['Pieces', 'Size', 'Index', 'GZip', 'BR', 'HSize', 'HIndex', 'HGZip', 'HBr']] | |
for (const pieces of [1, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000]) { | |
// for (const pieces of [1, 5, 10, 20, 50, 100, 200, 500, 2000, 3000, 5000]) { | |
// const size = Math.floor(file.length / pieces) | |
const size = 10000 | |
const hSize = hf(Buffer.from(file.slice(0, size)).length) | |
console.log('Pieces', pieces) | |
console.log('Piece Size', hSize) | |
const idx = lunr(function () { | |
this.ref('id') | |
this.field('text') | |
for (let i = 0; i < pieces; i++) { | |
const position = (i * size) % file.length | |
this.add({ | |
id: Math.random().toString(36), | |
text: file.slice(position, position + size), | |
}) | |
} | |
}) | |
const prebuilt = JSON.stringify(idx) | |
const bytes = Buffer.from(prebuilt) | |
const gz = gzipSync(bytes, { level: 9 }) | |
const br = brotliCompressSync(bytes) | |
const hIndex = hf(bytes.length, opts) | |
const hGz = hf(gz.length, opts) | |
const hBr = hf(br.length, opts) | |
console.log(`Normal:\t${hIndex}`) | |
console.log(`GZip:\t${hGz}`) | |
console.log(`Brotli:\t${hBr}`) | |
data.push([pieces, size, bytes.length, gz.length, br.length, hSize, hIndex, hGz, hBr]) | |
} | |
writeFileSync('./data.csv', data.map((row) => row.join(',')).join('\n')) |
Pieces | Size | Index | GZip | BR | HSize | HIndex | HGZip | HBr | |
---|---|---|---|---|---|---|---|---|---|
1 | 10000 | 37811 | 6063 | 4248 | 10.04 k | 36.92 KiB | 5.92 KiB | 4.15 KiB | |
10 | 10000 | 257608 | 43311 | 34045 | 10.04 k | 251.57 KiB | 42.3 KiB | 33.25 KiB | |
20 | 10000 | 502006 | 86938 | 68083 | 10.04 k | 490.24 KiB | 84.9 KiB | 66.49 KiB | |
50 | 10000 | 1161161 | 202147 | 157147 | 10.04 k | 1.11 MiB | 197.41 KiB | 153.46 KiB | |
100 | 10000 | 2178638 | 392040 | 292482 | 10.04 k | 2.08 MiB | 382.85 KiB | 285.63 KiB | |
200 | 10000 | 4145798 | 765040 | 540214 | 10.04 k | 3.95 MiB | 747.11 KiB | 527.55 KiB | |
500 | 10000 | 9776558 | 1952672 | 1137839 | 10.04 k | 9.32 MiB | 1.86 MiB | 1.09 MiB | |
1000 | 10000 | 19225463 | 4548799 | 2109189 | 10.04 k | 18.33 MiB | 4.34 MiB | 2.01 MiB | |
2000 | 10000 | 38005044 | 11370691 | 4089003 | 10.04 k | 36.24 MiB | 10.84 MiB | 3.9 MiB | |
5000 | 10000 | 94455961 | 35521544 | 10314160 | 10.04 k | 90.08 MiB | 33.88 MiB | 9.84 MiB |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment