Skip to content

Instantly share code, notes, and snippets.

@cupcakearmy
Last active December 7, 2020 13:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cupcakearmy/242b54ee6b1a914896390c91846aa4d4 to your computer and use it in GitHub Desktop.
Save cupcakearmy/242b54ee6b1a914896390c91846aa4d4 to your computer and use it in GitHub Desktop.
Lunr Test
import { readFileSync, writeFileSync } from 'fs'
import { brotliCompressSync, gzipSync } from 'zlib'
import hf from 'human-format'
import lunr from 'lunr'
// https://gutenberg.org/ebooks/6130
const file = readFileSync('./6130-0.txt', 'utf8')
const opts = {
scale: 'binary',
unit: 'B',
}
const data = [['Pieces', 'Size', 'Index', 'GZip', 'BR', 'HSize', 'HIndex', 'HGZip', 'HBr']]
for (const pieces of [1, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000]) {
// for (const pieces of [1, 5, 10, 20, 50, 100, 200, 500, 2000, 3000, 5000]) {
// const size = Math.floor(file.length / pieces)
const size = 10000
const hSize = hf(Buffer.from(file.slice(0, size)).length)
console.log('Pieces', pieces)
console.log('Piece Size', hSize)
const idx = lunr(function () {
this.ref('id')
this.field('text')
for (let i = 0; i < pieces; i++) {
const position = (i * size) % file.length
this.add({
id: Math.random().toString(36),
text: file.slice(position, position + size),
})
}
})
const prebuilt = JSON.stringify(idx)
const bytes = Buffer.from(prebuilt)
const gz = gzipSync(bytes, { level: 9 })
const br = brotliCompressSync(bytes)
const hIndex = hf(bytes.length, opts)
const hGz = hf(gz.length, opts)
const hBr = hf(br.length, opts)
console.log(`Normal:\t${hIndex}`)
console.log(`GZip:\t${hGz}`)
console.log(`Brotli:\t${hBr}`)
data.push([pieces, size, bytes.length, gz.length, br.length, hSize, hIndex, hGz, hBr])
}
writeFileSync('./data.csv', data.map((row) => row.join(',')).join('\n'))
Pieces Size Index GZip BR HSize HIndex HGZip HBr
1 10000 37811 6063 4248 10.04 k 36.92 KiB 5.92 KiB 4.15 KiB
10 10000 257608 43311 34045 10.04 k 251.57 KiB 42.3 KiB 33.25 KiB
20 10000 502006 86938 68083 10.04 k 490.24 KiB 84.9 KiB 66.49 KiB
50 10000 1161161 202147 157147 10.04 k 1.11 MiB 197.41 KiB 153.46 KiB
100 10000 2178638 392040 292482 10.04 k 2.08 MiB 382.85 KiB 285.63 KiB
200 10000 4145798 765040 540214 10.04 k 3.95 MiB 747.11 KiB 527.55 KiB
500 10000 9776558 1952672 1137839 10.04 k 9.32 MiB 1.86 MiB 1.09 MiB
1000 10000 19225463 4548799 2109189 10.04 k 18.33 MiB 4.34 MiB 2.01 MiB
2000 10000 38005044 11370691 4089003 10.04 k 36.24 MiB 10.84 MiB 3.9 MiB
5000 10000 94455961 35521544 10314160 10.04 k 90.08 MiB 33.88 MiB 9.84 MiB
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment