Skip to content

Instantly share code, notes, and snippets.

@rsms
Last active May 6, 2021 00:47
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rsms/0401b43477395f3619bab75d064ed716 to your computer and use it in GitHub Desktop.
Save rsms/0401b43477395f3619bab75d064ed716 to your computer and use it in GitHub Desktop.
Compile-time constant binary tree for fast byte-array key lookups in JavaScript
export interface BTreeNode<T> {
k :ArrayLike<byte>
v :T
L? :BTreeNode<T>
R? :BTreeNode<T>
}
export class BTree<T> {
readonly root :BTreeNode<T>
constructor(root :BTreeNode<T>) {
this.root = root
}
get(key :ArrayLike<byte>) :T|null {
return lookup(key, this.root)
}
}
// These two lookup functions are about as fast. Use whichever style you prefer.
// function lookup<T>(key :ArrayLike<byte>, n :BTreeNode<T>) :T|null {
// let c = bufcmp(key, n.k)
// return (
// (c == -1) ? n.L ? lookup(key, n.L) : null :
// (c == 1) ? n.R ? lookup(key, n.R) : null :
// (key.length == n.k.length) ? n.v :
// null
// )
// }
function lookup<T>(key :ArrayLike<byte>, n :BTreeNode<T>) :T|null {
while (n) {
const c = bufcmp(key, n.k)
if (c == -1) {
n = n.L as BTreeNode<T>
} else if (c == 1) {
n = n.R as BTreeNode<T>
} else if (key.length == n.k.length) {
return n.v
} else {
break
}
}
return null
}
function bufcmp(a :ArrayLike<byte>, b :ArrayLike<byte>) :int {
const aL = a.length, bL = b.length, L = (aL < bL ? aL : bL)
for (let i = 0; i != L; ++i) {
if (a[i] < b[i]) { return -1 }
if (b[i] < a[i]) { return 1 }
}
return (
aL < bL ? -1 :
bL < aL ? 1 :
0
)
}
import { BTree } from './btree'
// generated by gen-btree.js
const cdat = new Uint8Array([
103,111,99,111,110,116,105,110,117,101,99,97,115,101,98,114,101,97,107,99,
104,97,110,99,111,110,115,116,101,108,115,101,100,101,102,97,117,108,116,
100,101,102,101,114,102,97,108,108,116,104,114,111,117,103,104,102,111,114,
102,117,110,99,112,97,99,107,97,103,101,105,102,103,111,116,111,105,109,112,
111,114,116,105,110,116,101,114,102,97,99,101,109,97,112,115,101,108,101,99,
116,114,97,110,103,101,114,101,116,117,114,110,115,119,105,116,99,104,115,
116,114,117,99,116,116,121,112,101,118,97,114]);
const keywords = new BTree<token>(
{ k: cdat.subarray(0,2) /*go*/, v: token.GO,
L:{ k: cdat.subarray(2,10) /*continue*/, v: token.CONTINUE,
L:{ k: cdat.subarray(10,14) /*case*/, v: token.CASE,
L:{ k: cdat.subarray(14,19) /*break*/, v: token.BREAK},
R:{ k: cdat.subarray(19,23) /*chan*/, v: token.CHAN,
R:{ k: cdat.subarray(23,28) /*const*/, v: token.CONST}}},
R:{ k: cdat.subarray(28,32) /*else*/, v: token.ELSE,
L:{ k: cdat.subarray(32,39) /*default*/, v: token.DEFAULT,
R:{ k: cdat.subarray(39,44) /*defer*/, v: token.DEFER}},
R:{ k: cdat.subarray(44,55) /*fallthrough*/, v: token.FALLTHROUGH,
R:{ k: cdat.subarray(55,58) /*for*/, v: token.FOR,
R:{ k: cdat.subarray(58,62) /*func*/, v: token.FUNC}}}}},
R:{ k: cdat.subarray(62,69) /*package*/, v: token.PACKAGE,
L:{ k: cdat.subarray(69,71) /*if*/, v: token.IF,
L:{ k: cdat.subarray(71,75) /*goto*/, v: token.GOTO},
R:{ k: cdat.subarray(75,81) /*import*/, v: token.IMPORT,
R:{ k: cdat.subarray(81,90) /*interface*/, v: token.INTERFACE,
R:{ k: cdat.subarray(90,93) /*map*/, v: token.MAP}}}},
R:{ k: cdat.subarray(93,99) /*select*/, v: token.SELECT,
L:{ k: cdat.subarray(99,104) /*range*/, v: token.RANGE,
R:{ k: cdat.subarray(104,110) /*return*/, v: token.RETURN}},
R:{ k: cdat.subarray(110,116) /*switch*/, v: token.SWITCH,
L:{ k: cdat.subarray(116,122) /*struct*/, v: token.STRUCT},
R:{ k: cdat.subarray(122,126) /*type*/, v: token.TYPE,
R:{ k: cdat.subarray(126,129) /*var*/, v: token.VAR}}}}}}
)
// lookupKeyword maps an identifier to its keyword token or IDENT
// (if not a keyword).
//
function lookupKeyword(ident :ArrayLike<byte>) :token {
return keywords.get(ident) || token.IDENT
}
class ConstData {
constructor(name /*string*/) {
this.name = name
this.bytes = [] // byte[]
}
alloc(bytes /*ArrayLike<byte>*/) {
const start = this.bytes.length
const end = start + bytes.length
this.bytes = this.bytes.concat(bytes)
return `${this.name}.subarray(${start},${end})`
}
getInitJS() {
return `const ${this.name} = new Uint8Array([${this.bytes.join(',')}])`
}
}
function genBTree(cdat /*ConstData*/, m /*Map*/) {
const jsIdRe = /^[a-zA-Z0-9_]+$/
const indentation = ' '
const pairs = []
const sortedKeys = Array.from(m.keys())
sortedKeys.sort()
for (let i = 0; i < sortedKeys.length; ++i) {
const k = sortedKeys[i]
pairs.push({ key: k, value: m.get(k)})
}
const genBranch = (pairs, indent) => {
let pair, leftPairs, rightPairs
if (pairs.length == 1) {
pair = pairs[0]
} else {
const midIndex = Math.floor(pairs.length / 2)-1
pair = pairs[midIndex]
leftPairs = pairs.slice(0, midIndex)
rightPairs = pairs.slice(midIndex + 1)
}
let s = `{ k: ${cdat.alloc(strbytes(pair.key))} /*${pair.key.replace('*/','*-/')}*/, v: ${pair.value}`
if (leftPairs && leftPairs.length) {
s += `,\n${indent} L:${genBranch(leftPairs, indent + indentation)}`
}
if (rightPairs && rightPairs.length) {
s += `,\n${indent} R:${genBranch(rightPairs, indent + indentation)}`
}
return s + `}`
}
return genBranch(pairs, indentation)
}
function strbytes(s) {
return Array.from(s).map(s => s.charCodeAt(0))
}
const cdat = new ConstData('cdat')
const keywordsJS = genBTree(cdat, new Map([
["break", "token.BREAK"],
["case", "token.CASE"],
["chan", "token.CHAN"],
["const", "token.CONST"],
["continue", "token.CONTINUE"],
["default", "token.DEFAULT"],
["defer", "token.DEFER"],
["else", "token.ELSE"],
["fallthrough", "token.FALLTHROUGH"],
["for", "token.FOR"],
["func", "token.FUNC"],
["go", "token.GO"],
["goto", "token.GOTO"],
["if", "token.IF"],
["import", "token.IMPORT"],
["interface", "token.INTERFACE"],
["map", "token.MAP"],
["package", "token.PACKAGE"],
["range", "token.RANGE"],
["return", "token.RETURN"],
["select", "token.SELECT"],
["struct", "token.STRUCT"],
["switch", "token.SWITCH"],
["type", "token.TYPE"],
["var", "token.VAR"],
]))
console.log(
cdat.getInitJS() + ';\n' +
'const keywords =\n ' + keywordsJS
)
function strbytes(s) {
return Array.from(s).map(s => s.charCodeAt(0))
}
function strbuf(s) {
return new Uint8Array(strbytes(s))
}
function bufstr(b) {
return new Buffer(b).toString('utf8')
// const dec = new TextDecoder('utf-8')
// return dec.decode(b)
}
const cdat = new Uint8Array([
103,111,99,111,110,116,105,110,117,101,99,97,115,101,98,114,101,97,107,99,
104,97,110,99,111,110,115,116,101,108,115,101,100,101,102,97,117,108,116,
100,101,102,101,114,102,97,108,108,116,104,114,111,117,103,104,102,111,114,
102,117,110,99,112,97,99,107,97,103,101,105,102,103,111,116,111,105,109,112,
111,114,116,105,110,116,101,114,102,97,99,101,109,97,112,115,101,108,101,99,
116,114,97,110,103,101,114,101,116,117,114,110,115,119,105,116,99,104,115,
116,114,117,99,116,116,121,112,101,118,97,114
]);
const keywords =
{ k: cdat.subarray(0,2) /*go*/, v: "GO",
L:{ k: cdat.subarray(2,10) /*continue*/, v: "CONTINUE",
L:{ k: cdat.subarray(10,14) /*case*/, v: "CASE",
L:{ k: cdat.subarray(14,19) /*break*/, v: "BREAK"},
R:{ k: cdat.subarray(19,23) /*chan*/, v: "CHAN",
R:{ k: cdat.subarray(23,28) /*const*/, v: "CONST"}}},
R:{ k: cdat.subarray(28,32) /*else*/, v: "ELSE",
L:{ k: cdat.subarray(32,39) /*default*/, v: "DEFAULT",
R:{ k: cdat.subarray(39,44) /*defer*/, v: "DEFER"}},
R:{ k: cdat.subarray(44,55) /*fallthrough*/, v: "FALLTHROUGH",
R:{ k: cdat.subarray(55,58) /*for*/, v: "FOR",
R:{ k: cdat.subarray(58,62) /*func*/, v: "FUNC"}}}}},
R:{ k: cdat.subarray(62,69) /*package*/, v: "PACKAGE",
L:{ k: cdat.subarray(69,71) /*if*/, v: "IF",
L:{ k: cdat.subarray(71,75) /*goto*/, v: "GOTO"},
R:{ k: cdat.subarray(75,81) /*import*/, v: "IMPORT",
R:{ k: cdat.subarray(81,90) /*interface*/, v: "INTERFACE",
R:{ k: cdat.subarray(90,93) /*map*/, v: "MAP"}}}},
R:{ k: cdat.subarray(93,99) /*select*/, v: "SELECT",
L:{ k: cdat.subarray(99,104) /*range*/, v: "RANGE",
R:{ k: cdat.subarray(104,110) /*return*/, v: "RETURN"}},
R:{ k: cdat.subarray(110,116) /*switch*/, v: "SWITCH",
L:{ k: cdat.subarray(116,122) /*struct*/, v: "STRUCT"},
R:{ k: cdat.subarray(122,126) /*type*/, v: "TYPE",
R:{ k: cdat.subarray(126,129) /*var*/, v: "VAR"}}}}}}
function bufcmp(a, b /*ArrayLike<byte>*/) {
const aL = a.length, bL = b.length, L = (aL < bL ? aL : bL)
for (let i = 0; i != L; ++i) {
if (a[i] < b[i]) { return -1 }
if (b[i] < a[i]) { return 1 }
}
return (
aL < bL ? -1 :
bL < aL ? 1 :
0
)
}
function lookupRecurse(key /*Uint8Array*/, n /*Node*/) {
let c = bufcmp(key, n.k)
return (
(c == -1) ? n.L ? lookupRecurse(key, n.L) : null :
(c == 1) ? n.R ? lookupRecurse(key, n.R) : null :
(key.length == n.k.length) ? n.v :
null
)
}
function lookupLoop(key /*:Uint8Array*/, n /*Node*/) {
while (n) {
const c = bufcmp(key, n.k)
if (c == -1) {
n = n.L
} else if (c == 1) {
n = n.R
} else if (key.length == n.k.length) {
return n.v
} else {
break
}
}
return null
}
const assert = require('assert')
function bench(label, f, iterations=100000) {
if (typeof gc != 'undefined') {
gc()
}
let timeStart = process.hrtime()
for (let i = 0; i != iterations; ++i) {
f()
}
let timeEnd = process.hrtime()
let start = (timeStart[0] * 1000) + (timeStart[1] / 1000000)
let end = (timeEnd[0] * 1000) + (timeEnd[1] / 1000000)
let d = end - start
let avgd = d / iterations
let duration = (
avgd < 0.0001 ? `${avgd * 1000000} ns` :
avgd < 0.1 ? `${(avgd * 1000).toFixed(2)} µs` :
`${avgd.toFixed(2)} ms`
)
console.log(`${label} ${duration} (avg)`)
}
let wordsMap = (function() {
let m = new Map()
let n = keywords
const f = (n) => {
m.set(n.k, n.v)
m.set(n.k.subarray(0, n.k.lenght - 1), null) // miss
m.set(strbuf(bufstr(n.k + 'blabla')), null) // miss
if (n.L) { f(n.L) }
if (n.R) { f(n.R) }
}
f(keywords)
return m
})()
bench('lookupRecurse', () => {
for (const [k, v] of wordsMap) {
assert.equal(v, lookupRecurse(k, keywords))
}
})
bench('lookupLoop', () => {
for (const [k, v] of wordsMap) {
assert.equal(v, lookupLoop(k, keywords))
}
})
// MacBook 15 (2015), nodejs v8.1.3
// lookupRecurse 7.82 µs (avg)
// lookupLoop 6.79 µs (avg)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment