Skip to content

Instantly share code, notes, and snippets.

@telamon
Created October 9, 2022 20:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save telamon/541dbcee29c95f09ded40e6d4af1c98b to your computer and use it in GitHub Desktop.
Save telamon/541dbcee29c95f09ded40e6d4af1c98b to your computer and use it in GitHub Desktop.
Emoji search-n-type picker
import { readFileSync } from 'node:fs'
import { pack, unpack } from 'msgpackr'
// Load tag->emo list
let a
try {
a = JSON.parse(readFileSync('./emojis.json'))
} catch (err) {
console.error(err.message)
console.log('do you have the input file?')
console.log('run:')
console.log('wget https://api.github.com/emojis')
process.exit(1)
}
// Rewrite all values to emoji/unicode characters
const b = {}
for (const key in a) {
const v = a[key]
const m = v.match(/unicode\/(.+)\.png/)
if (!m) {
console.log('Failed match', key, a[key])
continue
}
const u = m[1]
.split(/-/)
.map(c => `\\u{${c}}`)
.join('')
b[key] = eval(`'${u}'`) // eslint-disable-line no-eval
}
// b now contains simple keyword => emoji pairs.
// Compress list into a Trie
// Attempt #3, after I read the wikipage https://en.wikipedia.org/wiki/Radix_tree
const root = {}
function isLeaf (n) { return typeof n === 'string' }
function lookup (key) {
let n = root
let depth = 0
while (!isLeaf(n) && depth < key.length) {
const suffix = key.slice(depth)
const edge = Object.keys(n).find(edge => suffix.startsWith(edge))
if (edge) {
depth += edge.length
n = n[edge]
} else break
}
const exactHit = isLeaf(n) && key.length === depth
return [exactHit, n, depth]
}
// Not needed during production. use prebuilt tree
function put (key, value) {
key += '.' // Let '.' be the terminator, avoid leaf-to-branch-rewriting
if (typeof value !== 'string') throw new Error('Expected value:string')
const [hit, n, depth] = lookup(key)
if (hit) throw new Error('Key exists')
const suffix = key.slice(depth)
// Rewrite Leaf to branch (not needed)
// if isLeaf(n); then n is terminator of key: key.slice(0, depth)
if (isLeaf(n)) throw new Error('MentalError')
// Find overlapping-candidates
let overlap = 0
let e
for (const edge of Object.keys(n)) {
if (e) break
for (let i = 0; i < Math.min(suffix.length, edge.length); i++) {
if (suffix[i] === edge[i]) {
e = edge
overlap++
} else break
}
}
if (!e) { // Insert remaining depth
n[suffix] = value
} else { // split/branch
n[e.slice(0, overlap)] = {
[e.slice(overlap)]: n[e], // insert left
[suffix.slice(overlap)]: value // insert right
}
delete n[e] // delete old edge
}
}
// Store all emojis in a Radix-Trie
for (const name of Object.keys(b)) {
put(name, b[name])
}
function * traverse (n, path = '', d = 0) {
if (isLeaf(n)) return yield [n, path, d] // .slice(0, path.length - 1)]
for (const e in n) {
for (const leaf of traverse(n[e], path + e, d + 1)) yield leaf
}
}
export function search (keyword) {
const [hit, n, depth] = lookup(keyword + '.')
const partial = []
const p = keyword.slice(0, depth)
for (const leaf of traverse(n, p, depth)) partial.push(leaf)
console.log(`Found ${hit}`, hit ? n : partial)
}
if (true) { // Print stats
console.log('\n\n========= DONE | stats ===========')
const rootJsonSize = JSON.stringify(root).length
console.log('Input file size', JSON.stringify(a).length)
console.log('Plainlist', JSON.stringify(b).length)
console.log('RadixTree', rootJsonSize)
const rtpRatio = rootJsonSize / JSON.stringify(b).length
const mpjsonRatio = pack(root).length / rootJsonSize
console.log('RT vs Plain', rtpRatio)
console.log('MsgPack vs JSON', mpjsonRatio, `(${pack(root).length}B)`)
}
console.log('\nUsage: node emosearch.js [searchterm]\n')
const term = process.argv[2] || 'comp'
console.log('\n\nSearching for:', term)
search(term)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment