Last active
February 20, 2022 15:40
-
-
Save intrnl/6cdc95084ada8246190e02021232205b to your computer and use it in GitHub Desktop.
Trie-based Twitch emote matching
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { Trie } from './trie.js'; | |
import BTTVDB from './bttv.json' assert { type: 'json' }; | |
import FFZDB from './frankerfacez.json' assert { type: 'json' }; | |
import TwitchGlobalDB from './twitchglobal.json' assert { type: 'json' }; | |
// import TwitchSubscriberDB from './twitchsubscriber.json' assert { type: 'json' }; | |
const kws = []; | |
const db = { | |
bttv: BTTVDB, | |
ffz: FFZDB, | |
twitchglobal: TwitchGlobalDB, | |
// twitchsubscriber: TwitchSubscriberDB, | |
}; | |
for (const source in db) { | |
const emotes = db[source]; | |
for (const name in emotes) { | |
const id = emotes[name]; | |
kws.push({ | |
source, | |
name, | |
id, | |
}); | |
} | |
} | |
const string = 'foKappa a quick brown fox jumps over the lazy PixelBob:spin dog Kappa'; | |
const build_start = performance.now(); | |
const trie = new Trie(kws); | |
const build_end = performance.now(); | |
console.log(`build took ${build_end - build_start} ms`); | |
const match_start = performance.now(); | |
const result = trie.search(string); | |
const match_end = performance.now(); | |
console.log(`match took ${match_end - match_start} ms`); | |
console.log(string); | |
console.dir(result, { depth: Infinity }); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @typedef {object} Emote | |
* @property {string} name | |
* @property {string} source | |
* @property {string} id | |
*/ | |
const modifiers = ['flip', 'spin', 'pulse', 'spin2', 'spin3', '1spin', '2spin', '3spin', 'tr', 'bl', 'br', 'shake', 'shake2', 'shake3', 'flap']; | |
export class Trie { | |
#dict; | |
#out; | |
#fail; | |
constructor (emotes) { | |
this.#build(emotes); | |
} | |
#build (emotes) { | |
const dict = { 0: {} }; | |
const out = {}; | |
let state = 0; | |
for (const emote of emotes) { | |
const name = emote.name; | |
let curr = 0; | |
for (let idx = 0, len = name.length; idx < len; idx++) { | |
const char = name[idx]; | |
if (dict[curr] && dict[curr][char]) { | |
curr = dict[curr][char]; | |
} | |
else { | |
state++; | |
dict[curr][char] = state; | |
dict[state] = {}; | |
curr = state; | |
out[state] = []; | |
} | |
} | |
out[curr].push(emote); | |
} | |
const fail = {}; | |
const xs = []; | |
for (const char in dict[0]) { | |
const state = dict[0][char]; | |
fail[state] = 0; | |
xs.push(state); | |
} | |
while (xs.length) { | |
const x = xs.shift(); | |
for (const char in dict[x]) { | |
const s = dict[x][char]; | |
xs.push(s); | |
let state = fail[x]; | |
while (state > 0 && !dict[state][char]) { | |
state = fail[state]; | |
} | |
if (dict[state][char]) { | |
const fs = dict[state][char]; | |
fail[s] = fs; | |
out[s].push(...out[fs]); | |
} | |
else { | |
fail[s] = 0; | |
} | |
} | |
} | |
this.#dict = dict; | |
this.#out = out; | |
this.#fail = fail; | |
} | |
search (string) { | |
const dict = this.#dict; | |
const out = this.#out; | |
const fail = this.#fail; | |
const results = []; | |
let state = 0; | |
for (let idx = 0, len = string.length; idx < len; idx++) { | |
const char = string[idx]; | |
while (state > 0 && !dict[state][char]) { | |
state = fail[state]; | |
} | |
if (!dict[state][char]) { | |
while (idx < len) { | |
if (isWhitespace(string[idx])) { | |
break; | |
} | |
idx++; | |
} | |
continue; | |
} | |
state = dict[state][char]; | |
// we're trying to match whole words only, skip if the next character is | |
// not whitespace, or colon (this is for modifiers) | |
const next_char = string[idx + 1]; | |
if (next_char && !isWhitespace(next_char) && next_char !== ':') { | |
continue; | |
} | |
if (out[state].length) { | |
const kws = out[state]; | |
let modifier = null; | |
// we support modifiers, an example would be Kappa:spin, where it would | |
// make the emote spin endlessly | |
if (next_char === ':') { | |
idx += 2; | |
let str = ''; | |
// match until we encounter whitespace | |
while (idx < len) { | |
const char = string[idx]; | |
if (isWhitespace(char)) { | |
break; | |
} | |
str += char; | |
idx++; | |
} | |
if (str && modifiers.includes(str)) { | |
modifier = str; | |
} | |
} | |
results.push([idx, kws, modifier]); | |
} | |
} | |
return results; | |
} | |
} | |
function isWhitespace (char) { | |
return char === ' ' || char === '\n'; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment