-
-
Save imdong/8cc26b4da43c5498c32fe4706e678552 to your computer and use it in GitHub Desktop.
对 ChatGPT 流式输出的文本进行实时关键词检查(未做性能优化,仅展示原理,大量关键词时应该对关键词逐字逐层做索引)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function (strList, bList) { | |
// 一秒输出一个字 | |
let iId = setInterval(() => { | |
if (strList.length <= 0) { | |
clearInterval(iId); | |
} | |
let sChar = strList.shift(); | |
// 调用你的处理方法 | |
filter(sChar); | |
}, 200); | |
// 自己逐字接收 | |
let buff = [] | |
function filter(sChar) { | |
// 先推入缓冲区 | |
buff.push(sChar) | |
// 检查缓冲区是否在黑名单内 | |
let buffStr = buff.join(''); | |
try { | |
if (!search(buffStr)) { | |
console.log(buff.shift()) | |
} | |
} catch (error) { | |
// 清空缓冲区 并替换为对应长度的 * | |
buffStr = buff.map(e => "*").join('') | |
console.log(buffStr); | |
buff = []; | |
} | |
// 如果缓冲区不为空则应该重新匹配 | |
if (buff.length > 0) { | |
for (let i = 1; i < buff.length; i++) { | |
const buffStr = buff.slice(0, i); | |
// 重新匹配是否命中(重复代码,应抽取公共方法) | |
if(!search(buffStr)) { | |
console.log(buff.shift()); | |
i--; | |
} else { | |
// 总感觉这里应该写点啥...想不起来了,太麻烦了, 谁用就自己修 bug 吧。 | |
} | |
} | |
} | |
} | |
// 搜索是否部分匹配黑名单 | |
function search(buffStr) { | |
let hit = false; | |
for (let i = 0; i < bList.length; i++) { | |
const bWord = bList[i]; | |
// 检查是否部分命中(关键词的开头) | |
if (bWord.indexOf(buffStr) >= 0) { | |
hit = true | |
// 完全相等就报错 | |
if (bWord == buffStr) { | |
throw new Error('full_hit'); | |
} | |
} | |
} | |
return hit; | |
} | |
})("锄禾日当午,汗滴禾下土;谁知盘中餐,粒粒皆辛苦。".split(''), [ | |
"禾日苗", | |
"日当午", | |
"盘他", | |
"下贱", | |
"土葬" | |
]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment