Skip to content

Instantly share code, notes, and snippets.

@zyf0330
Created December 13, 2020 09:25
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zyf0330/5874455087bc69708f07ff13d12890a5 to your computer and use it in GitHub Desktop.
Save zyf0330/5874455087bc69708f07ff13d12890a5 to your computer and use it in GitHub Desktop.
转换搜狗拼音txt词库为 Gboard 微软双拼词库
/**
* dic.txt 内每行类似 'a'ba 阿巴
* 先组合声母韵母,然后再完整替换
* 对于零声母音节,声母当作空字符
**/
const fs = require('fs')
// 声母
const shengmu = {
q: 'q',
w: 'w',
r: 'r',
t: 't',
y: 'y',
p: 'p',
s: 's',
d: 'd',
f: 'f',
g: 'g',
h: 'h',
j: 'j',
k: 'k',
l: 'l',
z: 'z',
x: 'x',
c: 'c',
b: 'b',
n: 'n',
m: 'm',
zh: 'v',
ch: 'i',
sh: 'u',
'': 'o',
}
// 韵母
const yunmu = {
a: 'a',
o: 'o',
u: 'u',
e: 'e',
i: 'i',
v: 'y',
iang: 'd',
uang: 'd',
iong: 's',
iao: 'c',
ian: 'm',
ing: ';',
ang: 'h',
eng: 'g',
ong: 's',
uan: 'r',
uai: 'y',
ia: 'w',
ai: 'l',
ei: 'z',
ui: 'v',
ao: 'k',
ou: 'b',
iu: 'q',
ie: 'x',
ue: 't',
er: 'r',
an: 'j',
en: 'f',
in: 'n',
ua: 'w',
un: 'p',
uo: 'o',
}
const quanpins = []
const shuangpins = []
for (const [shengmuQuanpin, shengmuShuangpin] of Object.entries(shengmu)) {
for (const [yunmuQuanpin, yunmuShuangpin] of Object.entries(yunmu)) {
if (quanpins.includes(shengmuQuanpin + yunmuQuanpin)) {
throw new Error(`repeat: ${shengmuQuanpin} ${yunmuQuanpin}`)
}
quanpins.push(shengmuQuanpin + yunmuQuanpin)
shuangpins.push(shengmuShuangpin + yunmuShuangpin)
}
}
console.log('音节数', quanpins.length, shuangpins.length)
const quanpinDic = fs.readFileSync('dic.txt', 'utf8')
console.log('read from dic.txt')
const shuangpinDic = quanpinDic
.split('\n')
.map((line) => {
if (line.trim().length == 0) {
return ''
}
const [wordQuanpin, wordHanzi] = line.split(' ')
const wordShuangpin = wordQuanpin
.split("'")
.filter((s) => s != '')
.map((quanpin) => {
const i = quanpins.indexOf(quanpin)
if (i == -1) {
console.error(wordQuanpin, '有不存在的全拼音节', quanpin, 'length:', quanpin.length)
process.exit(1)
return quanpin
} else {
return shuangpins[i]
}
})
.join('')
return `${wordShuangpin}\t${wordHanzi.trim()}\tzh-CN`
})
.join('\n')
fs.writeFileSync('dictionary.txt', '# Gboard Dictionary version:1\n', {flag: 'w'})
fs.writeFileSync('dictionary.txt', shuangpinDic, {flag: 'a'})
console.log('output to dictionary.txt')
@zyf0330
Copy link
Author

zyf0330 commented Dec 13, 2020

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment