Created
January 12, 2021 00:53
-
-
Save ayaka14732/e714574fcbc2d3d7bd4c10bf5078baf1 to your computer and use it in GitHub Desktop.
統計《廣韻》中是多音字,而普通話中不是多音字的字的個數
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Install these libraries with Node,js: | |
// qieyun~0.7.6 | |
// rime-utils~0.0.1 | |
// Prepare files: | |
// terra_pinyin.dict.yaml <https://raw.githubusercontent.com/rime/rime-terra-pinyin/5a445e2953c4b8149daac9d4625782fd5f80d9f2/terra_pinyin.dict.yaml> | |
// 常用字頻序表.txt <https://cdn.jsdelivr.net/gh/ayaka14732/syyon-vencie@481d58e/texts/%E5%B8%B8%E7%94%A8%E5%AD%97%E9%A0%BB%E5%BA%8F%E8%A1%A8.txt> | |
// Run: | |
// node heteronym.js | |
const fs = require('fs'); | |
const Qieyun = require('qieyun'); | |
const RimeUtils = require('rime-utils'); | |
const m = RimeUtils.readRimeDict('terra_pinyin.dict.yaml'); | |
function 古代有幾個音(c) { | |
return new Set(Qieyun.query漢字(c).map(({ 小韻號 }) => Qieyun.get音韻地位(小韻號).音韻描述)).size; | |
} | |
function 現代有幾個音(c) { | |
return [...m.get(c) || []].length; | |
} | |
const s = fs.readFileSync('常用字頻序表.txt', { encoding: 'utf8' }); | |
let 古代是多音字 = 0; | |
let 現代是多音字 = 0; | |
let 古今都是多音字 = 0; | |
let 總字數 = 0; | |
for (const c of s) { | |
const 古代音數量 = 古代有幾個音(c); | |
const 現代音數量 = 現代有幾個音(c); | |
if (古代音數量 !== 0 && 現代音數量 !== 0) { | |
if (古代音數量 > 1) 古代是多音字++; | |
if (現代音數量 > 1) 現代是多音字++; | |
if (古代音數量 > 1 && 現代音數量 > 1) 古今都是多音字++; | |
總字數++; | |
} | |
} | |
console.log(`統計了 ${總字數} 字,古代是多音字的有 ${古代是多音字} 個,現代是多音字的有 ${現代是多音字} 個。古代是多音字,而現代不是多音字的有 ${古代是多音字 - 古今都是多音字} 個。`); | |
// output: 統計了 6219 字,古代是多音字的有 1709 個,現代是多音字的有 643 個。古代是多音字,而現代不是多音字的有 1346 個。 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment