Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save euberdeveloper/a423bc5498123dcd608e75d819b7cc40 to your computer and use it in GitHub Desktop.
Save euberdeveloper/a423bc5498123dcd608e75d819b7cc40 to your computer and use it in GitHub Desktop.
A script that shows the ranking of the chinese characters, based on their sound without considering the tones
const https = require('https');
const fs = require('fs');
function analyzeInputData(charactersText) {
const ranking = charactersText.split('\n').map(line => line.trim()).filter(line => !!line).map(line => JSON.parse(line)).map(line => {
if (line.frequency_rank === '8220') {
line.pinyin = 'y\u01cen';
}
else if (line.frequency_rank === '9019') {
line.pinyin = 'ni\u00e1n';
}
return line;
})
.map(obj => ({
frequencyRank: obj.frequency_rank,
character: obj.charcter,
pinyinWithTones: obj.pinyin,
pinyin: obj.pinyin
.replace('\u0101', 'a')
.replace('\u00e1', 'a')
.replace('\u01ce', 'a')
.replace('\u00e0', 'a')
.replace('\u0113', 'e')
.replace('\u00e9', 'e')
.replace('\u011b', 'e')
.replace('\u00e8', 'e')
.replace('\u012b', 'i')
.replace('\u00ec', 'i')
.replace('\u00ed', 'i')
.replace('\u01d0', 'i')
.replace('\u014d', 'o')
.replace('\u00f3', 'o')
.replace('\u01d2', 'o')
.replace('\u00f2', 'o')
.replace('\u016b', 'u')
.replace('\u00fa', 'u')
.replace('\u01d4', 'u')
.replace('\u00f9', 'u')
.replace('\u00fc', 'ü')
.replace('\u01d6', 'ü')
.replace('\u01d8', 'ü')
.replace('\u01da', 'ü')
.replace('\u01dc', 'ü')
}))
.reduce((acc, curr) => {
if (acc[curr.pinyin]) {
if (!acc[curr.pinyin].find(c => c.character === curr.character)) {
acc[curr.pinyin].push(curr);
}
} else {
acc[curr.pinyin] = [curr];
}
return acc;
}, {});
const result = Object.keys(ranking).sort((x, y) => ranking[y].length - ranking[x].length).map(key => ({
pinyin: key,
length: ranking[key].length,
characters: ranking[key]
}));
fs.writeFileSync('ranking.json', JSON.stringify(result, null, 3));
}
function requestDataAndExecute(cb) {
const options = {
hostname: 'raw.githubusercontent.com',
port: 443,
path: '/ruddfawcett/hanziDB.csv/master/data/hanziDB.json',
method: 'GET',
};
const req = https.request(options, res => {
const chunks = [];
res.on('data', data => {
chunks.push(data);
});
res.on('close', d => {
cb(Buffer.concat(chunks).toString());
});
});
req.on('error', error => {
console.error(error);
});
req.end();
}
function main() {
requestDataAndExecute(analyzeInputData);
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment