Skip to content

Instantly share code, notes, and snippets.

@mandel59
Created March 24, 2021 03:38
Show Gist options
  • Save mandel59/a05f26c821d8870915e194000dfc4b82 to your computer and use it in GitHub Desktop.
Save mandel59/a05f26c821d8870915e194000dfc4b82 to your computer and use it in GitHub Desktop.
// SPDX-License-Identifier: MIT-0
/*
* Copyright 2021 Ryusei Yamaguchi
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
const fs = require("fs")
function splitIntoParts(/** @type {string} */ str) {
return Array.from(str.matchAll(/\d+|\D+/g)).map(x => {
if (x[0][0] >= '0' && x[0][0] <= '9') {
return parseInt(x[0])
} else {
return x[0]
}
})
}
function sorter(x, y) {
const xs = splitIntoParts(String(x))
const ys = splitIntoParts(String(y))
while (true) {
if (xs.length === 0 && ys.length > 0) return -1
if (xs.length > 0 && ys.length === 0) return 1
if (xs.length === 0) return 0
const x1 = xs.shift()
const y1 = ys.shift()
if (typeof x1 < typeof y1) return -1
if (typeof x1 > typeof y1) return 1
if (x1 < y1) return -1
if (x1 > y1) return 1
}
}
function liftSorter(i, sorter) {
return function (x, y) {
return sorter(x[i], y[i])
}
}
function* group(xs) {
let key
let arr = []
for (const x of xs) {
if (key !== x[0]) {
if (arr.length > 0) {
yield [key, arr]
arr = []
}
key = x[0]
}
arr.push(x[1])
}
}
const unihan_dld = fs.readFileSync("Unihan_DictionaryLikeData.txt")
const kphonetic = unihan_dld
.toString()
.replace(/#.*\n/g, "")
.split(/\n/g)
.map(s => s.split(/\t/g))
.filter(x => x[1] === "kPhonetic")
.map(x =>
x[2].split(/ /g)
.map(c => [c, String.fromCodePoint(parseInt(x[0].slice(2), 16))]))
.flat()
.sort(liftSorter(0, sorter))
console.log(Array.from(group(kphonetic)).map(x => [x[0], x[1].join(" ")].join("\t")).join("\n"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment