Created
July 6, 2021 02:24
-
-
Save lancejpollard/8ce25d1bee10c4bcaaeddaa8f17d048a to your computer and use it in GitHub Desktop.
Parse ORACC Global Cuneiform Sign List Hack
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs') | |
const signs = require('./ogsl/ogsl-sl.json') | |
const stuff = [ | |
...require('./saao/saa02/gloss-akk-x-neoass.json').entries, | |
...require('./saao 2/saa01/gloss-akk-x-neoass.json').entries, | |
// ...require('./saao 3/saa03/gloss-akk-x-neoass.json').entries, | |
// ...require('./saao 4/saa04/gloss-akk-x-neoass.json').entries, | |
...require('./saao 5/saa07/gloss-akk-x-neoass.json').entries, | |
...require('./saao 6/saa05/gloss-akk-x-neoass.json').entries, | |
...require('./saao 7/saa06/gloss-akk-x-neoass.json').entries, | |
...require('./saao 8/saa08/gloss-akk-x-neoass.json').entries, | |
...require('./saao 9/saa09/gloss-akk-x-neoass.json').entries, | |
...require('./saao 10/saa10/gloss-akk-x-neoass.json').entries, | |
...require('./saao 11/saa13/gloss-akk-x-neoass.json').entries, | |
...require('./saao 13/saa11/gloss-akk-x-neoass.json').entries, | |
...require('./saao 14/saa16/gloss-akk-x-neoass.json').entries, | |
// ...require('./saao 15/saa17/gloss-akk-x-neoass.json').entries, | |
...require('./saao 16/saa15/gloss-akk-x-neoass.json').entries, | |
...require('./saao 17/saa14/gloss-akk-x-neoass.json').entries | |
] | |
let array = [] | |
for (let key in signs.signs) { | |
let block = signs.signs[key] | |
if (block.values) { | |
block.values.forEach(value => { | |
array.push({ roman: value, utf8: block.utf8 }) | |
}) | |
} | |
} | |
array.sort((a, b) => { | |
if (a.roman < b.roman) return 1 | |
if (a.roman > b.roman) return -1 | |
return 0 | |
}) | |
let defs = {} | |
stuff.forEach(entry => { | |
let key = entry.cf | |
const def = entry.gw | |
const pos = entry.pos | |
let cun = [] | |
while (key) { | |
if (!process()) { | |
return | |
} | |
} | |
function process() { | |
for (let i = 0, n = array.length; i < n; i++) { | |
let item = array[i] | |
if (key.indexOf(item.roman) == 0) { | |
cun.push(item.utf8) | |
key = key.substr(item.roman.length) | |
return true | |
} | |
} | |
return false | |
} | |
if (!cun.length) return | |
cun = cun.join('') | |
let item = defs[`${pos}:${cun}`] = defs[`${pos}:${cun}`] || { def: {}, key: entry.cf } | |
item.def[def] = true | |
}) | |
defs = Object.keys(defs).map(x => { | |
let item = defs[x] | |
let [pos, cun] = x.split(':') | |
return `${pos},${cun},${clean(item.key)},"${Object.keys(item.def).join('; ')}"` | |
}) | |
function clean(s) { | |
return s | |
.replace(/š+/gi, 'x') | |
.replace(/ṭ+/gi, 't+') | |
.replace(/ṣ+/gi, 'x+') | |
.replace(/a+/gi, 'a') | |
.replace(/b+/gi, 'b') | |
.replace(/c+/gi, 'c') | |
.replace(/d+/gi, 'd') | |
.replace(/e+/gi, 'e') | |
.replace(/f+/gi, 'f') | |
.replace(/g+/gi, 'g') | |
.replace(/h+/gi, 'h') | |
.replace(/i+/gi, 'i') | |
.replace(/j+/gi, 'j') | |
.replace(/k+/gi, 'k') | |
.replace(/l+/gi, 'l') | |
.replace(/m+/gi, 'm') | |
.replace(/n+/gi, 'n') | |
.replace(/o+/gi, 'o') | |
.replace(/p+/gi, 'p') | |
.replace(/q+/gi, 'q') | |
.replace(/r+/gi, 'r') | |
.replace(/s+/gi, 's') | |
.replace(/t+/gi, 't') | |
.replace(/u+/gi, 'u') | |
.replace(/v+/gi, 'v') | |
.replace(/w+/gi, 'w') | |
.replace(/x+/gi, 'x') | |
.replace(/y+/gi, 'y') | |
.replace(/z+/gi, 'z') | |
.replace(/ā+/gi, 'aa') | |
.replace(/ē+/gi, 'ee') | |
.replace(/ū+/gi, 'uu') | |
.replace(/ī+/gi, 'ii') | |
.replace(/о̄+/gi, 'oo') | |
.replace(/q/gi, 'k+') | |
.replace(/ng/gi, 'q') | |
} | |
fs.writeFileSync('stuff.csv', defs.join('\n')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment