Skip to content

Instantly share code, notes, and snippets.

@jhmaster2000
Created September 18, 2021 16:29
Show Gist options
  • Save jhmaster2000/8ebfd9e1a212b3f6dcdae37c11c82032 to your computer and use it in GitHub Desktop.
Save jhmaster2000/8ebfd9e1a212b3f6dcdae37c11c82032 to your computer and use it in GitHub Desktop.
UnicodeData.txt to JSON converter
// Script runs on the browser console of the webpage: http://unicode.org/Public/UNIDATA/UnicodeData.txt
const entries = document.querySelector("body > pre").innerText.split('\n');
entries.pop();
let decompTags = ['<font>','<noBreak>','<initial>','<medial>','<final>','<isolated>','<circle>','<super>','<sub>','<vertical>','<wide>','<narrow>','<small>','<square>','<fraction>','<compat>']
function parseDecompMapping(mapping) {
if (!mapping) return null;
mapping = mapping.split(' ');
let mapobj = { tag: null, characters: [] };
if (decompTags.includes(mapping[0])) {
mapobj.tag = mapping[0];
mapping.shift();
}
mapping.forEach(charcode => {
mapobj.characters.push(Number('0x' + charcode));
});
return mapobj;
}
let chars = [];
entries.forEach(entry => {
let fields = entry.split(';');
let char = {};
char.code = Number('0x' + fields[0]);
char.name = fields[1];
char.category = fields[2];
char.canonicalCombiningClass = Number('0x' + fields[3]);
char.bidirectionalCategory = fields[4];
char.decompositionMapping = parseDecompMapping(fields[5]) || null;
char.decimalDigitValue = Number('0x' + fields[6]) || null;
char.digitValue = Number('0x' + fields[7]) || null;
char.numericValue = Number('0x' + fields[8]) || null;
char.mirrored = fields[9] === 'Y';
char.oldName = fields[10] || null;
char.comment = fields[11] || null;
char.uppercaseMapping = Number('0x' + fields[12]) || null;
char.lowercaseMapping = Number('0x' + fields[13]) || null;
char.titlecaseMapping = Number('0x' + fields[14]) || null;
chars.push(char);
});
// Output is now available as a JS object in the "chars" variable.
// Pretty-print the JSON for copy
const unicodejson = JSON.stringify(chars, null, 2);
console.info(unicodejson);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment