Created
June 3, 2020 08:20
-
-
Save dsenkus/17ddbd29d53bd6dd2cf4051bd05ad44d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Generates unaccent.rules for Postgres. | |
* Rule set will convert bold/italic UTF8 characters to normal characters. | |
* @type {number[]} | |
*/ | |
/** | |
* !IMPORTANT! | |
* the โ letter (3rd one) is not correctly recognized, needs to be manually updated | |
*/ | |
// characters from https://yaytext.com/bold-italic/ | |
// and https://www.obliquity.com/computer/html/unicode1D400.html | |
const diffsUpper = ['๐', '๐', '๐ด', '๐', '๐จ', '๐ผ', '๐', '๐ฐ'].map((ch) => ch.codePointAt(0) - 'A'.codePointAt(0)); | |
const diffsLower = ['๐', '๐ฎ', '๐', '๐ข', '๐', '๐', '๐ถ', '๐'].map((ch) => ch.codePointAt(0) - 'a'.codePointAt(0)); | |
const generateRule = (ch1, ch2) => `${ch1}\t${ch2}`; | |
const generateAlphabet = (from, to) => { | |
const alphabet = []; | |
const start = from.charCodeAt(0); | |
const last = to.charCodeAt(0); | |
for (var i = start; i <= last; ++i) { | |
alphabet.push(String.fromCharCode(i)); | |
} | |
return alphabet; | |
}; | |
const generateRuleSet = (alphabet, diffs) => { | |
const result = []; | |
alphabet.forEach(ch => { | |
diffs.forEach(diff => { | |
result.push(generateRule(String.fromCodePoint(ch.codePointAt(0) + diff), ch)) | |
}); | |
}); | |
return result; | |
}; | |
const rules = generateRuleSet(generateAlphabet('A', 'Z'), diffsUpper) | |
.concat(generateRuleSet(generateAlphabet('a', 'z'), diffsLower)); | |
console.log(rules.join("\n")); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment