Skip to content

Instantly share code, notes, and snippets.

@dsenkus
Created June 3, 2020 08:20
Show Gist options
  • Save dsenkus/17ddbd29d53bd6dd2cf4051bd05ad44d to your computer and use it in GitHub Desktop.
Save dsenkus/17ddbd29d53bd6dd2cf4051bd05ad44d to your computer and use it in GitHub Desktop.
/**
* Generates unaccent.rules for Postgres.
* Rule set will convert bold/italic UTF8 characters to normal characters.
* @type {number[]}
*/
/**
* !IMPORTANT!
* the โ„Ž letter (3rd one) is not correctly recognized, needs to be manually updated
*/
// characters from https://yaytext.com/bold-italic/
// and https://www.obliquity.com/computer/html/unicode1D400.html
const diffsUpper = ['๐€', '๐—”', '๐ด', '๐˜ˆ', '๐‘จ', '๐˜ผ', '๐’œ', '๐™ฐ'].map((ch) => ch.codePointAt(0) - 'A'.codePointAt(0));
const diffsLower = ['๐š', '๐—ฎ', '๐‘Ž', '๐˜ข', '๐’‚', '๐™–', '๐’ถ', '๐šŠ'].map((ch) => ch.codePointAt(0) - 'a'.codePointAt(0));
const generateRule = (ch1, ch2) => `${ch1}\t${ch2}`;
const generateAlphabet = (from, to) => {
const alphabet = [];
const start = from.charCodeAt(0);
const last = to.charCodeAt(0);
for (var i = start; i <= last; ++i) {
alphabet.push(String.fromCharCode(i));
}
return alphabet;
};
const generateRuleSet = (alphabet, diffs) => {
const result = [];
alphabet.forEach(ch => {
diffs.forEach(diff => {
result.push(generateRule(String.fromCodePoint(ch.codePointAt(0) + diff), ch))
});
});
return result;
};
const rules = generateRuleSet(generateAlphabet('A', 'Z'), diffsUpper)
.concat(generateRuleSet(generateAlphabet('a', 'z'), diffsLower));
console.log(rules.join("\n"));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment