-
-
Save alisterlf/3490957 to your computer and use it in GitHub Desktop.
function RemoveAccents(strAccents) { | |
var strAccents = strAccents.split(''); | |
var strAccentsOut = new Array(); | |
var strAccentsLen = strAccents.length; | |
var accents = 'ÀÁÂÃÄÅàáâãäåÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž'; | |
var accentsOut = "AAAAAAaaaaaaOOOOOOOooooooEEEEeeeeeCcDIIIIiiiiUUUUuuuuNnSsYyyZz"; | |
for (var y = 0; y < strAccentsLen; y++) { | |
if (accents.indexOf(strAccents[y]) != -1) { | |
strAccentsOut[y] = accentsOut.substr(accents.indexOf(strAccents[y]), 1); | |
} else | |
strAccentsOut[y] = strAccents[y]; | |
} | |
strAccentsOut = strAccentsOut.join(''); | |
return strAccentsOut; | |
} |
with ES6:
function removeAccents(str) {
let accents = 'ÀÁÂÃÄÅàáâãäåßÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž';
let accentsOut = "AAAAAAaaaaaaBOOOOOOOooooooEEEEeeeeeCcDIIIIiiiiUUUUuuuuNnSsYyyZz";
str = str.split('');
str.forEach((letter, index) => {
let i = accents.indexOf(letter);
if (i != -1) {
str[index] = accentsOut[i];
}
})
return str.join('');
}
if someone needs slovak letters:
ÝŤŔŇĽĹDŽĎČ were missing
'ÀÁÂÃÄÅàáâãäåßÒÓÔÕÕÖØòóôõöøĎďDŽdžÈÉÊËèéêëðÇçČčÐÌÍÎÏìíîïÙÚÛÜùúûüĽĹľĺÑŇňñŔ੹ŤťŸÝÿýŽž';
"AAAAAAaaaaaasOOOOOOOooooooDdDZdzEEEEeeeeeCcCcDIIIIiiiiUUUUuuuuLLllNNnnRrSsTtYYyyZz";
Thanks!!
What about this ? It's shorter :)
str.normalize('NFD').replace(/[\u0300-\u036f]/g, "")
It really is, Arthur, thank you! It's good to also point to the original, mainly for its complete explanation.
...with special hungarian characters:
let accents = 'ÀÁÂÃÄÅàáâãäåßÒÓÔÕÕÖØŐòóôőõöøĎďDŽdžÈÉÊËèéêëðÇçČčÐÌÍÎÏìíîïÙÚÛÜŰùűúûüĽĹľĺÑŇňñŔ੹ŤťŸÝÿýŽž';
let accentsOut = "AAAAAAaaaaaasOOOOOOOOoooooooDdDZdzEEEEeeeeeCcCcDIIIIiiiiUUUUUuuuuuLLllNNnnRrSsTtYYyyZz";
and with special polish characters:
let accents = 'ÀÁÂÃÄÅĄàáâãäåąßÒÓÔÕÕÖØŐòóôőõöøĎďDŽdžÈÉÊËĘèéêëęðÇçČčĆćÐÌÍÎÏìíîïÙÚÛÜŰùűúûüĽĹŁľĺłÑŇŃňñńŔ੦šśŤťŸÝÿýŽŻŹžżź'; let accentsOut = "AAAAAAAaaaaaaasOOOOOOOOoooooooDdDZdzEEEEEeeeeeeCcCcCcDIIIIiiiiUUUUUuuuuuLLLlllNNNnnnRrSSssTtYYyyZZZzzz";
with map:
function removeAccents(string) {
const accents =
"ÀÁÂÃÄÅàáâãäåßÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž";
const accentsOut =
"AAAAAAaaaaaaBOOOOOOOooooooEEEEeeeeeCcDIIIIiiiiUUUUuuuuNnSsYyyZz";
return string
.split("")
.map((letter, index) => {
const accentIndex = accents.indexOf(letter);
return accentIndex !== -1 ? accentsOut[accentIndex] : letter;
})
.join("");
}
@up with polish special characters
function removeAccents(string) {
const accents =
"ÀÁÂÃÄÅĄàáâãäåąßÒÓÔÕÕÖØÓòóôõöøóÈÉÊËĘèéêëęðÇĆçćÐÌÍÎÏìíîïÙÚÛÜùúûüÑŃñńŠŚšśŸÿýŽŻŹžżź";
const accentsOut =
"AAAAAAAaaaaaaaBOOOOOOOOoooooooEEEEEeeeeeeCCccDIIIIiiiiUUUUuuuuNNnnSSssYyyZZZzzz";
return string
.split("")
.map((letter, index) => {
const accentIndex = accents.indexOf(letter);
return accentIndex !== -1 ? accentsOut[accentIndex] : letter;
})
.join("");
}
If you use ES6 consider this Stack answer: https://stackoverflow.com/a/37511463/2649707
@brunoocasali doesn't work with "œ" char, like in "Cœur"
Other variant without constants
function normalizeString (string) {
return string.split('').map(function (letter) {
let i = this.accents.indexOf(letter)
return (i !== -1) ? this.out[i] : letter
}.bind({
accents: 'ÀÁÂÃÄÅĄàáâãäåąßÒÓÔÕÕÖØÓòóôõöøóÈÉÊËĘèéêëęðÇĆçćÐÌÍÎÏìíîïÙÚÛÜùúûüÑŃñńŠŚšśŸÿýŽŻŹžżź',
out: 'AAAAAAAaaaaaaaBOOOOOOOOoooooooEEEEEeeeeeeCCccDIIIIiiiiUUUUuuuuNNnnSSssYyyZZZzzz'
})
).join('')
}
I think the German eszett ß should be an "s" https://en.wikipedia.org/wiki/%C3%9F
β is the greek beta https://en.wikipedia.org/wiki/Beta and that should probably be a "B"
Added a few
var accents = 'ÀÁÂÃÄÅĄĀāàáâãäåąßÒÓÔÕÕÖØŐòóôőõöøĎďDŽdžÈÉÊËĘèéêëęðÇçČčĆćÐÌÍÎÏĪìíîïīÙÚÛÜŰùűúûüĽĹŁľĺłÑŇŃňñńŔŕŠŚŞšśşŤťŸÝÿýŽŻŹžżźđĢĞģğ';
var accentsOut = "AAAAAAAAaaaaaaaasOOOOOOOOoooooooDdDZdzEEEEEeeeeeeCcCcCcDIIIIIiiiiiUUUUUuuuuuLLLlllNNNnnnRrSSSsssTtYYyyZZZzzzdGGgg";
How about this?
var nIC = new Intl.Collator(undefined, {sensitivity: 'base'})
var cmp = nIC.compare.bind(nIC)
Or this?
'être'.localeCompare('etre', undefined, {sensitivity: 'base'})
Just... thank you !
Though I prefer the map
method, here's the original but TypeScript-friendly:
private replaceAccents(str: string): string
{
const ACCENTS = 'ÀÁÂÃÄÅàáâãäåÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž';
const NON_ACCENTS = "AAAAAAaaaaaaOOOOOOOooooooEEEEeeeeeCcDIIIIiiiiUUUUuuuuNnSsYyyZz";
const strAccents: string[] = str.split('');
const strAccentsOut: string[] = new Array();
const strAccentsLen: number = strAccents.length;
for (let y = 0; y < strAccentsLen; y++)
{
if (ACCENTS.indexOf(strAccents[y]) != -1)
{
strAccentsOut[y] = NON_ACCENTS.substr(ACCENTS.indexOf(strAccents[y]), 1);
}
else
{
strAccentsOut[y] = strAccents[y];
}
}
const newString: string = strAccentsOut.join('');
return newString;
}
A slight alternative to above using the string normalisation AND correct substitution for the German ß
import { includes, some } from 'lodash'
export default function convertForeignCharacters(str: string): string {
const strAccents: string[] = str.split('')
const strAccentsOut: string[] = new Array()
const strAccentsLen: number = strAccents.length
for (let y: number = 0; y < strAccentsLen; y++) {
if (!some(['ä', 'ü', 'ö'], (el: string) => includes(strAccents[y], el))) {
strAccentsOut[y] = strAccents[y].normalize('NFD').replace(/[\u0300-\u036f]/g, '')
} else {
strAccentsOut[y] = strAccents[y]
}
}
const newString: string = strAccentsOut.join('').replace('ß', 'ss')
return newString
}
O(n) version, ~5x faster, especially if you want add more accents.
const accents = 'ÀÁÂÃÄÅĄĀāàáâãäåąßÒÓÔÕÕÖØŐòóôőõöøĎďDŽdžÈÉÊËĘèéêëęðÇçČčĆćÐÌÍÎÏĪìíîïīÙÚÛÜŰùűúûüĽĹŁľĺłÑŇŃňñńŔŕŠŚŞšśşŤťŸÝÿýŽŻŹžżźđĢĞģğ',
accents_out = "AAAAAAAAaaaaaaaasOOOOOOOOoooooooDdDZdzEEEEEeeeeeeCcCcCcDIIIIIiiiiiUUUUUuuuuuLLLlllNNNnnnRrSSSsssTtYYyyZZZzzzdGGgg",
accents_map = new Map();
for (const i in accents)
accents_map.set(accents.charCodeAt(i), accents_out.charCodeAt(i))
function removeAccents(str) {
const nstr = new Array(str.length);
let x, i;
for (i = 0; i < nstr.length; i++)
nstr[i] = accents_map.get(x = str.charCodeAt(i)) || x;
return String.fromCharCode.apply(null, nstr);
}
@ArthurMaroulier
FYI: It doesn't cover all letters ('Ø' for example).
"ÀÁÂÃÄÅàáâãäåÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž".normalize('NFD').replace(/[\u0300-\u036f]/g, "");
"AAAAAAaaaaaaOOOOOOØoooooøEEEEeeeeðCcÐIIIIiiiiUUUUuuuuNnSsYyyZz"
did you find a way to convert Ø please ?
If your goal is to sort a list of strings, disregarding accents
['André', 'Álister', 'alan'].sort(new Intl.Collator('pt-BR').compare)
Or just to be safe
['André', 'Álister', 'alan'].sort(new Intl.Collator('pt-BR', {sensitivity:'base'}).compare)
Other variant could be..
const removeAccents = (str) => { return str .toLowerCase() .normalize("NFD") .replace(/[\u0300-\u036f]/g, ""); };
Hello! All the above examples are good! However, I put the function at onChange event on an input field and the next character duplicates all the string from the input. Is there any solution to prevent this? Thank you!
with ß: