-
-
Save alisterlf/3490957 to your computer and use it in GitHub Desktop.
function RemoveAccents(strAccents) { | |
var strAccents = strAccents.split(''); | |
var strAccentsOut = new Array(); | |
var strAccentsLen = strAccents.length; | |
var accents = 'ÀÁÂÃÄÅàáâãäåÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž'; | |
var accentsOut = "AAAAAAaaaaaaOOOOOOOooooooEEEEeeeeeCcDIIIIiiiiUUUUuuuuNnSsYyyZz"; | |
for (var y = 0; y < strAccentsLen; y++) { | |
if (accents.indexOf(strAccents[y]) != -1) { | |
strAccentsOut[y] = accentsOut.substr(accents.indexOf(strAccents[y]), 1); | |
} else | |
strAccentsOut[y] = strAccents[y]; | |
} | |
strAccentsOut = strAccentsOut.join(''); | |
return strAccentsOut; | |
} |
Doesn't cover ß
with ß:
function RemoveAccents(str) {
var accents = 'ÀÁÂÃÄÅàáâãäåßÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž';
var accentsOut = "AAAAAAaaaaaaBOOOOOOOooooooEEEEeeeeeCcDIIIIiiiiUUUUuuuuNnSsYyyZz";
str = str.split('');
var strLen = str.length;
var i, x;
for (i = 0; i < strLen; i++) {
if ((x = accents.indexOf(str[i])) != -1) {
str[i] = accentsOut[x];
}
}
return str.join('');
}
with ES6:
function removeAccents(str) {
let accents = 'ÀÁÂÃÄÅàáâãäåßÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž';
let accentsOut = "AAAAAAaaaaaaBOOOOOOOooooooEEEEeeeeeCcDIIIIiiiiUUUUuuuuNnSsYyyZz";
str = str.split('');
str.forEach((letter, index) => {
let i = accents.indexOf(letter);
if (i != -1) {
str[index] = accentsOut[i];
}
})
return str.join('');
}
if someone needs slovak letters:
ÝŤŔŇĽĹDŽĎČ were missing
'ÀÁÂÃÄÅàáâãäåßÒÓÔÕÕÖØòóôõöøĎďDŽdžÈÉÊËèéêëðÇçČčÐÌÍÎÏìíîïÙÚÛÜùúûüĽĹľĺÑŇňñŔ੹ŤťŸÝÿýŽž';
"AAAAAAaaaaaasOOOOOOOooooooDdDZdzEEEEeeeeeCcCcDIIIIiiiiUUUUuuuuLLllNNnnRrSsTtYYyyZz";
Thanks!!
What about this ? It's shorter :)
str.normalize('NFD').replace(/[\u0300-\u036f]/g, "")
It really is, Arthur, thank you! It's good to also point to the original, mainly for its complete explanation.
...with special hungarian characters:
let accents = 'ÀÁÂÃÄÅàáâãäåßÒÓÔÕÕÖØŐòóôőõöøĎďDŽdžÈÉÊËèéêëðÇçČčÐÌÍÎÏìíîïÙÚÛÜŰùűúûüĽĹľĺÑŇňñŔ੹ŤťŸÝÿýŽž';
let accentsOut = "AAAAAAaaaaaasOOOOOOOOoooooooDdDZdzEEEEeeeeeCcCcDIIIIiiiiUUUUUuuuuuLLllNNnnRrSsTtYYyyZz";
and with special polish characters:
let accents = 'ÀÁÂÃÄÅĄàáâãäåąßÒÓÔÕÕÖØŐòóôőõöøĎďDŽdžÈÉÊËĘèéêëęðÇçČčĆćÐÌÍÎÏìíîïÙÚÛÜŰùűúûüĽĹŁľĺłÑŇŃňñńŔ੦šśŤťŸÝÿýŽŻŹžżź'; let accentsOut = "AAAAAAAaaaaaaasOOOOOOOOoooooooDdDZdzEEEEEeeeeeeCcCcCcDIIIIiiiiUUUUUuuuuuLLLlllNNNnnnRrSSssTtYYyyZZZzzz";
with map:
function removeAccents(string) {
const accents =
"ÀÁÂÃÄÅàáâãäåßÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž";
const accentsOut =
"AAAAAAaaaaaaBOOOOOOOooooooEEEEeeeeeCcDIIIIiiiiUUUUuuuuNnSsYyyZz";
return string
.split("")
.map((letter, index) => {
const accentIndex = accents.indexOf(letter);
return accentIndex !== -1 ? accentsOut[accentIndex] : letter;
})
.join("");
}
@up with polish special characters
function removeAccents(string) {
const accents =
"ÀÁÂÃÄÅĄàáâãäåąßÒÓÔÕÕÖØÓòóôõöøóÈÉÊËĘèéêëęðÇĆçćÐÌÍÎÏìíîïÙÚÛÜùúûüÑŃñńŠŚšśŸÿýŽŻŹžżź";
const accentsOut =
"AAAAAAAaaaaaaaBOOOOOOOOoooooooEEEEEeeeeeeCCccDIIIIiiiiUUUUuuuuNNnnSSssYyyZZZzzz";
return string
.split("")
.map((letter, index) => {
const accentIndex = accents.indexOf(letter);
return accentIndex !== -1 ? accentsOut[accentIndex] : letter;
})
.join("");
}
If you use ES6 consider this Stack answer: https://stackoverflow.com/a/37511463/2649707
@brunoocasali doesn't work with "œ" char, like in "Cœur"
Other variant without constants
function normalizeString (string) {
return string.split('').map(function (letter) {
let i = this.accents.indexOf(letter)
return (i !== -1) ? this.out[i] : letter
}.bind({
accents: 'ÀÁÂÃÄÅĄàáâãäåąßÒÓÔÕÕÖØÓòóôõöøóÈÉÊËĘèéêëęðÇĆçćÐÌÍÎÏìíîïÙÚÛÜùúûüÑŃñńŠŚšśŸÿýŽŻŹžżź',
out: 'AAAAAAAaaaaaaaBOOOOOOOOoooooooEEEEEeeeeeeCCccDIIIIiiiiUUUUuuuuNNnnSSssYyyZZZzzz'
})
).join('')
}
I think the German eszett ß should be an "s" https://en.wikipedia.org/wiki/%C3%9F
β is the greek beta https://en.wikipedia.org/wiki/Beta and that should probably be a "B"
Added a few
var accents = 'ÀÁÂÃÄÅĄĀāàáâãäåąßÒÓÔÕÕÖØŐòóôőõöøĎďDŽdžÈÉÊËĘèéêëęðÇçČčĆćÐÌÍÎÏĪìíîïīÙÚÛÜŰùűúûüĽĹŁľĺłÑŇŃňñńŔŕŠŚŞšśşŤťŸÝÿýŽŻŹžżźđĢĞģğ';
var accentsOut = "AAAAAAAAaaaaaaaasOOOOOOOOoooooooDdDZdzEEEEEeeeeeeCcCcCcDIIIIIiiiiiUUUUUuuuuuLLLlllNNNnnnRrSSSsssTtYYyyZZZzzzdGGgg";
How about this?
var nIC = new Intl.Collator(undefined, {sensitivity: 'base'})
var cmp = nIC.compare.bind(nIC)
Or this?
'être'.localeCompare('etre', undefined, {sensitivity: 'base'})
Just... thank you !
Though I prefer the map
method, here's the original but TypeScript-friendly:
private replaceAccents(str: string): string
{
const ACCENTS = 'ÀÁÂÃÄÅàáâãäåÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž';
const NON_ACCENTS = "AAAAAAaaaaaaOOOOOOOooooooEEEEeeeeeCcDIIIIiiiiUUUUuuuuNnSsYyyZz";
const strAccents: string[] = str.split('');
const strAccentsOut: string[] = new Array();
const strAccentsLen: number = strAccents.length;
for (let y = 0; y < strAccentsLen; y++)
{
if (ACCENTS.indexOf(strAccents[y]) != -1)
{
strAccentsOut[y] = NON_ACCENTS.substr(ACCENTS.indexOf(strAccents[y]), 1);
}
else
{
strAccentsOut[y] = strAccents[y];
}
}
const newString: string = strAccentsOut.join('');
return newString;
}
A slight alternative to above using the string normalisation AND correct substitution for the German ß
import { includes, some } from 'lodash'
export default function convertForeignCharacters(str: string): string {
const strAccents: string[] = str.split('')
const strAccentsOut: string[] = new Array()
const strAccentsLen: number = strAccents.length
for (let y: number = 0; y < strAccentsLen; y++) {
if (!some(['ä', 'ü', 'ö'], (el: string) => includes(strAccents[y], el))) {
strAccentsOut[y] = strAccents[y].normalize('NFD').replace(/[\u0300-\u036f]/g, '')
} else {
strAccentsOut[y] = strAccents[y]
}
}
const newString: string = strAccentsOut.join('').replace('ß', 'ss')
return newString
}
O(n) version, ~5x faster, especially if you want add more accents.
const accents = 'ÀÁÂÃÄÅĄĀāàáâãäåąßÒÓÔÕÕÖØŐòóôőõöøĎďDŽdžÈÉÊËĘèéêëęðÇçČčĆćÐÌÍÎÏĪìíîïīÙÚÛÜŰùűúûüĽĹŁľĺłÑŇŃňñńŔŕŠŚŞšśşŤťŸÝÿýŽŻŹžżźđĢĞģğ',
accents_out = "AAAAAAAAaaaaaaaasOOOOOOOOoooooooDdDZdzEEEEEeeeeeeCcCcCcDIIIIIiiiiiUUUUUuuuuuLLLlllNNNnnnRrSSSsssTtYYyyZZZzzzdGGgg",
accents_map = new Map();
for (const i in accents)
accents_map.set(accents.charCodeAt(i), accents_out.charCodeAt(i))
function removeAccents(str) {
const nstr = new Array(str.length);
let x, i;
for (i = 0; i < nstr.length; i++)
nstr[i] = accents_map.get(x = str.charCodeAt(i)) || x;
return String.fromCharCode.apply(null, nstr);
}
@ArthurMaroulier
FYI: It doesn't cover all letters ('Ø' for example).
"ÀÁÂÃÄÅàáâãäåÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž".normalize('NFD').replace(/[\u0300-\u036f]/g, "");
"AAAAAAaaaaaaOOOOOOØoooooøEEEEeeeeðCcÐIIIIiiiiUUUUuuuuNnSsYyyZz"
did you find a way to convert Ø please ?
If your goal is to sort a list of strings, disregarding accents
['André', 'Álister', 'alan'].sort(new Intl.Collator('pt-BR').compare)
Or just to be safe
['André', 'Álister', 'alan'].sort(new Intl.Collator('pt-BR', {sensitivity:'base'}).compare)
Other variant could be..
const removeAccents = (str) => { return str .toLowerCase() .normalize("NFD") .replace(/[\u0300-\u036f]/g, ""); };
Hello! All the above examples are good! However, I put the function at onChange event on an input field and the next character duplicates all the string from the input. Is there any solution to prevent this? Thank you!
I like it!.
I've modified it a little to less code:
Thank you!