Skip to content

Instantly share code, notes, and snippets.

@rodneyrehm
Last active October 4, 2015 07:37
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save rodneyrehm/2602175 to your computer and use it in GitHub Desktop.
Reduce (UTF-8) strings to alphanumeric
// port of https://gist.github.com/909692
// TODO: check char-map of https://github.com/jbroadway/urlify/blob/master/URLify.php for characters we've missed
// TODO: check performance against http://stackoverflow.com/questions/1946040/using-javascript-to-normalize-url-rewriting-strings-entered-by-users
var Urlifyer = function(options) {
var _key, _code, i, _source, _sources;
// Allow instantiation without the 'new' keyword
if (!(this instanceof Urlifyer)) {
return new Urlifyer(options);
}
if (!options) {
options = {};
}
this._trim = options.trim || Urlifyer.trim;
this._lower = options.lower || Urlifyer.lower;
this._allow = {};
this._replace = {};
// allowed characters
_sources = [Urlifyer.allow, options.allow];
for (i=0; (_source = _sources[i]) !== undefined; i++) {
if (!_source) {
continue;
}
var index = _source.length;
while (index--) {
_key = _source.charCodeAt(index);
this._allow[_key] = true;
}
}
// replacements
_sources = [Urlifyer.replace, options.replace];
for (i=0; (_source = _sources[i]) !== undefined; i++) {
if (!_source) {
continue;
}
for (_key in _source) {
if (Object.prototype.hasOwnProperty.call(_source, _key)) {
console.log(_key);
// allow passing either unicode-points or characters
var __key = _key.match(/^[0-9]+$/) ? _key : _key.charCodeAt(0);
this._replace[__key] = _source[_key];
}
}
}
return this;
};
Urlifyer.prototype.urlify = function(string) {
var index = string.length;
while (index--) {
var _code = string.charCodeAt(index),
_replacement = null;
if ((_code >= 97 && _code <= 122) || (_code >= 65 && _code <= 90) || (_code >= 48 && _code <= 57) || _code === 95 || _code === 45) {
// skip normalization for alphanumeric characters [a-zA-Z0-9_-]
continue;
} else if (this._allow && this._allow[_code]) {
// skip normalization for allowed characters
continue;
} else if (this._replace[_code]) {
// replace as defined
_replacement = this._replace[_code];
} else if ((_code >= 0xC0 && _code <= 0xC6) || (_code >= 0xE0 && _code <= 0xE6) || (_code >= 0x100 && _code <= 0x105)) {
_replacement = "a";
} else if (_code == 0xC7 || _code == 0xE7 || (_code >= 0x106 && _code <= 0x10D)) {
_replacement = "c";
} else if (_code == 0xD0 || (_code >= 0x10E && _code <= 0x111)) {
_replacement = "d";
} else if ((_code >= 0xC8 && _code <= 0xCB) || (_code >= 0xE8 && _code <= 0xEB) || (_code >= 0x112 && _code <= 0x11B)) {
_replacement = "e";
} else if ((_code >= 0xCC && _code <= 0xCF) || (_code >= 0xEC && _code <= 0xEF)) {
_replacement = "i";
} else if (_code == 0xD1 || _code == 0xF1) {
_replacement = "n";
} else if ((_code >= 0xD2 && _code <= 0xD6) || (_code >= 0xF2 && _code <= 0xF6) || _code == 0xD8 || _code == 0xF8 ) {
_replacement = "o";
} else if ((_code >= 0xD9 && _code <= 0xDC) || (_code >= 0xF9 && _code <= 0xFC)) {
_replacement = "u";
} else if (_code == 0xDD || _code == 0xFD || _code == 0xFF) {
_replacement = "y";
} else {
_replacement = "-";
}
string = string.slice(0, index)
+ _replacement
+ string.slice(index + 1);
}
if (this._trim) {
// reduce to single dash
string = string.replace(/-{2,}/g, '-');
// trim dash from beginning and end
string = string.replace(/^-|-$/g, '');
}
if (this._lower) {
string = string.toLowerCase();
}
return string;
};
Urlifyer.lower = false;
Urlifyer.trim = false;
Urlifyer.allow = "";
Urlifyer.replace = {
0xE4: "ae", // ä
0xC4: "Ae", // Ä
0xF6: "oe", // ö
0xD6: "Oe", // Ö
0xFC: "ue", // ü
0xDC: "Ue", // Ü
0xDF: "ss", // ß
0xE6: "ae", // æ
0xC6: "AE" // Æ
};
var u = new Urlifyer({trim: true, lower: true, allow: "õ", replace: {"ñ":"#"}});
console.log(u);
console.log(u.urlify("häLLö w(orl)d ñõ so?"));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment