Skip to content

Instantly share code, notes, and snippets.

@demoive
Last active February 24, 2021 08:59
Show Gist options
  • Star 20 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save demoive/4249710 to your computer and use it in GitHub Desktop.
Save demoive/4249710 to your computer and use it in GitHub Desktop.
Converts a string to a "URL-safe" slug
/**
* Converts a string to a "URL-safe" slug.
* Allows for some customization with two optional parameters:
*
* @param {string} Delimiter used. If not specified, defaults to a dash "-"
* @param {array} Adds to the list of non-alphanumeric characters which
* will be converted to the delimiter. The default list includes:
* ['–', '—', '―', '~', '\\', '/', '|', '+', '\'', '‘', '’', ' ']
*/
if (!String.prototype.slugify) {
String.prototype.slugify = function (delimiter, separators) {
var i = separators && separators.length,
slug = this,
delimiter = delimiter || '-',
regexEscape = new RegExp(/[[\/\\^$*+?.()|{}\]]/g),
regexDelimiter = delimiter.replace(regexEscape, "\\$&"),
prohibited = new RegExp("([^a-z0-9" + regexDelimiter + "])", "g"),
consecutive = new RegExp("(" + regexDelimiter + "+)", "g"),
trim = new RegExp("^" + regexDelimiter + "*(.*?)" + regexDelimiter + "*$"),
sanitizer = {
// common latin
'á': 'a',
'à': 'a',
'â': 'a',
'ä': 'a',
'ã': 'a',
'æ': 'ae',
'ç': 'c',
'é': 'e',
'è': 'e',
'ê': 'e',
'ë': 'e',
'ẽ': 'e',
'í': 'i',
'ì': 'i',
'î': 'i',
'ï': 'i',
'ĩ': 'i',
'ó': 'o',
'ò': 'o',
'ô': 'o',
'ö': 'o',
'õ': 'o',
'œ': 'oe',
'ß': 'ss',
'ú': 'u',
'ù': 'u',
'û': 'u',
'ü': 'u',
'ũ': 'u',
// other diacritics
'ă': 'a',
'ắ': 'a',
'ằ': 'a',
'ẵ': 'a',
'ẳ': 'a',
'ấ': 'a',
'ầ': 'a',
'ẫ': 'a',
'ẩ': 'a',
'ǎ': 'a',
'å': 'a',
'ǻ': 'a',
'ǟ': 'a',
'ȧ': 'a',
'ǡ': 'a',
'ą': 'a',
'ā': 'a',
'ả': 'a',
'ȁ': 'a',
'ȃ': 'a',
'ạ': 'a',
'ặ': 'a',
'ậ': 'a',
'ḁ': 'a',
'ⱥ': 'a',
'ᶏ': 'a',
'ɐ': 'a',
'ɑ': 'a',
'ḃ': 'b',
'ḅ': 'b',
'ḇ': 'b',
'ƀ': 'b',
'ɓ': 'b',
'ƃ': 'b',
'ᵬ': 'b',
'ᶀ': 'b',
'þ': 'b',
'ć': 'c',
'ĉ': 'c',
'č': 'c',
'ċ': 'c',
'ḉ': 'c',
'ȼ': 'c',
'ƈ': 'c',
'ɕ': 'c',
'ď': 'd',
'ḋ': 'd',
'ḑ': 'd',
'ḍ': 'd',
'ḓ': 'd',
'ḏ': 'd',
'đ': 'd',
'ɖ': 'd',
'ɗ': 'd',
'ƌ': 'd',
'ᵭ': 'd',
'ᶁ': 'd',
'ᶑ': 'd',
'ȡ': 'd',
'∂': 'd',
'ĕ': 'e',
'ế': 'e',
'ề': 'e',
'ễ': 'e',
'ể': 'e',
'ě': 'e',
'ė': 'e',
'ȩ': 'e',
'ḝ': 'e',
'ę': 'e',
'ē': 'e',
'ḗ': 'e',
'ḕ': 'e',
'ẻ': 'e',
'ȅ': 'e',
'ȇ': 'e',
'ẹ': 'e',
'ệ': 'e',
'ḙ': 'e',
'ḛ': 'e',
'ɇ': 'e',
'ᶒ': 'e',
'ḟ': 'f',
'ƒ': 'f',
'ᵮ': 'f',
'ᶂ': 'f',
'ǵ': 'g',
'ğ': 'g',
'ĝ': 'g',
'ǧ': 'g',
'ġ': 'g',
'ģ': 'g',
'ḡ': 'g',
'ǥ': 'g',
'ɠ': 'g',
'ᶃ': 'g',
'ĥ': 'h',
'ȟ': 'h',
'ḧ': 'h',
'ḣ': 'h',
'ḩ': 'h',
'ḥ': 'h',
'ḫ': 'h',
'ẖ': 'h',
'ħ': 'h',
'ⱨ': 'h',
'ĭ': 'i',
'ǐ': 'i',
'ḯ': 'i',
'į': 'i',
'ī': 'i',
'ỉ': 'i',
'ȉ': 'i',
'ȋ': 'i',
'ị': 'i',
'ḭ': 'i',
'ɨ': 'i',
'ᵻ': 'i',
'ᶖ': 'i',
'i': 'i',
'ı': 'i',
'ĵ': 'j',
'ɉ': 'j',
'ǰ': 'j',
'ȷ': 'j',
'ʝ': 'j',
'ɟ': 'j',
'ʄ': 'j',
'ḱ': 'k',
'ǩ': 'k',
'ķ': 'k',
'ḳ': 'k',
'ḵ': 'k',
'ƙ': 'k',
'ⱪ': 'k',
'ᶄ': 'k',
'ĺ': 'l',
'ľ': 'l',
'ļ': 'l',
'ḷ': 'l',
'ḹ': 'l',
'ḽ': 'l',
'ḻ': 'l',
'ł': 'l',
'ŀ': 'l',
'ƚ': 'l',
'ⱡ': 'l',
'ɫ': 'l',
'ɬ': 'l',
'ᶅ': 'l',
'ɭ': 'l',
'ȴ': 'l',
'ḿ': 'm',
'ṁ': 'm',
'ṃ': 'm',
'ᵯ': 'm',
'ᶆ': 'm',
'ɱ': 'm',
'ń': 'n',
'ǹ': 'n',
'ň': 'n',
'ñ': 'n',
'ṅ': 'n',
'ņ': 'n',
'ṇ': 'n',
'ṋ': 'n',
'ṉ': 'n',
'n̈': 'n',
'ɲ': 'n',
'ƞ': 'n',
'ŋ': 'n',
'ᵰ': 'n',
'ᶇ': 'n',
'ɳ': 'n',
'ȵ': 'n',
'ŏ': 'o',
'ố': 'o',
'ồ': 'o',
'ỗ': 'o',
'ổ': 'o',
'ǒ': 'o',
'ȫ': 'o',
'ő': 'o',
'ṍ': 'o',
'ṏ': 'o',
'ȭ': 'o',
'ȯ': 'o',
'͘o͘': 'o',
'ȱ': 'o',
'ø': 'o',
'ǿ': 'o',
'ǫ': 'o',
'ǭ': 'o',
'ō': 'o',
'ṓ': 'o',
'ṑ': 'o',
'ỏ': 'o',
'ȍ': 'o',
'ȏ': 'o',
'ơ': 'o',
'ớ': 'o',
'ờ': 'o',
'ỡ': 'o',
'ở': 'o',
'ợ': 'o',
'ọ': 'o',
'ộ': 'o',
'ɵ': 'o',
'ɔ': 'o',
'ṕ': 'p',
'ṗ': 'p',
'ᵽ': 'p',
'ƥ': 'p',
'p̃': 'p',
'ᵱ': 'p',
'ᶈ': 'p',
'ɋ': 'q',
'ƣ': 'q',
'ʠ': 'q',
'ŕ': 'r',
'ř': 'r',
'ṙ': 'r',
'ŗ': 'r',
'ȑ': 'r',
'ȓ': 'r',
'ṛ': 'r',
'ṝ': 'r',
'ṟ': 'r',
'ɍ': 'r',
'ɽ': 'r',
'ᵲ': 'r',
'ᶉ': 'r',
'ɼ': 'r',
'ɾ': 'r',
'ᵳ': 'r',
'ś': 's',
'ṥ': 's',
'ŝ': 's',
'š': 's',
'ṧ': 's',
'ṡẛ': 's',
'ş': 's',
'ṣ': 's',
'ṩ': 's',
'ș': 's',
's̩': 's',
'ᵴ': 's',
'ᶊ': 's',
'ʂ': 's',
'ȿ': 's',
'г': 's',
'ť': 't',
'ṫ': 't',
'ţ': 't',
'ṭ': 't',
'ț': 't',
'ṱ': 't',
'ṯ': 't',
'ŧ': 't',
'ⱦ': 't',
'ƭ': 't',
'ʈ': 't',
'̈ẗ': 't',
'ᵵ': 't',
'ƫ': 't',
'ȶ': 't',
'ŭ': 'u',
'ǔ': 'u',
'ů': 'u',
'ǘ': 'u',
'ǜ': 'u',
'ǚ': 'u',
'ǖ': 'u',
'ű': 'u',
'ṹ': 'u',
'ų': 'u',
'ū': 'u',
'ṻ': 'u',
'ủ': 'u',
'ȕ': 'u',
'ȗ': 'u',
'ư': 'u',
'ứ': 'u',
'ừ': 'u',
'ữ': 'u',
'ử': 'u',
'ự': 'u',
'ụ': 'u',
'ṳ': 'u',
'ṷ': 'u',
'ṵ': 'u',
'ʉ': 'u',
'ᵾ': 'u',
'ᶙ': 'u',
'ṽ': 'v',
'ṿ': 'v',
'ʋ': 'v',
'ᶌ': 'v',
'ⱴ': 'v',
'ẃ': 'w',
'ẁ': 'w',
'ŵ': 'w',
'ẅ': 'w',
'ẇ': 'w',
'ẉ': 'w',
'ẘ': 'w',
'ẍ': 'x',
'ẋ': 'x',
'ᶍ': 'x',
'ý': 'y',
'ỳ': 'y',
'ŷ': 'y',
'ẙ': 'y',
'ÿ': 'y',
'ỹ': 'y',
'ẏ': 'y',
'ȳ': 'y',
'ỷ': 'y',
'ỵ': 'y',
'ɏ': 'y',
'ƴ': 'y',
'ʏ': 'y',
'ź': 'z',
'ẑ': 'z',
'ž': 'z',
'ż': 'z',
'ẓ': 'z',
'ẕ': 'z',
'ƶ': 'z',
'ȥ': 'z',
'ⱬ': 'z',
'ᵶ': 'z',
'ᶎ': 'z',
'ʐ': 'z',
'ʑ': 'z',
'ɀ': 'z',
// greek
'α': 'a',
'β': 'b',
'γ': 'g',
'ɣ': 'g',
'δ': 'd',
'ð': 'd',
'ε': 'e',
'ζ': 'z',
'η': 'i',
'θ': 'th',
'ι': 'i',
'κ': 'k',
'λ': 'l',
'μ': 'm',
'µ': 'm',
'ν': 'n',
'ξ': 'x',
'ο': 'o',
'π': 'p',
'ρ': 'r',
'σ': 's',
'ς': 's',
'τ': 't',
'υ': 'u', // official rule: if preceeded by 'α' OR 'ε' => 'v', by 'ο' => 'u', else => 'i'
'φ': 'f',
'χ': 'ch',
'ψ': 'ps',
'ω': 'o',
// greek diacritics
'ᾳ': 'a',
'ά': 'a',
'ὰ': 'a',
'ᾴ': 'a',
'ᾲ': 'a',
'ᾶ': 'a',
'ᾷ': 'a',
'ἀ': 'a',
'ᾀ': 'a',
'ἄ': 'a',
'ᾄ': 'a',
'ἂ': 'a',
'ᾂ': 'a',
'ἆ': 'a',
'ᾆ': 'a',
'ἁ': 'a',
'ᾁ': 'a',
'ἅ': 'a',
'ᾅ': 'a',
'ἃ': 'a',
'ᾃ': 'a',
'ἇ': 'a',
'ᾇ': 'a',
'ᾱ': 'a',
'ᾰ': 'a',
'έ': 'e',
'ὲ': 'e',
'ἐ': 'e',
'ἔ': 'e',
'ἒ': 'e',
'ἑ': 'e',
'ἕ': 'e',
'ἓ': 'e',
'ῃ': 'i',
'ή': 'i',
'ὴ': 'i',
'ῄ': 'i',
'ῂ': 'i',
'ῆ': 'i',
'ῇ': 'i',
'ἠ': 'i',
'ᾐ': 'i',
'ἤ': 'i',
'ᾔ': 'i',
'ἢ': 'i',
'ᾒ': 'i',
'ἦ': 'i',
'ᾖ': 'i',
'ἡ': 'i',
'ᾑ': 'i',
'ἥ': 'i',
'ᾕ': 'i',
'ἣ': 'i',
'ᾓ': 'i',
'ἧ': 'i',
'ᾗ': 'i',
'ί': 'i',
'ὶ': 'i',
'ῖ': 'i',
'ἰ': 'i',
'ἴ': 'i',
'ἲ': 'i',
'ἶ': 'i',
'ἱ': 'i',
'ἵ': 'i',
'ἳ': 'i',
'ἷ': 'i',
'ϊ': 'i',
'ΐ': 'i',
'ῒ': 'i',
'ῗ': 'i',
'ῑ': 'i',
'ῐ': 'i',
'ό': 'o',
'ὸ': 'o',
'ὀ': 'o',
'ὄ': 'o',
'ὂ': 'o',
'ὁ': 'o',
'ὅ': 'o',
'ὃ': 'o',
'ύ': 'u',
'ὺ': 'u',
'ῦ': 'u',
'ὐ': 'u',
'ὔ': 'u',
'ὒ': 'u',
'ὖ': 'u',
'ὑ': 'u',
'ὕ': 'u',
'ὓ': 'u',
'ὗ': 'u',
'ϋ': 'u',
'ΰ': 'u',
'ῢ': 'u',
'ῧ': 'u',
'ῡ': 'u',
'ῠ': 'u',
'ῳ': 'o',
'ώ': 'o',
'ῴ': 'o',
'ὼ': 'o',
'ῲ': 'o',
'ῶ': 'o',
'ῷ': 'o',
'ὠ': 'o',
'ᾠ': 'o',
'ὤ': 'o',
'ᾤ': 'o',
'ὢ': 'o',
'ᾢ': 'o',
'ὦ': 'o',
'ᾦ': 'o',
'ὡ': 'o',
'ᾡ': 'o',
'ὥ': 'o',
'ᾥ': 'o',
'ὣ': 'o',
'ᾣ': 'o',
'ὧ': 'o',
'ᾧ': 'o',
'ῤ': 'r',
'ῥ': 'r',
// cyrillic (russian)
'а': 'a',
'б': 'b',
'в': 'v',
'г': 'g',
'д': 'd',
'е': 'e',
'ё': 'e',
'ж': 'zh',
'з': 'z',
'и': 'i',
'й': 'j',
'к': 'k',
'л': 'l',
'м': 'm',
'н': 'n',
'о': 'o',
'п': 'p',
'р': 'r',
'с': 's',
'т': 't',
'у': 'u',
'ф': 'f',
'х': 'h',
'ц': 'ts',
'ч': 'ch',
'ш': 'sh',
'щ': 'sh',
'ъ': '',
'ы': 'i',
'ь': '',
'э': 'e',
'ю': 'yu',
'я': 'ya',
// ---
'і': 'j',
'ѳ': 'f',
'ѣ': 'e',
'ѵ': 'i',
'ѕ': 'z',
'ѯ': 'ks',
'ѱ': 'ps',
'ѡ': 'o',
'ѫ': 'yu',
'ѧ': 'ya',
'ѭ': 'yu',
'ѩ': 'ya',
// arabic (provided by @Omranic)
'١': '1',
'٢': '2',
'٣': '3',
'٤': '4',
'٥': '5',
'٦': '6',
'٧': '7',
'٨': '8',
'٩': '9',
'٠': '0',
'ا': 'a',
'أ': 'a',
'إ': 'a',
'آ': 'a',
'ب': 'b',
'ت': 't',
'ث': 'th',
'ج': 'j',
'ح': 'h',
'خ': 'kh',
'د': 'd',
'ذ': 'dh',
'ر': 'r',
'ز': 'z',
'س': 's',
'ش': 'sh',
'ص': 's',
'ض': 'd',
'ط': 't',
'ظ': 'z',
'ع': 'a',
'غ': 'gh',
'ف': 'f',
'ق': 'q',
'ك': 'k',
'ل': 'l',
'م': 'm',
'ن': 'n',
'ه': 'h',
'و': 'w',
'ى': 'y',
'ي': 'y',
'ة': 't',
// currency
/*
'₳': 'ARA',
'฿': 'THB',
'₵': 'GHS',
'¢': 'c',
'₡': 'CRC',
'₢': 'Cr',
'₠': 'XEU',
'$': 'USD',
'₫': 'VND',
'৳': 'BDT',
'₯': 'GRD',
'€': 'EUR',
'₣': 'FRF',
'₲': 'PYG',
'₴': 'HRN',
'₭': 'LAK',
'₦': 'NGN',
'₧': 'ESP',
'₱': 'PhP',
'£': 'GBP',
'₤': 'GBP',
'₨': 'Rs',
'₪': 'NS',
'₮': 'MNT',
'₩': 'WON',
'¥': 'YEN',
'៛': 'KHR',
//*/
// fractions
/*
'⅛': '',
'⅙': '',
'⅕': '',
'¼': '',
'⅓': '',
'⅜': '',
'⅖': '',
'½': '',
'⅗': '',
'⅝': '',
'⅔': '',
'¾': '',
'⅘': '',
'⅚': '',
'⅞': '',
//*/
// separators
'–': delimiter,
'—': delimiter,
'―': delimiter,
'~': delimiter,
'/': delimiter,
'\\': delimiter,
'|': delimiter,
'+': delimiter,
'‘': delimiter,
'’': delimiter,
'\'': delimiter,
' ': delimiter,
// permitted by default but can be overridden
'-': '-',
'_': '_'
};
// add any user-defined separator elements
if (separators) {
for (i; i >= 0; --i) {
sanitizer[separators[i]] = delimiter;
}
}
// do all the replacements
slug = slug.toLowerCase(); // if we don't do this, add the uppercase versions to the sanitizer plus inlcude A-Z in the prohibited filter
slug = slug.replace(prohibited, function (match) { return sanitizer[match] || ''; });
slug = slug.replace(consecutive, delimiter);
slug = slug.replace(trim, "$1");
return slug;
}
}
@demoive
Copy link
Author

demoive commented Dec 10, 2012

In addition to occasionally needing it myself, I often see a need within the community for strings to be "slugified" in Javascript. With no standard function for doing this, people often resort to some rather incomplete solutions. Search around the web for "javacript url slug generator" and you'll see what I'm talking about - there are too many simple solutions which might suffice for "most" cases, but omit a lot of characters and expected/desired behaviour.

Other languages have built-in utilities for this (iconv in PHP, for example), but I wasn't really able to find any Javascript solution that I was comfortable using. Taking cues from several other solutions ("The Perfect PHP Clearn URL Generator" being the most extensive), I propose this solution for a de-facto standard.

This Gist will be the "stable" versions of my own slugify function which I play around with and test on JSFiddle.

@demoive
Copy link
Author

demoive commented Dec 10, 2012

Some initial examples of what this does:

"  Mess'd up --text-- jus\"t (to) 'stress' /test/ \"this & that\" Of ?oůr! `little` \\clean\\ url fun.ction!?-->  --  ".slugify();
"mess-d-up-text-just-to-stress-test-this-that-of-our-little-clean-url-function"
"Perché l'erba è verde?".slugify();
"perche-l-erba-e-verde"
"Tänk efter nu – förr'n vi föser dig bort ".slugify();
"tank-efter-nu-forr-n-vi-foser-dig-bort"
"You+can|use/spaces\for/the delimiter".slugify(" ");
"you can use spaces for the delimiter"
"Custom separator`and delimiter*example".slugify('_', ['*', '`']);
"custom_separator_and_delimiter_example"

@demoive
Copy link
Author

demoive commented Dec 10, 2012

Made a small update to allow for the dash and underscore (which are both allowed characters by default) to be overridden as characters which will be replaced by the delimiter. For example:

"A_MiXeD-separator + delimiter/example".slugify(' ');
"a_mixed-separator delimiter example"
"A_MiXeD-separator + delimiter/example".slugify(' ', ['_', '-']);
"a mixed separator delimiter example"

@demoive
Copy link
Author

demoive commented Dec 12, 2012

Added the tilde character to the default list of delimiter conversions.

Changed the substitutions for some Greek characters (θ, ξ, χ, ν, υ) to be consistant with "Greek-lish" and phonetic replacements. Thanks to Philippos for the audit :)

Also added the Russian character set (thanks to Alexander and Yana for their contribution and the Translit resource):

"аз буки веди".slugify();
"az-buki-vedi"
"рцы слово твердо".slugify();
"rtsi-slovo-tverdo"
"цы червь ша ер ять ю".slugify();
"tsi-cherv-sha-er-yat-yu"

@demoive
Copy link
Author

demoive commented Dec 13, 2012

Special RegExp characters are now properly escaped if used as the delimiter: \ ^ $ * + ? . ( ) | { } [ ]

"Now any RegEx-sensative.char (\\ ^ $ * + ? . ( ) | { } [ ]) can be used safely as the delimiter... like let's say, a period:".slugify(".");
"now.any.regex-sensative.char.can.be.used.safely.as.the.delimiter.like.let.s.say.a.period"

@demoive
Copy link
Author

demoive commented Dec 14, 2012

Completed the Greek set with all the possible accented characters.

"ΧΕΙΜΕΡΙΝΌΣ, θαλασσινή".slugify();
"cheimerinos-thalassini"
"Δωματίου, ΎΨΟΣ, φιλοξενία".slugify();
"domatiou-upsos-filoxenia"
"ευήλιος, αύρα, ζέστη".slugify();
"euilios-aura-zesti"

Cheers,
Δεμοιυε

@demoive
Copy link
Author

demoive commented Jan 26, 2013

Updated the loop that iterates through the separators given these performance results.

@IbnSaeed
Copy link

IbnSaeed commented Aug 7, 2014

Can you add support for arabic words and letters

for e.g.

ب شبسي شسيب ش

التتبي

شبس ثصقب صث

@demoive
Copy link
Author

demoive commented Mar 30, 2015

@IbnSaeed, I'd be happy to. I didn't get a notification of your comment so sorry it's been so long to respond!

Could you help me in this effort? As you can see from the code, I'll setup a mapping of the characters. Can you recommend a good resource to map them? For example, should I just use what's on Wikipedia?
http://en.wikipedia.org/wiki/Arabic_alphabet

Also, can you advise how to handle the right-to-left nature of the language? For example, if the order of characters is:

غ
ظ
ض

Does that equate to the following?

غظض

And then what would be a logical replacement?

gh z d
d z gh

@demoive
Copy link
Author

demoive commented Mar 30, 2015

@Omranic
Copy link

Omranic commented May 14, 2015

@demoive
Thank you for this awesome plugin.
Here's Arabic Support implemented, you could kindly merge it.
https://gist.github.com/Omranic/c53be563645974884ca8#file-slugify-js-L625-L658

@maherelgamil
Copy link

@demoive Thanks for this very usefully plugin.

@Omranic Thanks for Arabic support :)

@demoive
Copy link
Author

demoive commented Feb 26, 2019

Merged Arabic support from @Omranic.

@IbnSaeed @maherelgamil.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment