-
-
Save demoive/4249710 to your computer and use it in GitHub Desktop.
/** | |
* Converts a string to a "URL-safe" slug. | |
* Allows for some customization with two optional parameters: | |
* | |
* @param {string} Delimiter used. If not specified, defaults to a dash "-" | |
* @param {array} Adds to the list of non-alphanumeric characters which | |
* will be converted to the delimiter. The default list includes: | |
* ['–', '—', '―', '~', '\\', '/', '|', '+', '\'', '‘', '’', ' '] | |
*/ | |
if (!String.prototype.slugify) { | |
String.prototype.slugify = function (delimiter, separators) { | |
var i = separators && separators.length, | |
slug = this, | |
delimiter = delimiter || '-', | |
regexEscape = new RegExp(/[[\/\\^$*+?.()|{}\]]/g), | |
regexDelimiter = delimiter.replace(regexEscape, "\\$&"), | |
prohibited = new RegExp("([^a-z0-9" + regexDelimiter + "])", "g"), | |
consecutive = new RegExp("(" + regexDelimiter + "+)", "g"), | |
trim = new RegExp("^" + regexDelimiter + "*(.*?)" + regexDelimiter + "*$"), | |
sanitizer = { | |
// common latin | |
'á': 'a', | |
'à': 'a', | |
'â': 'a', | |
'ä': 'a', | |
'ã': 'a', | |
'æ': 'ae', | |
'ç': 'c', | |
'é': 'e', | |
'è': 'e', | |
'ê': 'e', | |
'ë': 'e', | |
'ẽ': 'e', | |
'í': 'i', | |
'ì': 'i', | |
'î': 'i', | |
'ï': 'i', | |
'ĩ': 'i', | |
'ó': 'o', | |
'ò': 'o', | |
'ô': 'o', | |
'ö': 'o', | |
'õ': 'o', | |
'œ': 'oe', | |
'ß': 'ss', | |
'ú': 'u', | |
'ù': 'u', | |
'û': 'u', | |
'ü': 'u', | |
'ũ': 'u', | |
// other diacritics | |
'ă': 'a', | |
'ắ': 'a', | |
'ằ': 'a', | |
'ẵ': 'a', | |
'ẳ': 'a', | |
'ấ': 'a', | |
'ầ': 'a', | |
'ẫ': 'a', | |
'ẩ': 'a', | |
'ǎ': 'a', | |
'å': 'a', | |
'ǻ': 'a', | |
'ǟ': 'a', | |
'ȧ': 'a', | |
'ǡ': 'a', | |
'ą': 'a', | |
'ā': 'a', | |
'ả': 'a', | |
'ȁ': 'a', | |
'ȃ': 'a', | |
'ạ': 'a', | |
'ặ': 'a', | |
'ậ': 'a', | |
'ḁ': 'a', | |
'ⱥ': 'a', | |
'ᶏ': 'a', | |
'ɐ': 'a', | |
'ɑ': 'a', | |
'ḃ': 'b', | |
'ḅ': 'b', | |
'ḇ': 'b', | |
'ƀ': 'b', | |
'ɓ': 'b', | |
'ƃ': 'b', | |
'ᵬ': 'b', | |
'ᶀ': 'b', | |
'þ': 'b', | |
'ć': 'c', | |
'ĉ': 'c', | |
'č': 'c', | |
'ċ': 'c', | |
'ḉ': 'c', | |
'ȼ': 'c', | |
'ƈ': 'c', | |
'ɕ': 'c', | |
'ď': 'd', | |
'ḋ': 'd', | |
'ḑ': 'd', | |
'ḍ': 'd', | |
'ḓ': 'd', | |
'ḏ': 'd', | |
'đ': 'd', | |
'ɖ': 'd', | |
'ɗ': 'd', | |
'ƌ': 'd', | |
'ᵭ': 'd', | |
'ᶁ': 'd', | |
'ᶑ': 'd', | |
'ȡ': 'd', | |
'∂': 'd', | |
'ĕ': 'e', | |
'ế': 'e', | |
'ề': 'e', | |
'ễ': 'e', | |
'ể': 'e', | |
'ě': 'e', | |
'ė': 'e', | |
'ȩ': 'e', | |
'ḝ': 'e', | |
'ę': 'e', | |
'ē': 'e', | |
'ḗ': 'e', | |
'ḕ': 'e', | |
'ẻ': 'e', | |
'ȅ': 'e', | |
'ȇ': 'e', | |
'ẹ': 'e', | |
'ệ': 'e', | |
'ḙ': 'e', | |
'ḛ': 'e', | |
'ɇ': 'e', | |
'ᶒ': 'e', | |
'ḟ': 'f', | |
'ƒ': 'f', | |
'ᵮ': 'f', | |
'ᶂ': 'f', | |
'ǵ': 'g', | |
'ğ': 'g', | |
'ĝ': 'g', | |
'ǧ': 'g', | |
'ġ': 'g', | |
'ģ': 'g', | |
'ḡ': 'g', | |
'ǥ': 'g', | |
'ɠ': 'g', | |
'ᶃ': 'g', | |
'ĥ': 'h', | |
'ȟ': 'h', | |
'ḧ': 'h', | |
'ḣ': 'h', | |
'ḩ': 'h', | |
'ḥ': 'h', | |
'ḫ': 'h', | |
'ẖ': 'h', | |
'ħ': 'h', | |
'ⱨ': 'h', | |
'ĭ': 'i', | |
'ǐ': 'i', | |
'ḯ': 'i', | |
'į': 'i', | |
'ī': 'i', | |
'ỉ': 'i', | |
'ȉ': 'i', | |
'ȋ': 'i', | |
'ị': 'i', | |
'ḭ': 'i', | |
'ɨ': 'i', | |
'ᵻ': 'i', | |
'ᶖ': 'i', | |
'i': 'i', | |
'ı': 'i', | |
'ĵ': 'j', | |
'ɉ': 'j', | |
'ǰ': 'j', | |
'ȷ': 'j', | |
'ʝ': 'j', | |
'ɟ': 'j', | |
'ʄ': 'j', | |
'ḱ': 'k', | |
'ǩ': 'k', | |
'ķ': 'k', | |
'ḳ': 'k', | |
'ḵ': 'k', | |
'ƙ': 'k', | |
'ⱪ': 'k', | |
'ᶄ': 'k', | |
'ĺ': 'l', | |
'ľ': 'l', | |
'ļ': 'l', | |
'ḷ': 'l', | |
'ḹ': 'l', | |
'ḽ': 'l', | |
'ḻ': 'l', | |
'ł': 'l', | |
'ŀ': 'l', | |
'ƚ': 'l', | |
'ⱡ': 'l', | |
'ɫ': 'l', | |
'ɬ': 'l', | |
'ᶅ': 'l', | |
'ɭ': 'l', | |
'ȴ': 'l', | |
'ḿ': 'm', | |
'ṁ': 'm', | |
'ṃ': 'm', | |
'ᵯ': 'm', | |
'ᶆ': 'm', | |
'ɱ': 'm', | |
'ń': 'n', | |
'ǹ': 'n', | |
'ň': 'n', | |
'ñ': 'n', | |
'ṅ': 'n', | |
'ņ': 'n', | |
'ṇ': 'n', | |
'ṋ': 'n', | |
'ṉ': 'n', | |
'n̈': 'n', | |
'ɲ': 'n', | |
'ƞ': 'n', | |
'ŋ': 'n', | |
'ᵰ': 'n', | |
'ᶇ': 'n', | |
'ɳ': 'n', | |
'ȵ': 'n', | |
'ŏ': 'o', | |
'ố': 'o', | |
'ồ': 'o', | |
'ỗ': 'o', | |
'ổ': 'o', | |
'ǒ': 'o', | |
'ȫ': 'o', | |
'ő': 'o', | |
'ṍ': 'o', | |
'ṏ': 'o', | |
'ȭ': 'o', | |
'ȯ': 'o', | |
'͘o͘': 'o', | |
'ȱ': 'o', | |
'ø': 'o', | |
'ǿ': 'o', | |
'ǫ': 'o', | |
'ǭ': 'o', | |
'ō': 'o', | |
'ṓ': 'o', | |
'ṑ': 'o', | |
'ỏ': 'o', | |
'ȍ': 'o', | |
'ȏ': 'o', | |
'ơ': 'o', | |
'ớ': 'o', | |
'ờ': 'o', | |
'ỡ': 'o', | |
'ở': 'o', | |
'ợ': 'o', | |
'ọ': 'o', | |
'ộ': 'o', | |
'ɵ': 'o', | |
'ɔ': 'o', | |
'ṕ': 'p', | |
'ṗ': 'p', | |
'ᵽ': 'p', | |
'ƥ': 'p', | |
'p̃': 'p', | |
'ᵱ': 'p', | |
'ᶈ': 'p', | |
'ɋ': 'q', | |
'ƣ': 'q', | |
'ʠ': 'q', | |
'ŕ': 'r', | |
'ř': 'r', | |
'ṙ': 'r', | |
'ŗ': 'r', | |
'ȑ': 'r', | |
'ȓ': 'r', | |
'ṛ': 'r', | |
'ṝ': 'r', | |
'ṟ': 'r', | |
'ɍ': 'r', | |
'ɽ': 'r', | |
'ᵲ': 'r', | |
'ᶉ': 'r', | |
'ɼ': 'r', | |
'ɾ': 'r', | |
'ᵳ': 'r', | |
'ś': 's', | |
'ṥ': 's', | |
'ŝ': 's', | |
'š': 's', | |
'ṧ': 's', | |
'ṡẛ': 's', | |
'ş': 's', | |
'ṣ': 's', | |
'ṩ': 's', | |
'ș': 's', | |
's̩': 's', | |
'ᵴ': 's', | |
'ᶊ': 's', | |
'ʂ': 's', | |
'ȿ': 's', | |
'г': 's', | |
'ť': 't', | |
'ṫ': 't', | |
'ţ': 't', | |
'ṭ': 't', | |
'ț': 't', | |
'ṱ': 't', | |
'ṯ': 't', | |
'ŧ': 't', | |
'ⱦ': 't', | |
'ƭ': 't', | |
'ʈ': 't', | |
'̈ẗ': 't', | |
'ᵵ': 't', | |
'ƫ': 't', | |
'ȶ': 't', | |
'ŭ': 'u', | |
'ǔ': 'u', | |
'ů': 'u', | |
'ǘ': 'u', | |
'ǜ': 'u', | |
'ǚ': 'u', | |
'ǖ': 'u', | |
'ű': 'u', | |
'ṹ': 'u', | |
'ų': 'u', | |
'ū': 'u', | |
'ṻ': 'u', | |
'ủ': 'u', | |
'ȕ': 'u', | |
'ȗ': 'u', | |
'ư': 'u', | |
'ứ': 'u', | |
'ừ': 'u', | |
'ữ': 'u', | |
'ử': 'u', | |
'ự': 'u', | |
'ụ': 'u', | |
'ṳ': 'u', | |
'ṷ': 'u', | |
'ṵ': 'u', | |
'ʉ': 'u', | |
'ᵾ': 'u', | |
'ᶙ': 'u', | |
'ṽ': 'v', | |
'ṿ': 'v', | |
'ʋ': 'v', | |
'ᶌ': 'v', | |
'ⱴ': 'v', | |
'ẃ': 'w', | |
'ẁ': 'w', | |
'ŵ': 'w', | |
'ẅ': 'w', | |
'ẇ': 'w', | |
'ẉ': 'w', | |
'ẘ': 'w', | |
'ẍ': 'x', | |
'ẋ': 'x', | |
'ᶍ': 'x', | |
'ý': 'y', | |
'ỳ': 'y', | |
'ŷ': 'y', | |
'ẙ': 'y', | |
'ÿ': 'y', | |
'ỹ': 'y', | |
'ẏ': 'y', | |
'ȳ': 'y', | |
'ỷ': 'y', | |
'ỵ': 'y', | |
'ɏ': 'y', | |
'ƴ': 'y', | |
'ʏ': 'y', | |
'ź': 'z', | |
'ẑ': 'z', | |
'ž': 'z', | |
'ż': 'z', | |
'ẓ': 'z', | |
'ẕ': 'z', | |
'ƶ': 'z', | |
'ȥ': 'z', | |
'ⱬ': 'z', | |
'ᵶ': 'z', | |
'ᶎ': 'z', | |
'ʐ': 'z', | |
'ʑ': 'z', | |
'ɀ': 'z', | |
// greek | |
'α': 'a', | |
'β': 'b', | |
'γ': 'g', | |
'ɣ': 'g', | |
'δ': 'd', | |
'ð': 'd', | |
'ε': 'e', | |
'ζ': 'z', | |
'η': 'i', | |
'θ': 'th', | |
'ι': 'i', | |
'κ': 'k', | |
'λ': 'l', | |
'μ': 'm', | |
'µ': 'm', | |
'ν': 'n', | |
'ξ': 'x', | |
'ο': 'o', | |
'π': 'p', | |
'ρ': 'r', | |
'σ': 's', | |
'ς': 's', | |
'τ': 't', | |
'υ': 'u', // official rule: if preceeded by 'α' OR 'ε' => 'v', by 'ο' => 'u', else => 'i' | |
'φ': 'f', | |
'χ': 'ch', | |
'ψ': 'ps', | |
'ω': 'o', | |
// greek diacritics | |
'ᾳ': 'a', | |
'ά': 'a', | |
'ὰ': 'a', | |
'ᾴ': 'a', | |
'ᾲ': 'a', | |
'ᾶ': 'a', | |
'ᾷ': 'a', | |
'ἀ': 'a', | |
'ᾀ': 'a', | |
'ἄ': 'a', | |
'ᾄ': 'a', | |
'ἂ': 'a', | |
'ᾂ': 'a', | |
'ἆ': 'a', | |
'ᾆ': 'a', | |
'ἁ': 'a', | |
'ᾁ': 'a', | |
'ἅ': 'a', | |
'ᾅ': 'a', | |
'ἃ': 'a', | |
'ᾃ': 'a', | |
'ἇ': 'a', | |
'ᾇ': 'a', | |
'ᾱ': 'a', | |
'ᾰ': 'a', | |
'έ': 'e', | |
'ὲ': 'e', | |
'ἐ': 'e', | |
'ἔ': 'e', | |
'ἒ': 'e', | |
'ἑ': 'e', | |
'ἕ': 'e', | |
'ἓ': 'e', | |
'ῃ': 'i', | |
'ή': 'i', | |
'ὴ': 'i', | |
'ῄ': 'i', | |
'ῂ': 'i', | |
'ῆ': 'i', | |
'ῇ': 'i', | |
'ἠ': 'i', | |
'ᾐ': 'i', | |
'ἤ': 'i', | |
'ᾔ': 'i', | |
'ἢ': 'i', | |
'ᾒ': 'i', | |
'ἦ': 'i', | |
'ᾖ': 'i', | |
'ἡ': 'i', | |
'ᾑ': 'i', | |
'ἥ': 'i', | |
'ᾕ': 'i', | |
'ἣ': 'i', | |
'ᾓ': 'i', | |
'ἧ': 'i', | |
'ᾗ': 'i', | |
'ί': 'i', | |
'ὶ': 'i', | |
'ῖ': 'i', | |
'ἰ': 'i', | |
'ἴ': 'i', | |
'ἲ': 'i', | |
'ἶ': 'i', | |
'ἱ': 'i', | |
'ἵ': 'i', | |
'ἳ': 'i', | |
'ἷ': 'i', | |
'ϊ': 'i', | |
'ΐ': 'i', | |
'ῒ': 'i', | |
'ῗ': 'i', | |
'ῑ': 'i', | |
'ῐ': 'i', | |
'ό': 'o', | |
'ὸ': 'o', | |
'ὀ': 'o', | |
'ὄ': 'o', | |
'ὂ': 'o', | |
'ὁ': 'o', | |
'ὅ': 'o', | |
'ὃ': 'o', | |
'ύ': 'u', | |
'ὺ': 'u', | |
'ῦ': 'u', | |
'ὐ': 'u', | |
'ὔ': 'u', | |
'ὒ': 'u', | |
'ὖ': 'u', | |
'ὑ': 'u', | |
'ὕ': 'u', | |
'ὓ': 'u', | |
'ὗ': 'u', | |
'ϋ': 'u', | |
'ΰ': 'u', | |
'ῢ': 'u', | |
'ῧ': 'u', | |
'ῡ': 'u', | |
'ῠ': 'u', | |
'ῳ': 'o', | |
'ώ': 'o', | |
'ῴ': 'o', | |
'ὼ': 'o', | |
'ῲ': 'o', | |
'ῶ': 'o', | |
'ῷ': 'o', | |
'ὠ': 'o', | |
'ᾠ': 'o', | |
'ὤ': 'o', | |
'ᾤ': 'o', | |
'ὢ': 'o', | |
'ᾢ': 'o', | |
'ὦ': 'o', | |
'ᾦ': 'o', | |
'ὡ': 'o', | |
'ᾡ': 'o', | |
'ὥ': 'o', | |
'ᾥ': 'o', | |
'ὣ': 'o', | |
'ᾣ': 'o', | |
'ὧ': 'o', | |
'ᾧ': 'o', | |
'ῤ': 'r', | |
'ῥ': 'r', | |
// cyrillic (russian) | |
'а': 'a', | |
'б': 'b', | |
'в': 'v', | |
'г': 'g', | |
'д': 'd', | |
'е': 'e', | |
'ё': 'e', | |
'ж': 'zh', | |
'з': 'z', | |
'и': 'i', | |
'й': 'j', | |
'к': 'k', | |
'л': 'l', | |
'м': 'm', | |
'н': 'n', | |
'о': 'o', | |
'п': 'p', | |
'р': 'r', | |
'с': 's', | |
'т': 't', | |
'у': 'u', | |
'ф': 'f', | |
'х': 'h', | |
'ц': 'ts', | |
'ч': 'ch', | |
'ш': 'sh', | |
'щ': 'sh', | |
'ъ': '', | |
'ы': 'i', | |
'ь': '', | |
'э': 'e', | |
'ю': 'yu', | |
'я': 'ya', | |
// --- | |
'і': 'j', | |
'ѳ': 'f', | |
'ѣ': 'e', | |
'ѵ': 'i', | |
'ѕ': 'z', | |
'ѯ': 'ks', | |
'ѱ': 'ps', | |
'ѡ': 'o', | |
'ѫ': 'yu', | |
'ѧ': 'ya', | |
'ѭ': 'yu', | |
'ѩ': 'ya', | |
// arabic (provided by @Omranic) | |
'١': '1', | |
'٢': '2', | |
'٣': '3', | |
'٤': '4', | |
'٥': '5', | |
'٦': '6', | |
'٧': '7', | |
'٨': '8', | |
'٩': '9', | |
'٠': '0', | |
'ا': 'a', | |
'أ': 'a', | |
'إ': 'a', | |
'آ': 'a', | |
'ب': 'b', | |
'ت': 't', | |
'ث': 'th', | |
'ج': 'j', | |
'ح': 'h', | |
'خ': 'kh', | |
'د': 'd', | |
'ذ': 'dh', | |
'ر': 'r', | |
'ز': 'z', | |
'س': 's', | |
'ش': 'sh', | |
'ص': 's', | |
'ض': 'd', | |
'ط': 't', | |
'ظ': 'z', | |
'ع': 'a', | |
'غ': 'gh', | |
'ف': 'f', | |
'ق': 'q', | |
'ك': 'k', | |
'ل': 'l', | |
'م': 'm', | |
'ن': 'n', | |
'ه': 'h', | |
'و': 'w', | |
'ى': 'y', | |
'ي': 'y', | |
'ة': 't', | |
// currency | |
/* | |
'₳': 'ARA', | |
'฿': 'THB', | |
'₵': 'GHS', | |
'¢': 'c', | |
'₡': 'CRC', | |
'₢': 'Cr', | |
'₠': 'XEU', | |
'$': 'USD', | |
'₫': 'VND', | |
'৳': 'BDT', | |
'₯': 'GRD', | |
'€': 'EUR', | |
'₣': 'FRF', | |
'₲': 'PYG', | |
'₴': 'HRN', | |
'₭': 'LAK', | |
'₦': 'NGN', | |
'₧': 'ESP', | |
'₱': 'PhP', | |
'£': 'GBP', | |
'₤': 'GBP', | |
'₨': 'Rs', | |
'₪': 'NS', | |
'₮': 'MNT', | |
'₩': 'WON', | |
'¥': 'YEN', | |
'៛': 'KHR', | |
//*/ | |
// fractions | |
/* | |
'⅛': '', | |
'⅙': '', | |
'⅕': '', | |
'¼': '', | |
'⅓': '', | |
'⅜': '', | |
'⅖': '', | |
'½': '', | |
'⅗': '', | |
'⅝': '', | |
'⅔': '', | |
'¾': '', | |
'⅘': '', | |
'⅚': '', | |
'⅞': '', | |
//*/ | |
// separators | |
'–': delimiter, | |
'—': delimiter, | |
'―': delimiter, | |
'~': delimiter, | |
'/': delimiter, | |
'\\': delimiter, | |
'|': delimiter, | |
'+': delimiter, | |
'‘': delimiter, | |
'’': delimiter, | |
'\'': delimiter, | |
' ': delimiter, | |
// permitted by default but can be overridden | |
'-': '-', | |
'_': '_' | |
}; | |
// add any user-defined separator elements | |
if (separators) { | |
for (i; i >= 0; --i) { | |
sanitizer[separators[i]] = delimiter; | |
} | |
} | |
// do all the replacements | |
slug = slug.toLowerCase(); // if we don't do this, add the uppercase versions to the sanitizer plus inlcude A-Z in the prohibited filter | |
slug = slug.replace(prohibited, function (match) { return sanitizer[match] || ''; }); | |
slug = slug.replace(consecutive, delimiter); | |
slug = slug.replace(trim, "$1"); | |
return slug; | |
} | |
} |
Made a small update to allow for the dash and underscore (which are both allowed characters by default) to be overridden as characters which will be replaced by the delimiter. For example:
"A_MiXeD-separator + delimiter/example".slugify(' ');
"a_mixed-separator delimiter example"
"A_MiXeD-separator + delimiter/example".slugify(' ', ['_', '-']);
"a mixed separator delimiter example"
Added the tilde character to the default list of delimiter conversions.
Changed the substitutions for some Greek characters (θ, ξ, χ, ν, υ) to be consistant with "Greek-lish" and phonetic replacements. Thanks to Philippos for the audit :)
Also added the Russian character set (thanks to Alexander and Yana for their contribution and the Translit resource):
"аз буки веди".slugify();
"az-buki-vedi"
"рцы слово твердо".slugify();
"rtsi-slovo-tverdo"
"цы червь ша ер ять ю".slugify();
"tsi-cherv-sha-er-yat-yu"
Special RegExp characters are now properly escaped if used as the delimiter: \ ^ $ * + ? . ( ) | { } [ ]
"Now any RegEx-sensative.char (\\ ^ $ * + ? . ( ) | { } [ ]) can be used safely as the delimiter... like let's say, a period:".slugify(".");
"now.any.regex-sensative.char.can.be.used.safely.as.the.delimiter.like.let.s.say.a.period"
Completed the Greek set with all the possible accented characters.
"ΧΕΙΜΕΡΙΝΌΣ, θαλασσινή".slugify();
"cheimerinos-thalassini"
"Δωματίου, ΎΨΟΣ, φιλοξενία".slugify();
"domatiou-upsos-filoxenia"
"ευήλιος, αύρα, ζέστη".slugify();
"euilios-aura-zesti"
Cheers,
Δεμοιυε
Updated the loop that iterates through the separators given these performance results.
Can you add support for arabic words and letters
for e.g.
ب شبسي شسيب ش
التتبي
شبس ثصقب صث
@IbnSaeed, I'd be happy to. I didn't get a notification of your comment so sorry it's been so long to respond!
Could you help me in this effort? As you can see from the code, I'll setup a mapping of the characters. Can you recommend a good resource to map them? For example, should I just use what's on Wikipedia?
http://en.wikipedia.org/wiki/Arabic_alphabet
Also, can you advise how to handle the right-to-left nature of the language? For example, if the order of characters is:
غ
ظ
ض
Does that equate to the following?
غظض
And then what would be a logical replacement?
gh z d
d z gh
Note to self: https://github.com/dodo/node-slug/blob/master/slug.js
@demoive
Thank you for this awesome plugin.
Here's Arabic Support implemented, you could kindly merge it.
https://gist.github.com/Omranic/c53be563645974884ca8#file-slugify-js-L625-L658
Merged Arabic support from @Omranic.
Some initial examples of what this does: