Skip to content

Instantly share code, notes, and snippets.

@anhtran
Created May 7, 2013 03:34
Show Gist options
  • Save anhtran/5530083 to your computer and use it in GitHub Desktop.
Save anhtran/5530083 to your computer and use it in GitHub Desktop.
Add Vietnamese character map for urlify.js of Django Admin (generator of slug fields)
var LATIN_MAP = {
'À': 'A', 'Á': 'A', 'Â': 'A', 'Ã': 'A', 'Ä': 'A', 'Å': 'A', 'Æ': 'AE', 'Ç':
'C', 'È': 'E', 'É': 'E', 'Ê': 'E', 'Ë': 'E', 'Ì': 'I', 'Í': 'I', 'Î': 'I',
'Ï': 'I', 'Ð': 'D', 'Ñ': 'N', 'Ò': 'O', 'Ó': 'O', 'Ô': 'O', 'Õ': 'O', 'Ö':
'O', 'Ő': 'O', 'Ø': 'O', 'Ù': 'U', 'Ú': 'U', 'Û': 'U', 'Ü': 'U', 'Ű': 'U',
'Ý': 'Y', 'Þ': 'TH', 'ß': 'ss', 'à':'a', 'á':'a', 'â': 'a', 'ã': 'a', 'ä':
'a', 'å': 'a', 'æ': 'ae', 'ç': 'c', 'è': 'e', 'é': 'e', 'ê': 'e', 'ë': 'e',
'ì': 'i', 'í': 'i', 'î': 'i', 'ï': 'i', 'ð': 'd', 'ñ': 'n', 'ò': 'o', 'ó':
'o', 'ô': 'o', 'õ': 'o', 'ö': 'o', 'ő': 'o', 'ø': 'o', 'ù': 'u', 'ú': 'u',
'û': 'u', 'ü': 'u', 'ű': 'u', 'ý': 'y', 'þ': 'th', 'ÿ': 'y'
}
var LATIN_SYMBOLS_MAP = {
'©':'(c)'
}
var GREEK_MAP = {
'α':'a', 'β':'b', 'γ':'g', 'δ':'d', 'ε':'e', 'ζ':'z', 'η':'h', 'θ':'8',
'ι':'i', 'κ':'k', 'λ':'l', 'μ':'m', 'ν':'n', 'ξ':'3', 'ο':'o', 'π':'p',
'ρ':'r', 'σ':'s', 'τ':'t', 'υ':'y', 'φ':'f', 'χ':'x', 'ψ':'ps', 'ω':'w',
'ά':'a', 'έ':'e', 'ί':'i', 'ό':'o', 'ύ':'y', 'ή':'h', 'ώ':'w', 'ς':'s',
'ϊ':'i', 'ΰ':'y', 'ϋ':'y', 'ΐ':'i',
'Α':'A', 'Β':'B', 'Γ':'G', 'Δ':'D', 'Ε':'E', 'Ζ':'Z', 'Η':'H', 'Θ':'8',
'Ι':'I', 'Κ':'K', 'Λ':'L', 'Μ':'M', 'Ν':'N', 'Ξ':'3', 'Ο':'O', 'Π':'P',
'Ρ':'R', 'Σ':'S', 'Τ':'T', 'Υ':'Y', 'Φ':'F', 'Χ':'X', 'Ψ':'PS', 'Ω':'W',
'Ά':'A', 'Έ':'E', 'Ί':'I', 'Ό':'O', 'Ύ':'Y', 'Ή':'H', 'Ώ':'W', 'Ϊ':'I',
'Ϋ':'Y'
}
var TURKISH_MAP = {
'ş':'s', 'Ş':'S', 'ı':'i', 'İ':'I', 'ç':'c', 'Ç':'C', 'ü':'u', 'Ü':'U',
'ö':'o', 'Ö':'O', 'ğ':'g', 'Ğ':'G'
}
var RUSSIAN_MAP = {
'а':'a', 'б':'b', 'в':'v', 'г':'g', 'д':'d', 'е':'e', 'ё':'yo', 'ж':'zh',
'з':'z', 'и':'i', 'й':'j', 'к':'k', 'л':'l', 'м':'m', 'н':'n', 'о':'o',
'п':'p', 'р':'r', 'с':'s', 'т':'t', 'у':'u', 'ф':'f', 'х':'h', 'ц':'c',
'ч':'ch', 'ш':'sh', 'щ':'sh', 'ъ':'', 'ы':'y', 'ь':'', 'э':'e', 'ю':'yu',
'я':'ya',
'А':'A', 'Б':'B', 'В':'V', 'Г':'G', 'Д':'D', 'Е':'E', 'Ё':'Yo', 'Ж':'Zh',
'З':'Z', 'И':'I', 'Й':'J', 'К':'K', 'Л':'L', 'М':'M', 'Н':'N', 'О':'O',
'П':'P', 'Р':'R', 'С':'S', 'Т':'T', 'У':'U', 'Ф':'F', 'Х':'H', 'Ц':'C',
'Ч':'Ch', 'Ш':'Sh', 'Щ':'Sh', 'Ъ':'', 'Ы':'Y', 'Ь':'', 'Э':'E', 'Ю':'Yu',
'Я':'Ya'
}
var UKRAINIAN_MAP = {
'Є':'Ye', 'І':'I', 'Ї':'Yi', 'Ґ':'G', 'є':'ye', 'і':'i', 'ї':'yi', 'ґ':'g'
}
var CZECH_MAP = {
'č':'c', 'ď':'d', 'ě':'e', 'ň': 'n', 'ř':'r', 'š':'s', 'ť':'t', 'ů':'u',
'ž':'z', 'Č':'C', 'Ď':'D', 'Ě':'E', 'Ň': 'N', 'Ř':'R', 'Š':'S', 'Ť':'T',
'Ů':'U', 'Ž':'Z'
}
var POLISH_MAP = {
'ą':'a', 'ć':'c', 'ę':'e', 'ł':'l', 'ń':'n', 'ó':'o', 'ś':'s', 'ź':'z',
'ż':'z', 'Ą':'A', 'Ć':'C', 'Ę':'e', 'Ł':'L', 'Ń':'N', 'Ó':'o', 'Ś':'S',
'Ź':'Z', 'Ż':'Z'
}
var VIETNAMESE_MAP = {
'á':'a', 'à':'a', 'ả':'a', 'ã':'a', 'ạ':'a',
'ă':'a', 'ắ':'a', 'ằ':'a', 'ẵ':'a', 'ặ':'a', 'ẳ':'a',
'â':'a', 'ấ':'a', 'ầ':'a', 'ẫ':'a', 'ẩ':'a', 'ậ':'a',
'đ':'d',
'é':'e', 'è':'e', 'ẻ':'e', 'ẽ':'e', 'ẹ':'e',
'ê':'e', 'ế':'e', 'ề':'e', 'ể':'e', 'ễ':'e', 'ệ':'e',
'í':'i', 'ì':'i', 'ỉ':'i', 'ĩ':'i', 'ị':'i',
'ô':'o', 'ố':'o', 'ồ':'o', 'ổ':'o', 'ỗ':'o', 'ộ':'o',
'ơ':'o', 'ớ':'o', 'ờ':'o', 'ở':'o', 'ỡ':'o', 'ợ':'o',
'ù':'u', 'ú':'u', 'ủ':'u', 'ũ':'u', 'ụ':'u',
'ư':'u', 'ứ':'u', 'ừ':'u', 'ữ':'u', 'ử':'u', 'ự':'u',
'ỳ':'y', 'ý':'y', 'ỷ':'y', 'ỹ':'y', 'ỵ':'y',
'Á':'A', 'À':'A', 'Ả':'A', 'Ã':'A', 'Ạ':'A',
'Ă':'A', 'Ắ':'A', 'Ằ':'A', 'Ẵ':'A', 'Ặ':'A', 'Ẳ':'A',
'Â':'A', 'Ấ':'A', 'Ầ':'A', 'Ẫ':'A', 'Ẩ':'A', 'Ậ':'A',
'Đ':'D',
'É':'E', 'È':'E', 'Ẻ':'E', 'Ẽ':'E', 'Ẹ':'E',
'Ê':'E', 'Ế':'E', 'Ề':'E', 'Ể':'E', 'Ễ':'E', 'Ệ':'E',
'Í':'I', 'Ì':'I', 'Ỉ':'I', 'Ĩ':'I', 'Ị':'I',
'Ô':'O', 'Ố':'O', 'Ồ':'O', 'Ổ':'O', 'Ỗ':'O', 'Ộ':'O',
'Ơ':'O', 'Ớ':'O', 'Ờ':'O', 'Ở':'O', 'Ỡ':'O', 'Ợ':'O',
'Ù':'U', 'Ú':'U', 'Ủ':'U', 'Ũ':'U', 'Ụ':'U',
'Ư':'U', 'Ứ':'U', 'Ừ':'U', 'Ữ':'U', 'Ử':'U', 'Ự':'U',
'Ỳ':'Y', 'Ý':'Y', 'Ỷ':'Y', 'Ỹ':'Y', 'Ỵ':'Y'
}
var ALL_DOWNCODE_MAPS=new Array()
ALL_DOWNCODE_MAPS[0]=LATIN_MAP
ALL_DOWNCODE_MAPS[1]=LATIN_SYMBOLS_MAP
ALL_DOWNCODE_MAPS[2]=GREEK_MAP
ALL_DOWNCODE_MAPS[3]=TURKISH_MAP
ALL_DOWNCODE_MAPS[4]=RUSSIAN_MAP
ALL_DOWNCODE_MAPS[5]=UKRAINIAN_MAP
ALL_DOWNCODE_MAPS[6]=CZECH_MAP
ALL_DOWNCODE_MAPS[7]=POLISH_MAP
ALL_DOWNCODE_MAPS[8]=VIETNAMESE_MAP
var Downcoder = new Object();
Downcoder.Initialize = function()
{
if (Downcoder.map) // already made
return ;
Downcoder.map ={}
Downcoder.chars = '' ;
for(var i in ALL_DOWNCODE_MAPS)
{
var lookup = ALL_DOWNCODE_MAPS[i]
for (var c in lookup)
{
Downcoder.map[c] = lookup[c] ;
Downcoder.chars += c ;
}
}
Downcoder.regex = new RegExp('[' + Downcoder.chars + ']|[^' + Downcoder.chars + ']+','g') ;
}
downcode= function( slug )
{
Downcoder.Initialize() ;
var downcoded =""
var pieces = slug.match(Downcoder.regex);
if(pieces)
{
for (var i = 0 ; i < pieces.length ; i++)
{
if (pieces[i].length == 1)
{
var mapped = Downcoder.map[pieces[i]] ;
if (mapped != null)
{
downcoded+=mapped;
continue ;
}
}
downcoded+=pieces[i];
}
}
else
{
downcoded = slug;
}
return downcoded;
}
function URLify(s, num_chars) {
// changes, e.g., "Petty theft" to "petty_theft"
// remove all these words from the string before urlifying
s = downcode(s);
removelist = ["a", "an", "as", "at", "before", "but", "by", "for", "from",
"is", "in", "into", "like", "of", "off", "on", "onto", "per",
"since", "than", "the", "this", "that", "to", "up", "via",
"with"];
r = new RegExp('\\b(' + removelist.join('|') + ')\\b', 'gi');
s = s.replace(r, '');
// if downcode doesn't hit, the char will be stripped here
s = s.replace(/[^-\w\s]/g, ''); // remove unneeded chars
s = s.replace(/^\s+|\s+$/g, ''); // trim leading/trailing spaces
s = s.replace(/[-\s]+/g, '-'); // convert spaces to hyphens
s = s.toLowerCase(); // convert to lowercase
return s.substring(0, num_chars);// trim to first num_chars chars
}
@dieudv
Copy link

dieudv commented Nov 26, 2018

Amen. Đội ơn bác. Đội ơn thánh Ala đã giúp con tìm ra bác! Mò mãi mới ra

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment