Skip to content

Instantly share code, notes, and snippets.

@coagulant
Created May 21, 2016 22:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save coagulant/989b3846d07c1218716286d5c7919881 to your computer and use it in GitHub Desktop.
Save coagulant/989b3846d07c1218716286d5c7919881 to your computer and use it in GitHub Desktop.
"""Django 1.8.X"""
def slugify(value):
"""
Converts to ASCII. Converts spaces to hyphens. Removes characters that
aren't alphanumerics, underscores, or hyphens. Converts to lowercase.
Also strips leading and trailing whitespace.
"""
value = force_text(value)
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
value = re.sub('[^\w\s-]', '', value).strip().lower()
return mark_safe(re.sub('[-\s]+', '-', value))
slugify = allow_lazy(slugify, six.text_type, SafeText)
"""django-missing"""
@register.filter(is_safe=True)
@defaultfilters.stringfilter
def slugify2(value):
"""
Normalizes string, converts to lowercase, removes non-alpha characters,
and converts spaces to hyphens.
It is similar to built-in :filter:`slugify` but it also handles special characters in variety of languages
so that they are not simply removed but properly transliterated/downcoded.
"""
try:
value = unicodedata.normalize('NFC', value)
value = downcode(value)
value = unicodedata.normalize('NFD', value).encode('ascii', 'ignore')
value = unicode(re.sub('[^\w\s-]', '', value).strip().lower())
return safestring.mark_safe(re.sub('[-\s]+', '-', value))
except:
if settings.TEMPLATE_DEBUG:
raise
else:
return u''
"""Unused, but existing solutions"""
# https://pypi.python.org/pypi/Unidecode
#>>> from unidecode import unidecode:
#>>> unidecode(u"\u5317\u4EB0")
#"Bei Jing "
"""Unused, popular pytils"""
def slugify(in_string):
"""
Prepare string for slug (i.e. URL or file/dir name)
@param in_string: input string
@type in_string: C{basestring}
@return: slug-string
@rtype: C{str}
@raise ValueError: if in_string is C{str}, but it isn't ascii
"""
try:
u_in_string = six.text_type(in_string).lower()
except UnicodeDecodeError:
raise ValueError("We expects when in_string is str type," + \
"it is an ascii, but now it isn't. Use unicode " + \
"in this case.")
# convert & to "and"
u_in_string = re.sub('\&amp\;|\&', ' and ', u_in_string)
# replace spaces by hyphen
u_in_string = re.sub('[-\s]+', '-', u_in_string)
# remove symbols that not in alphabet
u_in_string = u''.join([symb for symb in u_in_string if symb in ALPHABET])
# translify it
out_string = translify(u_in_string)
# remove non-alpha
return re.sub('[^\w\s-]', '', out_string).strip().lower()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment