Created
May 21, 2016 22:01
-
-
Save coagulant/989b3846d07c1218716286d5c7919881 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Django 1.8.X""" | |
def slugify(value): | |
""" | |
Converts to ASCII. Converts spaces to hyphens. Removes characters that | |
aren't alphanumerics, underscores, or hyphens. Converts to lowercase. | |
Also strips leading and trailing whitespace. | |
""" | |
value = force_text(value) | |
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii') | |
value = re.sub('[^\w\s-]', '', value).strip().lower() | |
return mark_safe(re.sub('[-\s]+', '-', value)) | |
slugify = allow_lazy(slugify, six.text_type, SafeText) | |
"""django-missing""" | |
@register.filter(is_safe=True) | |
@defaultfilters.stringfilter | |
def slugify2(value): | |
""" | |
Normalizes string, converts to lowercase, removes non-alpha characters, | |
and converts spaces to hyphens. | |
It is similar to built-in :filter:`slugify` but it also handles special characters in variety of languages | |
so that they are not simply removed but properly transliterated/downcoded. | |
""" | |
try: | |
value = unicodedata.normalize('NFC', value) | |
value = downcode(value) | |
value = unicodedata.normalize('NFD', value).encode('ascii', 'ignore') | |
value = unicode(re.sub('[^\w\s-]', '', value).strip().lower()) | |
return safestring.mark_safe(re.sub('[-\s]+', '-', value)) | |
except: | |
if settings.TEMPLATE_DEBUG: | |
raise | |
else: | |
return u'' | |
"""Unused, but existing solutions""" | |
# https://pypi.python.org/pypi/Unidecode | |
#>>> from unidecode import unidecode: | |
#>>> unidecode(u"\u5317\u4EB0") | |
#"Bei Jing " | |
"""Unused, popular pytils""" | |
def slugify(in_string): | |
""" | |
Prepare string for slug (i.e. URL or file/dir name) | |
@param in_string: input string | |
@type in_string: C{basestring} | |
@return: slug-string | |
@rtype: C{str} | |
@raise ValueError: if in_string is C{str}, but it isn't ascii | |
""" | |
try: | |
u_in_string = six.text_type(in_string).lower() | |
except UnicodeDecodeError: | |
raise ValueError("We expects when in_string is str type," + \ | |
"it is an ascii, but now it isn't. Use unicode " + \ | |
"in this case.") | |
# convert & to "and" | |
u_in_string = re.sub('\&\;|\&', ' and ', u_in_string) | |
# replace spaces by hyphen | |
u_in_string = re.sub('[-\s]+', '-', u_in_string) | |
# remove symbols that not in alphabet | |
u_in_string = u''.join([symb for symb in u_in_string if symb in ALPHABET]) | |
# translify it | |
out_string = translify(u_in_string) | |
# remove non-alpha | |
return re.sub('[^\w\s-]', '', out_string).strip().lower() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment