Skip to content

Instantly share code, notes, and snippets.

@namongk
Created February 17, 2013 14:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save namongk/4971666 to your computer and use it in GitHub Desktop.
Save namongk/4971666 to your computer and use it in GitHub Desktop.
#-*-coding:utf-8-*-
import re
import unicodedata
from django.utils.encoding import smart_unicode
# Extra characters outside of alphanumerics that we'll allow.
SLUG_OK = '-_~'
def slugify(s, ok=SLUG_OK, lower=True, spaces=False):
# L and N signify letter/number.
# http://www.unicode.org/reports/tr44/tr44-4.html#GC_Values_Table
rv = []
for c in unicodedata.normalize('NFKC', smart_unicode(s)):
cat = unicodedata.category(c)[0]
if cat in 'LN' or c in ok:
rv.append(c)
if cat == 'Z': # space
rv.append(' ')
new = ''.join(rv).strip()
if not spaces:
new = re.sub('[-\s]+', '-', new)
return new.lower() if lower else new
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment