Skip to content

Instantly share code, notes, and snippets.

@kidsil
Created May 13, 2013 17:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kidsil/5569820 to your computer and use it in GitHub Desktop.
Save kidsil/5569820 to your computer and use it in GitHub Desktop.
Special Slugifying German Text in Python (works pretty good actually)
# -*- coding: utf-8 -*-
def replace_all(text, dic):
for replace_with, replace_what in dic.iteritems():
if type(replace_what) is list:
for letter in replace_what:
text = text.replace(letter,replace_with)
else:
text = text.replace(replace_what, replace_with)
return text
#example input: ssdfSERFS - ^#$%43 GSDG ____ :VKbm sdF öüoäßßEF_____-___ d a s d _e <>DFGDFG????
#example output: ssdfserfs-43-gsdg-vkbm-sdf-oeueoaessssef-d-a-s-d-e-dfgdfg
def slugify(string,params = []):
import re
#unescape html entity signs (e.g. &amp; -> &) & lowercase string
import HTMLParser
string = HTMLParser.HTMLParser().unescape(string).lower()
translation_dic = {
'ae' : ['Ä','ä','&Auml;','&auml;'],
'oe' : ['Ö','ö','&Ouml;','&ouml;'],
'ue' : ['Ü','ü','&Uuml;','&uuml;'],
'ss' : ['ß','&szlig;'],
}
string = replace_all(string,translation_dic)
string = re.sub(r'[^a-zA-Z0-9\_\-\s]','',string)
string = re.sub(r'[\s]{2,}',' ',string)
string = string.replace(' ','-').replace('_','-').strip(' -')
string = re.sub(r'[\-]{2,}','-',string)
return string
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment