Created
May 13, 2013 17:06
-
-
Save kidsil/5569820 to your computer and use it in GitHub Desktop.
Special Slugifying German Text in Python (works pretty good actually)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
def replace_all(text, dic): | |
for replace_with, replace_what in dic.iteritems(): | |
if type(replace_what) is list: | |
for letter in replace_what: | |
text = text.replace(letter,replace_with) | |
else: | |
text = text.replace(replace_what, replace_with) | |
return text | |
#example input: ssdfSERFS - ^#$%43 GSDG ____ :VKbm sdF öüoäßßEF_____-___ d a s d _e <>DFGDFG???? | |
#example output: ssdfserfs-43-gsdg-vkbm-sdf-oeueoaessssef-d-a-s-d-e-dfgdfg | |
def slugify(string,params = []): | |
import re | |
#unescape html entity signs (e.g. & -> &) & lowercase string | |
import HTMLParser | |
string = HTMLParser.HTMLParser().unescape(string).lower() | |
translation_dic = { | |
'ae' : ['Ä','ä','Ä','ä'], | |
'oe' : ['Ö','ö','Ö','ö'], | |
'ue' : ['Ü','ü','Ü','ü'], | |
'ss' : ['ß','ß'], | |
} | |
string = replace_all(string,translation_dic) | |
string = re.sub(r'[^a-zA-Z0-9\_\-\s]','',string) | |
string = re.sub(r'[\s]{2,}',' ',string) | |
string = string.replace(' ','-').replace('_','-').strip(' -') | |
string = re.sub(r'[\-]{2,}','-',string) | |
return string | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment