Created
August 4, 2010 13:33
-
-
Save ViktorStiskala/508138 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Author: Viktor Stískala | |
# Contact: viktor (at) stiskala.cz | |
# Copyright: This module has been placed in the public domain. | |
""" | |
This module removes language specific characters from file name | |
and makes it more usable for web. Can be also used as nautilus user script | |
when placed inside ~/.gnome2/nautilus-scripts | |
""" | |
import re | |
import os, sys | |
# transliteration table | |
conversion = { | |
u'ä' : 'a', u'Ä' : 'A', u'á' : 'a', u'Á' : 'A', u'à' : 'a', u'À' : 'A', u'ã' : 'a', | |
u'Ã' : 'A', u'â' : 'a', u'Â' : 'A', u'č' : 'c', u'Č' : 'C', u'ć' : 'c', u'Ć' : 'C', | |
u'ď' : 'd', u'Ď' : 'D', u'ě' : 'e', u'Ě' : 'E', u'é' : 'e', u'É' : 'E', u'ë' : 'e', | |
u'Ë' : 'E', u'è' : 'e', u'È' : 'E', u'ê' : 'e', u'Ê' : 'E', u'í' : 'i', u'Í' : 'I', | |
u'ï' : 'i', u'Ï' : 'I', u'ì' : 'i', u'Ì' : 'I', u'î' : 'i', u'Î' : 'I', u'ľ' : 'l', | |
u'Ľ' : 'L', u'ĺ' : 'l', u'Ĺ' : 'L', u'ń' : 'n', u'Ń' : 'N', u'ň' : 'n', u'Ň' : 'N', | |
u'ñ' : 'n', u'Ñ' : 'N', u'ó' : 'o', u'Ó' : 'O', u'ö' : 'o', u'Ö' : 'O', u'ô' : 'o', | |
u'Ô' : 'O', u'ò' : 'o', u'Ò' : 'O', u'õ' : 'o', u'Õ' : 'O', u'ő' : 'o', u'Ő' : 'O', | |
u'ř' : 'r', u'Ř' : 'R', u'ŕ' : 'r', u'Ŕ' : 'R', u'š' : 's', u'Š' : 'S', u'ś' : 's', | |
u'Ś' : 'S', u'ť' : 't', u'Ť' : 'T', u'ú' : 'u', u'Ú' : 'U', u'ů' : 'u', u'Ů' : 'U', | |
u'ü' : 'u', u'Ü' : 'U', u'ù' : 'u', u'Ù' : 'U', u'ũ' : 'u', u'Ũ' : 'U', u'û' : 'u', | |
u'Û' : 'U', u'ý' : 'y', u'Ý' : 'Y', u'ž' : 'z', u'Ž' : 'Z', u'ź' : 'z', u'Ź' : 'Z' | |
}; | |
def convert (text): | |
ret = "" | |
# find suffix | |
rsuffix = re.compile(r'\.[a-z]+$', re.IGNORECASE) | |
suffix = rsuffix.search(text) | |
if suffix: | |
suffix = suffix.group(0) | |
text = text[:-len(suffix)] | |
for c in text: | |
try: | |
c = conversion[c] | |
except KeyError: | |
pass | |
ret += c | |
space = re.compile(r'[\s]+', re.IGNORECASE) | |
# all characters that doesn't match this pattern will be deleted | |
nonalpha = re.compile(r'[^a-z0-9_\.\-\\(\\)]+', re.IGNORECASE) | |
# remove multiple occurences, such as __ or -- | |
multiple = re.compile(r'([-_\.]){1}\1+') | |
end = re.compile(r'[_\.-]+$') | |
start = re.compile(r'^[_\.-]+') | |
# replace spaces with _ | |
ret = space.sub('_', ret) | |
ret = nonalpha.sub('', ret) | |
ret = multiple.sub('\\1', ret) | |
ret = end.sub('', ret) | |
ret = start.sub('', ret) | |
# add suffix if present | |
if suffix: | |
ret += suffix | |
return ret.lower() | |
def main(): | |
"""Reads all filenames from parameters and performs conversion""" | |
for i in range(1, len(sys.argv)): | |
name = os.path.basename(sys.argv[i]).decode('utf-8') | |
new_name = os.path.dirname(sys.argv[i]).decode('utf-8') + convert(name) | |
# rename if name changed | |
if sys.argv[i].decode('utf-8') != new_name: | |
try: | |
os.rename(sys.argv[i], new_name) | |
except OSError: | |
print "Cannot rename file " + sys.argv[i].decode('utf-8') + ". Does it exists?" | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment