Skip to content

Instantly share code, notes, and snippets.

@ViktorStiskala
Created August 4, 2010 13:33
Show Gist options
  • Save ViktorStiskala/508138 to your computer and use it in GitHub Desktop.
Save ViktorStiskala/508138 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Viktor Stískala
# Contact: viktor (at) stiskala.cz
# Copyright: This module has been placed in the public domain.
"""
This module removes language specific characters from file name
and makes it more usable for web. Can be also used as nautilus user script
when placed inside ~/.gnome2/nautilus-scripts
"""
import re
import os, sys
# transliteration table
conversion = {
u'ä' : 'a', u'Ä' : 'A', u'á' : 'a', u'Á' : 'A', u'à' : 'a', u'À' : 'A', u'ã' : 'a',
u'Ã' : 'A', u'â' : 'a', u'Â' : 'A', u'č' : 'c', u'Č' : 'C', u'ć' : 'c', u'Ć' : 'C',
u'ď' : 'd', u'Ď' : 'D', u'ě' : 'e', u'Ě' : 'E', u'é' : 'e', u'É' : 'E', u'ë' : 'e',
u'Ë' : 'E', u'è' : 'e', u'È' : 'E', u'ê' : 'e', u'Ê' : 'E', u'í' : 'i', u'Í' : 'I',
u'ï' : 'i', u'Ï' : 'I', u'ì' : 'i', u'Ì' : 'I', u'î' : 'i', u'Î' : 'I', u'ľ' : 'l',
u'Ľ' : 'L', u'ĺ' : 'l', u'Ĺ' : 'L', u'ń' : 'n', u'Ń' : 'N', u'ň' : 'n', u'Ň' : 'N',
u'ñ' : 'n', u'Ñ' : 'N', u'ó' : 'o', u'Ó' : 'O', u'ö' : 'o', u'Ö' : 'O', u'ô' : 'o',
u'Ô' : 'O', u'ò' : 'o', u'Ò' : 'O', u'õ' : 'o', u'Õ' : 'O', u'ő' : 'o', u'Ő' : 'O',
u'ř' : 'r', u'Ř' : 'R', u'ŕ' : 'r', u'Ŕ' : 'R', u'š' : 's', u'Š' : 'S', u'ś' : 's',
u'Ś' : 'S', u'ť' : 't', u'Ť' : 'T', u'ú' : 'u', u'Ú' : 'U', u'ů' : 'u', u'Ů' : 'U',
u'ü' : 'u', u'Ü' : 'U', u'ù' : 'u', u'Ù' : 'U', u'ũ' : 'u', u'Ũ' : 'U', u'û' : 'u',
u'Û' : 'U', u'ý' : 'y', u'Ý' : 'Y', u'ž' : 'z', u'Ž' : 'Z', u'ź' : 'z', u'Ź' : 'Z'
};
def convert (text):
ret = ""
# find suffix
rsuffix = re.compile(r'\.[a-z]+$', re.IGNORECASE)
suffix = rsuffix.search(text)
if suffix:
suffix = suffix.group(0)
text = text[:-len(suffix)]
for c in text:
try:
c = conversion[c]
except KeyError:
pass
ret += c
space = re.compile(r'[\s]+', re.IGNORECASE)
# all characters that doesn't match this pattern will be deleted
nonalpha = re.compile(r'[^a-z0-9_\.\-\\(\\)]+', re.IGNORECASE)
# remove multiple occurences, such as __ or --
multiple = re.compile(r'([-_\.]){1}\1+')
end = re.compile(r'[_\.-]+$')
start = re.compile(r'^[_\.-]+')
# replace spaces with _
ret = space.sub('_', ret)
ret = nonalpha.sub('', ret)
ret = multiple.sub('\\1', ret)
ret = end.sub('', ret)
ret = start.sub('', ret)
# add suffix if present
if suffix:
ret += suffix
return ret.lower()
def main():
"""Reads all filenames from parameters and performs conversion"""
for i in range(1, len(sys.argv)):
name = os.path.basename(sys.argv[i]).decode('utf-8')
new_name = os.path.dirname(sys.argv[i]).decode('utf-8') + convert(name)
# rename if name changed
if sys.argv[i].decode('utf-8') != new_name:
try:
os.rename(sys.argv[i], new_name)
except OSError:
print "Cannot rename file " + sys.argv[i].decode('utf-8') + ". Does it exists?"
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment