Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Rename files in a directory by removing non-alphanumeric characters that shouldn't be in a filename.
"""
Recurseively rename all files in a directory by replace whitespace with underscores
and erasing all non-('a-zA-Z0-9' or '_' or '-' or '.') characters.
"""
import re, os, shutil, argparse, sys
parser = argparse.ArgumentParser(description = "Rename all files in directory by replacing whitespace with underscores.")
parser.add_argument('directories', metavar="DIR", nargs='*', default = [os.getcwd()], help="directories to walk. Default: CWD")
err = sys.stderr.write
def new_names(filenames):
"""
Generate new filenames for the given filenames
"""
# Replace white space with underscores:
filenames_new = (re.sub('\s+', '_', fn) for fn in filenames)
# Erase non-alphanumeric-period-underscore characters:
filenames_new = (re.sub('[^a-zA-Z0-9._-]', '', fn) for fn in filenames_new)
# Merge consecutive underscores for aesthetics
filenames_new = [re.sub('_+', '_', fn) for fn in filenames_new]
return filenames_new
def rename_files_in_dir(dir):
"""
Walk a directory and rename all files in the path.
"""
for (dirpath, dirnames, filenames) in os.walk(dir):
# Rename the filenames
filenames_new = new_names(filenames)
for f_old, f_new in zip(filenames, filenames_new):
f_old = os.path.join(dirpath, f_old)
f_new = os.path.join(dirpath, f_new)
if f_old == f_new:
continue
err("Renaming %s to %s\n"%(os.path.abspath(f_old), os.path.abspath(f_new)))
shutil.move(f_old, f_new)
# Rename the directories, and modify dirnames in place to aid os.walk
dirnames_new = new_names(dirnames)
for d_old, d_new in zip(dirnames, dirnames_new):
d_old = os.path.join(dirpath, d_old)
d_new = os.path.join(dirpath, d_new)
if d_old == d_new:
continue
err("Renaming dir %s to %s\n"%(os.path.abspath(d_old), os.path.abspath(d_new)))
shutil.move(d_old, d_new)
del dirnames[:]
dirnames.extend(dirnames_new)
if __name__ == '__main__':
args = parser.parse_args()
dirs = [os.path.abspath(d) for d in args.directories]
for d in dirs:
rename_files_in_dir(d)
@Kuzirashi
Copy link

Kuzirashi commented Dec 12, 2014

Wow, works great! Exactly what I was looking for to remove weird UTF characters from my directory names, so I can work with them using web manager.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment