Skip to content

Instantly share code, notes, and snippets.

@genzj
Created August 28, 2015 05:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save genzj/10f0d0f04e8e90c9387b to your computer and use it in GitHub Desktop.
Save genzj/10f0d0f04e8e90c9387b to your computer and use it in GitHub Desktop.
Clean / unquote url encode sequences in downloaded files.
#!/usr/bin/env python
from __future__ import generators
from functools import partial
import os, os.path
try:
from urllib import unquote
except ImportError:
from urllib.parse import unquote
def legalize_win_file_name(filename, substituent='.'):
invalid_chars = r'\/:*?"<>|'
return ''.join(substituent if c in invalid_chars else c for c in filename)
def clean_name(filename, windows_name=True):
clean_name = unquote(filename)
return legalize_win_file_name(clean_name) if windows_name else clean_name
def rename_file(dirname, filename):
original, target = os.path.join(dirname, filename), os.path.join(dirname, clean_name(filename))
if original == target:
# print("no change: %s"%(original, ))
return
print("rename %s to %s" % (original, target))
os.rename(original, target)
def walk_folder(topdir):
for dirpath, dirnames, filenames in os.walk(topdir):
list(map(partial(rename_file,dirpath), filenames))
if __name__ == "__main__":
walk_folder('.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment