Skip to content

Instantly share code, notes, and snippets.

@andreberg
Last active May 26, 2021 16:52
Show Gist options
  • Save andreberg/c563db74f697ccafa7fb18f0b395b4f2 to your computer and use it in GitHub Desktop.
Save andreberg/c563db74f697ccafa7fb18f0b395b4f2 to your computer and use it in GitHub Desktop.
[Rename Files] Recursive regex file renamer #python #cli #files #renamer #script #batch #bulk
#!/usr/bin/python
# encoding: utf-8
import os
import re
import sys
import time
import argparse
from datetime import date
#
# -------------- DEFAULT SETTINGS ---------------
#
searchstr = None
replacestr = None
basedir = os.curdir
flags = 'ignorecase, unicode'
count = 0
recursive = True
dryrun = False
listpatterns = False
excludedirs = ['CVS', '.svn']
# DEBUG variable is set at the end
# just before main() call
def parse_args():
parser = argparse.ArgumentParser(description='Recursive regex file renamer -- (c) André Berg {}'.format(date.today().year))
parser.add_argument('basedir', metavar='DIR', default=basedir, help='root directory to start from (default: %(default)s)')
parser.add_argument('searchstr', metavar='<searchstr>',help='search pattern')
parser.add_argument('replacestr', metavar='<replacestr>', help='replacement pattern')
parser.add_argument('-l', '--list-patterns', action=ListPatternsAction, help='show a list of common regex patterns and exit')
parser.add_argument('-r', '--recursive', action='store_true', default=recursive, help='process sub directories? (default: %(default)s)')
parser.add_argument('-f', '--re-flags', default=flags, dest='flags', help='set regex flags with a CSV string (default: %(default)s)')
parser.add_argument('-c', '--count', default=count, dest='count', type=int, help='set the maximum number of pattern occurrences to be replaced within a match (0 == all) (default: %(default)s)')
parser.add_argument('-d', '--dryrun', action='store_true', default=dryrun, help='do a dryrun? (show what would happen) (default: %(default)s)')
parser.add_argument('-e', '--excludedirs', nargs='+', metavar='DIR', default=excludedirs, help='dir names to exclude (default: %(default)s)')
parser.patterns = patterns
args = parser.parse_args()
return args
def main():
args = parse_args()
basedir = args.basedir
searchstr = args.searchstr
replacestr = args.replacestr
recursive = args.recursive
dryrun = args.dryrun
excludedirs = args.excludedirs
flags = re_flags_list_to_bitfield(args.flags)
count = args.count
print(flags)
if not os.path.exists(basedir):
print("Directory at '{!s}' doesn't exist. Exiting...".format(basedir))
exit(1)
dirs_processed = 0
files_processed = 0
files_renamed = 0
if dryrun:
print("---- DRY RUN ----")
t0 = time.perf_counter()
for path, dirs, files in os.walk(basedir):
print("Processing {}".format(path))
files = os.listdir(path)
for filename in files:
newfilename, number_of_subs_made = re.subn(searchstr, replacestr, filename, flags=flags)
if newfilename == filename: # no sense in renaming the file to its current name
continue
oldfilepath = os.path.join(path, filename)
newfilepath = os.path.join(path, newfilename)
print("Renaming {!r} to {!r}".format(filename, newfilename))
if not dryrun:
os.rename(oldfilepath, newfilepath)
if DEBUG:
print("Number of subs made: {}".format(number_of_subs_made))
files_renamed += 1
files_processed += 1
for dirname in excludedirs:
if dirname in dirs:
print("Skipping {} (config: excludedirs)".format(dirname))
dirs.remove(dirname) # don't visit excluded directories
dirs_processed += 1
if not recursive:
break
t1 = time.perf_counter()
tt = t1 - t0
dirsstr = "dirs"
if dirs_processed == 1:
dirsstr = "dir"
filespstr = "files"
if files_processed == 1:
filespstr = "file"
filesrstr = "files"
if files_renamed == 1:
filesrstr = "file"
print("Done in {:1.3f}s".format(tt))
if dryrun:
print("{} {} will be renamed".format(files_renamed, filesrstr))
else:
print("{} {} renamed".format(files_renamed, filesrstr))
print("Processed {} {} in {} {}".format(files_processed, filespstr, dirs_processed, dirsstr))
if dryrun:
print("---- DRY RUN END ----")
exit(0)
# format: [searchstr, replacestr, description]
patterns = [
[r'([0-9])([^0-9]+)', r'0\1\2', 'Add leading zero for filenames beginning with single digit number'],
[r'(.+?)\.jpg$', r'\1.png', 'Replace file extension'],
[r'([^\w\.])', r'', 'Remove non-word filename characters']
]
# Custom argparse action to list patterns and then exit
# without requiring default args to be present
class ListPatternsAction(argparse.Action):
def __init__(self,
option_strings,
patterns=None,
dest=argparse.SUPPRESS,
default=argparse.SUPPRESS,
help="show a list of common regex patterns and exit"):
super(ListPatternsAction, self).__init__(
option_strings=option_strings,
dest=dest,
default=default,
nargs=0,
help=help)
self.patterns = patterns
def __call__(self, parser, namespace, values, option_string=None):
patterns = self.patterns
if patterns is None:
patterns = parser.patterns
print("Listing common regex patterns: ")
for searchstr, replacestr, description in patterns:
print()
print(" description: {}".format(description))
print(" search: r'{}'".format(searchstr))
print(" replace: r'{}'".format(replacestr))
print()
parser.exit()
def re_flags_list_to_bitfield(csv_flag_str):
re_flags = 0
for f in csv_flag_str.split(','):
f = f.strip()
if f == 'ascii':
re_flags |= re.ASCII
if f == 'unicode':
re_flags |= re.UNICODE
if f == 'ignorecase':
re_flags |= re.IGNORECASE
if f == 'locale':
re_flags |= re.LOCALE
if f == 'verbose':
re_flags |= re.VERBOSE
if f == 'multiline':
re_flags |= re.MULTILINE
if f == 'dotall':
re_flags |= re.DOTALL
if f == 'template':
re_flags |= re.TEMPLATE
if f == 'debug':
re_flags |= re.DEBUG
return re_flags
DEBUG = 1
if __name__ == '__main__':
if DEBUG:
sys.argv += ['--dryrun', '--re-flags=ascii', basedir, searchstr, replacestr]
# sys.argv += ['--dryrun', basedir, searchstr, replacestr]
# sys.argv += [basedir, searchstr, replacestr]
# sys.argv += ['--list-patterns']
# sys.argv += ['--help']
main()
else:
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment