Skip to content

Instantly share code, notes, and snippets.

@louiskounios
Last active January 3, 2019 10:12
Show Gist options
  • Save louiskounios/ac2ae58dacb6f414c5d4dd4ed39e7420 to your computer and use it in GitHub Desktop.
Save louiskounios/ac2ae58dacb6f414c5d4dd4ed39e7420 to your computer and use it in GitHub Desktop.
Python script that flattens a directory without overwriting files that share a name. Takes absolute path to directory as input.
#!/usr/local/bin/python3
# Script that flattens a directory without overwriting files that share a name.
# Takes absolute path to directory as input.
#
# All files in any subdirectory of the user-provided directory, regardless of
# depth, are moved to the user-provided directory. When filenames clash,
# special care is taken by adding a counter to the filename (e.g., '_001').
#
# Linux only. Tested only on Python 3.7. Minimum version is 3.6.
#
# This has NOT been tested extensively. Use at your own risk.
import pathlib
import sys
def flatten_directory(path):
if not _is_valid_path(path):
return 1
_rename_files(path)
return 0
def _is_valid_path(path):
if not isinstance(path, pathlib.Path):
print('"path" argument must be instance of "pathlib.Path"')
return False
if not path.is_absolute():
print('Provided path must be an absolute path')
return False
if not path.is_dir():
print('Provided path must be a directory')
return False
return True
def _top_dir_files(top_dir):
return {path for path in top_dir.glob('*') if path.is_file()}
def _nested_dir_files(top_dir):
return {path for path in top_dir.glob('**/*')
if path.is_file() and path.parent != top_dir}
def _filename_paths_map(path):
top_dir_files = _top_dir_files(path)
nested_dir_files = _nested_dir_files(path)
filename_paths_map = dict()
for file in top_dir_files:
filename_paths_map[file.name] = [file]
for file in nested_dir_files:
if file.name in filename_paths_map:
filename_paths_map[file.name].append(file)
else:
filename_paths_map[file.name] = [file]
return filename_paths_map
# Returns a suffixless filename.
# 'myfile.tar.gz' => 'myfile'
def _suffixless_name(path):
return path.name.split('.')[0]
# Returns the suffixes combined into a string.
# 'myfile.tar.gz' => '.tar.gz'
def _suffix(path):
return ''.join(path.suffixes)
# Returns a zero-padded counter suffix.
def _counter_suffix(current, max):
return '_' + str(current).zfill(len(str(max)))
def _rename_files(top_dir):
filename_paths_map = _filename_paths_map(top_dir)
for _, paths in filename_paths_map.items():
# Filename in top dir does not clash with any other filename.
# Do nothing.
if len(paths) == 1 and paths[0].parent == top_dir:
print(f'{paths[0]} => No action necessary')
continue
# Filename in nested dir does not clash with any other filename.
# Move the file to the top directory.
if len(paths) == 1 and paths[0].parent != top_dir:
new_name = top_dir / paths[0].name
print(f'{paths[0]} => {new_name}')
paths[0].rename(new_name)
continue
# Since we are renaming the top directory file _and_ any files with
# the same file in any subdirectory, we can just loop over all files
# and move them to the top directory with a running counter added to
# their filename to ensure uniqueness.
for idx, path in enumerate(paths, start=1):
base_name = _suffixless_name(path)
suffix = _suffix(path)
counter = _counter_suffix(idx, len(paths))
new_name = top_dir / (base_name + counter + suffix)
print(f'{path} => {new_name}')
path.rename(new_name)
if __name__ == '__main__':
if len(sys.argv) != 2:
print('Need exactly one argument: the absolute path to the directory')
sys.exit(1)
path = pathlib.Path(sys.argv[1])
sys.exit(flatten_directory(path))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment