Created
January 23, 2022 07:18
-
-
Save nh2/a4e8b6a764ac4d258dbb8a4daec5868b to your computer and use it in GitHub Desktop.
Tool to move contents of one directory tree into another, safely.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
# Moves contents of one directory tree into another, safely. | |
import argparse | |
import filecmp | |
import shlex | |
import sys | |
from dataclasses import dataclass | |
from pathlib import Path | |
# pathlib has the default to follow symlinks for various functions, | |
# which is dangerous for our use case; so we define wrapper funtions. | |
def really_exists(path: Path) -> bool: | |
return path.exists() and not path.is_symlink() | |
def really_does_not_exist(path: Path) -> bool: | |
return (not path.exists()) and (not path.is_symlink()) | |
def is_real_dir(path: Path) -> bool: | |
return path.is_dir() and not path.is_symlink() | |
def is_real_file(path: Path) -> bool: | |
return path.is_file() and not path.is_symlink() | |
def quote_path(path: Path) -> str: | |
return shlex.join([str(path)]) | |
@dataclass | |
class Settings: | |
dry_run: bool = False | |
skip_file_contents_comparison: bool = False | |
def move_trees_rec( | |
source_dir: Path, | |
target_dir: Path, | |
settings: Settings, | |
) -> bool: | |
"""Moves all dirents from `source_dir` to `target_dir` that can be moved | |
conflict-free. | |
Recurses into subdirectories. | |
Does not follow symlinks. | |
Removes `source_dir` contents if they match and are equal to corresponding | |
`target_dir` contents. | |
Returns whether the `source_dir`'s contents coulds be entirely moved such | |
that it would now be empty. | |
""" | |
all_moved = True | |
for source in source_dir.iterdir(): | |
rel = source.relative_to(source_dir) | |
target = target_dir / rel | |
if really_does_not_exist(target): | |
print(f"mv {quote_path(source)} -> {quote_path(target)}") | |
if not settings.dry_run: | |
source.rename(target) | |
else: | |
# target exists; handle it depending on file type. | |
# if dir, recurse | |
if is_real_dir(source) and is_real_dir(target): | |
print(f"recurse-dir {quote_path(source)}") | |
all_moved_rec = move_trees_rec(source_dir=source, target_dir=target, settings=settings) | |
all_moved &= all_moved_rec | |
# if symlink, count as moved if targets are equal | |
elif source.is_symlink() and target.is_symlink(): | |
print(f"compare-symlink {quote_path(source)} <-> {quote_path(target)}") | |
has_equal_link_contents = source.readlink() == target.readlink() | |
if has_equal_link_contents: | |
print(f"rm-equal-symlink {quote_path(source)}") | |
if not settings.dry_run: | |
source.unlink() | |
else: | |
print(f"conflict-symlink {quote_path(source)} <-> {quote_path(target)}") | |
all_moved = False | |
# if real file, count as moved if contents | |
elif is_real_file(source) and is_real_file(target): | |
compare_label = 'filemetadata' if settings.skip_file_contents_comparison else 'filecontents' | |
print(f"compare-{compare_label} {quote_path(source)} <-> {quote_path(target)}") | |
# Do comparison | |
has_equal_contents = filecmp.cmp(str(source), str(target), shallow=settings.skip_file_contents_comparison) | |
filecmp.clear_cache() # we need the cache (potentially consuming unbound memory) | |
if has_equal_contents: | |
print(f"rm-equal-{compare_label} {quote_path(source)}") | |
if not settings.dry_run: | |
source.unlink() | |
else: | |
print(f"conflict-{compare_label} {quote_path(source)} <-> {quote_path(target)}") | |
all_moved = False | |
# if any other file type, count as conflict (not moved) | |
else: | |
print(f"conflict {quote_path(source)} <-> {quote_path(target)}") | |
all_moved = False | |
# Delete source dir if it's now empty. | |
if all_moved: | |
print(f"rmdir {source_dir}") | |
if not settings.dry_run: | |
is_source_dir_still_empty = not any(True for _ in source_dir.iterdir()) | |
if is_source_dir_still_empty: | |
source_dir.rmdir() | |
else: | |
print(f"Contents of source dir '{source_dir}' were modified, not performing rmdir", file=sys.stderr) | |
return all_moved | |
def move_trees( | |
source_dir: Path, | |
target_dir: Path, | |
settings: Settings, | |
) -> None: | |
if not is_real_dir(source_dir): | |
exit(f"Source dir '{source_dir}' must be an existing directory") | |
if not is_real_dir(target_dir): | |
exit(f"Target dir '{target_dir}' must be an existing directory") | |
move_trees_rec(source_dir=source_dir, target_dir=target_dir, settings=settings) | |
def main(): | |
parser = argparse.ArgumentParser(description='Moves contents of one directory tree into another, safely. Only moves those contents can be moved conflict-free.') | |
parser.add_argument('source', metavar='SOURCE', help='source directory tree') | |
parser.add_argument('target', metavar='TARGET', help='target directory tree') | |
parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='Perform a trial run with no changes made.') | |
parser.add_argument('--skip-file-contents-comparison', dest='skip_file_contents_comparison', action='store_true', help='Compare only file size and modification time instead of contents when files are to be compared.') | |
args = parser.parse_args() | |
move_trees( | |
source_dir=Path(args.source), | |
target_dir=Path(args.target), | |
settings=Settings( | |
dry_run=args.dry_run, | |
skip_file_contents_comparison=args.skip_file_contents_comparison, | |
), | |
) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment