Skip to content

Instantly share code, notes, and snippets.

@nh2
Created January 23, 2022 07:18
Show Gist options
  • Save nh2/a4e8b6a764ac4d258dbb8a4daec5868b to your computer and use it in GitHub Desktop.
Save nh2/a4e8b6a764ac4d258dbb8a4daec5868b to your computer and use it in GitHub Desktop.
Tool to move contents of one directory tree into another, safely.
#! /usr/bin/env python3
# Moves contents of one directory tree into another, safely.
import argparse
import filecmp
import shlex
import sys
from dataclasses import dataclass
from pathlib import Path
# pathlib has the default to follow symlinks for various functions,
# which is dangerous for our use case; so we define wrapper funtions.
def really_exists(path: Path) -> bool:
return path.exists() and not path.is_symlink()
def really_does_not_exist(path: Path) -> bool:
return (not path.exists()) and (not path.is_symlink())
def is_real_dir(path: Path) -> bool:
return path.is_dir() and not path.is_symlink()
def is_real_file(path: Path) -> bool:
return path.is_file() and not path.is_symlink()
def quote_path(path: Path) -> str:
return shlex.join([str(path)])
@dataclass
class Settings:
dry_run: bool = False
skip_file_contents_comparison: bool = False
def move_trees_rec(
source_dir: Path,
target_dir: Path,
settings: Settings,
) -> bool:
"""Moves all dirents from `source_dir` to `target_dir` that can be moved
conflict-free.
Recurses into subdirectories.
Does not follow symlinks.
Removes `source_dir` contents if they match and are equal to corresponding
`target_dir` contents.
Returns whether the `source_dir`'s contents coulds be entirely moved such
that it would now be empty.
"""
all_moved = True
for source in source_dir.iterdir():
rel = source.relative_to(source_dir)
target = target_dir / rel
if really_does_not_exist(target):
print(f"mv {quote_path(source)} -> {quote_path(target)}")
if not settings.dry_run:
source.rename(target)
else:
# target exists; handle it depending on file type.
# if dir, recurse
if is_real_dir(source) and is_real_dir(target):
print(f"recurse-dir {quote_path(source)}")
all_moved_rec = move_trees_rec(source_dir=source, target_dir=target, settings=settings)
all_moved &= all_moved_rec
# if symlink, count as moved if targets are equal
elif source.is_symlink() and target.is_symlink():
print(f"compare-symlink {quote_path(source)} <-> {quote_path(target)}")
has_equal_link_contents = source.readlink() == target.readlink()
if has_equal_link_contents:
print(f"rm-equal-symlink {quote_path(source)}")
if not settings.dry_run:
source.unlink()
else:
print(f"conflict-symlink {quote_path(source)} <-> {quote_path(target)}")
all_moved = False
# if real file, count as moved if contents
elif is_real_file(source) and is_real_file(target):
compare_label = 'filemetadata' if settings.skip_file_contents_comparison else 'filecontents'
print(f"compare-{compare_label} {quote_path(source)} <-> {quote_path(target)}")
# Do comparison
has_equal_contents = filecmp.cmp(str(source), str(target), shallow=settings.skip_file_contents_comparison)
filecmp.clear_cache() # we need the cache (potentially consuming unbound memory)
if has_equal_contents:
print(f"rm-equal-{compare_label} {quote_path(source)}")
if not settings.dry_run:
source.unlink()
else:
print(f"conflict-{compare_label} {quote_path(source)} <-> {quote_path(target)}")
all_moved = False
# if any other file type, count as conflict (not moved)
else:
print(f"conflict {quote_path(source)} <-> {quote_path(target)}")
all_moved = False
# Delete source dir if it's now empty.
if all_moved:
print(f"rmdir {source_dir}")
if not settings.dry_run:
is_source_dir_still_empty = not any(True for _ in source_dir.iterdir())
if is_source_dir_still_empty:
source_dir.rmdir()
else:
print(f"Contents of source dir '{source_dir}' were modified, not performing rmdir", file=sys.stderr)
return all_moved
def move_trees(
source_dir: Path,
target_dir: Path,
settings: Settings,
) -> None:
if not is_real_dir(source_dir):
exit(f"Source dir '{source_dir}' must be an existing directory")
if not is_real_dir(target_dir):
exit(f"Target dir '{target_dir}' must be an existing directory")
move_trees_rec(source_dir=source_dir, target_dir=target_dir, settings=settings)
def main():
parser = argparse.ArgumentParser(description='Moves contents of one directory tree into another, safely. Only moves those contents can be moved conflict-free.')
parser.add_argument('source', metavar='SOURCE', help='source directory tree')
parser.add_argument('target', metavar='TARGET', help='target directory tree')
parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='Perform a trial run with no changes made.')
parser.add_argument('--skip-file-contents-comparison', dest='skip_file_contents_comparison', action='store_true', help='Compare only file size and modification time instead of contents when files are to be compared.')
args = parser.parse_args()
move_trees(
source_dir=Path(args.source),
target_dir=Path(args.target),
settings=Settings(
dry_run=args.dry_run,
skip_file_contents_comparison=args.skip_file_contents_comparison,
),
)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment