Skip to content

Instantly share code, notes, and snippets.

@sr105
Created May 17, 2024 18:20
Show Gist options
  • Save sr105/c99cc6e85a45e9e0fdaee19d6c70af08 to your computer and use it in GitHub Desktop.
Save sr105/c99cc6e85a45e9e0fdaee19d6c70af08 to your computer and use it in GitHub Desktop.
Python method to keep two folders in sync (in one direction)
import os
from pathlib import Path
import shutil
# I needed a utilty on Windows to sync log files from a very slow network drive to a local drive
# for faster analysis. I'm not sure why Windows doesn't support this. This is pretty quick. I run
# just before analysis, and it grabs all the newer files and any that have changed.
def sync_dirs(src:Path, dst:Path) -> None:
"""Sync files from src to dst skipping files with same name, size, and modified time."""
print(f"Sync Dirs:")
print(f"\tSrc: {src.as_posix()}")
print(f"\tDst: {dst.as_posix()}")
def metadata_match(stat1:os.stat_result, stat2:os.stat_result) -> bool:
# I'm on Windows, and it can't sync atime and ctime.
return (stat1.st_size, stat1.st_mtime) == (stat2.st_size, stat2.st_mtime)
# It is *so* much faster to use os.scandir(): https://stackoverflow.com/a/2485843/47078
src_files = ((d.name, d.stat()) for d in os.scandir(src) if d.is_file())
for name, stat in src_files:
dest = dst.joinpath(name)
if dest.exists() and metadata_match(stat, dest.stat()):
continue
print(f"\t{name}")
shutil.copy2(src.joinpath(name), dest)
if __name__ == "__main__":
src = Path('I:/slow/network/path')
dst = Path('C:/fast/local/path')
sync_dirs(src, dst)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment