Skip to content

Instantly share code, notes, and snippets.

@MatteoLacki
Last active May 19, 2020 11:43
Show Gist options
  • Save MatteoLacki/28a75544e31b8185f4bc42ebe0fe4b45 to your computer and use it in GitHub Desktop.
Save MatteoLacki/28a75544e31b8185f4bc42ebe0fe4b45 to your computer and use it in GitHub Desktop.
"""All this is meant to work only on Windows."""
import os
from pathlib import Path
import subprocess
import sys
import time
import hashlib
import argparse
import logging
def age(file_path, unit='h'):
assert unit in ('s','h')
age_in_s = time.time() - os.path.getctime(file_path)
if unit == 's':
return age_in_s
else:
return age_in_s/3600
def get_size_in_kilobytes(file_path):
return os.path.getsize(file_path)
def copy(source, target, *file_names):
"""Copy files with Robocopy.exe.
/is copies files if they do not differ.
"""
assert len(file_names) > 0, "Specify file names to copy."
cmd = f"robocopy {str(source)} {str(target)} {' '.join(file_names)} /is"
return subprocess.run(cmd.split()).returncode
def check_sum(file_path, algo=hashlib.blake2b, chunksize=8192):
"""algo (hashlib function): E..g hashlib.blake2b, hashlib.md5."""
with open(file_path, "rb") as f:
file_hash = algo()
chunk = f.read(chunksize)
while chunk:
file_hash.update(chunk)
chunk = f.read(chunksize)
return file_hash.hexdigest()
def check_sums_aggree(file_name_0, file_name_1, **kwds):
return check_sum(file_name_0, **kwds) == check_sum(file_name_1, **kwds)
def sizes_aggree(file_name_0, file_name_1):
return get_size_in_kilobytes(file_name_0) == get_size_in_kilobytes(file_name_1)
####################################################################################
ap = argparse.ArgumentParser(description='Sync files between folders.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
epilog=r"Example: python syncFiles.py C:\test\V*.raw V:\RAW_test")
ap.add_argument('source_pattern',
type=Path,
help='Pattern of the files to sync.')
ap.add_argument('target_folder',
type=Path,
help='Path to the folder that')
ap.add_argument('--min_age_hours',
type=float,
help='Minimal age in hours for the files to be copied.',
default=24)
ap.add_argument('--logs_path',
type=Path,
help='Where to save logs.',
default=r"C:\Logs\sync.log")
ap = ap.parse_args()
####################################################################################
ap.logs_path.parent.mkdir(parents=True, exist_ok=True)
logging.basicConfig(filename=ap.logs_path,
level=logging.INFO,
format='%(asctime)s:%(name)s:%(levelname)s:%(message)s:')
log = logging.getLogger('syncFiles.py')
log.info("copying files")
log.info("FROM: " + str(ap.source_pattern))
log.info("TO: " + str(ap.target_folder))
log.info("How old are files in hours?: " + str(ap.target_folder))
####################################################################################
target_folder = ap.target_folder
source_folder = ap.source_pattern.parent
pattern = ap.source_pattern.name
old_files = [f for f in source_folder.glob(pattern) if age(f, 'h') >= ap.min_age_hours]
file_names = [f.name for f in old_files]
if not file_names:
err = f"no files matching pattern {ap.source_pattern}"
log.error(err)
print(err)
break
log.info(f"files older than {ap.min_age_hours} hours: {" ".join([str(f) for f in old_files])}")
copy(source_folder, target_folder, *file_names)
log.info("checking files and deleting wann alles stimmt.")
for sf in old_files:
tf = target_folder/sf.name
try:
if sizes_aggree(sf, tf):
log.info(f"File sizes aggree: {sf} {tf}")
if check_sums_aggree(sf, tf):
log.info(f"Check sums aggree: {sf} {tf}")
log.info(f"Deleting {sf}")
sf.unlink()
else:
log.error(f"Check sums differ: {sf} {tf}")
else:
log.error(f"Files sizes differ: {sf} {tf}")
except FileNotFoundError:
log.error(f"Target file missing: {tf}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment