Skip to content

Instantly share code, notes, and snippets.

@humpydonkey
Last active February 2, 2019 07:12
Show Gist options
  • Save humpydonkey/a0d98092a6f6d7146b2996cd0e16b5dd to your computer and use it in GitHub Desktop.
Save humpydonkey/a0d98092a6f6d7146b2996cd0e16b5dd to your computer and use it in GitHub Desktop.
[check missing files]check missing files, if missing then copy them to somewhere #python #os #tool #file
import os
from typing import Set
import shutil
def check_existence_or_copy(source_dir: str, check_against_dir: str, copy_dst_dir: str) -> None:
missing_files = check_missing(source_dir, check_against_dir)
copy_missing_files(source_dir, copy_dst_dir, missing_files)
def check_missing(source_dir: str, check_against_dir: str) -> Set[str]:
source_files: Set = _collect_files(source_dir)
check_files: Set = _collect_files(check_against_dir)
diff = source_files - check_files
print(f'Found {len(diff)} missing files.')
if diff:
print(f'Missing file names: {diff}.')
return diff
def copy_missing_files(source_dir: str, destination_dir: str, missing_file_names: Set[str]) -> None:
if not os.path.exists(destination_dir):
os.mkdir(destination_dir)
for curr_dir, subdirList, fileList in os.walk(source_dir):
curr_missing = set(fileList) & missing_file_names
if not curr_missing:
continue
print(f'Start copying {len(curr_missing)} missing files under {curr_dir}.')
_batch_copy(curr_dir, destination_dir, curr_missing)
def _batch_copy(src_dir, dst_dir, file_names) -> None:
for f in file_names:
copy_src_file = os.path.join(src_dir, f)
assert os.path.isfile(copy_src_file)
copy_dst_file = os.path.join(dst_dir, f)
if os.path.exists(copy_dst_file):
continue
# copy2() also attempts to preserve file metadata.
shutil.copy2(copy_src_file, copy_dst_file)
def _collect_files(source: str) -> Set[str]:
source_files: Set = set()
for curr_dir, subdirList, fileList in os.walk(source):
source_files.update(fileList)
print(f'Found {len(source_files)} files under {source}.')
return source_files
if __name__ == '__main__':
source_dir = '/Users/yacao/Pictures/2018/'
check_against_dir = '/Volumes/private_admin/photos/'
copy_dst_dir = '/Users/yacao/Downloads/missing_photos'
# check_existence_or_copy(source_dir, check_against_dir, copy_dst_dir)
check_missing(source_dir, check_against_dir)
source_dir = '/Volumes/private_admin/photos/Lightroom CC/12b6e686c06f48658af72171d6e84674/originals/2015'
check_against_dir = '/Volumes/private_admin/photos/past/2015'
copy_dst_dir = '/Users/yacao/Downloads/missing_photos_2015'
# check_missing(source_dir, check_against_dir)
# check_existence_or_copy(source_dir, check_against_dir, copy_dst_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment