Last active
May 14, 2020 06:37
-
-
Save mlisovyi/72797cd5ea7695a440d498f446918d27 to your computer and use it in GitHub Desktop.
Compare contents of 2 folders by filename and copy unique files from the first one
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pathlib import Path | |
import shutil | |
# inputs | |
input_dir = Path.cwd().joinpath("INPUTS") | |
result_dir = Path.cwd().joinpath("RESULTS") | |
if not input_dir.is_dir(): | |
raise IOError(f"Make sure there is {input_dir} folder in the current folder") | |
if not result_dir.is_dir(): | |
raise IOError(f"Make sure there is {result_dir} folder in the current folder") | |
# outputs | |
diff_dir = Path.cwd().joinpath("DIFF") | |
diff_dir.mkdir(parents=True, exist_ok=True) | |
# folder contents to compare | |
files_input = [f for f in input_dir.rglob("*") if f.is_file() and f.name != "Thumbs.db"] | |
files_result = [f for f in result_dir.rglob("*") if f.is_file()] | |
# sanity check: to duplicates in inputs | |
seen = [] | |
dupl = [] | |
for x in [f.name for f in files_input]: | |
if x not in seen: | |
seen.append(x) | |
else: | |
dupl.append(x) | |
if dupl: | |
dupl_dict = {} | |
for ff in dupl: | |
dupl_dict[ff] = [str(f) for f in files_input if f.name==ff] | |
print("\n".join([str(item) for item in dupl_dict.items()])) | |
raise ValueError( | |
f"Found {len(dupl)} duplicates among inputs! Do not know how to handle those..." | |
) | |
# find missing files | |
files_diff = set([f.name for f in files_input]) - set([f.name for f in files_result]) | |
print(f"Found {len(files_diff)} files missing in the results. Copying them into {str(diff_dir)}...") | |
# get full path | |
files_diff_full_path = [f for f in files_input if f.name in files_diff] | |
#copy files to the output directory | |
for f in files_diff_full_path: | |
# print(f) | |
shutil.copy(str(f), str(diff_dir)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment