Skip to content

Instantly share code, notes, and snippets.

@mlisovyi
Last active May 14, 2020 06:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mlisovyi/72797cd5ea7695a440d498f446918d27 to your computer and use it in GitHub Desktop.
Save mlisovyi/72797cd5ea7695a440d498f446918d27 to your computer and use it in GitHub Desktop.
Compare contents of 2 folders by filename and copy unique files from the first one
from pathlib import Path
import shutil
# inputs
input_dir = Path.cwd().joinpath("INPUTS")
result_dir = Path.cwd().joinpath("RESULTS")
if not input_dir.is_dir():
raise IOError(f"Make sure there is {input_dir} folder in the current folder")
if not result_dir.is_dir():
raise IOError(f"Make sure there is {result_dir} folder in the current folder")
# outputs
diff_dir = Path.cwd().joinpath("DIFF")
diff_dir.mkdir(parents=True, exist_ok=True)
# folder contents to compare
files_input = [f for f in input_dir.rglob("*") if f.is_file() and f.name != "Thumbs.db"]
files_result = [f for f in result_dir.rglob("*") if f.is_file()]
# sanity check: to duplicates in inputs
seen = []
dupl = []
for x in [f.name for f in files_input]:
if x not in seen:
seen.append(x)
else:
dupl.append(x)
if dupl:
dupl_dict = {}
for ff in dupl:
dupl_dict[ff] = [str(f) for f in files_input if f.name==ff]
print("\n".join([str(item) for item in dupl_dict.items()]))
raise ValueError(
f"Found {len(dupl)} duplicates among inputs! Do not know how to handle those..."
)
# find missing files
files_diff = set([f.name for f in files_input]) - set([f.name for f in files_result])
print(f"Found {len(files_diff)} files missing in the results. Copying them into {str(diff_dir)}...")
# get full path
files_diff_full_path = [f for f in files_input if f.name in files_diff]
#copy files to the output directory
for f in files_diff_full_path:
# print(f)
shutil.copy(str(f), str(diff_dir))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment