Skip to content

Instantly share code, notes, and snippets.

@1oglop1
Last active November 28, 2016 14:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 1oglop1/2bf40b0110270e57db890c91f4e52a6d to your computer and use it in GitHub Desktop.
Save 1oglop1/2bf40b0110270e57db890c91f4e52a6d to your computer and use it in GitHub Desktop.
"""
Find duplicated files module
"""
import filecmp as fcmp
from pprint import pprint
import sys
import os
def args():
"""
Simple arguments parsing
:return: working directory
"""
if len(sys.argv) != 2:
print("Usage: dup.py [FOLDER]")
exit()
else:
return sys.argv[1]
def duplicates(work_dir):
"""
Finds duplicated files in a work directory
:param work_dir: work directory (string)
:return: dictionary with duplicated files
"""
list_of_files = os.listdir(work_dir)
list_of_files = [os.path.join(work_dir, file) for file in list_of_files]
duplicates_dict = dict()
scanned = []
not_scanned = [x for x in range(len(list_of_files))]
while not_scanned:
idx_1 = not_scanned.pop(0)
file_1 = list_of_files[idx_1]
for idx_2 in not_scanned:
if idx_2 in scanned:
continue
file_2 = list_of_files[idx_2]
if fcmp.cmp(file_1, file_2, shallow=True):
scanned.append(idx_2)
not_scanned.remove(idx_2)
try:
duplicates_dict[file_1].append(os.path.basename(file_2))
except KeyError:
duplicates_dict[file_1] = [os.path.basename(file_2)]
return duplicates_dict
if __name__ == "__main__":
WORK_DIR = args()
pprint(duplicates(WORK_DIR))
@1oglop1
Copy link
Author

1oglop1 commented Nov 28, 2016

Your code has been rated at 10.00/10

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment