Last active
November 28, 2016 14:00
-
-
Save 1oglop1/2bf40b0110270e57db890c91f4e52a6d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Find duplicated files module | |
""" | |
import filecmp as fcmp | |
from pprint import pprint | |
import sys | |
import os | |
def args(): | |
""" | |
Simple arguments parsing | |
:return: working directory | |
""" | |
if len(sys.argv) != 2: | |
print("Usage: dup.py [FOLDER]") | |
exit() | |
else: | |
return sys.argv[1] | |
def duplicates(work_dir): | |
""" | |
Finds duplicated files in a work directory | |
:param work_dir: work directory (string) | |
:return: dictionary with duplicated files | |
""" | |
list_of_files = os.listdir(work_dir) | |
list_of_files = [os.path.join(work_dir, file) for file in list_of_files] | |
duplicates_dict = dict() | |
scanned = [] | |
not_scanned = [x for x in range(len(list_of_files))] | |
while not_scanned: | |
idx_1 = not_scanned.pop(0) | |
file_1 = list_of_files[idx_1] | |
for idx_2 in not_scanned: | |
if idx_2 in scanned: | |
continue | |
file_2 = list_of_files[idx_2] | |
if fcmp.cmp(file_1, file_2, shallow=True): | |
scanned.append(idx_2) | |
not_scanned.remove(idx_2) | |
try: | |
duplicates_dict[file_1].append(os.path.basename(file_2)) | |
except KeyError: | |
duplicates_dict[file_1] = [os.path.basename(file_2)] | |
return duplicates_dict | |
if __name__ == "__main__": | |
WORK_DIR = args() | |
pprint(duplicates(WORK_DIR)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Your code has been rated at 10.00/10