Created
September 29, 2021 15:41
-
-
Save ddurst/608d5c6380821b37ef22e7689beaee27 to your computer and use it in GitHub Desktop.
Old script for finding duplicate files by name in a directory tree
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import errno, sys, os | |
DEBUG = 0 | |
""" | |
def make_sure_path_exists(path): | |
if DEBUG: | |
print path | |
try: | |
os.makedirs(path, 0755) | |
except OSError as exception: | |
if exception.errno != errno.EEXIST: | |
raise | |
""" | |
def process_file(needle, tree): | |
# split needle into path & filename | |
arrpath = needle.split("/") | |
filename = arrpath[-1] | |
path = "" | |
if DEBUG: | |
print "looking for needle..." | |
if (len(arrpath) >= 2): | |
path = "/".join(arrpath[0:-1]) | |
# find that file elsewhere, if found return the found full path | |
n = "" | |
d = "" | |
for target in tree: | |
if (target[1] == filename) and (target[0] != path): | |
if DEBUG: | |
print "found a {%s} in path: %s instead of %s" % (filename, target[0], path) | |
n = needle | |
d = target[0] + "/" + target[1] | |
return [n, d] | |
try: | |
input_file = sys.argv[1] | |
except: | |
print 'usage: finddupefiles.py <file with list of files to find dupes of> (in directory to search for)' | |
sys.exit(2) | |
# get file names from input file | |
list_filenames = [] | |
with open(input_file) as filenames: | |
for line in filenames: | |
line = line.strip() | |
list_filenames.append(line) | |
# create list of directory [path, filename] | |
starting_directory = '.' | |
dir_tree = [] | |
for root, dirs, files in os.walk(starting_directory): | |
for name in files: | |
if DEBUG: | |
print "%s\t%s\t(%s)" % (root, name, os.path.join(root, name)) | |
dir_tree.append([root, name]) | |
# go through needles | |
file_dupes = open(os.path.join(starting_directory, "dupes_found"), 'w+') | |
for needle in list_filenames: | |
if DEBUG: | |
print 'needle: %s' % needle | |
x, y = process_file(needle, dir_tree) | |
if x: | |
_template = """{target} found at {found}\n""" | |
_context = { "target": x, | |
"found": y } | |
file_dupes.write(_template.format(**_context)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment