Skip to content

Instantly share code, notes, and snippets.

@ddurst
Created September 29, 2021 15:41
Show Gist options
  • Save ddurst/608d5c6380821b37ef22e7689beaee27 to your computer and use it in GitHub Desktop.
Save ddurst/608d5c6380821b37ef22e7689beaee27 to your computer and use it in GitHub Desktop.
Old script for finding duplicate files by name in a directory tree
#!/usr/bin/python
import errno, sys, os
DEBUG = 0
"""
def make_sure_path_exists(path):
if DEBUG:
print path
try:
os.makedirs(path, 0755)
except OSError as exception:
if exception.errno != errno.EEXIST:
raise
"""
def process_file(needle, tree):
# split needle into path & filename
arrpath = needle.split("/")
filename = arrpath[-1]
path = ""
if DEBUG:
print "looking for needle..."
if (len(arrpath) >= 2):
path = "/".join(arrpath[0:-1])
# find that file elsewhere, if found return the found full path
n = ""
d = ""
for target in tree:
if (target[1] == filename) and (target[0] != path):
if DEBUG:
print "found a {%s} in path: %s instead of %s" % (filename, target[0], path)
n = needle
d = target[0] + "/" + target[1]
return [n, d]
try:
input_file = sys.argv[1]
except:
print 'usage: finddupefiles.py <file with list of files to find dupes of> (in directory to search for)'
sys.exit(2)
# get file names from input file
list_filenames = []
with open(input_file) as filenames:
for line in filenames:
line = line.strip()
list_filenames.append(line)
# create list of directory [path, filename]
starting_directory = '.'
dir_tree = []
for root, dirs, files in os.walk(starting_directory):
for name in files:
if DEBUG:
print "%s\t%s\t(%s)" % (root, name, os.path.join(root, name))
dir_tree.append([root, name])
# go through needles
file_dupes = open(os.path.join(starting_directory, "dupes_found"), 'w+')
for needle in list_filenames:
if DEBUG:
print 'needle: %s' % needle
x, y = process_file(needle, dir_tree)
if x:
_template = """{target} found at {found}\n"""
_context = { "target": x,
"found": y }
file_dupes.write(_template.format(**_context))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment