Skip to content

Instantly share code, notes, and snippets.

@bkreider
Created June 25, 2018 23:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bkreider/13d84378e0094b10a431c2e4eb245853 to your computer and use it in GitHub Desktop.
Save bkreider/13d84378e0094b10a431c2e4eb245853 to your computer and use it in GitHub Desktop.
compare_projects.py
#!/usr/bin/env python
import os
import sys
import stat
import os.path
import getpass
def walktree(top, callback):
"""
recursively descend the directory tree rooted at top,
calling the callback function for each regular file
From: https://docs.python.org/2/library/stat.html
"""
for f in os.listdir(top):
pathname = os.path.join(top, f)
mode = os.stat(pathname).st_mode
if stat.S_ISDIR(mode):
# It's a directory, recurse into it
walktree(pathname, callback)
elif stat.S_ISREG(mode):
# It's a file, call the callback function
callback(pathname)
else:
# Unknown file type, print a message
print "Skipping %s" % (pathname,)
def _create_new_path(file_, orig_base_path, new_base_path):
"""
Return file path re-located in new directory
orig: /projects/user1/project1/blah
new: /projects/user1/project2/blah
"""
relative_path = file_[len(orig_base_path):]
new_path = os.path.join(new_base_path, relative_path)
return new_path
def _update_results(results, name):
"""Dictionary counter"""
results[name] = results.get(name, 0) + 1
return results
# closure for callback
def callback(path, compare, results):
"""
path: base path to look at
compare: base path to compare against
results: dictionary for holding results
"""
def visitfile(file_):
"""
Check file_ to see if it is hardlinked to the same inode in path2
"""
# update count
_update_results(results, "count")
s1 = os.stat(file_)
new_path = _create_new_path(file_, path, compare)
if os.path.exists(new_path):
s2 = os.stat(new_path)
# compare inode and device number
if (s1[stat.ST_INO], s1[stat.ST_DEV]) == \
(s2[stat.ST_INO], s2[stat.ST_DEV]):
# print "%s == %s" % (file, new_path)
_update_results(results, "matches")
else:
# print "!!!! %s != %s" % (file, new_path)
_update_results(results, "misses")
else:
# print "### Doesn't exist! %s" % (file, new_path)
_update_results(results, "misses")
# return function closure
return visitfile
def main():
# I prefer a dictionary to collections.Counter
results = {}
if len(sys.argv) != 3:
print "Error: missing args!"
print "Usage: compare_env.py project1 project2"
sys.exit(1)
# fails without a controlling terminal
# user = os.getlogin()
user = getpass.getuser()
print "User: %s" % (user,)
path = "/projects/%s/%s/" % (user, sys.argv[1])
compare = "/projects/%s/%s/" % (user, sys.argv[2])
if not os.path.exists(path):
print "Error: project %s doesn't exist" % (sys.argv[1],)
sys.exit(1)
if not os.path.exists(compare):
print "Error: project %s doesn't exist" % (sys.argv[2],)
sys.exit(1)
print "Comparing:\n %s\n %s" % (path, compare)
walktree(path, callback(path, compare, results))
p_match = float(results.get("matches", 0)) / results.get("count", 1) * 100
p_miss = float(results.get("misses" , 0)) / results.get("count", 1) * 100
print
print " Links: %-10d (%7.4f%%)" % (results.get("matches", 0), p_match)
print " Unique: %-10d (%7.4f%%)" % (results.get("misses", 0), p_miss)
print "--------------------------------"
print " Total: %-10d" % (results.get("count", 0),)
print
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment