Skip to content

Instantly share code, notes, and snippets.

@panzi
Last active January 24, 2016 22:17
Show Gist options
  • Save panzi/87de1289953988cb6d5e to your computer and use it in GitHub Desktop.
Save panzi/87de1289953988cb6d5e to your computer and use it in GitHub Desktop.
A very simple script to compare two folders, only based on file names (and not file content).
#!/usr/bin/env python
#
# A very simple script to compare two folders, only based on file names (and
# not file content).
# I'm sure there are lots of programs that do just this but given how short it
# is it was faster for me to write this than to search for a tool that does
# exactly what I want.
from __future__ import print_function
import sys
import os
from os.path import abspath, split as splitpath, join as joinpath
from stat import S_ISDIR
class Node(object):
__slots__ = 'name',
def __init__(self, name):
self.name = name
def hash(self):
return hash(self.name)
def __eq__(self, other):
return self.name == other.name
class File(Node):
__slots__ = ()
class Dir(Node):
__slots__ = 'children',
def __init__(self, name):
Node.__init__(self, name)
self.children = {}
def build(self, pardir):
path = joinpath(pardir, self.name)
children = self.children
for child in os.listdir(path):
cpath = joinpath(path, child)
st = os.lstat(cpath)
if S_ISDIR(st.st_mode):
node = Dir(child)
node.build(path)
else:
node = File(child)
children[child] = node
def diff(self, other):
a_keys = set(self.children.keys())
b_keys = set(other.children.keys())
in_a = list(a_keys.difference(b_keys))
in_b = list(b_keys.difference(a_keys))
in_both = list(a_keys.intersection(b_keys))
a_children = self.children
b_children = other.children
type_missmatch = []
for name in in_both:
a = a_children[name]
b = b_children[name]
a_type = type(a)
b_type = type(b)
if a_type is not b_type:
type_missmatch.append(name)
elif a_type is Dir:
child_in_a, child_in_b, child_type_missmatch = a.diff(b)
in_a.extend(joinpath(name,child) for child in child_in_a)
in_b.extend(joinpath(name,child) for child in child_in_b)
type_missmatch.extend(joinpath(name,child) for child in child_type_missmatch)
return in_a, in_b, type_missmatch
def buildTree(path):
path = abspath(path)
pardir, name = splitpath(path)
tree = Dir(name)
tree.build(pardir)
return tree
def print_paths(hdr, paths):
if paths:
print("\t"+hdr)
print("\t"+"=" * len(hdr))
print()
for path in paths:
print(path)
print()
def dirdiff(dir1, dir2):
tree1 = buildTree(dir1)
tree2 = buildTree(dir2)
in_a, in_b, type_missmatch = tree1.diff(tree2)
print_paths("Only in: %s" % dir1, in_a)
print_paths("Only in: %s" % dir2, in_b)
print_paths("Type missmatch:", type_missmatch)
if __name__ == '__main__':
import sys
dirdiff(sys.argv[1], sys.argv[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment