Last active
January 24, 2016 22:17
-
-
Save panzi/87de1289953988cb6d5e to your computer and use it in GitHub Desktop.
A very simple script to compare two folders, only based on file names (and not file content).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# A very simple script to compare two folders, only based on file names (and | |
# not file content). | |
# I'm sure there are lots of programs that do just this but given how short it | |
# is it was faster for me to write this than to search for a tool that does | |
# exactly what I want. | |
from __future__ import print_function | |
import sys | |
import os | |
from os.path import abspath, split as splitpath, join as joinpath | |
from stat import S_ISDIR | |
class Node(object): | |
__slots__ = 'name', | |
def __init__(self, name): | |
self.name = name | |
def hash(self): | |
return hash(self.name) | |
def __eq__(self, other): | |
return self.name == other.name | |
class File(Node): | |
__slots__ = () | |
class Dir(Node): | |
__slots__ = 'children', | |
def __init__(self, name): | |
Node.__init__(self, name) | |
self.children = {} | |
def build(self, pardir): | |
path = joinpath(pardir, self.name) | |
children = self.children | |
for child in os.listdir(path): | |
cpath = joinpath(path, child) | |
st = os.lstat(cpath) | |
if S_ISDIR(st.st_mode): | |
node = Dir(child) | |
node.build(path) | |
else: | |
node = File(child) | |
children[child] = node | |
def diff(self, other): | |
a_keys = set(self.children.keys()) | |
b_keys = set(other.children.keys()) | |
in_a = list(a_keys.difference(b_keys)) | |
in_b = list(b_keys.difference(a_keys)) | |
in_both = list(a_keys.intersection(b_keys)) | |
a_children = self.children | |
b_children = other.children | |
type_missmatch = [] | |
for name in in_both: | |
a = a_children[name] | |
b = b_children[name] | |
a_type = type(a) | |
b_type = type(b) | |
if a_type is not b_type: | |
type_missmatch.append(name) | |
elif a_type is Dir: | |
child_in_a, child_in_b, child_type_missmatch = a.diff(b) | |
in_a.extend(joinpath(name,child) for child in child_in_a) | |
in_b.extend(joinpath(name,child) for child in child_in_b) | |
type_missmatch.extend(joinpath(name,child) for child in child_type_missmatch) | |
return in_a, in_b, type_missmatch | |
def buildTree(path): | |
path = abspath(path) | |
pardir, name = splitpath(path) | |
tree = Dir(name) | |
tree.build(pardir) | |
return tree | |
def print_paths(hdr, paths): | |
if paths: | |
print("\t"+hdr) | |
print("\t"+"=" * len(hdr)) | |
print() | |
for path in paths: | |
print(path) | |
print() | |
def dirdiff(dir1, dir2): | |
tree1 = buildTree(dir1) | |
tree2 = buildTree(dir2) | |
in_a, in_b, type_missmatch = tree1.diff(tree2) | |
print_paths("Only in: %s" % dir1, in_a) | |
print_paths("Only in: %s" % dir2, in_b) | |
print_paths("Type missmatch:", type_missmatch) | |
if __name__ == '__main__': | |
import sys | |
dirdiff(sys.argv[1], sys.argv[2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment