Last active
May 16, 2017 02:27
-
-
Save brianhill11/8cc764bae65139b34a7d9070ca83bf83 to your computer and use it in GitHub Desktop.
Get lines that appear in one file but not the other, useful for finding missing files when copying/transferring big batches of files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import sys | |
import os | |
from sets import Set | |
def create_file_set(filename, file_extension): | |
with open(filename, "r") as in_f: | |
result_set = Set() | |
for line in in_f: | |
result = line.strip().split(file_extension)[0] | |
if result not in result_set: | |
result_set.add(result) | |
return result_set | |
def main(): | |
if len(sys.argv) < 4: | |
print "Usage: python " + sys.argv[0] + " <file1> <file2> <file extension>" | |
exit() | |
file1 = sys.argv[1] | |
file2 = sys.argv[2] | |
file_extension = sys.argv[3] | |
file1_set = create_file_set(file1, file_extension) | |
file2_set = create_file_set(file2, file_extension) | |
# get difference between sets | |
if len(file1_set) > len(file2_set): | |
file_set_diff = file1_set - file2_set | |
else: | |
file_set_diff = file2_set - file1_set | |
for item in file_set_diff: | |
print item | |
print "Number of items in {}: {}".format(file1, len(file1_set)) | |
print "Number of items in {}: {}".format(file2, len(file2_set)) | |
print "Number of items in {}: {}".format("difference set", len(file_set_diff)) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment