Skip to content

Instantly share code, notes, and snippets.

@brianhill11
Last active May 16, 2017 02:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brianhill11/8cc764bae65139b34a7d9070ca83bf83 to your computer and use it in GitHub Desktop.
Save brianhill11/8cc764bae65139b34a7d9070ca83bf83 to your computer and use it in GitHub Desktop.
Get lines that appear in one file but not the other, useful for finding missing files when copying/transferring big batches of files.
#!/usr/bin/python
import sys
import os
from sets import Set
def create_file_set(filename, file_extension):
with open(filename, "r") as in_f:
result_set = Set()
for line in in_f:
result = line.strip().split(file_extension)[0]
if result not in result_set:
result_set.add(result)
return result_set
def main():
if len(sys.argv) < 4:
print "Usage: python " + sys.argv[0] + " <file1> <file2> <file extension>"
exit()
file1 = sys.argv[1]
file2 = sys.argv[2]
file_extension = sys.argv[3]
file1_set = create_file_set(file1, file_extension)
file2_set = create_file_set(file2, file_extension)
# get difference between sets
if len(file1_set) > len(file2_set):
file_set_diff = file1_set - file2_set
else:
file_set_diff = file2_set - file1_set
for item in file_set_diff:
print item
print "Number of items in {}: {}".format(file1, len(file1_set))
print "Number of items in {}: {}".format(file2, len(file2_set))
print "Number of items in {}: {}".format("difference set", len(file_set_diff))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment