Skip to content

Instantly share code, notes, and snippets.

@rakrup
Created August 6, 2018 18:38
Show Gist options
  • Save rakrup/5a48d3e15b69a656b54d0cee2daac337 to your computer and use it in GitHub Desktop.
Save rakrup/5a48d3e15b69a656b54d0cee2daac337 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# Usage
# compare_files.py. <file_to_compare_1> <file_to_compare_2>
# ALGO
# 1. Check if both the files have same md5 checksum
# 2. If the checksum is same - means files are exactly same ; no need to check line by line
# 3. If the checksum doesnt match; we load the contents fo file to string
# 4. replace all the blank spaces and new line char with nothing. Baiscally get all text in a string.
# 5. Compare these strings if they are equal.
import hashlib # For using MD5 hashing
import sys # For reading arguments passed to file
def checksum(file_name):
""" Function to check the checksum of the file supplied"""
md5 = hashlib.md5()
md5.update(open(file_name).read())
return md5.hexdigest()
def is_contents_same(file1, file2):
"""checks if the files have same checksum"""
return checksum(file1) == checksum(file2)
def get_contents_of_file(file_name):
""" for getting the contents of file to string """
with open(file_name, 'r') as myfile:
# The line below removes the empty characters
# and the new line characters from the string read from the file
data = myfile.read().strip().replace('\n', '').replace(' ', '')
return data
def deep_compare(file1, file2):
""" for comparing contents of file in depth """
return get_contents_of_file(file1) == get_contents_of_file(file2)
def compare_files(file1 , file2):
""" function to compare 2 input files for exactly same text contents"""
if not is_contents_same('foo.txt', 'bar.txt'):
print 'The hash of the files are not the same!, lets check the content in depth'
if deep_compare('foo.txt', 'bar.txt'):
print 'The file contents are really equal'
return True
else:
print 'the file contents are different'
return False
else:
print "files are equal"
return True
if __name__ == "__main__":
""" Calling the file with its name, not using as library"""
file1 = sys.argv[1]
file2 = sys.argv[2]
compare_files(file1 , file2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment