Created
August 6, 2018 18:38
-
-
Save rakrup/5a48d3e15b69a656b54d0cee2daac337 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Usage | |
# compare_files.py. <file_to_compare_1> <file_to_compare_2> | |
# ALGO | |
# 1. Check if both the files have same md5 checksum | |
# 2. If the checksum is same - means files are exactly same ; no need to check line by line | |
# 3. If the checksum doesnt match; we load the contents fo file to string | |
# 4. replace all the blank spaces and new line char with nothing. Baiscally get all text in a string. | |
# 5. Compare these strings if they are equal. | |
import hashlib # For using MD5 hashing | |
import sys # For reading arguments passed to file | |
def checksum(file_name): | |
""" Function to check the checksum of the file supplied""" | |
md5 = hashlib.md5() | |
md5.update(open(file_name).read()) | |
return md5.hexdigest() | |
def is_contents_same(file1, file2): | |
"""checks if the files have same checksum""" | |
return checksum(file1) == checksum(file2) | |
def get_contents_of_file(file_name): | |
""" for getting the contents of file to string """ | |
with open(file_name, 'r') as myfile: | |
# The line below removes the empty characters | |
# and the new line characters from the string read from the file | |
data = myfile.read().strip().replace('\n', '').replace(' ', '') | |
return data | |
def deep_compare(file1, file2): | |
""" for comparing contents of file in depth """ | |
return get_contents_of_file(file1) == get_contents_of_file(file2) | |
def compare_files(file1 , file2): | |
""" function to compare 2 input files for exactly same text contents""" | |
if not is_contents_same('foo.txt', 'bar.txt'): | |
print 'The hash of the files are not the same!, lets check the content in depth' | |
if deep_compare('foo.txt', 'bar.txt'): | |
print 'The file contents are really equal' | |
return True | |
else: | |
print 'the file contents are different' | |
return False | |
else: | |
print "files are equal" | |
return True | |
if __name__ == "__main__": | |
""" Calling the file with its name, not using as library""" | |
file1 = sys.argv[1] | |
file2 = sys.argv[2] | |
compare_files(file1 , file2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment