ratozumbi/hash_compare_files.py

## hash_compare_files.py

import hashlib
import os

def getHashes(path, fileHashes):
    print("executing in "+ path)
    for filename in os.listdir(path):
        fullpath = os.path.join(path,filename)

        if os.path.isfile(fullpath):

            hasher = hashlib.md5()

            with open(fullpath, 'rb') as afile:
                #swap lines to work with big files
                buf = afile.read()
                hasher.update(buf)
                # BLOCKSIZE = 65536
                # buf = afile.read(BLOCKSIZE)
                # while len(buf) > 0:
                #     hasher.update(buf)
                #     buf = afile.read(BLOCKSIZE)

            fileHashes.write(hasher.hexdigest() + "##" + fullpath + "\n")

        else:
            getHashes(fullpath, fileHashes)

with open("./hashes.txt", "w+") as fileHashes:
    getHashes('.', fileHashes)

dicHashPath = {}
with open("./hashes.txt", "r") as fileHashes:
    with open("./result.txt", "w+") as searchResult:
        hashLines = fileHashes.readlines()
        for line in hashLines:
            lineHashPath = line.split("##")

            if lineHashPath[0] in dicHashPath:
                print("===================\nCopia encontrada em " + lineHashPath[1] + "Original: " + dicHashPath[lineHashPath[0]] + "MD5: " +lineHashPath[0] )
                searchResult.write("===================\nCopia encontrada em " + lineHashPath[1] + "Original: " + dicHashPath[lineHashPath[0]]+ "MD5: " +lineHashPath[0]+ "\n")
            else:
                dicHashPath[lineHashPath[0]] =lineHashPath[1]

	import hashlib
	import os

	def getHashes(path, fileHashes):
	print("executing in "+ path)
	for filename in os.listdir(path):
	fullpath = os.path.join(path,filename)

	if os.path.isfile(fullpath):

	hasher = hashlib.md5()

	with open(fullpath, 'rb') as afile:
	#swap lines to work with big files
	buf = afile.read()
	hasher.update(buf)
	# BLOCKSIZE = 65536
	# buf = afile.read(BLOCKSIZE)
	# while len(buf) > 0:
	# hasher.update(buf)
	# buf = afile.read(BLOCKSIZE)

	fileHashes.write(hasher.hexdigest() + "##" + fullpath + "\n")

	else:
	getHashes(fullpath, fileHashes)

	with open("./hashes.txt", "w+") as fileHashes:
	getHashes('.', fileHashes)

	dicHashPath = {}
	with open("./hashes.txt", "r") as fileHashes:
	with open("./result.txt", "w+") as searchResult:
	hashLines = fileHashes.readlines()
	for line in hashLines:
	lineHashPath = line.split("##")

	if lineHashPath[0] in dicHashPath:
	print("===================\nCopia encontrada em " + lineHashPath[1] + "Original: " + dicHashPath[lineHashPath[0]] + "MD5: " +lineHashPath[0] )
	searchResult.write("===================\nCopia encontrada em " + lineHashPath[1] + "Original: " + dicHashPath[lineHashPath[0]]+ "MD5: " +lineHashPath[0]+ "\n")
	else:
	dicHashPath[lineHashPath[0]] =lineHashPath[1]