Skip to content

Instantly share code, notes, and snippets.

@codekiln
Created May 2, 2017 18:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codekiln/638c0d64c72821c75741cd9f4c0ff35d to your computer and use it in GitHub Desktop.
Save codekiln/638c0d64c72821c75741cd9f4c0ff35d to your computer and use it in GitHub Desktop.
Get Git Object Hash
import os
import hashlib
def get_git_object_hash(filepath):
"""
If you use git hash-object <file>, it will give you git's
internal hash for that object: http://stackoverflow.com/a/552725/78202
This method provides a way to get a git-compatible hash of the file
at path `filepath`.
"""
# 64kb chunks
BUF_SIZE = 65536
filesize_bytes = os.path.getsize(filepath)
sha1 = hashlib.sha1()
sha1.update(("blob %u\0" % filesize_bytes).encode('utf-8'))
with open(filepath, 'rb') as f:
while True:
data = f.read(BUF_SIZE)
if not data:
break
sha1.update(data)
sha1_str = sha1.hexdigest()
# this should match git hash-object:
# sha1_str_from_git = subprocess.check_output(['git', 'hash-object', filepath])
# sha1_str_from_git = ''.join(sha1_str_from_git.split())
# equal = sha1_str_from_git == sha1_str
# logger.info("%s: subprocess check githash of file" % sha1_str_from_git)
# logger.info("%s: computed githash of file" % sha1_str)
# logger.info("Are they equal? %s" % equal)
return sha1_str
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment