Skip to content

Instantly share code, notes, and snippets.

@xyb
Last active August 20, 2023 12:09
Show Gist options
  • Save xyb/d71ad443d76fe9b5a51a312b689dbe5d to your computer and use it in GitHub Desktop.
Save xyb/d71ad443d76fe9b5a51a312b689dbe5d to your computer and use it in GitHub Desktop.
compute osdb (opensubtitles.org) hash code with the file size value, allowing to not download the entire file
import struct
import os
BLOCK_SIZE = 65536
MIN_SIZE = BLOCK_SIZE * 2
# based on https://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes#Python
def osdb_hash(filename, filesize=0):
try:
longlongformat = "<q" # little-endian long long
bytesize = struct.calcsize(longlongformat)
f = open(filename, "rb")
if not filesize:
filesize = os.path.getsize(filename)
hash = filesize
if filesize < MIN_SIZE:
return "FileSizeError, at least %d, but %d" % (MIN_SIZE, hash)
for x in range(BLOCK_SIZE // bytesize):
buffer = f.read(bytesize)
(l_value,) = struct.unpack(longlongformat, buffer)
hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number
f.seek(max(0, filesize - BLOCK_SIZE), 0)
for x in range(BLOCK_SIZE // bytesize):
buffer = f.read(bytesize)
(l_value,) = struct.unpack(longlongformat, buffer)
hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF
f.close()
returnedhash = "%016x" % hash
return returnedhash
except (IOError):
return "IOError"
if __name__ == '__main__':
import sys
path = sys.argv[1]
if len(sys.argv) > 2:
filesize = sys.argv[2]
print(osdb_hash(path, int(filesize)))
else:
print(osdb_hash(path))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment