Last active
August 20, 2023 12:09
-
-
Save xyb/d71ad443d76fe9b5a51a312b689dbe5d to your computer and use it in GitHub Desktop.
compute osdb (opensubtitles.org) hash code with the file size value, allowing to not download the entire file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
import os | |
BLOCK_SIZE = 65536 | |
MIN_SIZE = BLOCK_SIZE * 2 | |
# based on https://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes#Python | |
def osdb_hash(filename, filesize=0): | |
try: | |
longlongformat = "<q" # little-endian long long | |
bytesize = struct.calcsize(longlongformat) | |
f = open(filename, "rb") | |
if not filesize: | |
filesize = os.path.getsize(filename) | |
hash = filesize | |
if filesize < MIN_SIZE: | |
return "FileSizeError, at least %d, but %d" % (MIN_SIZE, hash) | |
for x in range(BLOCK_SIZE // bytesize): | |
buffer = f.read(bytesize) | |
(l_value,) = struct.unpack(longlongformat, buffer) | |
hash += l_value | |
hash = hash & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number | |
f.seek(max(0, filesize - BLOCK_SIZE), 0) | |
for x in range(BLOCK_SIZE // bytesize): | |
buffer = f.read(bytesize) | |
(l_value,) = struct.unpack(longlongformat, buffer) | |
hash += l_value | |
hash = hash & 0xFFFFFFFFFFFFFFFF | |
f.close() | |
returnedhash = "%016x" % hash | |
return returnedhash | |
except (IOError): | |
return "IOError" | |
if __name__ == '__main__': | |
import sys | |
path = sys.argv[1] | |
if len(sys.argv) > 2: | |
filesize = sys.argv[2] | |
print(osdb_hash(path, int(filesize))) | |
else: | |
print(osdb_hash(path)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment