Skip to content

Instantly share code, notes, and snippets.

@henryjfry
Last active January 10, 2024 15:45
Show Gist options
  • Save henryjfry/c265230c7aefbf7c80b31509a9d4cd99 to your computer and use it in GitHub Desktop.
Save henryjfry/c265230c7aefbf7c80b31509a9d4cd99 to your computer and use it in GitHub Desktop.
opensubtitles.org - HashFile = filehash = filesize + 64bit sum of the first and last 64k of the file
import struct, os
__64k = 65536
__longlong_format_char = 'q'
__byte_size = struct.calcsize(__longlong_format_char)
meta = {'filesize': '', 'filehash': ''}
def temp_file():
import tempfile
file = tempfile.NamedTemporaryFile()
filename = file.name
return filename
def size_hashFile_url(meta, filepath):
name = filepath
import urllib
from urllib import request
f = None
opener = None
url = name
request.urlcleanup()
f = request.urlopen(url)
filesize = int(f.headers['Content-Length'])
if filesize < __64k * 2:
try: filesize = int(str(f.headers['Content-Range']).split('/')[1])
except: pass
opener = request.build_opener()
opener.addheaders = [('Range','bytes=0-65536')]
first_64kb = temp_file()
last_64kb = temp_file()
#print(first_64kb)
#print(last_64kb)
#print([('Range','bytes=0-65536')])
request.install_opener(opener)
request.urlretrieve(url, first_64kb)
opener = request.build_opener()
if filesize > 0:
opener.addheaders = [('Range', 'bytes=%s-%s' % (filesize - __64k, filesize))]
#print([('Range', 'bytes=%s-%s' % (filesize - __64k, filesize))])
else:
opener.addheaders = [('Range','bytes=-65536-0')]
#print([('Range','bytes=-65536-0')])
request.install_opener(opener)
request.urlretrieve(url, last_64kb)
f = open(first_64kb, 'rb')
try:
longlongformat = '<q' # little-endian long long
bytesize = struct.calcsize(longlongformat)
#f = open(name, "rb")
#filesize = os.path.getsize(name)
meta['filesize'] = filesize
hash = filesize
if filesize < 65536 * 2:
return "SizeError"
range_value = __64k / __byte_size
range_value = round(range_value)
for x in range(range_value):
buffer = f.read(bytesize)
(l_value,)= struct.unpack(longlongformat, buffer)
hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
#f.seek(max(0,filesize-65536),0)
f.close()
f = open(last_64kb, 'rb')
for x in range(range_value):
buffer = f.read(bytesize)
(l_value,)= struct.unpack(longlongformat, buffer)
hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF
f.close()
returnedhash = "%016x" % hash
meta['filehash'] = returnedhash
return meta
except(IOError):
return meta
def size_hashFile(meta, filepath):
name = filepath
if 'http' in str(filepath):
meta = size_hashFile_url(meta=meta, filepath=filepath)
return meta
try:
longlongformat = '<q' # little-endian long long
bytesize = struct.calcsize(longlongformat)
f = open(name, "rb")
filesize = os.path.getsize(name)
meta['filesize'] = filesize
hash = filesize
if filesize < 65536 * 2:
return "SizeError"
range_value = __64k / __byte_size
range_value = round(range_value)
for x in range(range_value):
buffer = f.read(bytesize)
(l_value,)= struct.unpack(longlongformat, buffer)
hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
f.seek(max(0,filesize-65536),0)
for x in range(range_value):
buffer = f.read(bytesize)
(l_value,)= struct.unpack(longlongformat, buffer)
hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF
f.close()
returnedhash = "%016x" % hash
meta['filehash'] = returnedhash
return meta
except(IOError):
return meta
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment