Skip to content

Instantly share code, notes, and snippets.

@NyaMisty
Created August 5, 2023 12:47
Show Gist options
  • Save NyaMisty/d515964c40ed941b3aa7f8019657c532 to your computer and use it in GitHub Desktop.
Save NyaMisty/d515964c40ed941b3aa7f8019657c532 to your computer and use it in GitHub Desktop.
Python fileobj from HTTP, with ranged request!
import io
import requests
class HTTPFileObj:
def __init__(self, url, chunk_size=1024):
self.url = url
self.chunk_size = chunk_size
self.start_byte = 0 # Initialize start_byte here
self.buffer = b"" # Initialize buffer to store remaining data
self.max_size = -1
self.eof = False
self._download()
def _download(self):
self.response = self._get_response()
self.remaining_bytes = int(self.response.headers.get("Content-Length", 0))
if self.max_size < 0:
self.max_size = self.remaining_bytes
self.content = self.response.iter_content(chunk_size=self.chunk_size)
def _get_response(self):
headers = {"Range": f"bytes={self.start_byte}-"}
return requests.get(self.url, headers=headers, stream=True)
def seek(self, offset, whence=io.SEEK_SET):
originalStart = self.start_byte
if whence == io.SEEK_SET:
new_start_byte = offset
elif whence == io.SEEK_CUR:
new_start_byte += offset
elif whence == io.SEEK_END:
new_start_byte = self.max_size + offset
else:
raise Exception("lalala")
self.start_byte = new_start_byte
#print(new_start_byte, self.max_size)
if self.start_byte >= self.max_size:
self.eof = True
self.buffer = b""
self.remaining_bytes -= self.start_byte - originalStart
elif 0 < self.start_byte - originalStart < len(self.buffer):
bufoff = self.start_byte - originalStart
self.buffer = self.buffer[bufoff:]
self.remaining_bytes -= bufoff
else:
self.eof = False
self.buffer = b""
self._download()
return self.start_byte
def read(self, size=-1):
if self.eof:
return b""
data = self.buffer[:size]
self.buffer = self.buffer[size:]
while (size < 0 or len(data) < size) and self.remaining_bytes > 0:
#print(self.remaining_bytes)
try:
chunk = next(self.content)
except StopIteration: # Reached EOF, attempt to continue with Ranged request
print("EOF")
self._download()
chunk = next(self.content)
self.remaining_bytes -= len(chunk)
if self.remaining_bytes <= 0:
self.eof = True
remaining_size = size - len(data) if size >= 0 else len(chunk)
if remaining_size < len(chunk):
self.buffer = chunk[remaining_size:] # Store remaining data in buffer
chunk = chunk[:remaining_size]
else:
self.buffer = b"" # Clear buffer if all data in chunk is used
self.start_byte += len(chunk) # Update start_byte after reading
data += chunk
#self.content = io.BytesIO() # for simulating StopIteration
print("before", self.start_byte)
print("after", self.start_byte)
return data
def close(self):
self.response.close()
# Example
url = "http://127.0.0.1:12343/chfs/shared/test10m"
file_obj = HTTPFileObj(url)
print(file_obj.read(100).hex())
#open('111', 'wb').write(file_obj.read(10485660))
print(file_obj.seek(10485660))
print(file_obj.read(100).hex())
file_obj.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment