Skip to content

Instantly share code, notes, and snippets.

@habibutsu
Created May 31, 2019 15:30
Show Gist options
  • Save habibutsu/a9649eeea5ad6e2c78310049b10c6342 to your computer and use it in GitHub Desktop.
Save habibutsu/a9649eeea5ad6e2c78310049b10c6342 to your computer and use it in GitHub Desktop.
Reading remote file through http
import io
from http import HTTPStatus
from urllib.request import Request, urlopen
from urllib.error import HTTPError
class RemoteFile(io.IOBase):
'''
Example of usage
import zipfile
from remote_file import RemoteFile
zf = zipfile.ZipFile(RemoteFile('https://site.domain/remote.zip'))
print(zf.filelist)
'''
def __init__(self, url):
self._url = url
self._offset = 0
self.name = self._url.split('/')[-1]
head_request = Request(url)
head_request.get_method = lambda: 'HEAD'
try:
response = urlopen(head_request)
response_info = response.info()
self._filesize = int(response_info.get('Content-Length'))
except HTTPError as e:
raise IOError('could not open remote url')
if response_info.get('Accept-Ranges') != 'bytes':
raise IOError('remote server does not support range')
def seek(self, offset, from_what=io.SEEK_SET):
if from_what == io.SEEK_SET:
self._offset = offset
elif from_what == io.SEEK_CUR:
self._offset += offset
elif from_what == io.SEEK_END:
self._offset = self._filesize + offset
def tell(self):
return self._offset
def read(self, size=-1):
if size > 0:
start, end = self._offset, self._offset + size
else:
start, end = self._offset, self._filesize
get_request = Request(self._url, headers={
'Range': 'bytes={}-{}'.format(start, end - 1)
})
with urlopen(get_request) as response:
if response.status != HTTPStatus.PARTIAL_CONTENT:
raise IOError('could read data from remote')
content = response.read()
self._offset += len(content)
return content
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment