Skip to content

Instantly share code, notes, and snippets.

@reclosedev
Last active December 19, 2015 07:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save reclosedev/5921016 to your computer and use it in GitHub Desktop.
Save reclosedev/5921016 to your computer and use it in GitHub Desktop.
Seekable remote stream. Can be used to obtain listing (or download selected files) from huge .zip archives (or another structured file) from internet without downloading whole file. Requires Requests library.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests
class RemoteFile(object):
def __init__(self, url, session=None):
self._url = url
self._session = session or requests.Session()
self._need_to_close_session = session is None
self._response = None
self._position = 0
self._stream_position = 0
self._bytes_in_stream_left = 0
self.total_bytes_downloaded = 0
self._ensure_ranges_supported()
def _ensure_ranges_supported(self):
try:
r = self._session.get(self._url, stream=True)
except Exception as e:
raise IOError(e)
size = r.headers['Content-Length']
if not size:
raise IOError("Unknown Content-Length")
if not 'bytes' in r.headers.get('Accept-Ranges', ''):
raise IOError("Doesn't accept ranges")
self._bytes_in_stream_left = self._size = int(size)
self._response = r
def seek(self, offset, whence=0):
if whence == 0:
self._position = offset
if whence == 1:
self._position += offset
elif whence == 2:
self._position = self._size + offset
if not (0 <= self._position <= self._size):
raise IOError("Out of bounds")
def tell(self):
return self._position
def read(self, size=None):
if (self._bytes_in_stream_left
and self._stream_position == self._position):
return self._read_stream(size)
self._stream_position = self._position
end = self._size if size is None else self._position + size
if end > self._size:
return ''
self._bytes_in_stream_left = end - self._position
headers = {'Range': 'bytes=%s-%s' % (self._position, end)}
r = self._session.get(self._url, stream=True, headers=headers)
if self._response:
self._response.close()
self._response = r
return self._read_stream(size)
def _read_stream(self, size=None):
data = self._response.raw.read(size)
received = len(data)
self._position += received
self._stream_position += received
self._bytes_in_stream_left -= received
self.total_bytes_downloaded += received
return data
def close(self):
if self._response:
self._response.close()
if self._need_to_close_session:
self._session.close()
def __del__(self):
self.close()
# TODO tests
# TODO docstrings
if __name__ == '__main__':
import zipfile
#url = 'http://mirror.yandex.ru/mirrors/ftp.mysql.com/Downloads/MySQL-6.0/mysql-6.0.11-alpha.zip'
url = 'http://mirror.yandex.ru/mirrors/ftp.mysql.com/Downloads/Connector-Python/mysql-connector-python-1.0.10.zip'
rf = RemoteFile(url)
archive = zipfile.ZipFile(rf)
archive.printdir()
print 'Size:', rf._size, 'Downloaded:', rf.total_bytes_downloaded
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment