Skip to content

Instantly share code, notes, and snippets.

@migurski
Created August 1, 2012 18:44
Show Gist options
  • Save migurski/3229707 to your computer and use it in GitHub Desktop.
Save migurski/3229707 to your computer and use it in GitHub Desktop.
RemoteFileObject is a simple mapping from HTTP Range headers to a local file-like object
from time import time
from urlparse import urlparse
from httplib import HTTPConnection
from os.path import basename
from cStringIO import StringIO
from datetime import timedelta
from os import SEEK_SET, SEEK_CUR, SEEK_END
class RemoteFileObject:
""" Implement enough of this to be useful:
http://docs.python.org/release/2.5.2/lib/bltin-file-objects.html
Pull data from a remote URL with HTTP range headers.
"""
def __init__(self, url, verbose=False, block_size=(16 * 1024)):
self.verbose = verbose
# scheme://host/path;parameters?query#fragment
(scheme, host, path, parameters, query, fragment) = urlparse(url)
self.host = host
self.rest = path + (query and ('?' + query) or '')
self.offset = 0
self.length = self.get_length()
self.chunks = {}
self.block_size = block_size
self.start_time = time()
def get_length(self):
"""
"""
conn = HTTPConnection(self.host)
conn.request('GET', self.rest, headers={'Range': '0-1'})
length = int(conn.getresponse().getheader('content-length'))
if self.verbose:
print >> stderr, length, 'bytes in', basename(self.rest)
return length
def get_range(self, start, end):
"""
"""
headers = {'Range': 'bytes=%(start)d-%(end)d' % locals()}
conn = HTTPConnection(self.host)
conn.request('GET', self.rest, headers=headers)
return conn.getresponse().read()
def read(self, count=None):
""" Read /count/ bytes from the resource at the current offset.
"""
if count is None:
# to the end
count = self.length - self.offset
out = StringIO()
while count:
chunk_offset = self.block_size * (self.offset / self.block_size)
if chunk_offset not in self.chunks:
range = chunk_offset, min(self.length, self.offset + self.block_size) - 1
self.chunks[chunk_offset] = StringIO(self.get_range(*range))
if self.verbose:
loaded = float(self.block_size) * len(self.chunks) / self.length
expect = (time() - self.start_time) / loaded
remain = max(0, int(expect * (1 - loaded)))
print >> stderr, '%.1f%%' % min(100, 100 * loaded),
print >> stderr, 'of', basename(self.rest),
print >> stderr, 'with', timedelta(seconds=remain), 'to go'
chunk = self.chunks[chunk_offset]
in_chunk_offset = self.offset % self.block_size
in_chunk_count = min(count, self.block_size - in_chunk_offset)
chunk.seek(in_chunk_offset, SEEK_SET)
out.write(chunk.read(in_chunk_count))
count -= in_chunk_count
self.offset += in_chunk_count
out.seek(0)
return out.read()
def seek(self, offset, whence=SEEK_SET):
""" Seek to the specified offset.
/whence/ behaves as with other file-like objects:
http://docs.python.org/lib/bltin-file-objects.html
"""
if whence == SEEK_SET:
self.offset = offset
elif whence == SEEK_CUR:
self.offset += offset
elif whence == SEEK_END:
self.offset = self.length + offset
def tell(self):
return self.offset
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment