Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@panzi
Created November 2, 2012 21:11
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save panzi/4004353 to your computer and use it in GitHub Desktop.
Save panzi/4004353 to your computer and use it in GitHub Desktop.
Python 3 data url handler
import binascii
import urllib.request
import urllib.parse
import email.message
import io
__all__ = ['parse_data_url','DataResponse','DataHandler']
def parse_data_url(url):
scheme, data = url.split(":",1)
assert scheme == "data", "unsupported scheme: "+scheme
mediatype, data = data.split(",",1)
# base64 urls might have a padding which might (should) be quoted:
data = urllib.parse.unquote_to_bytes(data)
if mediatype.endswith(";base64"):
return binascii.a2b_base64(data), mediatype[:-7] or None
else:
return data, mediatype or None
# DataResponse exposes the mediatype and emulates some methods/properties of
# HTTPResponse: msg, headers, length, info, geturl, getheader and getheaders
class DataResponse(io.BytesIO):
__slots__ = 'url','mediatype','msg','headers','length'
def __init__(self,url):
data, mediatype = parse_data_url(url)
io.BytesIO.__init__(self,data)
self.url = url
self.mediatype = mediatype
self.length = len(data)
self.headers = self.msg = email.message.Message()
if mediatype is not None:
self.msg.add_header("Content-Type",mediatype)
def getheader(self,name,default=None):
headers = self.headers.get_all(name) or default
if isinstance(headers, str) or not hasattr(headers, '__iter__'):
return headers
else:
return ', '.join(headers)
def getheaders(self):
return list(self.headers.items())
def geturl(self):
return self.url
def info(self):
return self.headers
class DataHandler(urllib.request.BaseHandler):
def data_open(self, req):
return DataResponse(req.full_url)
import binascii
import urllib.request
import urllib.parse
import io
__all__ = ['parse_data_url','DataHandler']
def parse_data_url(url):
scheme, data = url.split(":",1)
assert scheme == "data", "unsupported scheme: "+scheme
mediatype, data = data.split(",",1)
# base64 urls might have a padding which might (should) be quoted:
data = urllib.parse.unquote_to_bytes(data)
if mediatype.endswith(";base64"):
return binascii.a2b_base64(data), mediatype[:-7] or None
else:
return data, mediatype or None
class DataHandler(urllib.request.BaseHandler):
def data_open(self, req):
return io.BytesIO(parse_data_url(req.full_url)[0])
#!/usr/bin/env python3
import sys
from dataurl import DataHandler
from urllib.request import urlopen, build_opener, install_opener
install_opener(build_opener(DataHandler))
# so we can write binary data to stdout:
sys.stdout = open(sys.stdout.fileno(),"wb")
for url in sys.argv[1:]:
with urlopen(url) as f:
sys.stdout.write(f.read())
#!/usr/bin/env python3
import sys
from dataurl import DataHandler
from urllib.request import urlopen, build_opener, install_opener
install_opener(build_opener(DataHandler))
for url in sys.argv[1:]:
with urlopen(url) as f:
# assumes utf-8 encoded text when no charset given:
sys.stdout.write(f.read().decode(dict(f.headers.get_params() or []).get('charset','UTF-8')))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment