Created
November 2, 2012 21:11
-
-
Save panzi/4004353 to your computer and use it in GitHub Desktop.
Python 3 data url handler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import binascii | |
import urllib.request | |
import urllib.parse | |
import email.message | |
import io | |
__all__ = ['parse_data_url','DataResponse','DataHandler'] | |
def parse_data_url(url): | |
scheme, data = url.split(":",1) | |
assert scheme == "data", "unsupported scheme: "+scheme | |
mediatype, data = data.split(",",1) | |
# base64 urls might have a padding which might (should) be quoted: | |
data = urllib.parse.unquote_to_bytes(data) | |
if mediatype.endswith(";base64"): | |
return binascii.a2b_base64(data), mediatype[:-7] or None | |
else: | |
return data, mediatype or None | |
# DataResponse exposes the mediatype and emulates some methods/properties of | |
# HTTPResponse: msg, headers, length, info, geturl, getheader and getheaders | |
class DataResponse(io.BytesIO): | |
__slots__ = 'url','mediatype','msg','headers','length' | |
def __init__(self,url): | |
data, mediatype = parse_data_url(url) | |
io.BytesIO.__init__(self,data) | |
self.url = url | |
self.mediatype = mediatype | |
self.length = len(data) | |
self.headers = self.msg = email.message.Message() | |
if mediatype is not None: | |
self.msg.add_header("Content-Type",mediatype) | |
def getheader(self,name,default=None): | |
headers = self.headers.get_all(name) or default | |
if isinstance(headers, str) or not hasattr(headers, '__iter__'): | |
return headers | |
else: | |
return ', '.join(headers) | |
def getheaders(self): | |
return list(self.headers.items()) | |
def geturl(self): | |
return self.url | |
def info(self): | |
return self.headers | |
class DataHandler(urllib.request.BaseHandler): | |
def data_open(self, req): | |
return DataResponse(req.full_url) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import binascii | |
import urllib.request | |
import urllib.parse | |
import io | |
__all__ = ['parse_data_url','DataHandler'] | |
def parse_data_url(url): | |
scheme, data = url.split(":",1) | |
assert scheme == "data", "unsupported scheme: "+scheme | |
mediatype, data = data.split(",",1) | |
# base64 urls might have a padding which might (should) be quoted: | |
data = urllib.parse.unquote_to_bytes(data) | |
if mediatype.endswith(";base64"): | |
return binascii.a2b_base64(data), mediatype[:-7] or None | |
else: | |
return data, mediatype or None | |
class DataHandler(urllib.request.BaseHandler): | |
def data_open(self, req): | |
return io.BytesIO(parse_data_url(req.full_url)[0]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
from dataurl import DataHandler | |
from urllib.request import urlopen, build_opener, install_opener | |
install_opener(build_opener(DataHandler)) | |
# so we can write binary data to stdout: | |
sys.stdout = open(sys.stdout.fileno(),"wb") | |
for url in sys.argv[1:]: | |
with urlopen(url) as f: | |
sys.stdout.write(f.read()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
from dataurl import DataHandler | |
from urllib.request import urlopen, build_opener, install_opener | |
install_opener(build_opener(DataHandler)) | |
for url in sys.argv[1:]: | |
with urlopen(url) as f: | |
# assumes utf-8 encoded text when no charset given: | |
sys.stdout.write(f.read().decode(dict(f.headers.get_params() or []).get('charset','UTF-8'))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment