Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
A li’l class for data URI manipulation in Python.

DataURI.py

Data URI manipulation made easy.

This isn't very robust, and will reject a number of valid data URIs. However, it meets the most useful case: a mimetype, a charset, and the base64 flag.

Parsing

>>> uri = DataURI('data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu')
>>> uri.mimetype
'text/plain'
>>> uri.charset
'utf-8'
>>> uri.is_base64
True
>>> uri.data
'The quick brown fox jumped over the lazy dog.'

Note that DataURI.data won't decode the data bytestring into a unicode string based on the charset.

Creating from a string

>>> made = DataURI.make('text/plain', charset='us-ascii', base64=True, data='This is a message.')
>>> made
DataURI('data:text/plain;charset=us-ascii;base64,VGhpcyBpcyBhIG1lc3NhZ2Uu')
>>> made.data
'This is a message.'

Creating from a file

This is really just a convenience method.

>>> png_uri = DataURI.from_file('somefile.png')
>>> png_uri.mimetype
'image/png'
>>> png_uri.data
'\x89PNG\r\n...'

License

This code is released under the Unlicense (c.f. http://unlicense.org/).

import mimetypes
import re
import urllib
MIMETYPE_REGEX = r'[\w]+\/[\w\-\+\.]+'
_MIMETYPE_RE = re.compile('^{}$'.format(MIMETYPE_REGEX))
CHARSET_REGEX = r'[\w\-\+\.]+'
_CHARSET_RE = re.compile('^{}$'.format(CHARSET_REGEX))
DATA_URI_REGEX = (
r'data:' +
r'(?P<mimetype>{})?'.format(MIMETYPE_REGEX) +
r'(?:\;charset\=(?P<charset>{}))?'.format(CHARSET_REGEX) +
r'(?P<base64>\;base64)?' +
r',(?P<data>.*)')
_DATA_URI_RE = re.compile(r'^{}$'.format(DATA_URI_REGEX), re.DOTALL)
class DataURI(str):
@classmethod
def make(cls, mimetype, charset, base64, data):
parts = ['data:']
if mimetype is not None:
if not _MIMETYPE_RE.match(mimetype):
raise ValueError("Invalid mimetype: %r" % mimetype)
parts.append(mimetype)
if charset is not None:
if not _CHARSET_RE.match(charset):
raise ValueError("Invalid charset: %r" % charset)
parts.extend([';charset=', charset])
if base64:
parts.append(';base64')
encoded_data = data.encode('base64').replace('\n', '')
else:
encoded_data = urllib.quote(data)
parts.extend([',', encoded_data])
return cls(''.join(parts))
@classmethod
def from_file(cls, filename, charset=None, base64=True):
mimetype, _ = mimetypes.guess_type(filename, strict=False)
with open(filename) as fp:
data = fp.read()
return cls.make(mimetype, charset, base64, data)
def __new__(cls, *args, **kwargs):
uri = super(DataURI, cls).__new__(cls, *args, **kwargs)
uri._parse # Trigger any ValueErrors on instantiation.
return uri
def __repr__(self):
return 'DataURI(%s)' % (super(DataURI, self).__repr__(),)
def wrap(self, width=76):
return type(self)('\n'.join(textwrap.wrap(self, width)))
@property
def mimetype(self):
return self._parse[0]
@property
def charset(self):
return self._parse[1]
@property
def is_base64(self):
return self._parse[2]
@property
def data(self):
return self._parse[3]
@property
def _parse(self):
match = _DATA_URI_RE.match(self)
if not match:
raise ValueError("Not a valid data URI: %r" % self)
mimetype = match.group('mimetype') or None
charset = match.group('charset') or None
if match.group('base64'):
data = match.group('data').decode('base64')
else:
data = urllib.unquote(match.group('data'))
return mimetype, charset, bool(match.group('base64')), data
@reedstrm

This comment has been minimized.

Copy link

reedstrm commented Nov 12, 2014

Thanks for the code! Exactly what we needed. Using this to help with free/open textbook writing and delivery over at OpenStax (cnx.org)

@jdp

This comment has been minimized.

Copy link

jdp commented Jan 28, 2015

Super helpful, thanks! Having this available through PyPI would be great.

@rskumar

This comment has been minimized.

Copy link

rskumar commented Apr 21, 2016

Nice work.

@izoomi

This comment has been minimized.

Copy link

izoomi commented Mar 1, 2017

You may need to import textwrap

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.