Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
A li’l class for data URI manipulation in Python.

Data URI manipulation made easy.

This isn't very robust, and will reject a number of valid data URIs. However, it meets the most useful case: a mimetype, a charset, and the base64 flag.


>>> uri = DataURI('data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu')
>>> uri.mimetype
>>> uri.charset
>>> uri.is_base64
'The quick brown fox jumped over the lazy dog.'

Note that won't decode the data bytestring into a unicode string based on the charset.

Creating from a string

>>> made = DataURI.make('text/plain', charset='us-ascii', base64=True, data='This is a message.')
>>> made
'This is a message.'

Creating from a file

This is really just a convenience method.

>>> png_uri = DataURI.from_file('somefile.png')
>>> png_uri.mimetype


This code is released under the Unlicense (c.f.

import mimetypes
import re
import urllib
MIMETYPE_REGEX = r'[\w]+\/[\w\-\+\.]+'
_MIMETYPE_RE = re.compile('^{}$'.format(MIMETYPE_REGEX))
CHARSET_REGEX = r'[\w\-\+\.]+'
_CHARSET_RE = re.compile('^{}$'.format(CHARSET_REGEX))
r'data:' +
r'(?P<mimetype>{})?'.format(MIMETYPE_REGEX) +
r'(?:\;charset\=(?P<charset>{}))?'.format(CHARSET_REGEX) +
r'(?P<base64>\;base64)?' +
_DATA_URI_RE = re.compile(r'^{}$'.format(DATA_URI_REGEX), re.DOTALL)
class DataURI(str):
def make(cls, mimetype, charset, base64, data):
parts = ['data:']
if mimetype is not None:
if not _MIMETYPE_RE.match(mimetype):
raise ValueError("Invalid mimetype: %r" % mimetype)
if charset is not None:
if not _CHARSET_RE.match(charset):
raise ValueError("Invalid charset: %r" % charset)
parts.extend([';charset=', charset])
if base64:
encoded_data = data.encode('base64').replace('\n', '')
encoded_data = urllib.quote(data)
parts.extend([',', encoded_data])
return cls(''.join(parts))
def from_file(cls, filename, charset=None, base64=True):
mimetype, _ = mimetypes.guess_type(filename, strict=False)
with open(filename) as fp:
data =
return cls.make(mimetype, charset, base64, data)
def __new__(cls, *args, **kwargs):
uri = super(DataURI, cls).__new__(cls, *args, **kwargs)
uri._parse # Trigger any ValueErrors on instantiation.
return uri
def __repr__(self):
return 'DataURI(%s)' % (super(DataURI, self).__repr__(),)
def wrap(self, width=76):
return type(self)('\n'.join(textwrap.wrap(self, width)))
def mimetype(self):
return self._parse[0]
def charset(self):
return self._parse[1]
def is_base64(self):
return self._parse[2]
def data(self):
return self._parse[3]
def _parse(self):
match = _DATA_URI_RE.match(self)
if not match:
raise ValueError("Not a valid data URI: %r" % self)
mimetype ='mimetype') or None
charset ='charset') or None
data ='data').decode('base64')
data = urllib.unquote('data'))
return mimetype, charset, bool('base64')), data

This comment has been minimized.

Copy link

@reedstrm reedstrm commented Nov 12, 2014

Thanks for the code! Exactly what we needed. Using this to help with free/open textbook writing and delivery over at OpenStax (


This comment has been minimized.

Copy link

@jdp jdp commented Jan 28, 2015

Super helpful, thanks! Having this available through PyPI would be great.


This comment has been minimized.

Copy link

@rskumar rskumar commented Apr 21, 2016

Nice work.


This comment has been minimized.

Copy link

@izoomi izoomi commented Mar 1, 2017

You may need to import textwrap

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment