Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
A li’l class for data URI manipulation in Python.

Data URI manipulation made easy.

This isn't very robust, and will reject a number of valid data URIs. However, it meets the most useful case: a mimetype, a charset, and the base64 flag.


>>> uri = DataURI('data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu')
>>> uri.mimetype
>>> uri.charset
>>> uri.is_base64
'The quick brown fox jumped over the lazy dog.'

Note that won't decode the data bytestring into a unicode string based on the charset.

Creating from a string

>>> made = DataURI.make('text/plain', charset='us-ascii', base64=True, data='This is a message.')
>>> made
'This is a message.'

Creating from a file

This is really just a convenience method.

>>> png_uri = DataURI.from_file('somefile.png')
>>> png_uri.mimetype


This code is released under the Unlicense (c.f.

import mimetypes
import re
import urllib
MIMETYPE_REGEX = r'[\w]+\/[\w\-\+\.]+'
_MIMETYPE_RE = re.compile('^{}$'.format(MIMETYPE_REGEX))
CHARSET_REGEX = r'[\w\-\+\.]+'
_CHARSET_RE = re.compile('^{}$'.format(CHARSET_REGEX))
r'data:' +
r'(?P<mimetype>{})?'.format(MIMETYPE_REGEX) +
r'(?:\;charset\=(?P<charset>{}))?'.format(CHARSET_REGEX) +
r'(?P<base64>\;base64)?' +
_DATA_URI_RE = re.compile(r'^{}$'.format(DATA_URI_REGEX), re.DOTALL)
class DataURI(str):
def make(cls, mimetype, charset, base64, data):
parts = ['data:']
if mimetype is not None:
if not _MIMETYPE_RE.match(mimetype):
raise ValueError("Invalid mimetype: %r" % mimetype)
if charset is not None:
if not _CHARSET_RE.match(charset):
raise ValueError("Invalid charset: %r" % charset)
parts.extend([';charset=', charset])
if base64:
encoded_data = data.encode('base64').replace('\n', '')
encoded_data = urllib.quote(data)
parts.extend([',', encoded_data])
return cls(''.join(parts))
def from_file(cls, filename, charset=None, base64=True):
mimetype, _ = mimetypes.guess_type(filename, strict=False)
with open(filename) as fp:
data =
return cls.make(mimetype, charset, base64, data)
def __new__(cls, *args, **kwargs):
uri = super(DataURI, cls).__new__(cls, *args, **kwargs)
uri._parse # Trigger any ValueErrors on instantiation.
return uri
def __repr__(self):
return 'DataURI(%s)' % (super(DataURI, self).__repr__(),)
def wrap(self, width=76):
return type(self)('\n'.join(textwrap.wrap(self, width)))
def mimetype(self):
return self._parse[0]
def charset(self):
return self._parse[1]
def is_base64(self):
return self._parse[2]
def data(self):
return self._parse[3]
def _parse(self):
match = _DATA_URI_RE.match(self)
if not match:
raise ValueError("Not a valid data URI: %r" % self)
mimetype ='mimetype') or None
charset ='charset') or None
data ='data').decode('base64')
data = urllib.unquote('data'))
return mimetype, charset, bool('base64')), data

This comment has been minimized.

Copy link

reedstrm commented Nov 12, 2014

Thanks for the code! Exactly what we needed. Using this to help with free/open textbook writing and delivery over at OpenStax (


This comment has been minimized.

Copy link

jdp commented Jan 28, 2015

Super helpful, thanks! Having this available through PyPI would be great.


This comment has been minimized.

Copy link

rskumar commented Apr 21, 2016

Nice work.


This comment has been minimized.

Copy link

izoomi commented Mar 1, 2017

You may need to import textwrap

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.