Skip to content

Instantly share code, notes, and snippets.

@zacharyvoase
Last active May 21, 2020 06:19
Show Gist options
  • Star 24 You must be signed in to star a gist
  • Fork 6 You must be signed in to fork a gist
  • Save zacharyvoase/5538178 to your computer and use it in GitHub Desktop.
Save zacharyvoase/5538178 to your computer and use it in GitHub Desktop.
A li’l class for data URI manipulation in Python.

DataURI.py

Data URI manipulation made easy.

This isn't very robust, and will reject a number of valid data URIs. However, it meets the most useful case: a mimetype, a charset, and the base64 flag.

Parsing

>>> uri = DataURI('data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu')
>>> uri.mimetype
'text/plain'
>>> uri.charset
'utf-8'
>>> uri.is_base64
True
>>> uri.data
'The quick brown fox jumped over the lazy dog.'

Note that DataURI.data won't decode the data bytestring into a unicode string based on the charset.

Creating from a string

>>> made = DataURI.make('text/plain', charset='us-ascii', base64=True, data='This is a message.')
>>> made
DataURI('data:text/plain;charset=us-ascii;base64,VGhpcyBpcyBhIG1lc3NhZ2Uu')
>>> made.data
'This is a message.'

Creating from a file

This is really just a convenience method.

>>> png_uri = DataURI.from_file('somefile.png')
>>> png_uri.mimetype
'image/png'
>>> png_uri.data
'\x89PNG\r\n...'

License

This code is released under the Unlicense (c.f. http://unlicense.org/).

import mimetypes
import re
import urllib
MIMETYPE_REGEX = r'[\w]+\/[\w\-\+\.]+'
_MIMETYPE_RE = re.compile('^{}$'.format(MIMETYPE_REGEX))
CHARSET_REGEX = r'[\w\-\+\.]+'
_CHARSET_RE = re.compile('^{}$'.format(CHARSET_REGEX))
DATA_URI_REGEX = (
r'data:' +
r'(?P<mimetype>{})?'.format(MIMETYPE_REGEX) +
r'(?:\;charset\=(?P<charset>{}))?'.format(CHARSET_REGEX) +
r'(?P<base64>\;base64)?' +
r',(?P<data>.*)')
_DATA_URI_RE = re.compile(r'^{}$'.format(DATA_URI_REGEX), re.DOTALL)
class DataURI(str):
@classmethod
def make(cls, mimetype, charset, base64, data):
parts = ['data:']
if mimetype is not None:
if not _MIMETYPE_RE.match(mimetype):
raise ValueError("Invalid mimetype: %r" % mimetype)
parts.append(mimetype)
if charset is not None:
if not _CHARSET_RE.match(charset):
raise ValueError("Invalid charset: %r" % charset)
parts.extend([';charset=', charset])
if base64:
parts.append(';base64')
encoded_data = data.encode('base64').replace('\n', '')
else:
encoded_data = urllib.quote(data)
parts.extend([',', encoded_data])
return cls(''.join(parts))
@classmethod
def from_file(cls, filename, charset=None, base64=True):
mimetype, _ = mimetypes.guess_type(filename, strict=False)
with open(filename) as fp:
data = fp.read()
return cls.make(mimetype, charset, base64, data)
def __new__(cls, *args, **kwargs):
uri = super(DataURI, cls).__new__(cls, *args, **kwargs)
uri._parse # Trigger any ValueErrors on instantiation.
return uri
def __repr__(self):
return 'DataURI(%s)' % (super(DataURI, self).__repr__(),)
def wrap(self, width=76):
return type(self)('\n'.join(textwrap.wrap(self, width)))
@property
def mimetype(self):
return self._parse[0]
@property
def charset(self):
return self._parse[1]
@property
def is_base64(self):
return self._parse[2]
@property
def data(self):
return self._parse[3]
@property
def _parse(self):
match = _DATA_URI_RE.match(self)
if not match:
raise ValueError("Not a valid data URI: %r" % self)
mimetype = match.group('mimetype') or None
charset = match.group('charset') or None
if match.group('base64'):
data = match.group('data').decode('base64')
else:
data = urllib.unquote(match.group('data'))
return mimetype, charset, bool(match.group('base64')), data
@reedstrm
Copy link

Thanks for the code! Exactly what we needed. Using this to help with free/open textbook writing and delivery over at OpenStax (cnx.org)

@jdp
Copy link

jdp commented Jan 28, 2015

Super helpful, thanks! Having this available through PyPI would be great.

@rskumar
Copy link

rskumar commented Apr 21, 2016

Nice work.

@izoomi
Copy link

izoomi commented Mar 1, 2017

You may need to import textwrap

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment