soamaven/embedimages.py

## embedimages.py
"""HTML Image handling for embedded images in markdown cells."""

#-----------------------------------------------------------------------------
# Copyright (c) 2013, the IPython Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#-----------------------------------------------------------------------------

#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------
from ipython_genutils.py3compat import PY3
if PY3:
    from html.parser import HTMLParser
else:
    from HTMLParser import HTMLParser
import base64
import os.path

#-----------------------------------------------------------------------------
# Functions
#-----------------------------------------------------------------------------

__all__ = ['img2base64']


def img2base64(s):
    """Parse HTML image references in Markdown cells.

    This looks for HTML tags having a img tag name `img`
    and converts the image to a data URI for static embedding.
    The tranformation looks like this:

    `<img src="./Images/My_image.png" width="800" height="800" alt="Alt_name" title="Mytitle" align="center" />`

    Becomes

    `<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADIA..." width="800" height="800" alt="Alt_name" title="Mytitle" align="center" />`

    Any HTML tag can be used, which allows the citations to be formatted
    in HTML in any manner.
    """
    parser = Img2Base64Parser()
    parser.feed(s)
    parser.close()
    outtext = u''
    startpos = 0
    for img in parser.imglist:
            outtext += s[startpos:img[1][0]]
            outtext += 'data:image/%s;base64,%s'% \
                (img[0][1],str(img[0][0]).lstrip('b\'').rstrip('\''))
            startpos = img[1][1] if len(img)==3 else -1
    outtext += s[startpos:] if startpos != -1 else ''
    return outtext

#-----------------------------------------------------------------------------
# Classes
#-----------------------------------------------------------------------------

class Img2Base64Parser(HTMLParser):
    """Image Parser
    Replaces html img file references with base64 encoded strings.

    Inherites from HTMLParser, overrides:
     - handle_starttag
     - handle_endtag
    """
    # number of open tags
    opentags = None
    # list of found imgs
    imglist = None
    # active img tag
    imgtag = None

    def __init__(self):
        self.imglist = []
        self.opentags = 0
        HTMLParser.__init__(self)

    def get_offset(self):
        # Compute startposition in source
        lin, offset = self.getpos()
        pos = 0
        for i in range(lin):
            pos = self.data.find('src=',pos) + 5
        return pos

    def handle_starttag(self, tag, attrs):
        # for each tag check if attributes are present and convert src to base64
        if self.opentags == 0 and len(attrs)>0:
            for atr, data in attrs:
                if atr.lower() == 'src':
                    self.imgtag = tag
                    self.opentags = 1
                    print(len(data))
                    with open(data, "rb") as image_file:
                        encoded_data = base64.b64encode(image_file.read())
                        extension = os.path.splitext(data)[1][1:].strip().lower()
                    self.imglist.append([[encoded_data, extension],
                                         [self.get_offset(), self.get_offset()+len(data)]])
                    return

        if tag == self.imgtag:
            # found an open img tag but not the starting one
            self.opentags += 1

    def handle_endtag(self, tag):
        if tag == self.imgtag:
            # found img tag check if starting one
            if self.opentags == 1:
                pos = self.get_offset()
                self.imglist[-1].append(pos+len(tag)+ 3)
            self.opentags -= 1

    def feed(self, data):
        self.data = data
        HTMLParser.feed(self, data)
	"""HTML Image handling for embedded images in markdown cells."""

	#-----------------------------------------------------------------------------
	# Copyright (c) 2013, the IPython Development Team.
	#
	# Distributed under the terms of the Modified BSD License.
	#
	# The full license is in the file COPYING.txt, distributed with this software.
	#-----------------------------------------------------------------------------

	#-----------------------------------------------------------------------------
	# Imports
	#-----------------------------------------------------------------------------
	from ipython_genutils.py3compat import PY3
	if PY3:
	from html.parser import HTMLParser
	else:
	from HTMLParser import HTMLParser
	import base64
	import os.path

	#-----------------------------------------------------------------------------
	# Functions
	#-----------------------------------------------------------------------------

	__all__ = ['img2base64']


	def img2base64(s):
	"""Parse HTML image references in Markdown cells.

	This looks for HTML tags having a img tag name `img`
	and converts the image to a data URI for static embedding.
	The tranformation looks like this:

	`<img src="./Images/My_image.png" width="800" height="800" alt="Alt_name" title="Mytitle" align="center" />`

	Becomes

	`<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADIA..." width="800" height="800" alt="Alt_name" title="Mytitle" align="center" />`

	Any HTML tag can be used, which allows the citations to be formatted
	in HTML in any manner.
	"""
	parser = Img2Base64Parser()
	parser.feed(s)
	parser.close()
	outtext = u''
	startpos = 0
	for img in parser.imglist:
	outtext += s[startpos:img[1][0]]
	outtext += 'data:image/%s;base64,%s'% \
	(img[0][1],str(img[0][0]).lstrip('b\'').rstrip('\''))
	startpos = img[1][1] if len(img)==3 else -1
	outtext += s[startpos:] if startpos != -1 else ''
	return outtext

	#-----------------------------------------------------------------------------
	# Classes
	#-----------------------------------------------------------------------------

	class Img2Base64Parser(HTMLParser):
	"""Image Parser
	Replaces html img file references with base64 encoded strings.

	Inherites from HTMLParser, overrides:
	- handle_starttag
	- handle_endtag
	"""
	# number of open tags
	opentags = None
	# list of found imgs
	imglist = None
	# active img tag
	imgtag = None

	def __init__(self):
	self.imglist = []
	self.opentags = 0
	HTMLParser.__init__(self)

	def get_offset(self):
	# Compute startposition in source
	lin, offset = self.getpos()
	pos = 0
	for i in range(lin):
	pos = self.data.find('src=',pos) + 5
	return pos

	def handle_starttag(self, tag, attrs):
	# for each tag check if attributes are present and convert src to base64
	if self.opentags == 0 and len(attrs)>0:
	for atr, data in attrs:
	if atr.lower() == 'src':
	self.imgtag = tag
	self.opentags = 1
	print(len(data))
	with open(data, "rb") as image_file:
	encoded_data = base64.b64encode(image_file.read())
	extension = os.path.splitext(data)[1][1:].strip().lower()
	self.imglist.append([[encoded_data, extension],
	[self.get_offset(), self.get_offset()+len(data)]])
	return

	if tag == self.imgtag:
	# found an open img tag but not the starting one
	self.opentags += 1

	def handle_endtag(self, tag):
	if tag == self.imgtag:
	# found img tag check if starting one
	if self.opentags == 1:
	pos = self.get_offset()
	self.imglist[-1].append(pos+len(tag)+ 3)
	self.opentags -= 1

	def feed(self, data):
	self.data = data
	HTMLParser.feed(self, data)