Skip to content

Instantly share code, notes, and snippets.

@myersjustinc
Created May 25, 2012 20:54
Show Gist options
  • Save myersjustinc/2790494 to your computer and use it in GitHub Desktop.
Save myersjustinc/2790494 to your computer and use it in GitHub Desktop.
Convert HTML image map to SVG

This takes an HTML document that contains a client-side image map ( and elements) and creates an SVG image based on the shapes described in the image map (since SVG's a more general-purpose format than the HTML image map).

The HTML document doesn't necessarily have to be well-formed XML (there's a fallback to the BeautifulSoup parser for documents with some weirdness in them), which might be useful.

Dependencies

  • lxml
  • Python (of course)

Usage

convert.py input_filename.html image_map_id

#!/usr/bin/env python
from itertools import islice, izip
from lxml import etree, html
import sys
"""
Takes a specified HTML, reads the client-side image map at the specified ID and
generates an SVG of all of those areas.
"""
def parse_html(input_filename):
"""
Takes the filename of an HTML file and returns an ElementTree representation
of its contents, using the BeautifulSoup parser as a fallback if needed.
"""
input_file = open(input_filename, 'rb')
input_contents = input_file.read()
input_file.close()
# Thanks to http://lxml.de/elementsoup.html#using-soupparser-as-a-fallback
# for this technique.
html_root = html.fromstring(input_contents)
try:
ignore = etree.tostring(html_root, encoding=unicode)
except UnicodeDecodeError:
html_root = html.soupparser.fromstring(input_contents)
return html_root
def group_by(input_list, n):
# Thanks to http://code.activestate.com/recipes/
# 303060-group-a-list-into-sequential-n-tuples/#c5 for this technique.
return izip(*[islice(input_list, i, None, n) for i in range(n)])
def area_to_path_string(area):
area_type = area.get('shape')
coords_string = area.get('coords')
if not area_type or not coords_string:
return '' # This isn't good.
# Make sure we have a comma-delimited set of integral coordinates.
try:
coords = [int(x.strip()) for x in coords_string.split(',')]
except ValueError:
return '' # Non-integral coordinates
if area_type == 'polygon':
# Draw a polygon as multiple line segments.
if len(coords) % 2 != 0:
return '' # Not an even number of coordinates
coord_pairs = group_by(coords, 2)
return 'M %s' % ' L '.join([
'%(x)s,%(y)s' % {
'x': point[0],
'y': point[1]
} for point in coord_pairs
])
elif area_type == 'circle':
# Draw a circle as two arcs.
if len(coords) != 3:
return '' # Not enough coordinates
return 'M %(x)s,%(y)s m -%(r)s,0 a %(r)s,%(r)s 0 1,0 %(d)s,0 a %(r)s,%(r)s 0 1,0 -%(d)s,0' % {
'x': coords[0],
'y': coords[1],
'r': coords[2],
'd': 2 * coords[2]
}
else: # 'default' or 'rect'
# Draw a rectangle as four line segments (three specified plus one
# close-path command).
if len(coords) != 4:
return '' # Not enough coordinates
return 'M %(xa)s,%(ya)s L %(xb)s,%(ya)s L %(xb)s,%(yb)s L %(xa)s,%(yb)s z' % {
'xa': coords[0],
'xb': coords[1],
'ya': coords[2],
'yb': coords[3]
}
return path_string
def main(input_filename='', map_id=''):
svg_root = etree.Element('svg', xmlns='http://www.w3.org/2000/svg')
input_html = parse_html(input_filename)
input_map = input_html.xpath('//*[@id="%s"]' % map_id)
if input_map:
input_map = input_map[0]
# Build a path string for each class of <area> element in the specified map.
input_map_classes = {}
placeholder_counter = 0
for child in input_map:
path_string = area_to_path_string(child)
if 'class' in child.attrib:
area_class = child.get('class')
else:
area_class = 'area_%s' % placeholder_counter
placeholder_counter += 1
if area_class in input_map_classes:
input_map_classes[area_class] += ' %s' % path_string
else:
input_map_classes[area_class] = path_string
# Build a <path> element for each class and append it to the SVG tree.
for area_class in input_map_classes:
area_path = etree.Element('path')
area_path.set('d', input_map_classes[area_class])
area_path.set('id', area_class)
area_path.set('fill', 'none')
area_path.set('stroke', 'black')
area_path.set('stroke-width', '1')
svg_root.append(area_path)
output_svg = etree.ElementTree(svg_root)
output_file = open('%s.svg' % input_filename, 'w')
output_svg.write(output_file, encoding='utf-8', pretty_print=True, xml_declaration=True)
output_file.close()
if __name__ == '__main__':
if len(sys.argv) != 3:
sys.stderr.write("Usage: %s input_html map_id\n" % sys.argv[0])
sys.exit(1)
else:
main(*sys.argv[1:])
@photogaff
Copy link

photogaff commented Apr 7, 2017

This is the version I needed to troubleshoot and process a map I had, it somehow had different attribute values to get it going

Exec: python convert.py file.html map1

from itertools import islice, izip
from lxml import etree, html
import sys

"""
Takes a specified HTML, reads the client-side image map at the specified ID and
generates an SVG of all of those areas.
"""

def parse_html(input_filename):
"""
Takes the filename of an HTML file and returns an ElementTree representation
of its contents, using the BeautifulSoup parser as a fallback if needed.
"""
input_file = open(input_filename, 'rb')
input_contents = input_file.read()
input_file.close()

# Thanks to http://lxml.de/elementsoup.html#using-soupparser-as-a-fallback
# for this technique.
html_root = html.fromstring(input_contents)
try:
    ignore = etree.tostring(html_root, encoding=unicode)
except UnicodeDecodeError:
    html_root = html.soupparser.fromstring(input_contents)

return html_root

def group_by(input_list, n):
# Thanks to http://code.activestate.com/recipes/
# 303060-group-a-list-into-sequential-n-tuples/#c5 for this technique.
return izip(*[islice(input_list, i, None, n) for i in range(n)])

def area_to_path_string(area):
area_type = area.get('shape')
coords_string = area.get('coords')
if not area_type or not coords_string:
print "# This isn't good."
return '' # This isn't good.

print coords_string
# Make sure we have a comma-delimited set of integral coordinates.
try:
    coords = [int(x.strip()) for x in coords_string.split(',')]
except ValueError:
    print "# Non-integral coordinates"
    return ''  # Non-integral coordinates

print area_type
if area_type == 'poly':
    # Draw a polygon as multiple line segments.
    if len(coords) % 2 != 0:
        print "# Not an even number of coordinates"
        return ''  # Not an even number of coordinates
    
    coord_pairs = group_by(coords, 2)
    
    return 'M %s' % ' L '.join([
        '%(x)s,%(y)s' % {
            'x': point[0],
            'y': point[1]
        } for point in coord_pairs
    ])
elif area_type == 'circle':
    # Draw a circle as two arcs.
    if len(coords) != 3:
        print "# Not enough coordinates 1"
        return ''  # Not enough coordinates
    
    return 'M %(x)s,%(y)s m -%(r)s,0 a %(r)s,%(r)s 0 1,0 %(d)s,0 a %(r)s,%(r)s 0 1,0 -%(d)s,0' % {
        'x': coords[0],
        'y': coords[1],
        'r': coords[2],
        'd': 2 * coords[2]
    }
else:  # 'default' or 'rect'
    # Draw a rectangle as four line segments (three specified plus one
    # close-path command).
    if len(coords) != 4:
        print "# Not enough coordinates 2"
        return ''  # Not enough coordinates
    
    return 'M %(xa)s,%(ya)s L %(xb)s,%(ya)s L %(xb)s,%(yb)s L %(xa)s,%(yb)s z' % {
        'xa': coords[0],
        'xb': coords[1],
        'ya': coords[2],
        'yb': coords[3]
    }

return path_string

def main(input_filename='', map_id=''):
svg_root = etree.Element('svg', xmlns='http://www.w3.org/2000/svg')

input_html = parse_html(input_filename)
input_map = input_html.xpath('//*[@id="%s"]' % map_id)
if input_map:
    input_map = input_map[0]

# Build a path string for each class of <area> element in the specified map.
input_map_classes = {}
placeholder_counter = 0
for child in input_map:
    path_string = area_to_path_string(child)
    
    if 'class' in child.attrib:
        area_class = child.get('class')
    else:
        area_class = 'area_%s' % placeholder_counter
        placeholder_counter += 1
    
    if area_class in input_map_classes:
        input_map_classes[area_class] += ' %s' % path_string
    else:
        input_map_classes[area_class] = path_string

# Build a <path> element for each class and append it to the SVG tree.
for area_class in input_map_classes:
    area_path = etree.Element('path')
    area_path.set('d', input_map_classes[area_class])
    area_path.set('id', area_class)
    
    area_path.set('fill', 'none')
    area_path.set('stroke', 'black')
    area_path.set('stroke-width', '1')
    
    svg_root.append(area_path)

output_svg = etree.ElementTree(svg_root)
output_file = open('%s.svg' % input_filename, 'w')
output_svg.write(output_file, encoding='utf-8', pretty_print=True, xml_declaration=True)
output_file.close()

if name == 'main':
if len(sys.argv) != 3:
sys.stderr.write("Usage: %s input_html map_id\n" % sys.argv[0])
sys.exit(1)
else:
main(*sys.argv[1:])

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment