stecman/Bake_codepoints_python.md

## Bake_codepoints_python.md

      
    Raw
  

              Bake_codepoints_python.md
            
          
    Bake glyphs from fonts to image files (Python)

This is an appendix item for Unicode Input Terminal.
Protoype code to pre-render 240x240 screens, each with a glyph and codepoint metadata.
# Download Unicode metadata
wget https://unicode.org/Public/UNIDATA/UnicodeData.txt
wget https://unicode.org/Public/UNIDATA/Blocks.txt

# Install dependencies
python3 -m pip install -r requirements.txt

# Render screens
python3 render-codepoints.py -f fontsdir -o outputdir --metadata-font NotoSans-Regular.ttf

# Convert an image to uncompressed pixel data for a ST7789
# Use a tool like `find` or `xargs` to run this in bulk
python3 convert-to-6bpp.py <image-file, ...>
This was part of the early work on my Unicode Binary Input Terminal project. The code has been removed from the master branch, but it can still be accessed at this commit:
stecman/unicode-input-panel@6e75f8ad6faacb83f8b661fd8d8278a463f2b1e4

  
## convert-to-6bpp.py
"""
Convert PNG images to raw bitmap data for dumb loading on a device.
This increases the storage requirement ~10x for small PNGs, but simplifies use on the device.
"""

from PIL import Image

import argparse
import os
import struct


def truncate_channels(im):
    """
    Zero the lower two bits of all channels in the image

    This can be used to preview colours similar to what a ST7789 display will show.

    Returns a new image
    """
    # Get a writable copy of the pixel data
    raw = list(im.getdata())

    for i in range(len(raw)):
        raw[i] = (raw[i][0] & 0xFC, raw[i][1] & 0xFC, raw[i][2] & 0xFC)

    trunc = Image.new(mode='RGB', size=im.size)
    trunc.putdata(raw)

    return trunc


if __name__ == '__main__':

    parser = argparse.ArgumentParser(
        'to-rgb6bpp',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="""Convert input images to RGB 666 for ST7789 display

Writes raw pixel data to a .rgb file next to the input file for reading into
memory of a ST7789 screen directly from an SD card. This is packed the same
as an 8-bpp image, but the lower two bits are effectively zeroed.
""")

    parser.add_argument('images', metavar='IMAGE', nargs='+', help="Image file to convert")

    args = parser.parse_args()

    for path in args.images:
        print(path)
        if not os.path.exists(path):
            print("File not found: %s" % path)
            continue

        glyph = Image.open(path).convert('RGBA')

        # Ensure the glyph is on a black background as the device does no alpha blending
        im = Image.new(mode='RGB', size=glyph.size, color=(0, 0, 0))
        im.paste(glyph, (0, 0), glyph)

        # Write raw rgb format with width and height data
        # This could be BMP with a little more work, but this is ok for now.
        name, ext = os.path.splitext(path)
        dest = name + '.rgb'
        with open(dest, 'wb') as handle:
            handle.write(struct.pack('BB', im.width, im.height))  # Images are known to be <= 240x240
            for pixel in im.getdata():
                handle.write(struct.pack('BBB', *pixel))

## namedb.py
import re
import difflib

class UnicodeNameDb:
    def __init__(self, unicode_data_path, unicode_blocks_path):
        self.unicode_data_path = unicode_data_path
        self.unicode_blocks_path = unicode_blocks_path
        self.has_loaded = False

        self.blocks = None
        self.blocks_last_codepoint = None

        self.codepoints = {}

    def get(self, codepoint):
        """
        Get the block name and shortened character name for a codepoint
        Returns (block_name: str, codepoint_name: str)
        """
        if not self.has_loaded:
            self.load()

        if 0xE000 <= codepoint <= 0xF8FF:
            return "Private Use Area", "[Not assigned by Unicode]"

        if 0xF0000 <= codepoint <= 0xFFFFF:
            return "Supl. Private Use Area A", "[Not assigned by Unicode]"

        if 0x100000 <= codepoint <= 0x10FFFF:
            return "Supl. Private Use Area B", "[Not assigned by Unicode]"

        try:
            return self.codepoints[codepoint]
        except KeyError:
            return "NOT REGISTERED", "Codepoint %X" % codepoint

    def load(self):
        self.has_loaded = True

        block_index = 0
        blocks = []

        # Read block range names
        with open(self.unicode_blocks_path, 'r') as handle:
            for line in handle:
                if line.startswith('#') or line.strip() == "":
                    continue

                match = re.match(r'^([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+);\W*(.*)$', line)
                start, end, name = match.group(1, 2, 3)  # type: ignore
                blocks.append((int(start, 16), int(end, 16), name))

        # Store the max codepoint defined by the blocks list
        self.blocks_last_codepoint = blocks[-1][1]
        self.blocks = blocks

        # Read codepoint names
        with open(self.unicode_data_path, 'r') as handle:
            for line in handle:
                fields = line.split(';')

                codepoint = int(fields[0], 16)
                name = fields[1]

                if name == '<control>':
                    name = fields[10]

                elif name.endswith('>'):
                    # Skip markers that aren't actually codepoint names
                    continue

                # Move to the next block if our codepoint is past the end of this block
                while blocks[block_index][1] < codepoint:
                    block_index += 1

                group_name = blocks[block_index][2]


                # Try to shorten the character name if it repeats the group name
                matcher = difflib.SequenceMatcher(None, group_name.lower(), name.lower())
                pos_a, pos_b, size = matcher.find_longest_match(0, len(group_name), 0, len(name))

                if size >= 3 and pos_b == 0:
                    words_a = group_name[pos_a:].lower().split(" ")
                    words_b = name.lower().split(" ")
                    trim_chars = 0

                    for a, b in zip(words_a, words_b):
                        if a == b or a == (b + 's') or a == (b + "-1"):
                            # This assumes there are single spaces, but should be ok...
                            trim_chars += len(b) + 1

                    short_name = name[trim_chars:]

                    # Fix cases like "alchemical symbols" where the naming scheme is "[block name] FOR XYZ"
                    # These names become "FOR XYZ", which is a bit awkward, so just drop the leading 'for'.
                    if short_name.startswith('FOR '):
                        short_name = short_name[4:]

                else:
                    short_name = name

                # Shorten specific words
                group_name = group_name.replace('Miscellaneous', 'Misc.')

                self.codepoints[codepoint] = (group_name, short_name or "")

## render-codepoints.py
import argparse
import difflib
import glob
import os
import re

from concurrent.futures import ProcessPoolExecutor
from multiprocessing import cpu_count

import uharfbuzz as hb

from fontTools.misc.transform import Offset
from fontTools.pens.freetypePen import FreeTypePen
from fontTools.pens.transformPen import TransformPen
from fontTools.pens.ttGlyphPen import TTGlyphPen
from fontTools.ttLib import TTFont
from PIL import Image, ImageOps


def build_font_map(fontfiles: list[str]):
    """
    Build a map of all available glyphs in a list of fonts
    Returns a dictionary mapping each available codepoint to the first font file that contains it
    """
    fontmap = {}

    for path in fontfiles:
        font = TTFont(path)

        cmap = font.get('cmap')
        for table in cmap.tables:  # type: ignore
            if not table.isUnicode():
                continue

            for code in table.cmap:
                if code not in fontmap:
                    fontmap[code] = path

    return fontmap


def make_output_path(codepoint):
    raw = hex(codepoint)[2:].upper()
    if (len(raw) % 2) != 0:
        raw = '0' + raw

    dir = ''

    while len(raw) > 2:
        dir += raw[:2] + '/'
        raw = raw[2:]

    if dir == '':
        dir = '00/'

    return dir, raw


def glyph_to_image(output_path, font_path, codepoint, with_metadata=True):
    print("Rendering %d -> %s" % (codepoint, output_path))

    target_size = (240, 150)
    display_size = (240, 240)

    size = target_size[1]

    # Load font with uharfbuzz to allow rendering specific font size
    # (The fonttools TTFont class doesn't seem to support this and draws at 1000px)
    blob = hb.Blob.from_file_path(font_path)
    face = hb.Face(blob)
    font = hb.Font(face)
    font.scale = (size, size)

    # Draw glyph to pixel buffer
    buf = hb.Buffer()
    pen = FreeTypePen(None)
    #hb.shape(font, buf, {"kern": True, "liga": True})
    font.draw_glyph_with_pen(font.get_nominal_glyph(codepoint), pen)
    im = pen.image(width=target_size[0], height=target_size[1], contain=True)

    # Centre all glyphs horizontally, even if it's not technically correct
    bbox = im.getbbox()
    if bbox:
        im = im.crop((bbox[0], 0, bbox[2], im.size[1]))

    #im.thumbnail(target_size, resample=Image.ANTIALIAS)

    padded = Image.new(mode='RGBA', size=target_size, color=(255, 255, 255, 255)) # type: ignore
    padded.paste(im, ((target_size[0] - im.size[0]) // 2, 0), im.convert('RGBA'))
    im = padded

    # Change to white text on black background
    im = ImageOps.invert(im.convert('RGB'))


    # Add metadata text to image if enabled
    if with_metadata:
        im = render_metadata_text(im, codepoint, display_size)

    im.save(output_path)


def render_metadata_text(glyph_im, codepoint, size):
    im = Image.new(mode='RGB', size=size, color=(0, 0, 0))

    # Add the rendered glyph in the centre
    paste_offset = (abs(size[0] - glyph_im.size[0]) // 2, abs(size[1] - glyph_im.size[1]) // 2 + 7)
    im.paste(glyph_im, paste_offset)

    block_name, char_name = name_db.get(codepoint)
    code_display = 'U+%04X' % codepoint
    dec_display = '%d' % codepoint

    global text_pen
    im.paste(text_pen.render_line(block_name, width=size[0], size=14, color='white'), (0, 0))  # type: ignore
    im.paste(text_pen.render_line(char_name, width=size[0], size=14, color='white'), (0, 17))  # type: ignore

    line = text_pen.render_line(code_display, width=size[0], size=18, color='green', trim=True)
    offset = (0, size[1] - line.size[1])
    im.paste(line, offset)

    # TODO: Fix text not masked, so paste kills other text on same line
    line = text_pen.render_line(dec_display, width=size[0], size=18, color='blue', trim=True)
    offset = (size[0] - line.size[0], size[1] - line.size[1])
    im.paste(line, offset)

    return im


class TextLineRenderer:
    def __init__(self, font_path):
        self.blob = hb.Blob.from_file_path(font_path)
        self.face = hb.Face(self.blob)

    def render_line(self, text, width, size=18, color='white', trim=False) -> Image:
        """
        Render one line of text. Does not support line breaks or layout

        Args:
            text (str): Line of text to render
            width (int): Width in pixels to truncate output to
            size (int): Font height in pixels. This is also used as the canvas height.
            color (str|tuple): PIL compatible color value to color text after rendering
            trim (bool): If the resulting canvas should be cropped to only rendered pixels
        """
        canvas = (width, size)

        buf = hb.Buffer()
        buf.direction = 'ltr'
        buf.add_str(text)
        buf.guess_segment_properties()

        font = hb.Font(self.face)  # The font has to be loaded here for multiprocessing
        font.scale = (size, size)

        hb.shape(font, buf, {"kern": True, "liga": True})

        x, y = 0, 0
        pen = FreeTypePen(None)
        for info, pos in zip(buf.glyph_infos, buf.glyph_positions):
            gid = info.codepoint
            transformed = TransformPen(pen, Offset(x + pos.x_offset, y + pos.y_offset))
            font.draw_glyph_with_pen(gid, transformed)
            x += pos.x_advance
            y += pos.y_advance

        # Render text (black with alpha) and place on a white background
        rendered = pen.image(width=0, height=0, contain=True).convert('RGBA')
        im = Image.new(mode='RGBA', size=canvas, color=(0, 0, 0, 0))
        im.paste(color, (0, 0), mask=rendered)

        if trim:
            bbox = im.getbbox()
            if bbox:
                im = im.crop((bbox[0], 0, bbox[2], im.size[1]))

        return im


if __name__ == '__main__':

    parser = argparse.ArgumentParser(
        'render-codepoints',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="""Given a set of fonts, render every codepoint to an image file.

Images are stored in hierarchy of directories named using the bytes in a codepoint.
If a codepoint is in multiple fonts, the first font with that codepoint is used.
"""
    )

    parser.add_argument('-f', '--fonts', required=True, type=str, help="Directory of fonts to read from")
    parser.add_argument('-o', '--outdir', required=True, type=str, help='Directory to write rendered images to')
    parser.add_argument('--metadata-font', required=True, help='Path to font file for metadata rendering')
    parser.add_argument('--code-data', required=True, help='Path to UnicodeData.txt', default="UnicodeData.txt")
    parser.add_argument('--block-data', required=True, help='Path to Blocks.txt', default="Blocks.txt")

    parser.add_argument('--overwrite', action='store_true', help='Overwrite existing files instead of skipping them')
    parser.add_argument('--serial', action='store_true', help='Run jobs in a single process instead of multi-processing')

    args = parser.parse_args()

    text_pen = TextLineRenderer(args.metadata_font)

    from namedb import UnicodeNameDb
    name_db = UnicodeNameDb(args.code_data, args.block_data)

    os.makedirs(args.outdir, exist_ok=True)
    fontfiles = []
    fontfiles += glob.glob(os.path.join(args.fonts, '*.otf'))
    fontfiles += glob.glob(os.path.join(args.fonts, '*.ttf'))

    table = build_font_map(fontfiles)

    print('Found %d glyphs in %d fonts' % (len(table), len(fontfiles)))


    count = 0
    with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
        for codepoint, font_path in table.items():

            dirname, filename = make_output_path(codepoint)

            # Ensure the folder hierarchy exists before submitting
            abs_path = os.path.join(args.outdir, dirname)
            output_path = os.path.join(abs_path, filename + '.png')
            os.makedirs(abs_path, exist_ok=True)

            # Skip existing
            if not args.overwrite and os.path.exists(output_path):
                continue

            fn_args = (output_path, font_path, codepoint)

            if args.serial:
                glyph_to_image(*fn_args)
            else:
                executor.submit(glyph_to_image, *fn_args)

            count += 1
            if count == 128:
                continue
                break


## requirements.txt
uharfbuzz
fontTools
Pillow
freetype-py
	"""
	Convert PNG images to raw bitmap data for dumb loading on a device.
	This increases the storage requirement ~10x for small PNGs, but simplifies use on the device.
	"""

	from PIL import Image

	import argparse
	import os
	import struct


	def truncate_channels(im):
	"""
	Zero the lower two bits of all channels in the image

	This can be used to preview colours similar to what a ST7789 display will show.

	Returns a new image
	"""
	# Get a writable copy of the pixel data
	raw = list(im.getdata())

	for i in range(len(raw)):
	raw[i] = (raw[i][0] & 0xFC, raw[i][1] & 0xFC, raw[i][2] & 0xFC)

	trunc = Image.new(mode='RGB', size=im.size)
	trunc.putdata(raw)

	return trunc


	if __name__ == '__main__':

	parser = argparse.ArgumentParser(
	'to-rgb6bpp',
	formatter_class=argparse.RawDescriptionHelpFormatter,
	description="""Convert input images to RGB 666 for ST7789 display

	Writes raw pixel data to a .rgb file next to the input file for reading into
	memory of a ST7789 screen directly from an SD card. This is packed the same
	as an 8-bpp image, but the lower two bits are effectively zeroed.
	""")

	parser.add_argument('images', metavar='IMAGE', nargs='+', help="Image file to convert")

	args = parser.parse_args()

	for path in args.images:
	print(path)
	if not os.path.exists(path):
	print("File not found: %s" % path)
	continue

	glyph = Image.open(path).convert('RGBA')

	# Ensure the glyph is on a black background as the device does no alpha blending
	im = Image.new(mode='RGB', size=glyph.size, color=(0, 0, 0))
	im.paste(glyph, (0, 0), glyph)

	# Write raw rgb format with width and height data
	# This could be BMP with a little more work, but this is ok for now.
	name, ext = os.path.splitext(path)
	dest = name + '.rgb'
	with open(dest, 'wb') as handle:
	handle.write(struct.pack('BB', im.width, im.height)) # Images are known to be <= 240x240
	for pixel in im.getdata():
	handle.write(struct.pack('BBB', *pixel))
	import re
	import difflib

	class UnicodeNameDb:
	def __init__(self, unicode_data_path, unicode_blocks_path):
	self.unicode_data_path = unicode_data_path
	self.unicode_blocks_path = unicode_blocks_path
	self.has_loaded = False

	self.blocks = None
	self.blocks_last_codepoint = None

	self.codepoints = {}

	def get(self, codepoint):
	"""
	Get the block name and shortened character name for a codepoint
	Returns (block_name: str, codepoint_name: str)
	"""
	if not self.has_loaded:
	self.load()

	if 0xE000 <= codepoint <= 0xF8FF:
	return "Private Use Area", "[Not assigned by Unicode]"

	if 0xF0000 <= codepoint <= 0xFFFFF:
	return "Supl. Private Use Area A", "[Not assigned by Unicode]"

	if 0x100000 <= codepoint <= 0x10FFFF:
	return "Supl. Private Use Area B", "[Not assigned by Unicode]"

	try:
	return self.codepoints[codepoint]
	except KeyError:
	return "NOT REGISTERED", "Codepoint %X" % codepoint

	def load(self):
	self.has_loaded = True

	block_index = 0
	blocks = []

	# Read block range names
	with open(self.unicode_blocks_path, 'r') as handle:
	for line in handle:
	if line.startswith('#') or line.strip() == "":
	continue

	match = re.match(r'^([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+);\W(.)$', line)
	start, end, name = match.group(1, 2, 3) # type: ignore
	blocks.append((int(start, 16), int(end, 16), name))

	# Store the max codepoint defined by the blocks list
	self.blocks_last_codepoint = blocks[-1][1]
	self.blocks = blocks

	# Read codepoint names
	with open(self.unicode_data_path, 'r') as handle:
	for line in handle:
	fields = line.split(';')

	codepoint = int(fields[0], 16)
	name = fields[1]

	if name == '<control>':
	name = fields[10]

	elif name.endswith('>'):
	# Skip markers that aren't actually codepoint names
	continue

	# Move to the next block if our codepoint is past the end of this block
	while blocks[block_index][1] < codepoint:
	block_index += 1

	group_name = blocks[block_index][2]


	# Try to shorten the character name if it repeats the group name
	matcher = difflib.SequenceMatcher(None, group_name.lower(), name.lower())
	pos_a, pos_b, size = matcher.find_longest_match(0, len(group_name), 0, len(name))

	if size >= 3 and pos_b == 0:
	words_a = group_name[pos_a:].lower().split(" ")
	words_b = name.lower().split(" ")
	trim_chars = 0

	for a, b in zip(words_a, words_b):
	if a == b or a == (b + 's') or a == (b + "-1"):
	# This assumes there are single spaces, but should be ok...
	trim_chars += len(b) + 1

	short_name = name[trim_chars:]

	# Fix cases like "alchemical symbols" where the naming scheme is "[block name] FOR XYZ"
	# These names become "FOR XYZ", which is a bit awkward, so just drop the leading 'for'.
	if short_name.startswith('FOR '):
	short_name = short_name[4:]

	else:
	short_name = name

	# Shorten specific words
	group_name = group_name.replace('Miscellaneous', 'Misc.')

	self.codepoints[codepoint] = (group_name, short_name or "")
	import argparse
	import difflib
	import glob
	import os
	import re

	from concurrent.futures import ProcessPoolExecutor
	from multiprocessing import cpu_count

	import uharfbuzz as hb

	from fontTools.misc.transform import Offset
	from fontTools.pens.freetypePen import FreeTypePen
	from fontTools.pens.transformPen import TransformPen
	from fontTools.pens.ttGlyphPen import TTGlyphPen
	from fontTools.ttLib import TTFont
	from PIL import Image, ImageOps


	def build_font_map(fontfiles: list[str]):
	"""
	Build a map of all available glyphs in a list of fonts
	Returns a dictionary mapping each available codepoint to the first font file that contains it
	"""
	fontmap = {}

	for path in fontfiles:
	font = TTFont(path)

	cmap = font.get('cmap')
	for table in cmap.tables: # type: ignore
	if not table.isUnicode():
	continue

	for code in table.cmap:
	if code not in fontmap:
	fontmap[code] = path

	return fontmap


	def make_output_path(codepoint):
	raw = hex(codepoint)[2:].upper()
	if (len(raw) % 2) != 0:
	raw = '0' + raw

	dir = ''

	while len(raw) > 2:
	dir += raw[:2] + '/'
	raw = raw[2:]

	if dir == '':
	dir = '00/'

	return dir, raw


	def glyph_to_image(output_path, font_path, codepoint, with_metadata=True):
	print("Rendering %d -> %s" % (codepoint, output_path))

	target_size = (240, 150)
	display_size = (240, 240)

	size = target_size[1]

	# Load font with uharfbuzz to allow rendering specific font size
	# (The fonttools TTFont class doesn't seem to support this and draws at 1000px)
	blob = hb.Blob.from_file_path(font_path)
	face = hb.Face(blob)
	font = hb.Font(face)
	font.scale = (size, size)

	# Draw glyph to pixel buffer
	buf = hb.Buffer()
	pen = FreeTypePen(None)
	#hb.shape(font, buf, {"kern": True, "liga": True})
	font.draw_glyph_with_pen(font.get_nominal_glyph(codepoint), pen)
	im = pen.image(width=target_size[0], height=target_size[1], contain=True)

	# Centre all glyphs horizontally, even if it's not technically correct
	bbox = im.getbbox()
	if bbox:
	im = im.crop((bbox[0], 0, bbox[2], im.size[1]))

	#im.thumbnail(target_size, resample=Image.ANTIALIAS)

	padded = Image.new(mode='RGBA', size=target_size, color=(255, 255, 255, 255)) # type: ignore
	padded.paste(im, ((target_size[0] - im.size[0]) // 2, 0), im.convert('RGBA'))
	im = padded

	# Change to white text on black background
	im = ImageOps.invert(im.convert('RGB'))


	# Add metadata text to image if enabled
	if with_metadata:
	im = render_metadata_text(im, codepoint, display_size)

	im.save(output_path)


	def render_metadata_text(glyph_im, codepoint, size):
	im = Image.new(mode='RGB', size=size, color=(0, 0, 0))

	# Add the rendered glyph in the centre
	paste_offset = (abs(size[0] - glyph_im.size[0]) // 2, abs(size[1] - glyph_im.size[1]) // 2 + 7)
	im.paste(glyph_im, paste_offset)

	block_name, char_name = name_db.get(codepoint)
	code_display = 'U+%04X' % codepoint
	dec_display = '%d' % codepoint

	global text_pen
	im.paste(text_pen.render_line(block_name, width=size[0], size=14, color='white'), (0, 0)) # type: ignore
	im.paste(text_pen.render_line(char_name, width=size[0], size=14, color='white'), (0, 17)) # type: ignore

	line = text_pen.render_line(code_display, width=size[0], size=18, color='green', trim=True)
	offset = (0, size[1] - line.size[1])
	im.paste(line, offset)

	# TODO: Fix text not masked, so paste kills other text on same line
	line = text_pen.render_line(dec_display, width=size[0], size=18, color='blue', trim=True)
	offset = (size[0] - line.size[0], size[1] - line.size[1])
	im.paste(line, offset)

	return im


	class TextLineRenderer:
	def __init__(self, font_path):
	self.blob = hb.Blob.from_file_path(font_path)
	self.face = hb.Face(self.blob)

	def render_line(self, text, width, size=18, color='white', trim=False) -> Image:
	"""
	Render one line of text. Does not support line breaks or layout

	Args:
	text (str): Line of text to render
	width (int): Width in pixels to truncate output to
	size (int): Font height in pixels. This is also used as the canvas height.
	color (str\|tuple): PIL compatible color value to color text after rendering
	trim (bool): If the resulting canvas should be cropped to only rendered pixels
	"""
	canvas = (width, size)

	buf = hb.Buffer()
	buf.direction = 'ltr'
	buf.add_str(text)
	buf.guess_segment_properties()

	font = hb.Font(self.face) # The font has to be loaded here for multiprocessing
	font.scale = (size, size)

	hb.shape(font, buf, {"kern": True, "liga": True})

	x, y = 0, 0
	pen = FreeTypePen(None)
	for info, pos in zip(buf.glyph_infos, buf.glyph_positions):
	gid = info.codepoint
	transformed = TransformPen(pen, Offset(x + pos.x_offset, y + pos.y_offset))
	font.draw_glyph_with_pen(gid, transformed)
	x += pos.x_advance
	y += pos.y_advance

	# Render text (black with alpha) and place on a white background
	rendered = pen.image(width=0, height=0, contain=True).convert('RGBA')
	im = Image.new(mode='RGBA', size=canvas, color=(0, 0, 0, 0))
	im.paste(color, (0, 0), mask=rendered)

	if trim:
	bbox = im.getbbox()
	if bbox:
	im = im.crop((bbox[0], 0, bbox[2], im.size[1]))

	return im


	if __name__ == '__main__':

	parser = argparse.ArgumentParser(
	'render-codepoints',
	formatter_class=argparse.RawDescriptionHelpFormatter,
	description="""Given a set of fonts, render every codepoint to an image file.

	Images are stored in hierarchy of directories named using the bytes in a codepoint.
	If a codepoint is in multiple fonts, the first font with that codepoint is used.
	"""
	)

	parser.add_argument('-f', '--fonts', required=True, type=str, help="Directory of fonts to read from")
	parser.add_argument('-o', '--outdir', required=True, type=str, help='Directory to write rendered images to')
	parser.add_argument('--metadata-font', required=True, help='Path to font file for metadata rendering')
	parser.add_argument('--code-data', required=True, help='Path to UnicodeData.txt', default="UnicodeData.txt")
	parser.add_argument('--block-data', required=True, help='Path to Blocks.txt', default="Blocks.txt")

	parser.add_argument('--overwrite', action='store_true', help='Overwrite existing files instead of skipping them')
	parser.add_argument('--serial', action='store_true', help='Run jobs in a single process instead of multi-processing')

	args = parser.parse_args()

	text_pen = TextLineRenderer(args.metadata_font)

	from namedb import UnicodeNameDb
	name_db = UnicodeNameDb(args.code_data, args.block_data)

	os.makedirs(args.outdir, exist_ok=True)
	fontfiles = []
	fontfiles += glob.glob(os.path.join(args.fonts, '*.otf'))
	fontfiles += glob.glob(os.path.join(args.fonts, '*.ttf'))

	table = build_font_map(fontfiles)

	print('Found %d glyphs in %d fonts' % (len(table), len(fontfiles)))


	count = 0
	with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
	for codepoint, font_path in table.items():

	dirname, filename = make_output_path(codepoint)

	# Ensure the folder hierarchy exists before submitting
	abs_path = os.path.join(args.outdir, dirname)
	output_path = os.path.join(abs_path, filename + '.png')
	os.makedirs(abs_path, exist_ok=True)

	# Skip existing
	if not args.overwrite and os.path.exists(output_path):
	continue

	fn_args = (output_path, font_path, codepoint)

	if args.serial:
	glyph_to_image(*fn_args)
	else:
	executor.submit(glyph_to_image, *fn_args)

	count += 1
	if count == 128:
	continue
	break