Skip to content

Instantly share code, notes, and snippets.

@stecman
Last active September 5, 2023 00:07
Show Gist options
  • Save stecman/8b8627a2708a8adea0089faa139678a8 to your computer and use it in GitHub Desktop.
Save stecman/8b8627a2708a8adea0089faa139678a8 to your computer and use it in GitHub Desktop.
Render glyphs to file with Python

Bake glyphs from fonts to image files (Python)

This is an appendix item for Unicode Input Terminal.

Protoype code to pre-render 240x240 screens, each with a glyph and codepoint metadata.

# Download Unicode metadata
wget https://unicode.org/Public/UNIDATA/UnicodeData.txt
wget https://unicode.org/Public/UNIDATA/Blocks.txt

# Install dependencies
python3 -m pip install -r requirements.txt

# Render screens
python3 render-codepoints.py -f fontsdir -o outputdir --metadata-font NotoSans-Regular.ttf

# Convert an image to uncompressed pixel data for a ST7789
# Use a tool like `find` or `xargs` to run this in bulk
python3 convert-to-6bpp.py <image-file, ...>

This was part of the early work on my Unicode Binary Input Terminal project. The code has been removed from the master branch, but it can still be accessed at this commit:

stecman/unicode-input-panel@6e75f8ad6faacb83f8b661fd8d8278a463f2b1e4

"""
Convert PNG images to raw bitmap data for dumb loading on a device.
This increases the storage requirement ~10x for small PNGs, but simplifies use on the device.
"""
from PIL import Image
import argparse
import os
import struct
def truncate_channels(im):
"""
Zero the lower two bits of all channels in the image
This can be used to preview colours similar to what a ST7789 display will show.
Returns a new image
"""
# Get a writable copy of the pixel data
raw = list(im.getdata())
for i in range(len(raw)):
raw[i] = (raw[i][0] & 0xFC, raw[i][1] & 0xFC, raw[i][2] & 0xFC)
trunc = Image.new(mode='RGB', size=im.size)
trunc.putdata(raw)
return trunc
if __name__ == '__main__':
parser = argparse.ArgumentParser(
'to-rgb6bpp',
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""Convert input images to RGB 666 for ST7789 display
Writes raw pixel data to a .rgb file next to the input file for reading into
memory of a ST7789 screen directly from an SD card. This is packed the same
as an 8-bpp image, but the lower two bits are effectively zeroed.
""")
parser.add_argument('images', metavar='IMAGE', nargs='+', help="Image file to convert")
args = parser.parse_args()
for path in args.images:
print(path)
if not os.path.exists(path):
print("File not found: %s" % path)
continue
glyph = Image.open(path).convert('RGBA')
# Ensure the glyph is on a black background as the device does no alpha blending
im = Image.new(mode='RGB', size=glyph.size, color=(0, 0, 0))
im.paste(glyph, (0, 0), glyph)
# Write raw rgb format with width and height data
# This could be BMP with a little more work, but this is ok for now.
name, ext = os.path.splitext(path)
dest = name + '.rgb'
with open(dest, 'wb') as handle:
handle.write(struct.pack('BB', im.width, im.height)) # Images are known to be <= 240x240
for pixel in im.getdata():
handle.write(struct.pack('BBB', *pixel))
import re
import difflib
class UnicodeNameDb:
def __init__(self, unicode_data_path, unicode_blocks_path):
self.unicode_data_path = unicode_data_path
self.unicode_blocks_path = unicode_blocks_path
self.has_loaded = False
self.blocks = None
self.blocks_last_codepoint = None
self.codepoints = {}
def get(self, codepoint):
"""
Get the block name and shortened character name for a codepoint
Returns (block_name: str, codepoint_name: str)
"""
if not self.has_loaded:
self.load()
if 0xE000 <= codepoint <= 0xF8FF:
return "Private Use Area", "[Not assigned by Unicode]"
if 0xF0000 <= codepoint <= 0xFFFFF:
return "Supl. Private Use Area A", "[Not assigned by Unicode]"
if 0x100000 <= codepoint <= 0x10FFFF:
return "Supl. Private Use Area B", "[Not assigned by Unicode]"
try:
return self.codepoints[codepoint]
except KeyError:
return "NOT REGISTERED", "Codepoint %X" % codepoint
def load(self):
self.has_loaded = True
block_index = 0
blocks = []
# Read block range names
with open(self.unicode_blocks_path, 'r') as handle:
for line in handle:
if line.startswith('#') or line.strip() == "":
continue
match = re.match(r'^([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+);\W*(.*)$', line)
start, end, name = match.group(1, 2, 3) # type: ignore
blocks.append((int(start, 16), int(end, 16), name))
# Store the max codepoint defined by the blocks list
self.blocks_last_codepoint = blocks[-1][1]
self.blocks = blocks
# Read codepoint names
with open(self.unicode_data_path, 'r') as handle:
for line in handle:
fields = line.split(';')
codepoint = int(fields[0], 16)
name = fields[1]
if name == '<control>':
name = fields[10]
elif name.endswith('>'):
# Skip markers that aren't actually codepoint names
continue
# Move to the next block if our codepoint is past the end of this block
while blocks[block_index][1] < codepoint:
block_index += 1
group_name = blocks[block_index][2]
# Try to shorten the character name if it repeats the group name
matcher = difflib.SequenceMatcher(None, group_name.lower(), name.lower())
pos_a, pos_b, size = matcher.find_longest_match(0, len(group_name), 0, len(name))
if size >= 3 and pos_b == 0:
words_a = group_name[pos_a:].lower().split(" ")
words_b = name.lower().split(" ")
trim_chars = 0
for a, b in zip(words_a, words_b):
if a == b or a == (b + 's') or a == (b + "-1"):
# This assumes there are single spaces, but should be ok...
trim_chars += len(b) + 1
short_name = name[trim_chars:]
# Fix cases like "alchemical symbols" where the naming scheme is "[block name] FOR XYZ"
# These names become "FOR XYZ", which is a bit awkward, so just drop the leading 'for'.
if short_name.startswith('FOR '):
short_name = short_name[4:]
else:
short_name = name
# Shorten specific words
group_name = group_name.replace('Miscellaneous', 'Misc.')
self.codepoints[codepoint] = (group_name, short_name or "")
import argparse
import difflib
import glob
import os
import re
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import cpu_count
import uharfbuzz as hb
from fontTools.misc.transform import Offset
from fontTools.pens.freetypePen import FreeTypePen
from fontTools.pens.transformPen import TransformPen
from fontTools.pens.ttGlyphPen import TTGlyphPen
from fontTools.ttLib import TTFont
from PIL import Image, ImageOps
def build_font_map(fontfiles: list[str]):
"""
Build a map of all available glyphs in a list of fonts
Returns a dictionary mapping each available codepoint to the first font file that contains it
"""
fontmap = {}
for path in fontfiles:
font = TTFont(path)
cmap = font.get('cmap')
for table in cmap.tables: # type: ignore
if not table.isUnicode():
continue
for code in table.cmap:
if code not in fontmap:
fontmap[code] = path
return fontmap
def make_output_path(codepoint):
raw = hex(codepoint)[2:].upper()
if (len(raw) % 2) != 0:
raw = '0' + raw
dir = ''
while len(raw) > 2:
dir += raw[:2] + '/'
raw = raw[2:]
if dir == '':
dir = '00/'
return dir, raw
def glyph_to_image(output_path, font_path, codepoint, with_metadata=True):
print("Rendering %d -> %s" % (codepoint, output_path))
target_size = (240, 150)
display_size = (240, 240)
size = target_size[1]
# Load font with uharfbuzz to allow rendering specific font size
# (The fonttools TTFont class doesn't seem to support this and draws at 1000px)
blob = hb.Blob.from_file_path(font_path)
face = hb.Face(blob)
font = hb.Font(face)
font.scale = (size, size)
# Draw glyph to pixel buffer
buf = hb.Buffer()
pen = FreeTypePen(None)
#hb.shape(font, buf, {"kern": True, "liga": True})
font.draw_glyph_with_pen(font.get_nominal_glyph(codepoint), pen)
im = pen.image(width=target_size[0], height=target_size[1], contain=True)
# Centre all glyphs horizontally, even if it's not technically correct
bbox = im.getbbox()
if bbox:
im = im.crop((bbox[0], 0, bbox[2], im.size[1]))
#im.thumbnail(target_size, resample=Image.ANTIALIAS)
padded = Image.new(mode='RGBA', size=target_size, color=(255, 255, 255, 255)) # type: ignore
padded.paste(im, ((target_size[0] - im.size[0]) // 2, 0), im.convert('RGBA'))
im = padded
# Change to white text on black background
im = ImageOps.invert(im.convert('RGB'))
# Add metadata text to image if enabled
if with_metadata:
im = render_metadata_text(im, codepoint, display_size)
im.save(output_path)
def render_metadata_text(glyph_im, codepoint, size):
im = Image.new(mode='RGB', size=size, color=(0, 0, 0))
# Add the rendered glyph in the centre
paste_offset = (abs(size[0] - glyph_im.size[0]) // 2, abs(size[1] - glyph_im.size[1]) // 2 + 7)
im.paste(glyph_im, paste_offset)
block_name, char_name = name_db.get(codepoint)
code_display = 'U+%04X' % codepoint
dec_display = '%d' % codepoint
global text_pen
im.paste(text_pen.render_line(block_name, width=size[0], size=14, color='white'), (0, 0)) # type: ignore
im.paste(text_pen.render_line(char_name, width=size[0], size=14, color='white'), (0, 17)) # type: ignore
line = text_pen.render_line(code_display, width=size[0], size=18, color='green', trim=True)
offset = (0, size[1] - line.size[1])
im.paste(line, offset)
# TODO: Fix text not masked, so paste kills other text on same line
line = text_pen.render_line(dec_display, width=size[0], size=18, color='blue', trim=True)
offset = (size[0] - line.size[0], size[1] - line.size[1])
im.paste(line, offset)
return im
class TextLineRenderer:
def __init__(self, font_path):
self.blob = hb.Blob.from_file_path(font_path)
self.face = hb.Face(self.blob)
def render_line(self, text, width, size=18, color='white', trim=False) -> Image:
"""
Render one line of text. Does not support line breaks or layout
Args:
text (str): Line of text to render
width (int): Width in pixels to truncate output to
size (int): Font height in pixels. This is also used as the canvas height.
color (str|tuple): PIL compatible color value to color text after rendering
trim (bool): If the resulting canvas should be cropped to only rendered pixels
"""
canvas = (width, size)
buf = hb.Buffer()
buf.direction = 'ltr'
buf.add_str(text)
buf.guess_segment_properties()
font = hb.Font(self.face) # The font has to be loaded here for multiprocessing
font.scale = (size, size)
hb.shape(font, buf, {"kern": True, "liga": True})
x, y = 0, 0
pen = FreeTypePen(None)
for info, pos in zip(buf.glyph_infos, buf.glyph_positions):
gid = info.codepoint
transformed = TransformPen(pen, Offset(x + pos.x_offset, y + pos.y_offset))
font.draw_glyph_with_pen(gid, transformed)
x += pos.x_advance
y += pos.y_advance
# Render text (black with alpha) and place on a white background
rendered = pen.image(width=0, height=0, contain=True).convert('RGBA')
im = Image.new(mode='RGBA', size=canvas, color=(0, 0, 0, 0))
im.paste(color, (0, 0), mask=rendered)
if trim:
bbox = im.getbbox()
if bbox:
im = im.crop((bbox[0], 0, bbox[2], im.size[1]))
return im
if __name__ == '__main__':
parser = argparse.ArgumentParser(
'render-codepoints',
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""Given a set of fonts, render every codepoint to an image file.
Images are stored in hierarchy of directories named using the bytes in a codepoint.
If a codepoint is in multiple fonts, the first font with that codepoint is used.
"""
)
parser.add_argument('-f', '--fonts', required=True, type=str, help="Directory of fonts to read from")
parser.add_argument('-o', '--outdir', required=True, type=str, help='Directory to write rendered images to')
parser.add_argument('--metadata-font', required=True, help='Path to font file for metadata rendering')
parser.add_argument('--code-data', required=True, help='Path to UnicodeData.txt', default="UnicodeData.txt")
parser.add_argument('--block-data', required=True, help='Path to Blocks.txt', default="Blocks.txt")
parser.add_argument('--overwrite', action='store_true', help='Overwrite existing files instead of skipping them')
parser.add_argument('--serial', action='store_true', help='Run jobs in a single process instead of multi-processing')
args = parser.parse_args()
text_pen = TextLineRenderer(args.metadata_font)
from namedb import UnicodeNameDb
name_db = UnicodeNameDb(args.code_data, args.block_data)
os.makedirs(args.outdir, exist_ok=True)
fontfiles = []
fontfiles += glob.glob(os.path.join(args.fonts, '*.otf'))
fontfiles += glob.glob(os.path.join(args.fonts, '*.ttf'))
table = build_font_map(fontfiles)
print('Found %d glyphs in %d fonts' % (len(table), len(fontfiles)))
count = 0
with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
for codepoint, font_path in table.items():
dirname, filename = make_output_path(codepoint)
# Ensure the folder hierarchy exists before submitting
abs_path = os.path.join(args.outdir, dirname)
output_path = os.path.join(abs_path, filename + '.png')
os.makedirs(abs_path, exist_ok=True)
# Skip existing
if not args.overwrite and os.path.exists(output_path):
continue
fn_args = (output_path, font_path, codepoint)
if args.serial:
glyph_to_image(*fn_args)
else:
executor.submit(glyph_to_image, *fn_args)
count += 1
if count == 128:
continue
break
uharfbuzz
fontTools
Pillow
freetype-py
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment