import argparse |
import difflib |
import glob |
import os |
import re |
from concurrent.futures import ProcessPoolExecutor |
from multiprocessing import cpu_count |
import uharfbuzz as hb |
from fontTools.misc.transform import Offset |
from fontTools.pens.freetypePen import FreeTypePen |
from fontTools.pens.transformPen import TransformPen |
from fontTools.pens.ttGlyphPen import TTGlyphPen |
from fontTools.ttLib import TTFont |
from PIL import Image, ImageOps |
def build_font_map(fontfiles: list[str]): |
""" |
Build a map of all available glyphs in a list of fonts |
Returns a dictionary mapping each available codepoint to the first font file that contains it |
""" |
fontmap = {} |
for path in fontfiles: |
font = TTFont(path) |
cmap = font.get('cmap') |
for table in cmap.tables: # type: ignore |
if not table.isUnicode(): |
continue |
for code in table.cmap: |
if code not in fontmap: |
fontmap[code] = path |
return fontmap |
def make_output_path(codepoint): |
raw = hex(codepoint)[2:].upper() |
if (len(raw) % 2) != 0: |
raw = '0' + raw |
dir = '' |
while len(raw) > 2: |
dir += raw[:2] + '/' |
raw = raw[2:] |
if dir == '': |
dir = '00/' |
return dir, raw |
def glyph_to_image(output_path, font_path, codepoint, with_metadata=True): |
print("Rendering %d -> %s" % (codepoint, output_path)) |
target_size = (240, 150) |
display_size = (240, 240) |
size = target_size[1] |
# Load font with uharfbuzz to allow rendering specific font size |
# (The fonttools TTFont class doesn't seem to support this and draws at 1000px) |
blob = hb.Blob.from_file_path(font_path) |
face = hb.Face(blob) |
font = hb.Font(face) |
font.scale = (size, size) |
# Draw glyph to pixel buffer |
buf = hb.Buffer() |
pen = FreeTypePen(None) |
#hb.shape(font, buf, {"kern": True, "liga": True}) |
font.draw_glyph_with_pen(font.get_nominal_glyph(codepoint), pen) |
im = pen.image(width=target_size[0], height=target_size[1], contain=True) |
# Centre all glyphs horizontally, even if it's not technically correct |
bbox = im.getbbox() |
if bbox: |
im = im.crop((bbox[0], 0, bbox[2], im.size[1])) |
#im.thumbnail(target_size, resample=Image.ANTIALIAS) |
padded = Image.new(mode='RGBA', size=target_size, color=(255, 255, 255, 255)) # type: ignore |
padded.paste(im, ((target_size[0] - im.size[0]) // 2, 0), im.convert('RGBA')) |
im = padded |
# Change to white text on black background |
im = ImageOps.invert(im.convert('RGB')) |
# Add metadata text to image if enabled |
if with_metadata: |
im = render_metadata_text(im, codepoint, display_size) |
im.save(output_path) |
def render_metadata_text(glyph_im, codepoint, size): |
im = Image.new(mode='RGB', size=size, color=(0, 0, 0)) |
# Add the rendered glyph in the centre |
paste_offset = (abs(size[0] - glyph_im.size[0]) // 2, abs(size[1] - glyph_im.size[1]) // 2 + 7) |
im.paste(glyph_im, paste_offset) |
block_name, char_name = name_db.get(codepoint) |
code_display = 'U+%04X' % codepoint |
dec_display = '%d' % codepoint |
global text_pen |
im.paste(text_pen.render_line(block_name, width=size[0], size=14, color='white'), (0, 0)) # type: ignore |
im.paste(text_pen.render_line(char_name, width=size[0], size=14, color='white'), (0, 17)) # type: ignore |
line = text_pen.render_line(code_display, width=size[0], size=18, color='green', trim=True) |
offset = (0, size[1] - line.size[1]) |
im.paste(line, offset) |
# TODO: Fix text not masked, so paste kills other text on same line |
line = text_pen.render_line(dec_display, width=size[0], size=18, color='blue', trim=True) |
offset = (size[0] - line.size[0], size[1] - line.size[1]) |
im.paste(line, offset) |
return im |
class TextLineRenderer: |
def __init__(self, font_path): |
self.blob = hb.Blob.from_file_path(font_path) |
self.face = hb.Face(self.blob) |
def render_line(self, text, width, size=18, color='white', trim=False) -> Image: |
""" |
Render one line of text. Does not support line breaks or layout |
Args: |
text (str): Line of text to render |
width (int): Width in pixels to truncate output to |
size (int): Font height in pixels. This is also used as the canvas height. |
color (str|tuple): PIL compatible color value to color text after rendering |
trim (bool): If the resulting canvas should be cropped to only rendered pixels |
""" |
canvas = (width, size) |
buf = hb.Buffer() |
buf.direction = 'ltr' |
buf.add_str(text) |
buf.guess_segment_properties() |
font = hb.Font(self.face) # The font has to be loaded here for multiprocessing |
font.scale = (size, size) |
hb.shape(font, buf, {"kern": True, "liga": True}) |
x, y = 0, 0 |
pen = FreeTypePen(None) |
for info, pos in zip(buf.glyph_infos, buf.glyph_positions): |
gid = info.codepoint |
transformed = TransformPen(pen, Offset(x + pos.x_offset, y + pos.y_offset)) |
font.draw_glyph_with_pen(gid, transformed) |
x += pos.x_advance |
y += pos.y_advance |
# Render text (black with alpha) and place on a white background |
rendered = pen.image(width=0, height=0, contain=True).convert('RGBA') |
im = Image.new(mode='RGBA', size=canvas, color=(0, 0, 0, 0)) |
im.paste(color, (0, 0), mask=rendered) |
if trim: |
bbox = im.getbbox() |
if bbox: |
im = im.crop((bbox[0], 0, bbox[2], im.size[1])) |
return im |
if __name__ == '__main__': |
parser = argparse.ArgumentParser( |
'render-codepoints', |
formatter_class=argparse.RawDescriptionHelpFormatter, |
description="""Given a set of fonts, render every codepoint to an image file. |
Images are stored in hierarchy of directories named using the bytes in a codepoint. |
If a codepoint is in multiple fonts, the first font with that codepoint is used. |
""" |
) |
parser.add_argument('-f', '--fonts', required=True, type=str, help="Directory of fonts to read from") |
parser.add_argument('-o', '--outdir', required=True, type=str, help='Directory to write rendered images to') |
parser.add_argument('--metadata-font', required=True, help='Path to font file for metadata rendering') |
parser.add_argument('--code-data', required=True, help='Path to UnicodeData.txt', default="UnicodeData.txt") |
parser.add_argument('--block-data', required=True, help='Path to Blocks.txt', default="Blocks.txt") |
parser.add_argument('--overwrite', action='store_true', help='Overwrite existing files instead of skipping them') |
parser.add_argument('--serial', action='store_true', help='Run jobs in a single process instead of multi-processing') |
args = parser.parse_args() |
text_pen = TextLineRenderer(args.metadata_font) |
from namedb import UnicodeNameDb |
name_db = UnicodeNameDb(args.code_data, args.block_data) |
os.makedirs(args.outdir, exist_ok=True) |
fontfiles = [] |
fontfiles += glob.glob(os.path.join(args.fonts, '*.otf')) |
fontfiles += glob.glob(os.path.join(args.fonts, '*.ttf')) |
table = build_font_map(fontfiles) |
print('Found %d glyphs in %d fonts' % (len(table), len(fontfiles))) |
count = 0 |
with ProcessPoolExecutor(max_workers=cpu_count()) as executor: |
for codepoint, font_path in table.items(): |
dirname, filename = make_output_path(codepoint) |
# Ensure the folder hierarchy exists before submitting |
abs_path = os.path.join(args.outdir, dirname) |
output_path = os.path.join(abs_path, filename + '.png') |
os.makedirs(abs_path, exist_ok=True) |
# Skip existing |
if not args.overwrite and os.path.exists(output_path): |
continue |
fn_args = (output_path, font_path, codepoint) |
if args.serial: |
glyph_to_image(*fn_args) |
else: |
executor.submit(glyph_to_image, *fn_args) |
count += 1 |
if count == 128: |
continue |
break |