Skip to content

Instantly share code, notes, and snippets.

@m4rc1e
Created April 14, 2023 10:08
Show Gist options
  • Save m4rc1e/bc8c5582abd9e99f1052b4574d31f587 to your computer and use it in GitHub Desktop.
Save m4rc1e/bc8c5582abd9e99f1052b4574d31f587 to your computer and use it in GitHub Desktop.
Check whether two fonts are metrics compatible by testing on random strings
import random
import string
import uharfbuzz as hb
import sys
from fontTools.ttLib import TTFont
def generate_random_string(fp):
f = TTFont(fp)
cmap = f.getBestCmap()
length = random.randint(3, 100)
letters = [chr(c) for c in cmap]
return ''.join(random.choice(letters) for i in range(length))
def string_length(fp, text):
# Create a font object using a TrueType font file
blob = hb.Blob.from_file_path(fp)
face = hb.Face(blob)
font = hb.Font(face)
# Set the string to be rendered
buf = hb.Buffer()
buf.add_str(text)
buf.guess_segment_properties()
hb.shape(font, buf)
pos = buf.glyph_positions
return sum(g.x_advance for g in pos)
while True:
rand_string = generate_random_string(sys.argv[1])
original_length = string_length(sys.argv[1], rand_string)
new_length = string_length(sys.argv[2], rand_string)
if original_length != new_length:
print(f"{rand_string}, {original_length}, {new_length}")
@twardoch
Copy link

twardoch commented Apr 14, 2023

#!/usr/bin/env python3

import random
import string
import uharfbuzz as hb
import sys
from fontTools.ttLib import TTFont
from fire import Fire
from pathlib import Path
from functools import cache
import asyncio
from concurrent.futures import ProcessPoolExecutor


@cache
def invalid_set():
    return {
        codepoint
        for codepoint in range(0x110000)
        if (0xD800 <= codepoint <= 0xDFFF)
        or (0xFDD0 <= codepoint <= 0xFDEF)
        or (codepoint & 0xFFFE == 0xFFFE)
        or (0x0000 <= codepoint <= 0x001F)
        or (0x007F <= codepoint <= 0x009F)
        or codepoint in {0x00A0}
    }


def generate_random_string(chars):
    length = random.randint(3, 100)
    return "".join(random.choice(chars) for _ in range(length))


def string_length(blob, text):
    # Create a font object using a TrueType font file
    face = hb.Face(blob)
    font = hb.Font(face)

    # Set the string to be rendered
    buf = hb.Buffer()
    buf.add_str(text)
    buf.guess_segment_properties()

    hb.shape(font, buf)
    pos = buf.glyph_positions
    return sum(g.x_advance for g in pos)


async def compare_string(chars, hb1, hb2):
    rand_string = generate_random_string(chars)
    loop = asyncio.get_event_loop()
    len1, len2 = await asyncio.gather(
        loop.run_in_executor(None, string_length, hb1, rand_string),
        loop.run_in_executor(None, string_length, hb2, rand_string),
    )

    if len1 != len2:
        print(f"{rand_string}\n  {len1} != {len2}")


async def compare_font_strings(fp1, fp2, n=2000):
    cmap = (
        set(TTFont(fp1).getBestCmap()) & set(TTFont(fp2).getBestCmap()) - invalid_set()
    )
    chars = [chr(c) for c in cmap]
    hb1 = hb.Blob.from_file_path(fp1)
    hb2 = hb.Blob.from_file_path(fp2)

    tasks = [
        asyncio.ensure_future(compare_string(chars, hb1, hb2))
        for _ in range(n)
    ]
    await asyncio.gather(*tasks)


def compare_fonts_metrics(fp1, fp2, n=1000):
    asyncio.run(compare_font_strings(fp1, fp2, n))


if __name__ == "__main__":
    Fire(compare_fonts_metrics)

@twardoch
Copy link

The above is optimized a bit for execution, and also only takes the intersection of the two fonts’ cmaps minus a disallowed set (plus nbspace which we don’t test for)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment