Skip to content

Instantly share code, notes, and snippets.

@black7375
Created April 6, 2021 00:19
Show Gist options
  • Save black7375/243f0179328d45d78e43fb56653c6302 to your computer and use it in GitHub Desktop.
Save black7375/243f0179328d45d78e43fb56653c6302 to your computer and use it in GitHub Desktop.
Font File to hdf5
import h5py
import PIL, PIL.ImageFont, PIL.Image, PIL.ImageDraw, PIL.ImageChops, PIL.ImageOps
import os
import random
import string
import numpy
import sys
# -------------------- Convert to Numpy Array --------------------
w, h = 64, 64
w0, h0 = 256, 256
s_ascii = string.ascii_uppercase + string.ascii_lowercase + string.digits
# s_hangul = "가나다라마바사아자차카타파하"
s_hangul = "독창적인스물네자로만들어진표음문자로서고유하고특별해지켜져야한다"
blank = PIL.Image.new('L', (w0*5, h0*3), 255)
def read_font(fn, chars=s_ascii):
font = PIL.ImageFont.truetype(fn, min(w0, h0))
# We need to make sure we scale down the fonts but preserve the vertical alignment
min_ly = float('inf')
max_hy = float('-inf')
max_width = 0
imgs = []
for char in chars:
print('...', char)
# Draw character
img = PIL.Image.new("L", (w0*5, h0*3), 255)
draw = PIL.ImageDraw.Draw(img)
draw.text((w0, h0), char, font=font)
# Get bounding box
diff = PIL.ImageChops.difference(img, blank)
lx, ly, hx, hy = diff.getbbox()
min_ly = min(min_ly, ly)
max_hy = max(max_hy, hy)
max_width = max(max_width, hx - lx)
imgs.append((lx, hx, img))
print('crop dims:', max_hy - min_ly, max_width)
scale_factor = min(1.0 * h / (max_hy - min_ly), 1.0 * w / max_width)
data = []
for lx, hx, img in imgs:
img = img.crop((lx, min_ly, hx, max_hy))
# Resize to smaller
new_width = (hx-lx) * scale_factor
new_height = (max_hy - min_ly) * scale_factor
img = img.resize((int(new_width),
int(new_height)), PIL.Image.ANTIALIAS)
# Expand to square
img_sq = PIL.Image.new('L', (w, h), 255)
offset_x = (w - new_width)/2
offset_y = (h - new_height)/2
print(offset_x, offset_y)
img_sq.paste(img, (int(offset_x), int(offset_y)))
# Convert to numpy array
matrix = numpy.array(img_sq.getdata()).reshape((h, w))
matrix = 255 - matrix
data.append(matrix)
return numpy.array(data)
# -------------------- Get Files --------------------
def get_ttfs(d='../dataset/fonts'):
for dirpath, dirname, filenames in os.walk(d):
for filename in filenames:
if filename.endswith('.ttf') or filename.endswith('.otf'):
yield os.path.join(dirpath, filename)
def new_dataset(f, dshape, label):
dmax = (None,) + dshape[1:]
f.create_dataset(label,
dshape, chunks=dshape,
maxshape=dmax, dtype='u1')
return f
def new_file(path, dshape, label):
f = h5py.File(path, 'w')
f = new_dataset(f, dshape, label)
return f
def get_h5py(path='fonts.hdf5', dshape=(1,), label=None):
if os.path.exists(path):
if os.path.exists('fonts.hdf5'):
f = h5py.File(path, 'r+')
else:
f = new_file(path, dshape, label)
else:
f = new_file(path, dshape, label)
return f
# -------------------- Create Dataset --------------------
def create_dataset(path='../dataset/fonts', chars=s_ascii, label='ascii',
h5dfP = 'fonts.hdf5',):
dshape = (1, len(chars), h, w)
f = get_h5py(h5dfP, dshape, label)
try:
dset = f[label]
except KeyError:
f = new_dataset(f, dshape, label)
dset = f[label]
if dset.shape[0] == 1:
i = dset.shape[0] - 1
else:
i = dset.shape[0]
for fn in get_ttfs(path):
print(fn)
try:
data = read_font(fn, chars=chars)
except: # IOError:
print('was not able to read', fn)
continue
print(data.shape)
dset.resize((i+1,) + dshape[1:])
dset[i] = data
i += 1
f.flush()
f.close()
def file_dataset(path, h5dfP = 'fonts.hdf5'):
dshape = (1, len(chars), h, w)
f = get_h5py(h5dfP, dshape)
dset = f['fonts']
try:
data = read_font(path)
except:
print("Can't able to read")
dset[0] = data
f.flush()
f.close()
if __name__ == "__main__":
create_dataset(path='../font/fonts', chars=s_ascii, label='ascii')
create_dataset(path='../font/fonts', chars=s_hangul, label='hangul')
@black7375
Copy link
Author

black7375 commented Apr 6, 2021

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment