JoshCheek/parse_otf.rb

## parse_otf.rb
# WOFF spec                                https://www.w3.org/TR/2012/REC-WOFF-20121213/
# OTF spec                                 https://www.microsoft.com/en-us/Typography/OpenTypeSpecification.aspx
# Really nice inspector                    https://opentype.js.org/font-inspector.html
# unpack instructions                      http://www.rubydoc.info/stdlib/core/String#unpack-instance_method
# Font programming instruction definitions https://developer.apple.com/fonts/TrueType-Reference-Manual/RM05/Chap5.html#WS
# Font forge has some useful info, too, get it with homebrew cask
FONT_FILE = '/Users/xjxc322/gamut/bots/pxgamut_regular.woff'

require 'zlib'

SIZES = {
  ''   => 0,
  'a4' => 4, # 4 chars, 8 bits each
  'L>' => 4, # 32 bits unsigned
  'l>' => 4, # 32 bits signed
  'S>' => 2, # 16 bits unsigned
  's>' => 2, # 16 bits signed
}

def read_and_unpack(binary, format_hash)
  struct = Struct.new(*format_hash.keys).new
  format_hash.each do |name, format|
    struct[name] = binary.unpack(format)[0]
    binary = binary[SIZES.fetch(format)..-1]
  end
  return struct, binary
end


rest = font = File.read(FONT_FILE, encoding: 'ASCII-8BIT')

header, rest = read_and_unpack(
  rest,
  signature:        "a4",
  flavor:           "a4",
  length:           "L>",
  num_tables:       "S>",
  reserved:         "S>",
  total_sfnt_size:  "L>",
  major_version:    "S>",
  minor_version:    "S>",
  meta_offset:      "L>",
  meta_length:      "L>",
  meta_orig_length: "L>",
  priv_offset:      "L>",
  priv_length:      "L>",
)
header
# => #<struct
#     signature="wOFF",
#     flavor="OTTO",
#     length=69456,
#     num_tables=12,
#     reserved=0,
#     total_sfnt_size=133632,
#     major_version=0,
#     minor_version=0,
#     meta_offset=0,
#     meta_length=0,
#     meta_orig_length=0,
#     priv_offset=0,
#     priv_length=0>

entries = header.num_tables.times.map do
  entry, rest = read_and_unpack(
    rest,
    tag:           "a4",
    offset:        "L>",
    comp_length:   "L>",
    orig_length:   "L>",
    orig_checksum: "L>",
  )
  entry
end
entries.sort_by!(&:offset)

[%w[TAG    OFFSET   COMPUTED_LEN   ORIGINAL_LEN   CHECKSUM],
 %w[------ -------- -------------- -------------- ----------],
 *entries.map(&:to_a)
].map { |row| "%-6s%-8s%-14s%-14s%-10s" % row }
# => ["TAG   OFFSET  COMPUTED_LEN  ORIGINAL_LEN  CHECKSUM  ",
#     "----------------------------------------------------",
#     "head  284     54            54            208499658 ",
#     "hhea  340     33            36            247336158 ",
#     "maxp  376     6             6             42094592  ",
#     "OS/2  384     89            96            2363395350",
#     "name  476     499           1190          1917503162",
#     "cmap  976     1103          1712          3922624268",
#     "hmtx  2080    1326          2568          293570700 ",
#     "post  3408    19            32            4290248754",
#     "kern  3428    30983         65538         1282694526",
#     "GPOS  34412   4679          17102         3357198945",
#     "GSUB  39092   3235          8266          1306928084",
#     "CFF   42328   27126         36814         3912817438"]

tables = entries.map do |e|
  table = e.orig_length <= e.comp_length ?
            font[e.offset, e.orig_length] :
            Zlib::Inflate.inflate(font[e.offset, e.comp_length])
  [e.tag, table]
end.to_h

# Hmm, not sure I'm parsing this right, any
glyph_substitution, offsets_binary = read_and_unpack(
  tables['GSUB'],
  major_version:        "S>",
  minor_version:        "S>",
  script_list_offset:   "s>",
  feature_list_offset:  "s>",
  lookup_list_offset:   "s>",
  substitution_format:  "S>", # 2
  coverage_offset:      "s>",
  glyph_count:          "S>",
  substitute_glyph_ids: "",
)
offsets_binary.length # => 8250
glyph_substitution
# => #<struct
#     major_version=1,
#     minor_version=0,
#     script_list_offset=10,
#     feature_list_offset=296,
#     lookup_list_offset=1780,
#     substitution_format=2,
#     coverage_offset=17478,
#     glyph_count=19540,
#     substitute_glyph_ids=nil>
	# WOFF spec https://www.w3.org/TR/2012/REC-WOFF-20121213/
	# OTF spec https://www.microsoft.com/en-us/Typography/OpenTypeSpecification.aspx
	# Really nice inspector https://opentype.js.org/font-inspector.html
	# unpack instructions http://www.rubydoc.info/stdlib/core/String#unpack-instance_method
	# Font programming instruction definitions https://developer.apple.com/fonts/TrueType-Reference-Manual/RM05/Chap5.html#WS
	# Font forge has some useful info, too, get it with homebrew cask
	FONT_FILE = '/Users/xjxc322/gamut/bots/pxgamut_regular.woff'

	require 'zlib'

	SIZES = {
	'' => 0,
	'a4' => 4, # 4 chars, 8 bits each
	'L>' => 4, # 32 bits unsigned
	'l>' => 4, # 32 bits signed
	'S>' => 2, # 16 bits unsigned
	's>' => 2, # 16 bits signed
	}

	def read_and_unpack(binary, format_hash)
	struct = Struct.new(*format_hash.keys).new
	format_hash.each do \|name, format\|
	struct[name] = binary.unpack(format)[0]
	binary = binary[SIZES.fetch(format)..-1]
	end
	return struct, binary
	end



	rest = font = File.read(FONT_FILE, encoding: 'ASCII-8BIT')

	header, rest = read_and_unpack(
	rest,
	signature: "a4",
	flavor: "a4",
	length: "L>",
	num_tables: "S>",
	reserved: "S>",
	total_sfnt_size: "L>",
	major_version: "S>",
	minor_version: "S>",
	meta_offset: "L>",
	meta_length: "L>",
	meta_orig_length: "L>",
	priv_offset: "L>",
	priv_length: "L>",
	)
	header
	# => #<struct
	# signature="wOFF",
	# flavor="OTTO",
	# length=69456,
	# num_tables=12,
	# reserved=0,
	# total_sfnt_size=133632,
	# major_version=0,
	# minor_version=0,
	# meta_offset=0,
	# meta_length=0,
	# meta_orig_length=0,
	# priv_offset=0,
	# priv_length=0>

	entries = header.num_tables.times.map do
	entry, rest = read_and_unpack(
	rest,
	tag: "a4",
	offset: "L>",
	comp_length: "L>",
	orig_length: "L>",
	orig_checksum: "L>",
	)
	entry
	end
	entries.sort_by!(&:offset)

	[%w[TAG OFFSET COMPUTED_LEN ORIGINAL_LEN CHECKSUM],
	%w[------ -------- -------------- -------------- ----------],
	*entries.map(&:to_a)
	].map { \|row\| "%-6s%-8s%-14s%-14s%-10s" % row }
	# => ["TAG OFFSET COMPUTED_LEN ORIGINAL_LEN CHECKSUM ",
	# "----------------------------------------------------",
	# "head 284 54 54 208499658 ",
	# "hhea 340 33 36 247336158 ",
	# "maxp 376 6 6 42094592 ",
	# "OS/2 384 89 96 2363395350",
	# "name 476 499 1190 1917503162",
	# "cmap 976 1103 1712 3922624268",
	# "hmtx 2080 1326 2568 293570700 ",
	# "post 3408 19 32 4290248754",
	# "kern 3428 30983 65538 1282694526",
	# "GPOS 34412 4679 17102 3357198945",
	# "GSUB 39092 3235 8266 1306928084",
	# "CFF 42328 27126 36814 3912817438"]

	tables = entries.map do \|e\|
	table = e.orig_length <= e.comp_length ?
	font[e.offset, e.orig_length] :
	Zlib::Inflate.inflate(font[e.offset, e.comp_length])
	[e.tag, table]
	end.to_h

	# Hmm, not sure I'm parsing this right, any
	glyph_substitution, offsets_binary = read_and_unpack(
	tables['GSUB'],
	major_version: "S>",
	minor_version: "S>",
	script_list_offset: "s>",
	feature_list_offset: "s>",
	lookup_list_offset: "s>",
	substitution_format: "S>", # 2
	coverage_offset: "s>",
	glyph_count: "S>",
	substitute_glyph_ids: "",
	)
	offsets_binary.length # => 8250
	glyph_substitution
	# => #<struct
	# major_version=1,
	# minor_version=0,
	# script_list_offset=10,
	# feature_list_offset=296,
	# lookup_list_offset=1780,
	# substitution_format=2,
	# coverage_offset=17478,
	# glyph_count=19540,
	# substitute_glyph_ids=nil>