Skip to content

Instantly share code, notes, and snippets.

@zr-tex8r
Last active October 6, 2015 11:47
Show Gist options
  • Save zr-tex8r/2988601 to your computer and use it in GitHub Desktop.
Save zr-tex8r/2988601 to your computer and use it in GitHub Desktop.
A program to show the mapping between GIDs and glyph names for a given TrueType font
use strict;
my $prog_name = "zrotfdump";
my ($mode, $smode, $font_file);
my @mglist = qw(
.notdef .null nonmarkingreturn space exclam quotedbl numbersign
dollar percent ampersand quotesingle parenleft parenright
asterisk plus comma hyphen period slash zero one two three four
five six seven eight nine colon semicolon less equal greater
question at A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
bracketleft backslash bracketright asciicircum underscore grave
a b c d e f g h i j k l m n o p q r s t u v w x y z braceleft
bar braceright asciitilde Adieresis Aring Ccedilla Eacute
Ntilde Odieresis Udieresis aacute agrave acircumflex adieresis
atilde aring ccedilla eacute egrave ecircumflex edieresis
iacute igrave icircumflex idieresis ntilde oacute ograve
ocircumflex odieresis otilde uacute ugrave ucircumflex
udieresis dagger degree cent sterling section bullet paragraph
germandbls registered copyright trademark acute dieresis
notequal AE Oslash infinity plusminus lessequal greaterequal
yen mu partialdiff summation product pi integral ordfeminine
ordmasculine Omega ae oslash questiondown exclamdown logicalnot
radical florin approxequal Delta guillemotleft guillemotright
ellipsis nonbreakingspace Agrave Atilde Otilde OE oe endash
emdash quotedblleft quotedblright quoteleft quoteright divide
lozenge ydieresis Ydieresis fraction currency guilsinglleft
guilsinglright fi fl daggerdbl periodcentered quotesinglbase
quotedblbase perthousand Acircumflex Ecircumflex Aacute
Edieresis Egrave Iacute Icircumflex Idieresis Igrave Oacute
Ocircumflex apple Ograve Uacute Ucircumflex Ugrave dotlessi
circumflex tilde macron breve dotaccent ring cedilla
hungarumlaut ogonek caron Lslash lslash Scaron scaron Zcaron
zcaron brokenbar Eth eth Yacute yacute Thorn thorn minus
multiply onesuperior twosuperior threesuperior onehalf
onequarter threequarters franc Gbreve gbreve Idotaccent
Scedilla scedilla Cacute cacute Ccaron ccaron dcroat
);
#---------------------------------------
use Data::Dump 'dump';
my %proc = (
name => \&main_name,
cmap => \&main_cmap,
);
sub main {
read_option();
my ($hfont);
open($hfont, '<', $font_file) && binmode($hfont)
or error("cannot open for input", $font_file);
my $data = $proc{$mode}->($hfont);
dump_data($data);
close($hfont);
}
#---------------------------------------
sub main_cmap {
my ($hfont) = @_;
my $head = read_head($hfont)
or error("cannot read header", $font_file);
(defined $head->{post})
or error("cannot find 'post' table", $font_file);
(defined $head->{cmap})
or error("cannot find 'post' table", $font_file);
my @post2 = parse_post($hfont, @{$head->{post}});
my $post = read_post(@post2) if (@post2);
my $cmaphead = read_cmap_head($hfont, @{$head->{cmap}});
my $ofs12 = find_cmap($cmaphead, 12)
or error("cnnot find cmap-12 table", $font_file);
my $cmap12 = parse_cmap_12($hfont, $ofs12);
if ($smode eq '12') {
return read_cmap_12($cmap12, $post);
} elsif ($smode eq '14') {
my $ofs14 = find_cmap($cmaphead, 14)
or error("cnnot find cmap-14 table", $font_file);
my $cmap14 = parse_cmap_14($hfont, $ofs14, $cmap12);
return read_cmap_14($cmap14, $cmap12, $post);
}
}
sub find_cmap {
my ($chead, $fmt) = @_;
foreach my $e (@$chead) {
my ($pid, $eid, $ofs, $fmt1) = @$e;
($fmt1 == $fmt &&
(($pid == 3 && $eid == 1) ||
($pid == 3 && $eid == 10) ||
($pid == 0 && $eid == 5))) or next;
return $ofs;
}
return;
}
sub read_cmap_head {
my ($hin, $ofs, $len) = @_;
my $buf = read_file($hin, $ofs, $len) or return;
my (@fh) = unpack("nn", substr($buf, 0, 4));
my $nctbl = $fh[1];
($fh[0] == 0x0 && $nctbl > 0) or return;
my (@fe) = unpack("(nnN)*", substr($buf, 4, $nctbl * 8));
my @ary;
while (@fe) {
my (@f1) = splice(@fe, 0, 3);
$f1[2] += $ofs;
my (@fc) = unpack("nn", read_file($hin, $f1[2], 4));
push(@ary, [ @f1, @fc ]);
}
return \@ary;
}
sub parse_cmap_12 {
my ($hin, $ofs) = @_;
my @fh = unpack("nnNNN", read_file($hin, $ofs, 16));
($fh[0] == 12 && $fh[2] > 0 && $fh[4] > 0) or return;
my $buf = read_file($hin, $ofs, $fh[2]);
my @vs = unpack("N*", substr($buf, 16));
(scalar(@vs) == $fh[4] * 3) or return;
my (@cmap);
while (@vs) {
my ($suc, $euc, $gid) = splice(@vs, 0, 3);
foreach my $uc ($suc .. $euc) {
push(@cmap, [ $uc, $gid++ ]);
}
}
return \@cmap;
}
sub read_cmap_12 {
my ($cmap12, $post) = @_;
(defined $post) or return $cmap12;
my @data = map {
my $n = $post->[$_->[1]];
[ @$_, (defined $n) ? $n : '' ]
} (@$cmap12);
return \@data;
}
sub parse_cmap_14 {
my ($hin, $ofs) = @_;
my @fh = unpack("nNN", read_file($hin, $ofs, 10));
($fh[0] == 14 && $fh[1] > 0) or return;
my $buf = read_file($hin, $ofs, $fh[1]);
my @vs = unpack("(a3NN)*", substr($buf, 10, $fh[2] * 11));
(scalar(@vs) == $fh[2] * 3) or return;
my (%cmap);
while (@vs) {
my ($vs, $dofs, $nofs) = splice(@vs, 0, 3);
my (@d, @n);
if ($dofs > 0) {
my @x = unpack("N/(a3c)", substr($buf, $dofs));
while (@x) {
my ($suc, $n) = splice(@x, 0, 2);
$suc = int24($suc);
push(@d, $suc .. ($suc + $n));
}
}
if ($nofs > 0) {
my @x = unpack("N/(a3n)", substr($buf, $nofs));
while (@x) {
my ($uc, $gid) = splice(@x, 0, 2);
$uc = int24($uc);
push(@n, [$uc, $gid]);
}
}
$cmap{int24($vs)} = [\@d, \@n];
}
return \%cmap;
}
sub read_cmap_14 {
my ($cmap14, $cmap12, $post) = @_;
my (%dgid, @ent);
foreach my $e (@$cmap12) {
$dgid{$e->[0]} = $e->[1];
}
foreach my $vs (keys %$cmap14) {
my ($d, $n) = @{$cmap14->{$vs}};
foreach my $uc (@$d) {
my $gid = $dgid{$uc};
push(@ent, [$uc, $vs, $gid, ss($post->[$gid])]);
}
foreach my $e (@$n) {
my ($uc, $gid) = @$e;
push(@ent, [$uc, $vs, $gid, ss($post->[$gid])]);
}
}
@ent = sort {
($a->[0] <=> $b->[0]) || ($a->[1] <=> $b->[1])
} (@ent);
return \@ent;
}
#---------------------------------------
sub ss {
return (defined $_[0]) ? $_[0] : '';
}
sub int24 {
return unpack("N", "\0".$_[0]);
}
sub read_file {
my ($hin, $ofs, $len) = @_;
seek($hin, $ofs, 0);
my ($buf); read($hin, $buf, $len);
(length($buf) == $len) or return;
return $buf;
}
sub data_from_array {
my ($array) = @_;
my @data = map {
my $v = $array->[$_];
[ $_, (ref $v) ? @$v : $v ]
} (0..$#$array);
return \@data;
}
sub data_from_hash {
my ($hash) = @_;
my @data = map {
my $v = $hash->{$_};
[ $_, (ref $v) ? @$v : $v ]
} (keys %$hash);
return \@data;
}
sub dump_data {
my ($data) = @_;
foreach (@$data) {
print(join("\t", @$_), "\n");
}
}
sub read_head {
my ($hin) = @_;
seek($hin, 0, 0);
my ($buf); read($hin, $buf, 12);
my ($ver, $ntbl) = unpack("Nnnnn", $buf);
($ver == 0x10000)
or info("Unexpected sfnt version");
my %res;
foreach (1 .. $ntbl) {
read($hin, $buf, 16);
my ($tag, $csum, $ofs, $len) = unpack("a4NNN", $buf);
$res{$tag} = [ $ofs, $len ];
}
return \%res;
}
#---------------------------------------
sub main_name {
my ($hfont) = @_;
my $head = read_head($hfont)
or error("cannot read header", $font_file);
(defined $head->{post})
or error("cannot find 'post' table", $font_file);
my ($name, $idx) = parse_post($hfont, @{$head->{post}})
or error("cannot read glyph name table", $font_file);
if ($smode eq '') {
return data_from_array(read_post($name, $idx));
} elsif ($smode eq 'r') {
return data_from_array($idx);
} elsif ($smode eq 'x') {
return data_from_array(read_post_x($name, $idx));
}
}
sub parse_post {
my ($hin, $ofs, $len) = @_;
seek($hin, $ofs, 0);
my ($buf); read($hin, $buf, $len);
(length($buf) == $len) or return;
my (@f) = unpack("NNnnNNNNNn", $buf);
my $nglf = $f[9];
($f[0] == 0x20000 && $nglf > 0) or return;
my @idx = unpack("n*", substr($buf, 34, $nglf * 2));
(scalar(@idx) == $nglf) or return;
$buf = substr($buf, 34 + $nglf * 2);
my @nams = (@mglist, unpack("(c/a)*", $buf));
return ( \@nams, \@idx );
}
sub read_post {
my ($nams, $idx) = @_;
my @res = map {
my $n = $nams->[$_];
(defined $n) ? $n : return;
} (@$idx);
return \@res;
}
sub read_post_x {
my ($nams, $idx) = @_;
my @res = map {
my $n = $nams->[$_];
(defined $n) ? [$_, $n] : return;
} (@$idx);
return \@res;
}
#---------------------------------------
my %command = (
'name' => ['name', ''],
'name-x' => ['name', 'x'],
'name-r' => ['name', 'r'],
'cmap-12' => ['cmap', '12'],
'cmap-14' => ['cmap', '14'],
);
sub read_option {
($mode, $smode) = ('', '');
while ($ARGV[0] =~ /^-/) {
my $opt = shift(@ARGV);
if ($opt =~ /^--?h(elp)?$/) {
show_usage();
} else {
error("invalid option", $opt);
}
}
(@ARGV) or show_usage();
($#ARGV == 1) or error("wrong number of arguments");
my $cmd = shift(@ARGV);
(defined $command{$cmd}) or error("unknown command", $cmd);
($mode, $smode) = @{$command{$cmd}};
$font_file = shift(@ARGV);
}
sub show_usage {
print <<"EOT"; exit;
Usage: $prog_name <command> <font_file>
Here <command> is one of the following:
name glyph name list (gid/name)
cmap-12 Unicode map (code/gid/name)
cmap-14 Unicode VS map (base-code/VS-code/gid/name)
EOT
}
sub info {
print STDERR (join(": ", $prog_name, @_), "\n");
}
sub error {
info(@_); exit(-1);
}
#---------------------------------------
main();
# EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment