Skip to content

Instantly share code, notes, and snippets.

@zr-tex8r
Last active February 18, 2019 12:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save zr-tex8r/9f94493ecab4ae4e1ce6 to your computer and use it in GitHub Desktop.
Save zr-tex8r/9f94493ecab4ae4e1ce6 to your computer and use it in GitHub Desktop.
To make CMap files from OpenType font files using texlua
-- zrmakecmap.lua
prog_name = "zrmakecmap"
version = "0.3"
mod_date = "2015/10/09"
verbose = false
sort = false
inencoding = "utf16"
fwid = true
---------------------------------------- preparations
filename, fontname, outname, jis_map = nil
tinsert, tconcat = table.insert, table.concat
tunpack = unpack or table.unpack
texlua = pcall(function()
fontloader = require "fontloader"
kpse = require "kpse"
kpse.set_program_name("dvipdfmx")
end)
---------------------------------------- create CMap text
do
local blocksize = 100
local prologue = [[
%%!PS-Adobe-3.0 Resource-CMap
%%%%DocumentNeededResources: ProcSet (CIDInit)
%%%%IncludeResource: ProcSet (CIDInit)
%%%%BeginResource: CMap (%s)
%%%%Title: (%s %s %s %s)
%%%%Version: %s
%%%%EndComments
%% NOTICE:
%% It is intended that this file is used only with dvipdfmx.
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo 3 dict dup begin
/Registry (%s) def
/Ordering (%s) def
/Supplement %s def
end def
/CMapName /%s def
/CMapVersion %s def
/CMapType 1 def
/WMode 0 def]]
local epilogue = [[
endcmap
CMapName currentdict /CMap defineresource pop
end
end
%%%%EndResource
%%%%EOF
]]
local function hex_utf16(uc)
if uc >= 0x10000 then
local h, l = math.floor(uc / 0x400), uc % 0x400
uc = h * 0x10000 + l + 0xD7C0DC00
return ("%08x"):format(uc)
else
return ("%04x"):format(uc)
end
end
local function hex_2(uc)
return ("%04x"):format(uc)
end
local function hex_4(uc)
return ("%08x"):format(uc)
end
local function rearrange(map, minuc, maxuc)
local ranges, suc, scc, pcc = {}
for uc = minuc, maxuc do
local cc = map[uc]
if not (pcc and pcc + 1 == cc and uc % 256 > 0) then
if pcc then
tinsert(ranges, { suc, uc - 1, scc })
end
if cc then suc, scc = uc, cc end
end
pcc = cc
end
if pcc then
tinsert(ranges, { suc, maxuc, scc })
end
return ranges
end
local function rearrange_unicode(map, maxuc)
maxuc = maxuc or 0x10FFFF
local notdef = map[0]
for uc = 1, 31 do
if map[uc] ~= notdef then notdef = nil end
end
local ranges = rearrange(map, notdef and 32 or 0,
maxuc or 0x10FFFF)
return ranges, notdef
end
local function rearrange_jis(map)
local jismap, cc = {}
for jc, ucary in pairs(jis_map) do
cc = nil
for i = 1, #ucary do
cc = map[ucary[i]]
if cc then break end
end
jismap[jc] = cc
end
local ranges = rearrange(jismap, 0x2100, 0x7FFF)
return ranges, nil
end
local function add_partition (flines, name, lines)
if sort then
table.sort(lines)
end
local sl, el = 0, 0
while sl < #lines do
el = math.min(sl + blocksize, #lines)
tinsert(flines, '')
tinsert(flines, ("%s begin%s"):format(el - sl, name))
for l = sl + 1, el do
tinsert(flines, lines[l])
end
tinsert(flines, ("end%s"):format(name))
sl = el
end
end
function cmap_text(name, version, ros, map, maxuc, inenc)
local out = {}
local rr, ro, rs = tunpack(ros)
tinsert(out, prologue:format(
name, name, rr, ro, rs, version, rr, ro, rs,
name, version))
--
local hex, csrls, ndrls, ccls, crls = nil, nil, {}, {}, {}
if inenc == "utf32" then
hex, rearr = hex_4, rearrange_unicode
csrls = { " <00000000> <0010FFFF>" }
elseif inenc == "utf16" then
hex, rearr = hex_utf16, rearrange_unicode
csrls = { " <0000> <D7FF>",
" <D800DC00> <DBFFDFFF>",
" <E000> <FFFF>" }
elseif inenc == "jis" then
hex, rearr = hex_2, rearrange_jis
csrls = { " <2121> <7E7E>" }
else sure(nil, 9)
end
local ranges, notdef = rearr(map, maxuc)
if notdef then
ndrls[1] = ("<%s> <%s> %s"):format(hex(0), hex(31), notdef)
end
for i = 1, #ranges do
local suc, euc, scc = tunpack(ranges[i])
if suc == euc then
tinsert(ccls, ("<%s> %s"):format(hex(suc), scc))
else
tinsert(crls,
("<%s> <%s> %s"):format(hex(suc), hex(euc), scc))
end
end
add_partition(out, "codespacerange", csrls)
add_partition(out, "notdefrange", ndrls)
add_partition(out, "cidchar", ccls)
add_partition(out, "cidrange", crls)
tinsert(out, '')
--
tinsert(out, epilogue:format())
return tconcat(out, "\n")
end
end
---------------------------------------- process one font
do
local function ver_number(version)
local ver = (version or ""):gsub("(%d)_(%d)", "%1%2")
local ps, pe = ver:find("^%d+%.%d+")
if not ps then
ps, pe = ver:find("^%d+")
end
ver = (ps) and ver:sub(ps, pe) or "0"
if ver ~= version then
info("version is not a number", version)
end
info("resolved version", ver)
return ver
end
local function ros_info(cidinfo)
cidinfo = cidinfo or {}
if not cidinfo.registry then
info("cidinfo is missing")
end
local res = {
cidinfo.registry or "Adobe",
cidinfo.ordering or "Identity",
cidinfo.supplement or 0
}
info("resolved ROS", tconcat(res, "-"))
return res
end
local function map_info(mapobj)
mapobj = mapobj or {}
sure(mapobj.map, "cannot find map data")
info("obtained map data")
info("*max codepoint", mapobj.encmax or "(unknown)")
return mapobj.map, mapobj.encmax or 0x10FFFF
end
function extract_map(file, name)
info("open font file", file)
local font, diag
if name then
font, diag = fontloader.open(file, name)
else
font, diag = fontloader.open(file)
end
if diag then
local max = math.min(#diag, font and 5 or 50)
info(("-------- diagnoses from fontloader (%s of %s)")
:format(max, #diag))
for i = 1, max do info(diag[i]) end
info("-------- end")
end
sure(font, "failure in extracing map data", fontfile)
local version = ver_number(font.version)
local ros = ros_info(font.cidinfo)
local map, maxuc = map_info(font.map)
return version, ros, map, maxuc
end
end
---------------------------------------- resolve parameters
do
local function out_name(core)
if inencoding == "utf32" then
return "Uni"..core.."-UTF32-H"
elseif inencoding == "utf16" then
return "Uni"..core.."-UTF16-H"
elseif inencoding == "jis" then
return "Jis"..core.."-H"
else sure(nil, 8)
end
end
local function trim(name)
return (name or ""):gsub("[%W]", "")
end
local function count(sum, name)
sum[name] = (sum[name] or 0) + 1
end
function resolve_param()
local finfo, msg = fontloader.info(filename)
local ttc, sel = false, fontname
if not finfo then
info("-------- diagnosis from fontloader")
info(msg)
abort("not a valid font file", filename)
elseif finfo[1] then
ttc = true; info("TTC file", filename)
elseif finfo.fontname then
info("not TTC file", filename)
sel = sel or 0; finfo = { finfo }
else sure(nil, 5)
end
if type(sel) == "string" then
local t = {}
for i = 1, #finfo do
if finfo[i].fontname == sel then tinsert(t, i - 1) end
end
for i = 1, #finfo do
if finfo[i].fullname == sel then tinsert(t, i - 1) end
end
sure(#t <= 1, "font name is ambiguous", sel)
sure(#t >= 1, "no such font in font file", sel)
sel = t[1]
end
info("font count", #finfo)
info("font index", sel or "(all)")
sure(not sel or (0 <= sel and sel < #finfo),
"bad font index number", sel)
sure(not outname or sel,
"cannot specify output file name in processing all fonts")
if sel then
local fi = finfo[sel + 1]
return { {
name = ttc and fi.fullname,
out = outname or out_name(trim(fi.familyname))
} }
else
local res, sum = {}, {}
for i = 1, #finfo do
local fi = finfo[i]
res[i] = { name = fi.fullname, out = trim(fi.familyname) }
info("font name", i, res[i].name)
info("out-core", i, res[i].out)
count(sum, res[i].out)
end
for i = 1, #finfo do
if sum[res[i].out] > 1 then
res[i].out = res[i].out.."_"..trim(finfo[i].weight)
info("new out-core", i, res[i].out)
count(sum, res[i].out)
end
end
for i = 1, #finfo do
if sum[res[i].out] > 1 then
res[i].out = res[i].out.."_"..tostring(i)
info("new out-core", i, res[i].out)
end
res[i].out = out_name(res[i].out)
info("output", i, res[i].out)
end
return res
end
end
end
---------------------------------------- create JIS-UCS map
do
local cnjis, cnucs = "H", "UniJIS-UCS2"
local ccname = "Adobe-Japan1"
local function cvalue(expr)
local cv = (#expr == 4) and tonumber(expr, 16)
if cv then return { cv } end
local set, cvs = {}, expr:explode(",")
for i = 1, #cvs do
cv = tonumber(cvs[i], 16)
if cv then tinsert(set, cv) end
end
return set
end
local function one_file(path)
local file = assert(io.open(path, "rb"))
local ok, line, cjis, cucs
while true do
line = file:read("*l")
if not (line and (line == "" or line:sub(1, 1) == "#")) then
break
elseif line:find(cnucs, 1, true) then
ok = true
end
end
if not ok or not line then return end
local fs = line:explode("\t")
if not fs[1] == "CID" then return end
for k, cn in ipairs(fs) do
if cn == cnjis then cjis = k end
if cn == cnucs then cucs = k end
end
if not (cjis and cucs) then return end
local map, alrt = {}, false
while true do
line = file:read("*l")
if not line then break end
local fs = line:explode("\t")
local jis, ucs = fs[cjis], fs[cucs]
if jis ~= "*" then
if ucs == "*" then
alrt = true
else
local cvjis, cvucs = cvalue(jis), cvalue(ucs)
if #cvjis > 0 and #cvucs > 0 then
if #cvjis > 1 then alrt = true end
map[cvjis[1]] = cvucs
end
end
end
end
return map, alrt
end
function make_jis_map()
info("make JIS-UCS map from cid2code data...")
local c2cs = { kpse.lookup("cid2code.txt", {
format = "cmap files", all = true, mustexist = true
}) }
local res
for _, path in pairs(c2cs) do
info("try", path)
res, alrt = one_file(path)
if res then
info("success")
if alrt then info("(but strange cid2code data)") end
break
else info("failure (not AJ1 data)")
end
end
sure(res, "suitable cid2code.txt is not found")
jis_map = res
end
end
---------------------------------------- logging
do
local stt_meta = {
__tostring = function(self)
return "{"..concat(self, ",").."}"
end
}
function stt(tbl)
return setmetatable(tbl, stt_meta)
end
function concat(tbl, ...)
local t = {}
for i = 1, #tbl do t[i] = tostring(tbl[i]) end
return table.concat(t, ...):gsub("\n$", "")
end
function info(...)
if not verbose then return end
local t = { prog_name, ... }
io.stderr:write(concat(t, ": ").."\n")
end
function abort(...)
verbose = true; info(...)
os.exit(-1)
end
function sure(val, a1, ...)
if val then return val end
if type(a1) == "number" then
a1 = "error("..a1..")"
end
abort(a1, ...)
end
end
---------------------------------------- main
do
local function show_usage()
io.stdout:write(([[
This is %s v%s <%s> by 'ZR'
Usage: %s[.lua] [<option>...] <font_file>
-h/--help show help
-v/--verbose be verbose
-o/--out <name> specify CMap (file) name to output
-i/--index <num> choose from TTC by font index (0-origin)
-n/--name <name> choose from TTC by font name
-j/--jis use JIS as CMap input codespace
--utf32 use UTF32 as CMap input codespace
--utf16 use UTF16 as CMap input codespace (default)
<font_file> name of the input font file, which can be
of TTF, OTF or TTC format.
]]):format(prog_name, version, mod_date, prog_name))
os.exit(0)
end
function find_file(name)
return kpse.find_file(name, "opentype fonts", true) or
kpse.find_file(name, "truetype fonts", true) or name
end
function read_option()
if #arg == 0 then show_usage() end
local idx = 1
while idx <= #arg do
local opt = arg[idx]
if opt:sub(1, 1) ~= "-" then break end
if opt == "-h" or opt == "--help" then
show_usage()
elseif opt == "-v" or opt == "--verbose" then
verbose = true
elseif opt == "-o" or opt == "--out" then
idx = idx + 1
outname = sure(arg[idx], "output name is missing")
elseif opt == "-i" or opt == "--index" then
idx = idx + 1
fontname = tonumber(arg[idx])
sure(fontname, "index is not a number")
elseif opt == "-n" or opt == "--name" then
idx = idx + 1
fontname = sure(arg[idx], "font name is missing")
elseif opt == "-f" or opt == "--fwid" then
fwid = true
elseif opt == "-F" or opt == "--no-fwid" then
fwid = false
elseif opt == "-j" or opt == "--jis" then
inencoding = "jis"
elseif opt == "--utf32" then
inencoding = "utf32"
elseif opt == "--utf16" then
inencoding = "utf16"
elseif opt == "--sort" then
sort = true
else abort("invalid option", opt)
end
idx = idx + 1
end
sure(#arg == idx, "wrong number of arguments")
filename = find_file(arg[idx])
end
local function write_whole(file, data)
local ofile = io.open(file, "wb")
sure(ofile, "cannot open for output", file)
sure(ofile:write(data), "output failed", file)
ofile:close()
end
function main()
sure(texlua, "this script requrires TeXlua")
read_option()
if inencoding == "jis" then
make_jis_map()
end
local param = resolve_param()
for i = 1, #param do
local name, out = param[i].name, param[i].out
info("******** PROCESS", filename, name or nil)
local version, ros, map, maxuc = extract_map(filename, name)
info("create cmap file", out)
local text = cmap_text(out, version, ros, map, maxuc,
inencoding)
write_whole(out, text)
info("DONE", filename, name or nil)
end
info("all done")
end
end
---------------------------------------- done
main()
-- EOF
@zr-tex8r
Copy link
Author

zr-tex8r commented Oct 4, 2015

This script requires the LuaTeX engine, which is run as an extended Lua interpreter.

luatex zrmakecmap.lua [option...] <font_file_name>

@chaitanya-lakkundi
Copy link

Can I use this for True type fonts ? How ?

@trueroad
Copy link

私の環境では cid2code.txt の最後にコメント行があって、そのままだとエラーになってしまうためパッチを作ってみました。
https://gist.github.com/trueroad/830f59df102e27392ea4d0ad54208121/revisions#diff-b817b49d8db36cca5a0788e014edd713

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment