Skip to content

Instantly share code, notes, and snippets.

@trueroad
Last active January 22, 2023 06:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save trueroad/e14ff8ca64f6765391071c0ca6f60a34 to your computer and use it in GitHub Desktop.
Save trueroad/e14ff8ca64f6765391071c0ca6f60a34 to your computer and use it in GitHub Desktop.
Experimental PDF Font Embedder without Ghostscript
//
// Experimental PDF Font Embedder without Ghostscript
// https://gist.github.com/trueroad/e14ff8ca64f6765391071c0ca6f60a34
//
// Copyright (C) 2023 Masamichi Hosoda. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED.
// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
//
// Required:
// libqpdf (https://qpdf.sourceforge.io/)
// cmdlineparse.hh (https://github.com/trueroad/cmdlineparse)
//
// Build:
// g++ -o pdf-font-embedder pdf-font-embedder.cc -lqpdf
//
// Usage:
// $ ./pdf-font-embedder TABLE.txt INPUT.pdf OUTPUT.pdf
//
// ```TABLE.txt
// # original_fontname new_fontname font_filename
// /Ryumin-Light /HaranoAjiMincho-Light /dir/HaranoAjiMincho-Light.otf
// /GothicBBB-Medium /HaranoAjiGothic-Regular /dir/HaranoAjiGothic-Regular.otf
// ```
//
#include <fstream>
#include <iostream>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include <qpdf/QPDF.hh>
#include <qpdf/QPDFObjectHandle.hh>
#include <qpdf/QPDFWriter.hh>
#define PACKAGE_STRING "Experimental PDF Font Embedder without Ghostscript"
#define PACKAGE_COPYRIGHT "Copyright (C) 2023 Masamichi Hosoda"
#define PACKAGE_LICENSE "License: BSD-2-Clause"
#define PACKAGE_URL \
"https://gist.github.com/trueroad/e14ff8ca64f6765391071c0ca6f60a34"
#include "cmdlineparse.hh"
class pdf_font_embedder
{
public:
void set_linearize (bool l)
{
linearize_ = l;
}
void set_object_streams (qpdf_object_stream_e os)
{
object_streams_ = os;
}
void set_newline_before_endstream (bool n)
{
newline_before_endstream_ = n;
}
void set_qdf (bool q)
{
qdf_ = q;
}
void load_pdf (const std::string &filename);
void process_pdf ();
void save_pdf (const std::string &filename);
void load_table (const std::string &filename);
private:
void process_obj (QPDFObjectHandle oh);
void process_font (QPDFObjectHandle oh);
void process_font_type0 (QPDFObjectHandle oh);
std::string process_font_type0_descendant (QPDFObjectHandle oh);
bool process_font_other (QPDFObjectHandle oh);
bool process_fontdescriptor (QPDFObjectHandle oh);
QPDF qpdf_;
std::map<std::string, std::pair<std::string, std::string>> table_;
std::map<std::string, QPDFObjectHandle> font_obj_map_;
std::set<int> processed_obj_id_;
bool linearize_ = false;
qpdf_object_stream_e object_streams_ = qpdf_o_preserve;
bool newline_before_endstream_ = false;
bool qdf_ = false;
};
void pdf_font_embedder::load_pdf (const std::string &filename)
{
qpdf_.processFile (filename.c_str ());
}
void pdf_font_embedder::process_pdf ()
{
auto objs {qpdf_.getAllObjects ()};
for (auto o: objs)
process_obj (o);
}
void pdf_font_embedder::save_pdf (const std::string &filename)
{
QPDFWriter w (qpdf_, filename.c_str ());
w.setLinearization (linearize_);
w.setObjectStreamMode (object_streams_);
w.setNewlineBeforeEndstream (newline_before_endstream_);
w.setQDFMode (qdf_);
w.setMinimumPDFVersion ("1.6");
w.write ();
}
void pdf_font_embedder::load_table (const std::string &filename)
{
std::ifstream ifs {filename};
std::string line;
while (std::getline (ifs, line))
{
if (line[0] == '#')
continue;
std::istringstream iss {line};
std::string org_fontname;
std::string new_fontname;
std::string font_filename;
iss >> org_fontname >> new_fontname >> font_filename;
table_[org_fontname] = std::make_pair (new_fontname,
font_filename);
}
}
void pdf_font_embedder::process_obj (QPDFObjectHandle oh)
{
if (!(oh.isDictionary () && oh.hasKey ("/Type")))
return;
auto type {oh.getKey ("/Type")};
if (!type.isName ())
return;
if (std::string ("/Font") == type.getName ())
process_font (oh);
}
void pdf_font_embedder::process_font (QPDFObjectHandle oh)
{
std::cout << "Font: Object ID " << oh.getObjectID ()
<< ", Generation " << oh.getGeneration ()
<< std::endl;
if (processed_obj_id_.find (oh.getObjectID ()) !=
processed_obj_id_.end ())
{
std::cout << " Already processed. Skipping..."
<< std::endl;
return;
}
auto subtype {oh.getKey ("/Subtype")};
std::cout << " Subtype is "
<< subtype.getName ()
<< "."
<< std::endl;
auto fontname {oh.getKey ("/BaseFont")};
const std::string org_fontname {fontname.getName ()};
std::cout << " BaseFont is "
<< org_fontname
<< "."
<< std::endl;
if (std::string ("/Type0") == subtype.getName ())
process_font_type0 (oh);
else
process_font_other (oh);
}
void pdf_font_embedder::process_font_type0 (QPDFObjectHandle oh)
{
auto descendantfonts {oh.getKey ("/DescendantFonts")};
if (descendantfonts.getArrayNItems () != 1)
{
std::cout << " Error: DescendantFonts is not a one-element array."
<< std::endl
<< " It is not PDF32000-1:2008 compliant."
<< std::endl;
return;
}
auto descendantfont {oh.getKey ("/DescendantFonts").getArrayItem (0)};
auto new_fontname {process_font_type0_descendant (descendantfont)};
if (new_fontname.empty ())
{
std::cout << " Skipping..."
<< std::endl;
return;
}
auto fontname {oh.getKey ("/BaseFont")};
const std::string org_fontname {fontname.getName ()};
const std::string encoding {oh.getKey ("/Encoding").getName ()};
if (org_fontname.size () > encoding.size ())
{
const std::string encoding_hyphen
{std::string ("-") + encoding.substr (1)};
if (org_fontname.substr (org_fontname.size () - encoding_hyphen.size ())
== encoding_hyphen)
new_fontname = new_fontname + encoding_hyphen;
}
if (org_fontname != new_fontname)
{
std::cout << " Replacing /BaseFont of Type0: "
<< org_fontname
<< " -> "
<< new_fontname
<< std::endl;
oh.replaceKey ("/BaseFont", QPDFObjectHandle::newName (new_fontname));
}
processed_obj_id_.insert (oh.getObjectID ());
}
std::string
pdf_font_embedder::process_font_type0_descendant (QPDFObjectHandle oh)
{
std::cout << " --- Font (descendant): Object ID " << oh.getObjectID ()
<< ", Generation " << oh.getGeneration ()
<< " ---"
<< std::endl;
if (processed_obj_id_.find (oh.getObjectID ()) !=
processed_obj_id_.end ())
{
std::cout << " Already processed."
<< std::endl
<< " -- Font (descendant): Skipping... ---"
<< std::endl;
return oh.getKey ("/BaseFont").getName ();
}
auto subtype {oh.getKey ("/Subtype")};
std::cout << " Subtype is "
<< subtype.getName ()
<< "."
<< std::endl;
auto fontname {oh.getKey ("/BaseFont")};
const std::string org_fontname {fontname.getName ()};
std::cout << " BaseFont is "
<< org_fontname
<< "."
<< std::endl;
if (!process_font_other (oh))
{
std::cout << " --- Font (descendant): Skipping... ---"
<< std::endl;
return "";
}
std::cout << " --- Font (descendant): Complete. ---"
<< std::endl;
return oh.getKey ("/BaseFont").getName ();
}
bool pdf_font_embedder::process_font_other (QPDFObjectHandle oh)
{
if (!oh.hasKey ("/FontDescriptor"))
{
std::cout << " Error: It does not have /FontDescriptor."
<< std::endl;
return false;
}
auto fontname {oh.getKey ("/BaseFont")};
const std::string org_fontname {fontname.getName ()};
if (table_.find (org_fontname) == table_.end ())
{
std::cout << " It is not in the table. Skipping..."
<< std::endl;
return false;
}
auto fontdescriptor {oh.getKey ("/FontDescriptor")};
if (!process_fontdescriptor (fontdescriptor))
{
std::cout << " Skipping..."
<< std::endl;
return false;
}
const std::string new_fontname {table_[org_fontname].first};
if (org_fontname != new_fontname)
{
std::cout << " Replacing /BaseFont: "
<< org_fontname
<< " -> "
<< new_fontname
<< std::endl;
oh.replaceKey ("/BaseFont", QPDFObjectHandle::newName (new_fontname));
}
processed_obj_id_.insert (oh.getObjectID ());
return true;
}
bool pdf_font_embedder::process_fontdescriptor (QPDFObjectHandle oh)
{
if (processed_obj_id_.find (oh.getObjectID ()) !=
processed_obj_id_.end ())
return true;
std::cout << " FontDescriptor: Object ID " << oh.getObjectID ()
<< ", Generation " << oh.getGeneration ()
<< std::endl;
if (!(oh.isDictionary () && oh.hasKey ("/Type")))
return false;
auto type {oh.getKey ("/Type")};
if (!type.isName ())
return false;
if (std::string ("/FontDescriptor") != type.getName ())
return false;
auto fontname {oh.getKey ("/FontName")};
const std::string org_fontname {fontname.getName ()};
std::cout << " FontName is "
<< org_fontname
<< "."
<< std::endl;
if (oh.hasKey ("/FontFile"))
{
std::cout << " It has /FontFile. Skipping..."
<< std::endl;
return false;
}
if (oh.hasKey ("/FontFile2"))
{
std::cout << " It has /FontFile2. Skipping..."
<< std::endl;
return false;
}
if (oh.hasKey ("/FontFile3"))
{
std::cout << " It has /FontFile3. Skipping..."
<< std::endl;
return false;
}
if (table_.find (org_fontname) == table_.end ())
return false;
const std::string new_fontname {table_[org_fontname].first};
const std::string font_filename {table_[org_fontname].second};
if (org_fontname != new_fontname)
{
std::cout << " Replacing /FontName: "
<< org_fontname
<< " -> "
<< new_fontname
<< std::endl;
oh.replaceKey ("/FontName", QPDFObjectHandle::newName (new_fontname));
}
if (font_obj_map_.find (new_fontname) == font_obj_map_.end ())
{
std::cout << " Embedding "
<< font_filename
<< std::endl;
std::ifstream ifs (font_filename);
auto fontfile {QPDFObjectHandle::newStream
(&qpdf_,
std::string (std::istreambuf_iterator<char>(ifs),
std::istreambuf_iterator<char>()))};
auto fontfile_dir {fontfile.getDict ()};
fontfile_dir.replaceKey ("/Subtype",
QPDFObjectHandle::newName ("/OpenType"));
font_obj_map_[new_fontname] = fontfile;
}
std::cout << " Adding /FontFile3" << std::endl;
oh.replaceKey ("/FontFile3", font_obj_map_[new_fontname]);
processed_obj_id_.insert (oh.getObjectID ());
return true;
}
int main(int argc, char *argv[])
{
cmdlineparse::parser cmd;
cmd.set_usage_unamed_opts ("TABLE.txt INPUT.pdf OUTPUT.pdf");
cmd.add_default ();
bool linearize;
std::string object_streams;
bool newline_before_endstream;
bool qdf;
cmd.add_flag (0, "linearize", &linearize,
" Output linearized (web-optimized) PDF",
"Output PDF settings (QPDF)");
cmd.add_string (0, "object-streams", &object_streams, "preserve",
" Settings for object streams",
"[preserve|disable|generate]",
"Output PDF settings (QPDF)");
cmd.add_flag (0, "newline-before-endstream", &newline_before_endstream,
" Output newline before endstream",
"Output PDF settings (QPDF)");
cmd.add_flag (0, "qdf", &qdf,
" Output QDF",
"Output PDF settings (QPDF)");
if (!cmd.parse (argc, argv))
return 1;
auto uargs {cmd.get_unamed_args ()};
if (uargs.size () != 3)
{
std::cout << cmd.build_help ();
return 1;
}
std::cout << cmd.get_version_string () << std::endl;
pdf_font_embedder pfe;
pfe.set_linearize (linearize);
if (object_streams == "preserve")
pfe.set_object_streams (qpdf_o_preserve);
else if (object_streams == "generate")
pfe.set_object_streams (qpdf_o_generate);
else if (object_streams == "disable")
pfe.set_object_streams (qpdf_o_disable);
else
{
std::cerr << "unknwon --object-streams mode" << std::endl;
return 1;
}
pfe.set_newline_before_endstream (newline_before_endstream);
pfe.set_qdf (qdf);
pfe.load_table (uargs[0]);
pfe.load_pdf (uargs[1]);
pfe.process_pdf ();
pfe.save_pdf (uargs[2]);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment