|
From 9c59afcfb7e08b26a2d49f20acec0e5d1343c81e Mon Sep 17 00:00:00 2001 |
|
From: Jiang Jiang <gzjjgod@gmail.com> |
|
Date: Sat, 26 Jul 2014 22:16:41 +0200 |
|
Subject: [PATCH 2/3] Fix reverse CMap lookup for ToUnicode map generation |
|
|
|
--- |
|
texk/dvipdfm-x/pdfdev.c | 9 ++++++ |
|
texk/dvipdfm-x/pdffont.c | 18 +++++++++++ |
|
texk/dvipdfm-x/pdffont.h | 1 + |
|
texk/dvipdfm-x/tt_cmap.c | 43 ++++++++++++++++++++++--- |
|
texk/dvipdfm-x/type0.c | 84 ++++++++++++++++++++++++++++++++++-------------- |
|
texk/dvipdfm-x/type0.h | 2 ++ |
|
6 files changed, 128 insertions(+), 29 deletions(-) |
|
|
|
diff --git a/texk/dvipdfm-x/pdfdev.c b/texk/dvipdfm-x/pdfdev.c |
|
index 6fa4223..b7d2d9a 100644 |
|
--- a/texk/dvipdfm-x/pdfdev.c |
|
+++ b/texk/dvipdfm-x/pdfdev.c |
|
@@ -463,6 +463,7 @@ struct dev_font { |
|
|
|
pdf_obj *resource; |
|
char *used_chars; |
|
+ char *used_glyphs; |
|
|
|
/* Font format: |
|
* simple, composite or bitmap. |
|
@@ -876,6 +877,8 @@ dev_set_font (int font_id) |
|
if (!real_font->resource) { |
|
real_font->resource = pdf_get_font_reference(real_font->font_id); |
|
real_font->used_chars = pdf_get_font_usedchars(real_font->font_id); |
|
+ if (real_font->cff_charsets) |
|
+ real_font->used_glyphs = pdf_get_font_usedglyphs(real_font->font_id); |
|
} |
|
|
|
if (!real_font->used_on_this_page) { |
|
@@ -1155,6 +1158,11 @@ pdf_dev_set_string (spt_t xpos, spt_t ypos, |
|
length = instr_len; |
|
|
|
if (font->format == PDF_FONTTYPE_COMPOSITE) { |
|
+ if (real_font->used_glyphs != NULL) { |
|
+ for (i = 0; i < length; i += 2) |
|
+ add_to_used_chars2(real_font->used_glyphs, |
|
+ (unsigned short) (str_ptr[i] << 8)|str_ptr[i+1]); |
|
+ } |
|
if (handle_multibyte_string(font, &str_ptr, &length, ctype) < 0) { |
|
ERROR("Error in converting input string..."); |
|
return; |
|
@@ -1536,6 +1544,7 @@ pdf_dev_locate_font (const char *font_name, spt_t ptsize) |
|
|
|
font->resource = NULL; /* Don't ref obj until font is actually used. */ |
|
font->used_chars = NULL; |
|
+ font->used_glyphs = NULL; |
|
|
|
font->extend = 1.0; |
|
font->slant = 0.0; |
|
diff --git a/texk/dvipdfm-x/pdffont.c b/texk/dvipdfm-x/pdffont.c |
|
index e25a1cc..f0189d7 100644 |
|
--- a/texk/dvipdfm-x/pdffont.c |
|
+++ b/texk/dvipdfm-x/pdffont.c |
|
@@ -334,6 +334,24 @@ pdf_get_font_usedchars (int font_id) |
|
} |
|
} |
|
|
|
+char * |
|
+pdf_get_font_usedglyphs (int font_id) |
|
+{ |
|
+ pdf_font *font; |
|
+ |
|
+ CHECK_ID(font_id); |
|
+ |
|
+ font = GET_FONT(font_id); |
|
+ if (font->subtype == PDF_FONT_FONTTYPE_TYPE0) { |
|
+ Type0Font *t0font; |
|
+ |
|
+ t0font = Type0Font_cache_get(font->font_id); |
|
+ return Type0Font_get_usedglyphs(t0font); |
|
+ } |
|
+ |
|
+ return NULL; |
|
+} |
|
+ |
|
int |
|
pdf_get_font_wmode (int font_id) |
|
{ |
|
diff --git a/texk/dvipdfm-x/pdffont.h b/texk/dvipdfm-x/pdffont.h |
|
index a833fe8..38ca3a5 100644 |
|
--- a/texk/dvipdfm-x/pdffont.h |
|
+++ b/texk/dvipdfm-x/pdffont.h |
|
@@ -63,6 +63,7 @@ extern int pdf_font_findresource (const char *font_name, |
|
extern int pdf_get_font_subtype (int font_id); |
|
extern pdf_obj *pdf_get_font_reference (int font_id); |
|
extern char *pdf_get_font_usedchars (int font_id); |
|
+extern char *pdf_get_font_usedglyphs (int font_id); |
|
|
|
#if 0 |
|
extern char *pdf_get_font_fontname (int font_id); /* without unique tag */ |
|
diff --git a/texk/dvipdfm-x/tt_cmap.c b/texk/dvipdfm-x/tt_cmap.c |
|
index 718e858..f49421e 100644 |
|
--- a/texk/dvipdfm-x/tt_cmap.c |
|
+++ b/texk/dvipdfm-x/tt_cmap.c |
|
@@ -941,6 +941,31 @@ handle_subst_glyphs (CMap *cmap, |
|
return count; |
|
} |
|
|
|
+static cff_font * |
|
+prepare_CIDFont_from_sfnt(sfnt* sfont) |
|
+{ |
|
+ cff_font *cffont; |
|
+ unsigned long offset = 0; |
|
+ |
|
+ if (sfont->type != SFNT_TYPE_POSTSCRIPT || |
|
+ sfnt_read_table_directory(sfont, 0) < 0 || |
|
+ (offset = sfnt_find_table_pos(sfont, "CFF ")) == 0) { |
|
+ return NULL; |
|
+ } |
|
+ |
|
+ cffont = cff_open(sfont->stream, offset, 0); |
|
+ if (!cffont) |
|
+ return NULL; |
|
+ |
|
+ if (!(cffont->flag & FONTTYPE_CIDFONT)) { |
|
+ cff_close(cffont); |
|
+ return NULL; |
|
+ } |
|
+ |
|
+ cff_read_charsets(cffont); |
|
+ return cffont; |
|
+} |
|
+ |
|
static pdf_obj * |
|
create_ToUnicode_cmap4 (struct cmap4 *map, |
|
const char *cmap_name, CMap *cmap_add, |
|
@@ -952,6 +977,7 @@ create_ToUnicode_cmap4 (struct cmap4 *map, |
|
USHORT c0, c1, gid, count, ch; |
|
USHORT i, j, d, segCount; |
|
char used_glyphs_copy[8192]; |
|
+ cff_font *cffont = prepare_CIDFont_from_sfnt(sfont); |
|
|
|
cmap = CMap_new(); |
|
CMap_set_name (cmap, cmap_name); |
|
@@ -979,10 +1005,11 @@ create_ToUnicode_cmap4 (struct cmap4 *map, |
|
map->idDelta[i]) & 0xffff; |
|
} |
|
if (is_used_char2(used_glyphs_copy, gid)) { |
|
+ unsigned int cid = cffont ? cff_charsets_lookup_inverse(cffont, gid) : gid; |
|
count++; |
|
|
|
- wbuf[0] = (gid >> 8) & 0xff; |
|
- wbuf[1] = (gid & 0xff); |
|
+ wbuf[0] = (cid >> 8) & 0xff; |
|
+ wbuf[1] = (cid & 0xff); |
|
|
|
wbuf[2] = (ch >> 8) & 0xff; |
|
wbuf[3] = ch & 0xff; |
|
@@ -1014,6 +1041,9 @@ create_ToUnicode_cmap4 (struct cmap4 *map, |
|
} |
|
CMap_release(cmap); |
|
|
|
+ if (cffont) |
|
+ cff_close(cffont); |
|
+ |
|
return stream; |
|
} |
|
|
|
@@ -1029,6 +1059,7 @@ create_ToUnicode_cmap12 (struct cmap12 *map, |
|
ULONG i, ch; |
|
USHORT gid, count; |
|
char used_glyphs_copy[8192]; |
|
+ cff_font *cffont = prepare_CIDFont_from_sfnt(sfont); |
|
|
|
cmap = CMap_new(); |
|
CMap_set_name (cmap, cmap_name); |
|
@@ -1049,9 +1080,10 @@ create_ToUnicode_cmap12 (struct cmap12 *map, |
|
d = ch - map->groups[i].startCharCode; |
|
gid = (USHORT) ((map->groups[i].startGlyphID + d) & 0xffff); |
|
if (is_used_char2(used_glyphs_copy, gid)) { |
|
+ unsigned int cid = cffont ? cff_charsets_lookup_inverse(cffont, gid) : gid; |
|
count++; |
|
- wbuf[0] = (gid >> 8) & 0xff; |
|
- wbuf[1] = (gid & 0xff); |
|
+ wbuf[0] = (cid >> 8) & 0xff; |
|
+ wbuf[1] = (cid & 0xff); |
|
len = UC_sput_UTF16BE((long)ch, &p, wbuf+WBUF_SIZE); |
|
|
|
CMap_add_bfchar(cmap, wbuf, 2, wbuf+2, len); |
|
@@ -1081,6 +1113,9 @@ create_ToUnicode_cmap12 (struct cmap12 *map, |
|
} |
|
CMap_release(cmap); |
|
|
|
+ if (cffont) |
|
+ cff_close(cffont); |
|
+ |
|
return stream; |
|
} |
|
|
|
diff --git a/texk/dvipdfm-x/type0.c b/texk/dvipdfm-x/type0.c |
|
index ec19577..e1f1fee 100644 |
|
--- a/texk/dvipdfm-x/type0.c |
|
+++ b/texk/dvipdfm-x/type0.c |
|
@@ -85,6 +85,7 @@ struct Type0Font { |
|
char *fontname; /* BaseFont */ |
|
char *encoding; /* "Identity-H" or "Identity-V" (not ID) */ |
|
char *used_chars; /* Used chars (CIDs) */ |
|
+ char *used_glyphs; /* Used glyphs (GIDs) */ |
|
|
|
/* |
|
* Type0 only |
|
@@ -112,6 +113,7 @@ Type0Font_init_font_struct (Type0Font *font) |
|
font->descriptor = NULL; |
|
font->encoding = NULL; |
|
font->used_chars = NULL; |
|
+ font->used_glyphs = NULL; |
|
font->descendant = NULL; |
|
font->wmode = -1; |
|
font->flags = FLAG_NONE; |
|
@@ -131,6 +133,8 @@ Type0Font_clean (Type0Font *font) |
|
ERROR("%s: FontDescriptor unexpected for Type0 font.", TYPE0FONT_DEBUG_STR); |
|
if (!(font->flags & FLAG_USED_CHARS_SHARED) && font->used_chars) |
|
RELEASE(font->used_chars); |
|
+ if (font->used_glyphs) |
|
+ RELEASE(font->used_glyphs); |
|
if (font->encoding) |
|
RELEASE(font->encoding); |
|
if (font->fontname) |
|
@@ -139,6 +143,7 @@ Type0Font_clean (Type0Font *font) |
|
font->indirect = NULL; |
|
font->descriptor = NULL; |
|
font->used_chars = NULL; |
|
+ font->used_glyphs = NULL; |
|
font->encoding = NULL; |
|
font->fontname = NULL; |
|
} |
|
@@ -147,13 +152,45 @@ Type0Font_clean (Type0Font *font) |
|
/* PLEASE FIX THIS */ |
|
#include "tt_cmap.h" |
|
|
|
+pdf_obj *Type0Font_create_ToUnicode_stream(Type0Font *font) { |
|
+ CIDFont *cidfont = font->descendant; |
|
+ char *used = Type0Font_get_usedglyphs(font); |
|
+ if (!used) |
|
+ used = Type0Font_get_usedchars(font); |
|
+ |
|
+ return otf_create_ToUnicode_stream(CIDFont_get_ident(cidfont), |
|
+ CIDFont_get_opt_index(cidfont), |
|
+ used); |
|
+} |
|
+ |
|
+/* Try to load ToUnicode CMap from file system first, if not found fallback to |
|
+ * font CMap reverse lookup. */ |
|
+pdf_obj *Type0Font_try_load_ToUnicode_stream(Type0Font *font, char *cmap_base) { |
|
+ char *cmap_name = NEW(strlen(cmap_base) + strlen("-UTF-16"), char); |
|
+ pdf_obj *tounicode; |
|
+ |
|
+ sprintf(cmap_name, "%s-UTF16", cmap_base); |
|
+ tounicode = pdf_read_ToUnicode_file(cmap_name); |
|
+ if (!tounicode) { |
|
+ sprintf(cmap_name, "%s-UCS2", cmap_base); |
|
+ tounicode = pdf_read_ToUnicode_file(cmap_name); |
|
+ } |
|
+ |
|
+ RELEASE(cmap_name); |
|
+ |
|
+ if (!tounicode) |
|
+ tounicode = Type0Font_create_ToUnicode_stream(font); |
|
+ |
|
+ return tounicode; |
|
+} |
|
+ |
|
static void |
|
add_ToUnicode (Type0Font *font) |
|
{ |
|
pdf_obj *tounicode; |
|
CIDFont *cidfont; |
|
CIDSysInfo *csi; |
|
- char *cmap_name, *fontname; |
|
+ char *fontname; |
|
|
|
/* |
|
* ToUnicode CMap: |
|
@@ -197,39 +234,24 @@ add_ToUnicode (Type0Font *font) |
|
switch (CIDFont_get_subtype(cidfont)) { |
|
case CIDFONT_TYPE2: |
|
/* PLEASE FIX THIS */ |
|
- tounicode = otf_create_ToUnicode_stream(CIDFont_get_ident(cidfont), |
|
- CIDFont_get_opt_index(cidfont), |
|
- Type0Font_get_usedchars(font)); |
|
+ tounicode = Type0Font_create_ToUnicode_stream(font); |
|
break; |
|
default: |
|
if (CIDFont_get_flag(cidfont, CIDFONT_FLAG_TYPE1C)) { /* FIXME */ |
|
- tounicode = otf_create_ToUnicode_stream(CIDFont_get_ident(cidfont), |
|
- CIDFont_get_opt_index(cidfont), |
|
- Type0Font_get_usedchars(font)); |
|
+ tounicode = Type0Font_create_ToUnicode_stream(font); |
|
} else if (CIDFont_get_flag(cidfont, CIDFONT_FLAG_TYPE1)) { /* FIXME */ |
|
/* Font loader will create ToUnicode and set. */ |
|
return; |
|
} else { |
|
- cmap_name = NEW(strlen(fontname) + 7, char); |
|
- sprintf(cmap_name, "%s-UTF16", fontname); |
|
- tounicode = pdf_read_ToUnicode_file(cmap_name); |
|
- if (!tounicode) { |
|
- sprintf(cmap_name, "%s-UCS2", fontname); |
|
- tounicode = pdf_read_ToUnicode_file(cmap_name); |
|
- } |
|
- RELEASE(cmap_name); |
|
+ tounicode = Type0Font_try_load_ToUnicode_stream(font, fontname); |
|
} |
|
break; |
|
} |
|
} else { |
|
- cmap_name = NEW(strlen(csi->registry)+strlen(csi->ordering)+8, char); |
|
- sprintf(cmap_name, "%s-%s-UTF16", csi->registry, csi->ordering); |
|
- tounicode = pdf_read_ToUnicode_file(cmap_name); |
|
- if (!tounicode) { |
|
- sprintf(cmap_name, "%s-%s-UCS2", csi->registry, csi->ordering); |
|
- tounicode = pdf_read_ToUnicode_file(cmap_name); |
|
- } |
|
- RELEASE(cmap_name); |
|
+ char *cmap_base = NEW(strlen(csi->registry) + strlen(csi->ordering) + 2, char); |
|
+ sprintf(cmap_base, "%s-%s", csi->registry, csi->ordering); |
|
+ tounicode = Type0Font_try_load_ToUnicode_stream(font, cmap_base); |
|
+ RELEASE(cmap_base); |
|
} |
|
|
|
if (tounicode) { |
|
@@ -304,6 +326,14 @@ Type0Font_get_usedchars (Type0Font *font) |
|
return font->used_chars; |
|
} |
|
|
|
+char * |
|
+Type0Font_get_usedglyphs (Type0Font *font) |
|
+{ |
|
+ ASSERT(font); |
|
+ |
|
+ return font->used_glyphs; |
|
+} |
|
+ |
|
pdf_obj * |
|
Type0Font_get_resource (Type0Font *font) |
|
{ |
|
@@ -481,6 +511,7 @@ Type0Font_cache_find (const char *map_name, int cmap_id, fontmap_opt *fmap_opt) |
|
*/ |
|
|
|
font->used_chars = NULL; |
|
+ font->used_glyphs = NULL; |
|
font->flags = FLAG_NONE; |
|
|
|
switch (CIDFont_get_subtype(cidfont)) { |
|
@@ -492,11 +523,14 @@ Type0Font_cache_find (const char *map_name, int cmap_id, fontmap_opt *fmap_opt) |
|
/* |
|
* Need used_chars to write W, W2. |
|
*/ |
|
- if ((parent_id = CIDFont_get_parent_id(cidfont, wmode ? 0 : 1)) < 0) |
|
+ if ((parent_id = CIDFont_get_parent_id(cidfont, wmode ? 0 : 1)) < 0) { |
|
font->used_chars = new_used_chars2(); |
|
- else { |
|
+ if (fmap_opt->cff_charsets) |
|
+ font->used_glyphs = new_used_chars2(); |
|
+ } else { |
|
/* Don't allocate new one. */ |
|
font->used_chars = Type0Font_get_usedchars(Type0Font_cache_get(parent_id)); |
|
+ font->used_glyphs = Type0Font_get_usedglyphs(Type0Font_cache_get(parent_id)); |
|
font->flags |= FLAG_USED_CHARS_SHARED; |
|
} |
|
break; |
|
diff --git a/texk/dvipdfm-x/type0.h b/texk/dvipdfm-x/type0.h |
|
index 817f65d..da75a2e 100644 |
|
--- a/texk/dvipdfm-x/type0.h |
|
+++ b/texk/dvipdfm-x/type0.h |
|
@@ -36,6 +36,8 @@ extern char *Type0Font_get_encoding (Type0Font *font); |
|
#endif |
|
extern char *Type0Font_get_usedchars (Type0Font *font); |
|
|
|
+extern char *Type0Font_get_usedglyphs (Type0Font *font); |
|
+ |
|
extern pdf_obj *Type0Font_get_resource (Type0Font *font); |
|
|
|
extern void Type0Font_set_ToUnicode (Type0Font *font, pdf_obj *cmap_ref); |
|
-- |
|
2.0.0 |
|
|