Skip to content

Instantly share code, notes, and snippets.

@datalogics-pgallot
Created February 21, 2017 15:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save datalogics-pgallot/0dd35155bfe84319d3dc7964284f8492 to your computer and use it in GitHub Desktop.
Save datalogics-pgallot/0dd35155bfe84319d3dc7964284f8492 to your computer and use it in GitHub Desktop.
Using bookmarks to create a Table of Contents
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Datalogics.PDFL;
/*
*
* A sample which uses a document's bookmark tree to create a Table Of Contents in front of
* the original content.
*
* Copyright (c) 2007-2017, Datalogics, Inc. All rights reserved.
*
* The information and code in this sample is for the exclusive use of Datalogics
* customers and evaluation users only. Datalogics permits you to use, modify and
* distribute this file in accordance with the terms of your license agreement.
* Sample code is for demonstrative purposes only and is not intended for production use.
*
*/
namespace BookmarksToTOC
{
class BookmarksToTOC
{
// A recursive function to flatten the bookmark tree into a list.
static void getBookmarks(Bookmark node,ref List<Bookmark> bkmarkList)
{
if(node.HasChildren()){
var entry = node.FirstChild;
while(entry != null)
{
bkmarkList.Add(entry);
getBookmarks(entry, ref bkmarkList);
entry = entry.Next;
}
}
}
static int[] indentlevels = new int[]{ 0, 0, 36, 18, 9 };
/*
* We use the number of ancestors a bookmark has in the outline tree to determine how much to indent the TOC entry.
*/
static int calcIndent(Document doc, Bookmark bkmk)
{
int level = 0;
var parent = bkmk.Parent;
var rootID = doc.BookmarkRoot.PDFDict.ID;
while (parent.PDFDict.ID != rootID)
{
parent = parent.Parent;
level++;
}
var lastlevel = indentlevels.Length - 1;
int indent = 0;
for (int i = 0; i <= level; i++)
{
int curDepth = Math.Min(i, lastlevel);
indent += indentlevels[curDepth];
}
return indent;
}
/*
* practiceLayout does most of the layout to determine how many boxes of size rect it takes to create the TOC.
*/
static int practiceLayout(Document doc, ref List<Bookmark> bkmrkList,Rect rect,Font f,TextState ts)
{
int numRects=1;
var T = new Text();
var gs = new GraphicState();
double down_pos = rect.Top - ts.FontSize;
double horiz_pos;
bool bUsed = false;
foreach (Bookmark bkmk in bkmrkList)
{
ViewDestination viewDest = null;
if (bkmk.Action == null) //if no Action, then it has to have a ViewDestination
viewDest = bkmk.ViewDestination;
else if (bkmk.Action is GoToAction) // but if it has an Action, it must be a GoToAction.
{
try {
viewDest = (bkmk.Action as GoToAction).Destination;
}
catch(Exception ex)
{
Console.WriteLine("Error processing object {0},{1}", bkmk.PDFDict.ID, ex.Message);
}
}
if (viewDest == null) //then we don't have a landing page, so skip over this bookmark.
continue;
String bkmrkPageLabel = doc.FindLabelForPageNum(viewDest.PageNumber); //this is the page number
var tmpRun = new TextRun(bkmrkPageLabel, f, gs, ts, new Matrix());
var rsrvd = tmpRun.Advance;
tmpRun.Dispose();
viewDest.Dispose();
var title = bkmk.Title;
tmpRun = null;
var line_start = 0;
var chars_added = 0;
var prov_lineend = 0;
double indent = calcIndent(doc, bkmk);
horiz_pos = rect.Left + indent;
var line_width = rect.Width - indent - rsrvd;
/*
* The heart of the line-wrapping algorithm, where we divide the title by spaces and
* increase the amount of text in the line word by word, until it exceeds the alocated width
* for the line. in which case we back off a word and add the last line to the Text
*/
while (chars_added < title.Length)
{
var m = new Matrix(1, 0, 0, 1, horiz_pos, down_pos);
var next_wordend = title.Length;
if (title.IndexOf(' ', prov_lineend + 1) > 0)
next_wordend = title.IndexOf(' ', prov_lineend + 1);
prov_lineend = next_wordend;
if (prov_lineend - line_start > 0)
{
tmpRun = new TextRun(title.Substring(line_start, prov_lineend - line_start), f, gs, ts, m);
chars_added = prov_lineend + 1;
while (tmpRun.Advance < line_width && chars_added < title.Length)
{
prov_lineend = next_wordend;
chars_added = next_wordend + 1;
next_wordend = title.Length;
if (chars_added < title.Length && (title.IndexOf(' ', prov_lineend + 1) > 0))
next_wordend = title.IndexOf(' ', prov_lineend + 1);
tmpRun.Dispose();
tmpRun = new TextRun(title.Substring(line_start, next_wordend - line_start), f, gs, ts, m);
}
var lineend = prov_lineend;
var line = title.Substring(line_start, lineend - line_start);
Console.WriteLine("adding line: {0}", line);
var finalRun = new TextRun(line, f, gs, ts, m);
bUsed = true;
T.AddRun(finalRun);
finalRun.Dispose();
tmpRun.Dispose();
line_start = lineend + 1;
down_pos -= (ts.FontSize * ts.TextRise);
// if we reach the bottom of the box, start a new box.
if (down_pos < rect.Bottom)
{
numRects++;
down_pos = rect.Top - ts.FontSize;
horiz_pos = rect.Left;
bUsed = false;
}
}
}
}
T.Dispose();
return numRects - (bUsed?0:1);
}
/*
* Like practiceLayout, but on the last line of a TOC entry, we add dots up to a right-justified page number.
* If we run out of room in the rect for the current page, we skip to the next page and assume we can fill
* the same rect area on the next page.
*/
static void LayoutTOC(Document doc, int pageNum, ref List<Bookmark> bkmrkList, Rect rect, Font f, TextState ts)
{
int curPage = pageNum;
var gs = new GraphicState();
double down_pos = rect.Top - ts.FontSize;
double horiz_pos = rect.Left;
var TocPage = doc.GetPage(curPage);
LinkAnnotation link = null;
foreach (Bookmark bkmk in bkmrkList)
{
var T = new Text();
var title = bkmk.Title;
ViewDestination viewDest = null;
if (bkmk.Action == null) //if no Action, then it has to have a ViewDestination
viewDest = bkmk.ViewDestination;
else if (bkmk.Action is GoToAction) // but if it has an Action, it must be a GoToAction.
{
try
{
viewDest = (bkmk.Action as GoToAction).Destination;
}catch(Exception ex)
{
Console.WriteLine("Error processing object {0},{1}", bkmk.PDFDict.ID, ex.Message);
}
}
if (viewDest == null) //then we don't have a landing page, so skip over this bookmark.
continue;
String bkmrkPageLabel = doc.FindLabelForPageNum(viewDest.PageNumber); //this is the page number
var tmpRun = new TextRun(bkmrkPageLabel, f, gs, ts, new Matrix());
var rsrvd = tmpRun.Advance;
tmpRun.Dispose();
tmpRun = null;
var line_start = 0;
var chars_added = 0;
var prov_lineend = 0;
double indent = calcIndent(doc, bkmk);
horiz_pos = rect.Left + indent;
var line_width = rect.Width - indent - rsrvd;
while (chars_added < title.Length)
{
var m = new Matrix(1, 0, 0, 1, horiz_pos, down_pos);
var next_wordend = title.Length;
if (title.IndexOf(' ', prov_lineend + 1) > 0)
next_wordend = title.IndexOf(' ', prov_lineend + 1);
prov_lineend = next_wordend;
if (prov_lineend - line_start > 0)
{
tmpRun = new TextRun(title.Substring(line_start, prov_lineend-line_start), f, gs, ts, m);
chars_added = prov_lineend + 1;
while (tmpRun.Advance < line_width && chars_added < title.Length)
{
prov_lineend = next_wordend;
chars_added = prov_lineend + 1;
next_wordend = title.Length;
if (chars_added < title.Length && (title.IndexOf(' ', prov_lineend + 1) > 0))
next_wordend = title.IndexOf(' ', prov_lineend + 1);
tmpRun.Dispose();
tmpRun = new TextRun(title.Substring(line_start, next_wordend - line_start), f, gs, ts, m);
}
var lineend = prov_lineend;
var line = title.Substring(line_start, lineend - line_start);
Console.WriteLine("adding line: {0}", line );
var finalRun = new TextRun(line, f, gs, ts, m);
T.AddRun(finalRun);
if (lineend >= title.Length-1)
{
//we have added the last line of the bookmark title; time to add dots and page numbers.
m = new Matrix(1, 0, 0, 1, horiz_pos + finalRun.Advance, down_pos);
var dotLength = rect.Right - rsrvd - finalRun.Advance - horiz_pos;
/* For adding the dots, we start with 16, add until we exceed, back off, add 4 at a time
* until we exceed again, back off, then add 1 at a time.
*/
string[] dotarray = { "................", "....", "." };
int dotIdx = 0;
string lastdots = ".";
string dots;
while (dotIdx < dotarray.Length)
{
dots = lastdots;
tmpRun = new TextRun(dots, f, gs, ts, m);
while (tmpRun.Advance < dotLength)
{
lastdots = dots;
dots += dotarray[dotIdx];
tmpRun.Dispose();
tmpRun = new TextRun(dots, f, gs, ts, m);
}
dotIdx++;
}
var dotRun = new TextRun(lastdots, f, gs, ts, m);
T.AddRun(dotRun);
dotRun.Dispose();
m.Dispose();
//add the right-justified page number.
m = new Matrix(1, 0, 0, 1, rect.Right - rsrvd, down_pos);
var pgnum_run = new TextRun(bkmrkPageLabel, f, gs, ts, m);
T.AddRun(pgnum_run);
pgnum_run.Dispose();
}
finalRun.Dispose();
tmpRun.Dispose();
line_start = lineend + 1;
down_pos -= (ts.FontSize * ts.TextRise);
/* if we reach the bottom of the rect, commit what we have and
* continue on the next page.
*/
if (down_pos < rect.Bottom)
{
TocPage.Content.AddElement(T);
link = new LinkAnnotation(TocPage, T.BoundingBox);
link.Destination = viewDest;
link.Width = 0;
link.Dispose();
TocPage.UpdateContent();
TocPage.Dispose();
T.Dispose();
curPage++;
TocPage = doc.GetPage(curPage);
T = new Text();
down_pos = rect.Top;
}
}
}
TocPage.Content.AddElement(T);
link = new LinkAnnotation(TocPage, T.BoundingBox);
link.Width = 0;
link.Destination = viewDest;
link.Dispose();
T.Dispose();
}
TocPage.UpdateContent();
TocPage.Dispose();
}
static void Main(string[] args)
{
Console.WriteLine("Bookmarks To TOC sample:");
string cwd = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location);
string filename = "Samples\\Data\\constitution.pdf";
if (args.Length > 0)
filename = args[0];
using (Library lib = new Library())
{
Console.WriteLine("Initialized the library.");
Document doc = new Document(filename);
Rect pageRect = doc.GetPage(0).MediaBox;
Rect tocRect = new Rect(pageRect.Left + 72, pageRect.Bottom + 72, pageRect.Right - 72, pageRect.Top - 72);
var tocFont = new Font("Arial", FontCreateFlags.Embedded |FontCreateFlags.Subset);
List<Bookmark> bkmrkList = new List<Bookmark>();
getBookmarks(doc.BookmarkRoot, ref bkmrkList );
var ts = new TextState();
ts.TextRise = 1.25;
ts.FontSize = 12.0;
int numPagesReqrd = practiceLayout(doc, ref bkmrkList, tocRect, tocFont, ts);
//insert the pages we need for the TOC.
for(int i=0; i < numPagesReqrd; i++)
{
doc.CreatePage(Document.BeforeFirstPage, pageRect);
}
LayoutTOC(doc,0, ref bkmrkList, tocRect, tocFont, ts);
doc.EmbedFonts(EmbedFlags.None);
doc.Save(SaveFlags.Full, filename.Replace(".pdf", "_wTOC.pdf"));
doc.Close();
Console.WriteLine("document saved");
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment