Skip to content

Instantly share code, notes, and snippets.

@datalogics-pgallot
Created April 4, 2017 20:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save datalogics-pgallot/0ada961031425ff563b24a2f5fe6066c to your computer and use it in GitHub Desktop.
Save datalogics-pgallot/0ada961031425ff563b24a2f5fe6066c to your computer and use it in GitHub Desktop.
A sample App which compares PDF Documents, pages, or indirect objects
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Datalogics.PDFL;
/*
*
* A sample which compares PDF Documents, pages, or indirect objects.
*
* Copyright (c) 2007-2017, Datalogics, Inc. All rights reserved.
*
* The information and code in this sample is for the exclusive use of Datalogics
* customers and evaluation users only. Datalogics permits you to use, modify and
* distribute this file in accordance with the terms of your license agreement.
* Sample code is for demonstrative purposes only and is not intended for production use.
*
*/
namespace comparePDFs
{
class ComparePDFs
{
enum compare { doc, page, obj };
enum cosType { cosNull, Array, Boolean, Dict, Integer, Name, Real, Stream, String };
static cosType GetObjType(PDFObject obj)
{
if (obj is PDFBoolean) { return cosType.Boolean; }
else if (obj is PDFInteger) { return cosType.Integer; }
else if (obj is PDFReal) { return cosType.Real; }
else if (obj is PDFName) { return cosType.Name; }
else if (obj is PDFString) { return cosType.String; }
else if (obj is PDFArray) { return cosType.Array; }
else if (obj is PDFDict) { return cosType.Dict; }
else if (obj is PDFStream) { return cosType.Stream; }
else { return cosType.cosNull; }
}
static HashSet<string> getKeySet(PDFDict dict)
{
var keySet = new HashSet<string>();
foreach (PDFName keyObj in dict.Keys)
{
keySet.Add(keyObj.Value);
}
return keySet;
}
static string describeObject(PDFObject obj)
{
var objType = GetObjType(obj);
switch (objType)
{
case cosType.cosNull:
return "null";
case cosType.Boolean:
return (obj as PDFBoolean).Value.ToString();
case cosType.Integer:
return (obj as PDFInteger).Value.ToString();
case cosType.Real:
return (obj as PDFReal).Value.ToString("0.###");
case cosType.Name:
return (obj as PDFName).Value;
case cosType.String:
return (obj as PDFString).Value;
case cosType.Dict:
return "-dict-";
case cosType.Stream:
return "-stream-";
case cosType.Array:
{
var arrayObj = obj as PDFArray;
switch (arrayObj.Length)
{
case 6: return String.Format("[ {0} {1} {2} {3} {4} {5}]",
describeObject(arrayObj.Get(0)),
describeObject(arrayObj.Get(1)),
describeObject(arrayObj.Get(2)),
describeObject(arrayObj.Get(3)),
describeObject(arrayObj.Get(4)),
describeObject(arrayObj.Get(5)));
case 5: return String.Format("[ {0} {1} {2} {3} {4}]",
describeObject(arrayObj.Get(0)),
describeObject(arrayObj.Get(1)),
describeObject(arrayObj.Get(2)),
describeObject(arrayObj.Get(3)),
describeObject(arrayObj.Get(4)));
case 4: return String.Format("[ {0} {1} {2} {3}]",
describeObject(arrayObj.Get(0)),
describeObject(arrayObj.Get(1)),
describeObject(arrayObj.Get(2)),
describeObject(arrayObj.Get(3)));
case 3: return String.Format("[ {0} {1} {2}]",
describeObject(arrayObj.Get(0)),
describeObject(arrayObj.Get(1)),
describeObject(arrayObj.Get(2)));
case 2: return String.Format("[ {0} {1}]",
describeObject(arrayObj.Get(0)),
describeObject(arrayObj.Get(1)));
case 1: return String.Format("[ {0} ]",
describeObject(arrayObj.Get(0)));
case 0: return "[ ]";
default: return "-array-";
}
}
default:
return "None";
}
}
static void compareObjs(string path, PDFObject left, PDFObject right, ref HashSet<string> visited, ref HashSet<string> skipSet)
{
if (left != null && right != null && left.Indirect && right.Indirect)
{
var visitedTag = String.Format("{0}:{1}", left.ID, right.ID);
if (visited.Contains(visitedTag))
return;
else
visited.Add(visitedTag);
}
var leftType = GetObjType(left);
var rightType = GetObjType(right);
if (leftType != rightType)
{
if (leftType == cosType.Integer && rightType == cosType.Real)
{
if ((double)(left as PDFInteger).Value != (right as PDFReal).Value)
Console.WriteLine("{0}: {1} vs. {2}", path, describeObject(left), describeObject(right));
}
else if (leftType == cosType.Real && rightType == cosType.Integer)
{
if ((left as PDFReal).Value != (double)((right as PDFInteger).Value))
Console.WriteLine("{0}: {1} vs. {2}", path, describeObject(left), describeObject(right));
}
else
{
Console.WriteLine("{0}: {1} vs. {2}", path, describeObject(left), describeObject(right));
}
return;
}
switch (leftType)
{
case cosType.Boolean:
{
var leftElem = left as PDFBoolean;
var rightElem = right as PDFBoolean;
if (leftElem.Value != rightElem.Value)
{
Console.WriteLine("{0}: {1} vs {2}", path, describeObject(left), describeObject(right));
}
return;
}
case cosType.Integer:
{
var leftElem = left as PDFInteger;
var rightElem = right as PDFInteger;
if (leftElem.Value != rightElem.Value)
{
Console.WriteLine("{0}: {1} vs {2}", path, describeObject(left), describeObject(right));
}
return;
}
case cosType.Real:
{
var leftElem = left as PDFReal;
var rightElem = right as PDFReal;
if (leftElem.Value != rightElem.Value)
{
Console.WriteLine("{0}: {1} vs {2}", path, describeObject(left), describeObject(right));
}
return;
}
case cosType.Name:
{
var leftElem = left as PDFName;
var rightElem = right as PDFName;
if (!leftElem.Value.Equals(rightElem.Value))
{
Console.WriteLine("{0}: {1} vs {2}", path, describeObject(left), describeObject(right));
}
return;
}
case cosType.String:
{
var leftElem = left as PDFString;
var rightElem = right as PDFString;
if (!leftElem.Value.Equals(rightElem.Value))
{
Console.WriteLine("{0}: {1} vs {2}", path, describeObject(left), describeObject(right));
}
return;
}
case cosType.Array:
{
var leftElem = left as PDFArray;
var rightElem = right as PDFArray;
var leftLen = leftElem.Length;
var rightLen = rightElem.Length;
var compareLen = Math.Min(leftLen, rightLen);
if (leftLen != rightLen)
Console.WriteLine("{0}[]: Length is {1} vs {2}", path, leftLen, rightLen);
for (int i = 0; i < compareLen; i++)
{
var newPath = path + "[" + i.ToString() + "]";
compareObjs(newPath, leftElem.Get(i), rightElem.Get(i), ref visited, ref skipSet);
}
if (leftLen > compareLen)
{
for (int i = compareLen; i < leftLen; i++)
{
Console.WriteLine("{0}[{1}] << {2}", path, i, describeObject(leftElem.Get(i)));
}
}
else if (rightLen > compareLen)
{
for (int i = compareLen; i < rightLen; i++)
{
Console.WriteLine("{0}[{1}] >> {2}", path, i, describeObject(rightElem.Get(i)));
}
}
return;
}
case cosType.Dict:
{
var leftElem = left as PDFDict;
var rightElem = right as PDFDict;
var leftKeys = getKeySet(leftElem);
var rightKeys = getKeySet(rightElem);
leftKeys.ExceptWith(skipSet);
rightKeys.ExceptWith(skipSet);
var compareKeys = getKeySet(leftElem);
compareKeys.IntersectWith(rightKeys);
leftKeys.ExceptWith(compareKeys);
rightKeys.ExceptWith(compareKeys);
foreach (var key in leftKeys)
{
Console.WriteLine("{0}:{1} << {2}", path, key, describeObject(leftElem.Get(key)));
}
foreach (var key in rightKeys)
{
Console.WriteLine("{0}:{1} >> {2}", path, key, describeObject(rightElem.Get(key)));
}
foreach (var key in compareKeys)
{
var newPath = path + ":" + key;
compareObjs(newPath, leftElem.Get(key), rightElem.Get(key), ref visited, ref skipSet);
}
return;
}
case cosType.Stream:
{
var leftElem = left as PDFStream;
var rightElem = right as PDFStream;
var leftLen = leftElem.Length;
var rightLen = rightElem.Length;
if (leftLen != rightLen)
Console.WriteLine("{0}: Stream length is {1} vs {2}", path, leftLen, rightLen);
var leftKeys = getKeySet(leftElem.Dict);
var rightKeys = getKeySet(rightElem.Dict);
leftKeys.ExceptWith(skipSet);
rightKeys.ExceptWith(skipSet);
var compareKeys = getKeySet(leftElem.Dict);
compareKeys.IntersectWith(rightKeys);
leftKeys.ExceptWith(compareKeys);
rightKeys.ExceptWith(compareKeys);
foreach (var key in leftKeys)
{
Console.WriteLine("{0}:{1} << {2}", path, key, describeObject(leftElem.Dict.Get(key)));
}
foreach (var key in rightKeys)
{
Console.WriteLine("{0}:{1} >> {2}", path, key, describeObject(rightElem.Dict.Get(key)));
}
foreach (var key in compareKeys)
{
var newPath = path + ":" + key;
compareObjs(newPath, leftElem.Dict.Get(key), rightElem.Dict.Get(key), ref visited, ref skipSet);
}
return;
}
}
}
static void Main(string[] args)
{
Console.WriteLine("comparePDFs:");
string[] compareFiles = new string[2];
compare comparisonLeft = compare.doc;
compare comparisonRight = compare.doc;
int numCompareFiles = 0;
int leftID = -1, rightID = -1;
string outFile = null;
int n = 0;
while (n < args.Length)
{
var arg = args[n++];
switch (arg)
{
case "-p": if (leftID == -1) comparisonLeft = compare.page; comparisonRight = compare.page; break;
case "-o": if (leftID == -1) comparisonLeft = compare.obj; comparisonRight = compare.obj; break;
case "-out": outFile = args[n++]; break;
default:
{
int num;
bool isNumeric = Int32.TryParse(arg, out num);
if (isNumeric)
{
if (leftID == -1)
leftID = num;
else
rightID = num;
}
else if (System.IO.File.Exists(arg))
{
compareFiles[numCompareFiles++] = arg;
}
}
break;
}
}
if ((comparisonLeft == compare.doc && numCompareFiles < 2) ||
(comparisonLeft != compare.doc && rightID == -1))
{
Console.WriteLine("Usage: file1 [-o|-p id1] [file2 | [-o|-p][ id2]] [-out outfile]");
return;
}
using (Library lib = new Library())
{
Console.WriteLine("Initialized the library.");
var visited = new HashSet<string>();
var rootSkipSet = new HashSet<string>();
var pageSkipSet = new HashSet<string>();
rootSkipSet.Add("Pages");
rootSkipSet.Add("Metadata");
pageSkipSet.Add("Parent");
try
{
FileStream ostrm = null;
StreamWriter writer = null;
TextWriter oldOut = Console.Out;
if (outFile != null)
{
try
{
ostrm = new FileStream(outFile, FileMode.OpenOrCreate, FileAccess.Write);
writer = new StreamWriter(ostrm);
Console.SetOut(writer);
}
catch (Exception ex)
{
Console.WriteLine("Error: Cannot open {0} for writing: {1}", outFile, ex.Message);
return;
}
}
var docLeft = new Document(compareFiles[0]);
var docRight = new Document(compareFiles[numCompareFiles - 1]);
Console.WriteLine("{0} << vs. >> {1}", compareFiles[0], compareFiles[numCompareFiles - 1]);
if (comparisonLeft == compare.doc)
{
compareObjs("Root", docLeft.Root, docRight.Root, ref visited, ref rootSkipSet);
//TODO: Enumeration of NameTrees, NumberTrees.
int pagesLeft = docLeft.NumPages;
int pagesRight = docRight.NumPages;
var comparePages = Math.Min(pagesLeft, pagesRight);
for (int i = 0; i < comparePages; i++)
{
var path = String.Format("Page[{0}]", i);
compareObjs(path, docLeft.GetPage(i).PDFDict, docRight.GetPage(i).PDFDict, ref visited, ref pageSkipSet);
}
}
else
{
var label = String.Format("{0} {1} vs. {2} {3}",
comparisonLeft == compare.obj ? "Obj" : "Page",
leftID,
comparisonRight == compare.obj ? "Obj" : "Page",
rightID);
PDFObject leftObj = (comparisonLeft == compare.obj ? docLeft.FindPDFObjectByID(leftID) : docLeft.GetPage(leftID).PDFDict);
PDFObject rightObj = (comparisonRight == compare.obj ? docRight.FindPDFObjectByID(rightID) : docRight.GetPage(rightID).PDFDict);
compareObjs(label, leftObj, rightObj, ref visited, ref pageSkipSet);
}
try
{
docLeft.Close();
docRight.Close();
}
catch (ApplicationException ex) { }
if (outFile != null)
{
Console.SetOut(oldOut);
writer.Close();
ostrm.Close();
}
}
catch (LibraryException ex)
{
Console.WriteLine("** Error: {0}", ex.Message);
}
}
Console.WriteLine("Done.");
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment