-
-
Save datalogics-pgallot/0ada961031425ff563b24a2f5fe6066c to your computer and use it in GitHub Desktop.
A sample App which compares PDF Documents, pages, or indirect objects
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Text; | |
using Datalogics.PDFL; | |
/* | |
* | |
* A sample which compares PDF Documents, pages, or indirect objects. | |
* | |
* Copyright (c) 2007-2017, Datalogics, Inc. All rights reserved. | |
* | |
* The information and code in this sample is for the exclusive use of Datalogics | |
* customers and evaluation users only. Datalogics permits you to use, modify and | |
* distribute this file in accordance with the terms of your license agreement. | |
* Sample code is for demonstrative purposes only and is not intended for production use. | |
* | |
*/ | |
namespace comparePDFs | |
{ | |
class ComparePDFs | |
{ | |
enum compare { doc, page, obj }; | |
enum cosType { cosNull, Array, Boolean, Dict, Integer, Name, Real, Stream, String }; | |
static cosType GetObjType(PDFObject obj) | |
{ | |
if (obj is PDFBoolean) { return cosType.Boolean; } | |
else if (obj is PDFInteger) { return cosType.Integer; } | |
else if (obj is PDFReal) { return cosType.Real; } | |
else if (obj is PDFName) { return cosType.Name; } | |
else if (obj is PDFString) { return cosType.String; } | |
else if (obj is PDFArray) { return cosType.Array; } | |
else if (obj is PDFDict) { return cosType.Dict; } | |
else if (obj is PDFStream) { return cosType.Stream; } | |
else { return cosType.cosNull; } | |
} | |
static HashSet<string> getKeySet(PDFDict dict) | |
{ | |
var keySet = new HashSet<string>(); | |
foreach (PDFName keyObj in dict.Keys) | |
{ | |
keySet.Add(keyObj.Value); | |
} | |
return keySet; | |
} | |
static string describeObject(PDFObject obj) | |
{ | |
var objType = GetObjType(obj); | |
switch (objType) | |
{ | |
case cosType.cosNull: | |
return "null"; | |
case cosType.Boolean: | |
return (obj as PDFBoolean).Value.ToString(); | |
case cosType.Integer: | |
return (obj as PDFInteger).Value.ToString(); | |
case cosType.Real: | |
return (obj as PDFReal).Value.ToString("0.###"); | |
case cosType.Name: | |
return (obj as PDFName).Value; | |
case cosType.String: | |
return (obj as PDFString).Value; | |
case cosType.Dict: | |
return "-dict-"; | |
case cosType.Stream: | |
return "-stream-"; | |
case cosType.Array: | |
{ | |
var arrayObj = obj as PDFArray; | |
switch (arrayObj.Length) | |
{ | |
case 6: return String.Format("[ {0} {1} {2} {3} {4} {5}]", | |
describeObject(arrayObj.Get(0)), | |
describeObject(arrayObj.Get(1)), | |
describeObject(arrayObj.Get(2)), | |
describeObject(arrayObj.Get(3)), | |
describeObject(arrayObj.Get(4)), | |
describeObject(arrayObj.Get(5))); | |
case 5: return String.Format("[ {0} {1} {2} {3} {4}]", | |
describeObject(arrayObj.Get(0)), | |
describeObject(arrayObj.Get(1)), | |
describeObject(arrayObj.Get(2)), | |
describeObject(arrayObj.Get(3)), | |
describeObject(arrayObj.Get(4))); | |
case 4: return String.Format("[ {0} {1} {2} {3}]", | |
describeObject(arrayObj.Get(0)), | |
describeObject(arrayObj.Get(1)), | |
describeObject(arrayObj.Get(2)), | |
describeObject(arrayObj.Get(3))); | |
case 3: return String.Format("[ {0} {1} {2}]", | |
describeObject(arrayObj.Get(0)), | |
describeObject(arrayObj.Get(1)), | |
describeObject(arrayObj.Get(2))); | |
case 2: return String.Format("[ {0} {1}]", | |
describeObject(arrayObj.Get(0)), | |
describeObject(arrayObj.Get(1))); | |
case 1: return String.Format("[ {0} ]", | |
describeObject(arrayObj.Get(0))); | |
case 0: return "[ ]"; | |
default: return "-array-"; | |
} | |
} | |
default: | |
return "None"; | |
} | |
} | |
static void compareObjs(string path, PDFObject left, PDFObject right, ref HashSet<string> visited, ref HashSet<string> skipSet) | |
{ | |
if (left != null && right != null && left.Indirect && right.Indirect) | |
{ | |
var visitedTag = String.Format("{0}:{1}", left.ID, right.ID); | |
if (visited.Contains(visitedTag)) | |
return; | |
else | |
visited.Add(visitedTag); | |
} | |
var leftType = GetObjType(left); | |
var rightType = GetObjType(right); | |
if (leftType != rightType) | |
{ | |
if (leftType == cosType.Integer && rightType == cosType.Real) | |
{ | |
if ((double)(left as PDFInteger).Value != (right as PDFReal).Value) | |
Console.WriteLine("{0}: {1} vs. {2}", path, describeObject(left), describeObject(right)); | |
} | |
else if (leftType == cosType.Real && rightType == cosType.Integer) | |
{ | |
if ((left as PDFReal).Value != (double)((right as PDFInteger).Value)) | |
Console.WriteLine("{0}: {1} vs. {2}", path, describeObject(left), describeObject(right)); | |
} | |
else | |
{ | |
Console.WriteLine("{0}: {1} vs. {2}", path, describeObject(left), describeObject(right)); | |
} | |
return; | |
} | |
switch (leftType) | |
{ | |
case cosType.Boolean: | |
{ | |
var leftElem = left as PDFBoolean; | |
var rightElem = right as PDFBoolean; | |
if (leftElem.Value != rightElem.Value) | |
{ | |
Console.WriteLine("{0}: {1} vs {2}", path, describeObject(left), describeObject(right)); | |
} | |
return; | |
} | |
case cosType.Integer: | |
{ | |
var leftElem = left as PDFInteger; | |
var rightElem = right as PDFInteger; | |
if (leftElem.Value != rightElem.Value) | |
{ | |
Console.WriteLine("{0}: {1} vs {2}", path, describeObject(left), describeObject(right)); | |
} | |
return; | |
} | |
case cosType.Real: | |
{ | |
var leftElem = left as PDFReal; | |
var rightElem = right as PDFReal; | |
if (leftElem.Value != rightElem.Value) | |
{ | |
Console.WriteLine("{0}: {1} vs {2}", path, describeObject(left), describeObject(right)); | |
} | |
return; | |
} | |
case cosType.Name: | |
{ | |
var leftElem = left as PDFName; | |
var rightElem = right as PDFName; | |
if (!leftElem.Value.Equals(rightElem.Value)) | |
{ | |
Console.WriteLine("{0}: {1} vs {2}", path, describeObject(left), describeObject(right)); | |
} | |
return; | |
} | |
case cosType.String: | |
{ | |
var leftElem = left as PDFString; | |
var rightElem = right as PDFString; | |
if (!leftElem.Value.Equals(rightElem.Value)) | |
{ | |
Console.WriteLine("{0}: {1} vs {2}", path, describeObject(left), describeObject(right)); | |
} | |
return; | |
} | |
case cosType.Array: | |
{ | |
var leftElem = left as PDFArray; | |
var rightElem = right as PDFArray; | |
var leftLen = leftElem.Length; | |
var rightLen = rightElem.Length; | |
var compareLen = Math.Min(leftLen, rightLen); | |
if (leftLen != rightLen) | |
Console.WriteLine("{0}[]: Length is {1} vs {2}", path, leftLen, rightLen); | |
for (int i = 0; i < compareLen; i++) | |
{ | |
var newPath = path + "[" + i.ToString() + "]"; | |
compareObjs(newPath, leftElem.Get(i), rightElem.Get(i), ref visited, ref skipSet); | |
} | |
if (leftLen > compareLen) | |
{ | |
for (int i = compareLen; i < leftLen; i++) | |
{ | |
Console.WriteLine("{0}[{1}] << {2}", path, i, describeObject(leftElem.Get(i))); | |
} | |
} | |
else if (rightLen > compareLen) | |
{ | |
for (int i = compareLen; i < rightLen; i++) | |
{ | |
Console.WriteLine("{0}[{1}] >> {2}", path, i, describeObject(rightElem.Get(i))); | |
} | |
} | |
return; | |
} | |
case cosType.Dict: | |
{ | |
var leftElem = left as PDFDict; | |
var rightElem = right as PDFDict; | |
var leftKeys = getKeySet(leftElem); | |
var rightKeys = getKeySet(rightElem); | |
leftKeys.ExceptWith(skipSet); | |
rightKeys.ExceptWith(skipSet); | |
var compareKeys = getKeySet(leftElem); | |
compareKeys.IntersectWith(rightKeys); | |
leftKeys.ExceptWith(compareKeys); | |
rightKeys.ExceptWith(compareKeys); | |
foreach (var key in leftKeys) | |
{ | |
Console.WriteLine("{0}:{1} << {2}", path, key, describeObject(leftElem.Get(key))); | |
} | |
foreach (var key in rightKeys) | |
{ | |
Console.WriteLine("{0}:{1} >> {2}", path, key, describeObject(rightElem.Get(key))); | |
} | |
foreach (var key in compareKeys) | |
{ | |
var newPath = path + ":" + key; | |
compareObjs(newPath, leftElem.Get(key), rightElem.Get(key), ref visited, ref skipSet); | |
} | |
return; | |
} | |
case cosType.Stream: | |
{ | |
var leftElem = left as PDFStream; | |
var rightElem = right as PDFStream; | |
var leftLen = leftElem.Length; | |
var rightLen = rightElem.Length; | |
if (leftLen != rightLen) | |
Console.WriteLine("{0}: Stream length is {1} vs {2}", path, leftLen, rightLen); | |
var leftKeys = getKeySet(leftElem.Dict); | |
var rightKeys = getKeySet(rightElem.Dict); | |
leftKeys.ExceptWith(skipSet); | |
rightKeys.ExceptWith(skipSet); | |
var compareKeys = getKeySet(leftElem.Dict); | |
compareKeys.IntersectWith(rightKeys); | |
leftKeys.ExceptWith(compareKeys); | |
rightKeys.ExceptWith(compareKeys); | |
foreach (var key in leftKeys) | |
{ | |
Console.WriteLine("{0}:{1} << {2}", path, key, describeObject(leftElem.Dict.Get(key))); | |
} | |
foreach (var key in rightKeys) | |
{ | |
Console.WriteLine("{0}:{1} >> {2}", path, key, describeObject(rightElem.Dict.Get(key))); | |
} | |
foreach (var key in compareKeys) | |
{ | |
var newPath = path + ":" + key; | |
compareObjs(newPath, leftElem.Dict.Get(key), rightElem.Dict.Get(key), ref visited, ref skipSet); | |
} | |
return; | |
} | |
} | |
} | |
static void Main(string[] args) | |
{ | |
Console.WriteLine("comparePDFs:"); | |
string[] compareFiles = new string[2]; | |
compare comparisonLeft = compare.doc; | |
compare comparisonRight = compare.doc; | |
int numCompareFiles = 0; | |
int leftID = -1, rightID = -1; | |
string outFile = null; | |
int n = 0; | |
while (n < args.Length) | |
{ | |
var arg = args[n++]; | |
switch (arg) | |
{ | |
case "-p": if (leftID == -1) comparisonLeft = compare.page; comparisonRight = compare.page; break; | |
case "-o": if (leftID == -1) comparisonLeft = compare.obj; comparisonRight = compare.obj; break; | |
case "-out": outFile = args[n++]; break; | |
default: | |
{ | |
int num; | |
bool isNumeric = Int32.TryParse(arg, out num); | |
if (isNumeric) | |
{ | |
if (leftID == -1) | |
leftID = num; | |
else | |
rightID = num; | |
} | |
else if (System.IO.File.Exists(arg)) | |
{ | |
compareFiles[numCompareFiles++] = arg; | |
} | |
} | |
break; | |
} | |
} | |
if ((comparisonLeft == compare.doc && numCompareFiles < 2) || | |
(comparisonLeft != compare.doc && rightID == -1)) | |
{ | |
Console.WriteLine("Usage: file1 [-o|-p id1] [file2 | [-o|-p][ id2]] [-out outfile]"); | |
return; | |
} | |
using (Library lib = new Library()) | |
{ | |
Console.WriteLine("Initialized the library."); | |
var visited = new HashSet<string>(); | |
var rootSkipSet = new HashSet<string>(); | |
var pageSkipSet = new HashSet<string>(); | |
rootSkipSet.Add("Pages"); | |
rootSkipSet.Add("Metadata"); | |
pageSkipSet.Add("Parent"); | |
try | |
{ | |
FileStream ostrm = null; | |
StreamWriter writer = null; | |
TextWriter oldOut = Console.Out; | |
if (outFile != null) | |
{ | |
try | |
{ | |
ostrm = new FileStream(outFile, FileMode.OpenOrCreate, FileAccess.Write); | |
writer = new StreamWriter(ostrm); | |
Console.SetOut(writer); | |
} | |
catch (Exception ex) | |
{ | |
Console.WriteLine("Error: Cannot open {0} for writing: {1}", outFile, ex.Message); | |
return; | |
} | |
} | |
var docLeft = new Document(compareFiles[0]); | |
var docRight = new Document(compareFiles[numCompareFiles - 1]); | |
Console.WriteLine("{0} << vs. >> {1}", compareFiles[0], compareFiles[numCompareFiles - 1]); | |
if (comparisonLeft == compare.doc) | |
{ | |
compareObjs("Root", docLeft.Root, docRight.Root, ref visited, ref rootSkipSet); | |
//TODO: Enumeration of NameTrees, NumberTrees. | |
int pagesLeft = docLeft.NumPages; | |
int pagesRight = docRight.NumPages; | |
var comparePages = Math.Min(pagesLeft, pagesRight); | |
for (int i = 0; i < comparePages; i++) | |
{ | |
var path = String.Format("Page[{0}]", i); | |
compareObjs(path, docLeft.GetPage(i).PDFDict, docRight.GetPage(i).PDFDict, ref visited, ref pageSkipSet); | |
} | |
} | |
else | |
{ | |
var label = String.Format("{0} {1} vs. {2} {3}", | |
comparisonLeft == compare.obj ? "Obj" : "Page", | |
leftID, | |
comparisonRight == compare.obj ? "Obj" : "Page", | |
rightID); | |
PDFObject leftObj = (comparisonLeft == compare.obj ? docLeft.FindPDFObjectByID(leftID) : docLeft.GetPage(leftID).PDFDict); | |
PDFObject rightObj = (comparisonRight == compare.obj ? docRight.FindPDFObjectByID(rightID) : docRight.GetPage(rightID).PDFDict); | |
compareObjs(label, leftObj, rightObj, ref visited, ref pageSkipSet); | |
} | |
try | |
{ | |
docLeft.Close(); | |
docRight.Close(); | |
} | |
catch (ApplicationException ex) { } | |
if (outFile != null) | |
{ | |
Console.SetOut(oldOut); | |
writer.Close(); | |
ostrm.Close(); | |
} | |
} | |
catch (LibraryException ex) | |
{ | |
Console.WriteLine("** Error: {0}", ex.Message); | |
} | |
} | |
Console.WriteLine("Done."); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment