using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Text; | |
using Datalogics.PDFL; | |
/* | |
* | |
* A sample which compares PDF Documents, pages, or indirect objects. | |
* | |
* Copyright (c) 2007-2017, Datalogics, Inc. All rights reserved. | |
* | |
* The information and code in this sample is for the exclusive use of Datalogics | |
* customers and evaluation users only. Datalogics permits you to use, modify and | |
* distribute this file in accordance with the terms of your license agreement. | |
* Sample code is for demonstrative purposes only and is not intended for production use. | |
* | |
*/ | |
namespace comparePDFs | |
{ | |
class ComparePDFs | |
{ | |
enum compare { doc, page, obj }; | |
enum cosType { cosNull, Array, Boolean, Dict, Integer, Name, Real, Stream, String }; | |
static cosType GetObjType(PDFObject obj) | |
{ | |
if (obj is PDFBoolean) { return cosType.Boolean; } | |
else if (obj is PDFInteger) { return cosType.Integer; } | |
else if (obj is PDFReal) { return cosType.Real; } | |
else if (obj is PDFName) { return cosType.Name; } | |
else if (obj is PDFString) { return cosType.String; } | |
else if (obj is PDFArray) { return cosType.Array; } | |
else if (obj is PDFDict) { return cosType.Dict; } | |
else if (obj is PDFStream) { return cosType.Stream; } | |
else { return cosType.cosNull; } | |
} | |
static HashSet<string> getKeySet(PDFDict dict) | |
{ | |
var keySet = new HashSet<string>(); | |
foreach (PDFName keyObj in dict.Keys) | |
{ | |
keySet.Add(keyObj.Value); | |
} | |
return keySet; | |
} | |
static string describeObject(PDFObject obj) | |
{ | |
var objType = GetObjType(obj); | |
switch (objType) | |
{ | |
case cosType.cosNull: | |
return "null"; | |
case cosType.Boolean: | |
return (obj as PDFBoolean).Value.ToString(); | |
case cosType.Integer: | |
return (obj as PDFInteger).Value.ToString(); | |
case cosType.Real: | |
return (obj as PDFReal).Value.ToString("0.###"); | |
case cosType.Name: | |
return (obj as PDFName).Value; | |
case cosType.String: | |
return (obj as PDFString).Value; | |
case cosType.Dict: | |
return "-dict-"; | |
case cosType.Stream: | |
return "-stream-"; | |
case cosType.Array: | |
{ | |
var arrayObj = obj as PDFArray; | |
switch (arrayObj.Length) | |
{ | |
case 6: return String.Format("[ {0} {1} {2} {3} {4} {5}]", | |
describeObject(arrayObj.Get(0)), | |
describeObject(arrayObj.Get(1)), | |
describeObject(arrayObj.Get(2)), | |
describeObject(arrayObj.Get(3)), | |
describeObject(arrayObj.Get(4)), | |
describeObject(arrayObj.Get(5))); | |
case 5: return String.Format("[ {0} {1} {2} {3} {4}]", | |
describeObject(arrayObj.Get(0)), | |
describeObject(arrayObj.Get(1)), | |
describeObject(arrayObj.Get(2)), | |
describeObject(arrayObj.Get(3)), | |
describeObject(arrayObj.Get(4))); | |
case 4: return String.Format("[ {0} {1} {2} {3}]", | |
describeObject(arrayObj.Get(0)), | |
describeObject(arrayObj.Get(1)), | |
describeObject(arrayObj.Get(2)), | |
describeObject(arrayObj.Get(3))); | |
case 3: return String.Format("[ {0} {1} {2}]", | |
describeObject(arrayObj.Get(0)), | |
describeObject(arrayObj.Get(1)), | |
describeObject(arrayObj.Get(2))); | |
case 2: return String.Format("[ {0} {1}]", | |
describeObject(arrayObj.Get(0)), | |
describeObject(arrayObj.Get(1))); | |
case 1: return String.Format("[ {0} ]", | |
describeObject(arrayObj.Get(0))); | |
case 0: return "[ ]"; | |
default: return "-array-"; | |
} | |
} | |
default: | |
return "None"; | |
} | |
} | |
static void compareObjs(string path, PDFObject left, PDFObject right, ref HashSet<string> visited, ref HashSet<string> skipSet) | |
{ | |
if (left != null && right != null && left.Indirect && right.Indirect) | |
{ | |
var visitedTag = String.Format("{0}:{1}", left.ID, right.ID); | |
if (visited.Contains(visitedTag)) | |
return; | |
else | |
visited.Add(visitedTag); | |
} | |
var leftType = GetObjType(left); | |
var rightType = GetObjType(right); | |
if (leftType != rightType) | |
{ | |
if (leftType == cosType.Integer && rightType == cosType.Real) | |
{ | |
if ((double)(left as PDFInteger).Value != (right as PDFReal).Value) | |
Console.WriteLine("{0}: {1} vs. {2}", path, describeObject(left), describeObject(right)); | |
} | |
else if (leftType == cosType.Real && rightType == cosType.Integer) | |
{ | |
if ((left as PDFReal).Value != (double)((right as PDFInteger).Value)) | |
Console.WriteLine("{0}: {1} vs. {2}", path, describeObject(left), describeObject(right)); | |
} | |
else | |
{ | |
Console.WriteLine("{0}: {1} vs. {2}", path, describeObject(left), describeObject(right)); | |
} | |
return; | |
} | |
switch (leftType) | |
{ | |
case cosType.Boolean: | |
{ | |
var leftElem = left as PDFBoolean; | |
var rightElem = right as PDFBoolean; | |
if (leftElem.Value != rightElem.Value) | |
{ | |
Console.WriteLine("{0}: {1} vs {2}", path, describeObject(left), describeObject(right)); | |
} | |
return; | |
} | |
case cosType.Integer: | |
{ | |
var leftElem = left as PDFInteger; | |
var rightElem = right as PDFInteger; | |
if (leftElem.Value != rightElem.Value) | |
{ | |
Console.WriteLine("{0}: {1} vs {2}", path, describeObject(left), describeObject(right)); | |
} | |
return; | |
} | |
case cosType.Real: | |
{ | |
var leftElem = left as PDFReal; | |
var rightElem = right as PDFReal; | |
if (leftElem.Value != rightElem.Value) | |
{ | |
Console.WriteLine("{0}: {1} vs {2}", path, describeObject(left), describeObject(right)); | |
} | |
return; | |
} | |
case cosType.Name: | |
{ | |
var leftElem = left as PDFName; | |
var rightElem = right as PDFName; | |
if (!leftElem.Value.Equals(rightElem.Value)) | |
{ | |
Console.WriteLine("{0}: {1} vs {2}", path, describeObject(left), describeObject(right)); | |
} | |
return; | |
} | |
case cosType.String: | |
{ | |
var leftElem = left as PDFString; | |
var rightElem = right as PDFString; | |
if (!leftElem.Value.Equals(rightElem.Value)) | |
{ | |
Console.WriteLine("{0}: {1} vs {2}", path, describeObject(left), describeObject(right)); | |
} | |
return; | |
} | |
case cosType.Array: | |
{ | |
var leftElem = left as PDFArray; | |
var rightElem = right as PDFArray; | |
var leftLen = leftElem.Length; | |
var rightLen = rightElem.Length; | |
var compareLen = Math.Min(leftLen, rightLen); | |
if (leftLen != rightLen) | |
Console.WriteLine("{0}[]: Length is {1} vs {2}", path, leftLen, rightLen); | |
for (int i = 0; i < compareLen; i++) | |
{ | |
var newPath = path + "[" + i.ToString() + "]"; | |
compareObjs(newPath, leftElem.Get(i), rightElem.Get(i), ref visited, ref skipSet); | |
} | |
if (leftLen > compareLen) | |
{ | |
for (int i = compareLen; i < leftLen; i++) | |
{ | |
Console.WriteLine("{0}[{1}] << {2}", path, i, describeObject(leftElem.Get(i))); | |
} | |
} | |
else if (rightLen > compareLen) | |
{ | |
for (int i = compareLen; i < rightLen; i++) | |
{ | |
Console.WriteLine("{0}[{1}] >> {2}", path, i, describeObject(rightElem.Get(i))); | |
} | |
} | |
return; | |
} | |
case cosType.Dict: | |
{ | |
var leftElem = left as PDFDict; | |
var rightElem = right as PDFDict; | |
var leftKeys = getKeySet(leftElem); | |
var rightKeys = getKeySet(rightElem); | |
leftKeys.ExceptWith(skipSet); | |
rightKeys.ExceptWith(skipSet); | |
var compareKeys = getKeySet(leftElem); | |
compareKeys.IntersectWith(rightKeys); | |
leftKeys.ExceptWith(compareKeys); | |
rightKeys.ExceptWith(compareKeys); | |
foreach (var key in leftKeys) | |
{ | |
Console.WriteLine("{0}:{1} << {2}", path, key, describeObject(leftElem.Get(key))); | |
} | |
foreach (var key in rightKeys) | |
{ | |
Console.WriteLine("{0}:{1} >> {2}", path, key, describeObject(rightElem.Get(key))); | |
} | |
foreach (var key in compareKeys) | |
{ | |
var newPath = path + ":" + key; | |
compareObjs(newPath, leftElem.Get(key), rightElem.Get(key), ref visited, ref skipSet); | |
} | |
return; | |
} | |
case cosType.Stream: | |
{ | |
var leftElem = left as PDFStream; | |
var rightElem = right as PDFStream; | |
var leftLen = leftElem.Length; | |
var rightLen = rightElem.Length; | |
if (leftLen != rightLen) | |
Console.WriteLine("{0}: Stream length is {1} vs {2}", path, leftLen, rightLen); | |
var leftKeys = getKeySet(leftElem.Dict); | |
var rightKeys = getKeySet(rightElem.Dict); | |
leftKeys.ExceptWith(skipSet); | |
rightKeys.ExceptWith(skipSet); | |
var compareKeys = getKeySet(leftElem.Dict); | |
compareKeys.IntersectWith(rightKeys); | |
leftKeys.ExceptWith(compareKeys); | |
rightKeys.ExceptWith(compareKeys); | |
foreach (var key in leftKeys) | |
{ | |
Console.WriteLine("{0}:{1} << {2}", path, key, describeObject(leftElem.Dict.Get(key))); | |
} | |
foreach (var key in rightKeys) | |
{ | |
Console.WriteLine("{0}:{1} >> {2}", path, key, describeObject(rightElem.Dict.Get(key))); | |
} | |
foreach (var key in compareKeys) | |
{ | |
var newPath = path + ":" + key; | |
compareObjs(newPath, leftElem.Dict.Get(key), rightElem.Dict.Get(key), ref visited, ref skipSet); | |
} | |
return; | |
} | |
} | |
} | |
static void Main(string[] args) | |
{ | |
Console.WriteLine("comparePDFs:"); | |
string[] compareFiles = new string[2]; | |
compare comparisonLeft = compare.doc; | |
compare comparisonRight = compare.doc; | |
int numCompareFiles = 0; | |
int leftID = -1, rightID = -1; | |
string outFile = null; | |
int n = 0; | |
while (n < args.Length) | |
{ | |
var arg = args[n++]; | |
switch (arg) | |
{ | |
case "-p": if (leftID == -1) comparisonLeft = compare.page; comparisonRight = compare.page; break; | |
case "-o": if (leftID == -1) comparisonLeft = compare.obj; comparisonRight = compare.obj; break; | |
case "-out": outFile = args[n++]; break; | |
default: | |
{ | |
int num; | |
bool isNumeric = Int32.TryParse(arg, out num); | |
if (isNumeric) | |
{ | |
if (leftID == -1) | |
leftID = num; | |
else | |
rightID = num; | |
} | |
else if (System.IO.File.Exists(arg)) | |
{ | |
compareFiles[numCompareFiles++] = arg; | |
} | |
} | |
break; | |
} | |
} | |
if ((comparisonLeft == compare.doc && numCompareFiles < 2) || | |
(comparisonLeft != compare.doc && rightID == -1)) | |
{ | |
Console.WriteLine("Usage: file1 [-o|-p id1] [file2 | [-o|-p][ id2]] [-out outfile]"); | |
return; | |
} | |
using (Library lib = new Library()) | |
{ | |
Console.WriteLine("Initialized the library."); | |
var visited = new HashSet<string>(); | |
var rootSkipSet = new HashSet<string>(); | |
var pageSkipSet = new HashSet<string>(); | |
rootSkipSet.Add("Pages"); | |
rootSkipSet.Add("Metadata"); | |
pageSkipSet.Add("Parent"); | |
try | |
{ | |
FileStream ostrm = null; | |
StreamWriter writer = null; | |
TextWriter oldOut = Console.Out; | |
if (outFile != null) | |
{ | |
try | |
{ | |
ostrm = new FileStream(outFile, FileMode.OpenOrCreate, FileAccess.Write); | |
writer = new StreamWriter(ostrm); | |
Console.SetOut(writer); | |
} | |
catch (Exception ex) | |
{ | |
Console.WriteLine("Error: Cannot open {0} for writing: {1}", outFile, ex.Message); | |
return; | |
} | |
} | |
var docLeft = new Document(compareFiles[0]); | |
var docRight = new Document(compareFiles[numCompareFiles - 1]); | |
Console.WriteLine("{0} << vs. >> {1}", compareFiles[0], compareFiles[numCompareFiles - 1]); | |
if (comparisonLeft == compare.doc) | |
{ | |
compareObjs("Root", docLeft.Root, docRight.Root, ref visited, ref rootSkipSet); | |
//TODO: Enumeration of NameTrees, NumberTrees. | |
int pagesLeft = docLeft.NumPages; | |
int pagesRight = docRight.NumPages; | |
var comparePages = Math.Min(pagesLeft, pagesRight); | |
for (int i = 0; i < comparePages; i++) | |
{ | |
var path = String.Format("Page[{0}]", i); | |
compareObjs(path, docLeft.GetPage(i).PDFDict, docRight.GetPage(i).PDFDict, ref visited, ref pageSkipSet); | |
} | |
} | |
else | |
{ | |
var label = String.Format("{0} {1} vs. {2} {3}", | |
comparisonLeft == compare.obj ? "Obj" : "Page", | |
leftID, | |
comparisonRight == compare.obj ? "Obj" : "Page", | |
rightID); | |
PDFObject leftObj = (comparisonLeft == compare.obj ? docLeft.FindPDFObjectByID(leftID) : docLeft.GetPage(leftID).PDFDict); | |
PDFObject rightObj = (comparisonRight == compare.obj ? docRight.FindPDFObjectByID(rightID) : docRight.GetPage(rightID).PDFDict); | |
compareObjs(label, leftObj, rightObj, ref visited, ref pageSkipSet); | |
} | |
try | |
{ | |
docLeft.Close(); | |
docRight.Close(); | |
} | |
catch (ApplicationException ex) { } | |
if (outFile != null) | |
{ | |
Console.SetOut(oldOut); | |
writer.Close(); | |
ostrm.Close(); | |
} | |
} | |
catch (LibraryException ex) | |
{ | |
Console.WriteLine("** Error: {0}", ex.Message); | |
} | |
} | |
Console.WriteLine("Done."); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment