Skip to content

Instantly share code, notes, and snippets.

@ivanistheone
Created March 25, 2014 18:30
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ivanistheone/219dad11a30efef82b7e to your computer and use it in GitHub Desktop.
Save ivanistheone/219dad11a30efef82b7e to your computer and use it in GitHub Desktop.
I'm using latexdiff to show changes that I make in a large text (200+ pages). Most of my changes affect only a few pages though, so I don't want my readers to sift through 200 pages looking for the rare pages where isolated typo corrections were made: only about 30pp out of the 200 will contain changes. I would like to: Insert hidden PDF markers…
\RequirePackage[normalem]{ulem}
\RequirePackage{color}
\definecolor{RED}{rgb}{1,0,0}
\definecolor{BLUE}{rgb}{0,0,1}
%% modif start
\usepackage{pdfcomment}
\providecommand{\DIFadd}[1]{\pdfmargincomment[opacity=0.0]{DIFFCHANGE}{\protect\color{blue}\uwave{#1}}}
\providecommand{\DIFdel}[1]{\pdfmargincomment[opacity=0.0]{DIFFCHANGE}{\protect\color{red}\sout{#1}}}
%% modifs end
\providecommand{\DIFaddbegin}{}
\providecommand{\DIFaddend}{}
\providecommand{\DIFdelbegin}{}
\providecommand{\DIFdelend}{}
\providecommand{\DIFaddFL}[1]{\DIFadd{#1}}
\providecommand{\DIFdelFL}[1]{\DIFdel{#1}}
\providecommand{\DIFaddbeginFL}{}
\providecommand{\DIFaddendFL}{}
\providecommand{\DIFdelbeginFL}{}
\providecommand{\DIFdelendFL}{}
\begin{document}
# Setup and params
############################################
INFILENAME = "diff.pdf" # Input PDF filename
OUTFILENAME = "diff_subset.pdf" # Output PDF filename
pages_with_changes = set() # List of pages to select
# Find pages with DIFFCHANGE annotations on them
################################################
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.pdfdevice import PDFDevice
from pdfminer.layout import LAParams
from pdfminer.converter import PDFPageAggregator
# Open file
infp = open(INFILENAME, 'rb')
print "Opened " + INFILENAME + " for reading"
# Create a PDF parser object associated with the file object.
parser = PDFParser(infp)
# Create a PDF document object that stores the document structure.
document = PDFDocument(parser)
# Create a PDF resource manager object that stores shared resources.
rsrcmgr = PDFResourceManager()
# Set parameters for analysis.
laparams = LAParams()
# Create a PDF page aggregator object.
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
# Create a PDF interpreter object.
interpreter = PDFPageInterpreter(rsrcmgr, device)
pages_gen = PDFPage.create_pages(document)
for pgnum, page in enumerate(pages_gen):
#print "processing page " + str(pgnum+1)
if page.annots:
for a in page.annots:
a = a.resolve()
if a['Type'].name == 'Annot' and a['Subtype'].name == 'Text':
if a["Contents"] == 'DIFFCHANGE':
pages_with_changes.add(pgnum)
#print "page " + str(pgnum+1) + " contains changes"
infp.close()
print "The following pages have changes in them:"
print sorted( list(pages_with_changes) )
# Write out subset
############################################
import sys
import os
from pdfrw import PdfReader, PdfWriter
pages = PdfReader(INFILENAME).pages
outdata = PdfWriter()
for pagenum in sorted( list(pages_with_changes) ):
outdata.addpage(pages[pagenum])
outdata.write(OUTFILENAME)
print "Wrote output to " + OUTFILENAME
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment