-
-
Save ivanistheone/219dad11a30efef82b7e to your computer and use it in GitHub Desktop.
I'm using latexdiff to show changes that I make in a large text (200+ pages). Most of my changes affect only a few pages though, so I don't want my readers to sift through 200 pages looking for the rare pages where isolated typo corrections were made: only about 30pp out of the 200 will contain changes. I would like to: Insert hidden PDF markers…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
\RequirePackage[normalem]{ulem} | |
\RequirePackage{color} | |
\definecolor{RED}{rgb}{1,0,0} | |
\definecolor{BLUE}{rgb}{0,0,1} | |
%% modif start | |
\usepackage{pdfcomment} | |
\providecommand{\DIFadd}[1]{\pdfmargincomment[opacity=0.0]{DIFFCHANGE}{\protect\color{blue}\uwave{#1}}} | |
\providecommand{\DIFdel}[1]{\pdfmargincomment[opacity=0.0]{DIFFCHANGE}{\protect\color{red}\sout{#1}}} | |
%% modifs end | |
\providecommand{\DIFaddbegin}{} | |
\providecommand{\DIFaddend}{} | |
\providecommand{\DIFdelbegin}{} | |
\providecommand{\DIFdelend}{} | |
\providecommand{\DIFaddFL}[1]{\DIFadd{#1}} | |
\providecommand{\DIFdelFL}[1]{\DIFdel{#1}} | |
\providecommand{\DIFaddbeginFL}{} | |
\providecommand{\DIFaddendFL}{} | |
\providecommand{\DIFdelbeginFL}{} | |
\providecommand{\DIFdelendFL}{} | |
\begin{document} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Setup and params | |
############################################ | |
INFILENAME = "diff.pdf" # Input PDF filename | |
OUTFILENAME = "diff_subset.pdf" # Output PDF filename | |
pages_with_changes = set() # List of pages to select | |
# Find pages with DIFFCHANGE annotations on them | |
################################################ | |
from pdfminer.pdfparser import PDFParser | |
from pdfminer.pdfdocument import PDFDocument | |
from pdfminer.pdfpage import PDFPage | |
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter | |
from pdfminer.pdfdevice import PDFDevice | |
from pdfminer.layout import LAParams | |
from pdfminer.converter import PDFPageAggregator | |
# Open file | |
infp = open(INFILENAME, 'rb') | |
print "Opened " + INFILENAME + " for reading" | |
# Create a PDF parser object associated with the file object. | |
parser = PDFParser(infp) | |
# Create a PDF document object that stores the document structure. | |
document = PDFDocument(parser) | |
# Create a PDF resource manager object that stores shared resources. | |
rsrcmgr = PDFResourceManager() | |
# Set parameters for analysis. | |
laparams = LAParams() | |
# Create a PDF page aggregator object. | |
device = PDFPageAggregator(rsrcmgr, laparams=laparams) | |
# Create a PDF interpreter object. | |
interpreter = PDFPageInterpreter(rsrcmgr, device) | |
pages_gen = PDFPage.create_pages(document) | |
for pgnum, page in enumerate(pages_gen): | |
#print "processing page " + str(pgnum+1) | |
if page.annots: | |
for a in page.annots: | |
a = a.resolve() | |
if a['Type'].name == 'Annot' and a['Subtype'].name == 'Text': | |
if a["Contents"] == 'DIFFCHANGE': | |
pages_with_changes.add(pgnum) | |
#print "page " + str(pgnum+1) + " contains changes" | |
infp.close() | |
print "The following pages have changes in them:" | |
print sorted( list(pages_with_changes) ) | |
# Write out subset | |
############################################ | |
import sys | |
import os | |
from pdfrw import PdfReader, PdfWriter | |
pages = PdfReader(INFILENAME).pages | |
outdata = PdfWriter() | |
for pagenum in sorted( list(pages_with_changes) ): | |
outdata.addpage(pages[pagenum]) | |
outdata.write(OUTFILENAME) | |
print "Wrote output to " + OUTFILENAME |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment