raphiz/pdf_remove_watermark.py

## pdf_remove_watermark.py
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.pdf import ContentStream
from PyPDF2.generic import TextStringObject, NameObject
from PyPDF2.utils import b_

wm_text = 'Persönliches Exemplar von'
replace_with = ''

# Load PDF into pyPDF
source = PdfFileReader(open('input.pdf', "rb"))
output = PdfFileWriter()

# For each page
for page in range(source.getNumPages()):
    # Get the current page and it's contents
    page = source.getPage(page)
    content_object = page["/Contents"].getObject()
    content = ContentStream(content_object, source)

    # Loop over all pdf elements
    for operands, operator in content.operations:
        # You might adapt this part depending on your PDF file
        if operator == b_("TJ"):
            text = operands[0][0]
            if isinstance(text, TextStringObject) and text.startswith(wm_text):
                operands[0] = TextStringObject(replace_with)


    # Set the modified content as content object on the page
    page.__setitem__(NameObject('/Contents'), content)

    # Add the page to the output
    output.addPage(page)

# Write the stream
outputStream = open("output.pdf", "wb")
output.write(outputStream)
	from PyPDF2 import PdfFileReader, PdfFileWriter
	from PyPDF2.pdf import ContentStream
	from PyPDF2.generic import TextStringObject, NameObject
	from PyPDF2.utils import b_

	wm_text = 'Persönliches Exemplar von'
	replace_with = ''

	# Load PDF into pyPDF
	source = PdfFileReader(open('input.pdf', "rb"))
	output = PdfFileWriter()

	# For each page
	for page in range(source.getNumPages()):
	# Get the current page and it's contents
	page = source.getPage(page)
	content_object = page["/Contents"].getObject()
	content = ContentStream(content_object, source)

	# Loop over all pdf elements
	for operands, operator in content.operations:
	# You might adapt this part depending on your PDF file
	if operator == b_("TJ"):
	text = operands[0][0]
	if isinstance(text, TextStringObject) and text.startswith(wm_text):
	operands[0] = TextStringObject(replace_with)


	# Set the modified content as content object on the page
	page.__setitem__(NameObject('/Contents'), content)

	# Add the page to the output
	output.addPage(page)

	# Write the stream
	outputStream = open("output.pdf", "wb")
	output.write(outputStream)