Skip to content

Instantly share code, notes, and snippets.

@lyubenov

lyubenov/itebooks.py

Last active Jun 21, 2020
Embed
What would you like to do?
Removes it-ebooks.info links from book's footers. Run python itebooks.py -f /sourcepath/books/thebook.pdf to cleanup your thebook.pdf file.
#!/usr/bin/env python3
import sys
import re
import shutil
import argparse
import binascii
pattern = b'''0a2f54797065202f416e6e6f740a2f53756274797065202f4c696e6b0a2f52656
374205b20.*?205d0a2f426f7264657220.*?\n0a2f41203c3c0a2f54797065202f416374696f6e
0a2f53202f5552490a2f5552492028687474703a2f2f7777772e69742d65626f6f6b732e696e666
f2f290a3e3e'''.replace(b'\n', b'').strip()
def remove_evil_links(pdf_data):
'Removes all it-ebook links and metadata from the passed PDF data.'
pdf_data = binascii.hexlify(pdf_data)
# Remove each annotation element inside the PDF file
# (This removes the "clickable" it-ebooks.info links)
new_data = re.sub(pattern, b'', pdf_data)
# Remove the actual links
# (link elements which are assigned to the annotations)
new_data = new_data.replace(binascii.hexlify(b'www.it-ebooks.info'), b'')
return binascii.unhexlify(new_data)
def main(args):
try:
args.files = list(set(args.files))
for file_path in args.files:
if not file_path:
continue
if args.verbose:
print('Processing: {0}'.format(file_path))
try:
with open(file_path, 'rb') as input_file:
pdf_data = input_file.read()
except IOError as e:
sys.stderr.write('{0}: {1}\n'.format(file_path, e.strerror))
sys.stderr.flush()
continue
# Backup the file with a different name
if not args.no_backup:
if args.verbose:
print('Creating backup: {0}.old'.format(file_path))
shutil.move(file_path, '{0}.old'.format(file_path))
# Modify the PDF file
new_pdf_data = remove_evil_links(pdf_data)
# Save the new file
with open(file_path, 'wb') as out_file:
out_file.write(new_pdf_data)
if args.verbose:
print('Saving modified file: {0}'.format(file_path))
except KeyboardInterrupt:
pass
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'-f', '--files',
help='One or more PDF files to remove it-ebook watermarks.',
nargs='*', required=True
)
parser.add_argument(
'-n', '--no-backup',
help='Disables the creating of backups for the files ' +
'which are being processed.',
action='store_true'
)
parser.add_argument(
'-v', '--verbose',
action='store_true'
)
args = parser.parse_args()
main(args)
@aoxiangwu

This comment has been minimized.

Copy link

@aoxiangwu aoxiangwu commented Apr 26, 2018

If I have a example.com footer/header in pdf, just replace line 24 to example.com?

@jehuamanna

This comment has been minimized.

Copy link

@jehuamanna jehuamanna commented Jun 21, 2020

Thank you so much

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.