lyubenov/itebooks.py

## itebooks.py
#!/usr/bin/env python3

import sys
import re
import shutil
import argparse
import binascii

pattern = b'''0a2f54797065202f416e6e6f740a2f53756274797065202f4c696e6b0a2f52656
374205b20.*?205d0a2f426f7264657220.*?\n0a2f41203c3c0a2f54797065202f416374696f6e
0a2f53202f5552490a2f5552492028687474703a2f2f7777772e69742d65626f6f6b732e696e666
f2f290a3e3e'''.replace(b'\n', b'').strip()

def remove_evil_links(pdf_data):
    'Removes all it-ebook links and metadata from the passed PDF data.'
    pdf_data = binascii.hexlify(pdf_data)

    # Remove each annotation element inside the PDF file
    # (This removes the "clickable" it-ebooks.info links)
    new_data = re.sub(pattern, b'', pdf_data)

    # Remove the actual links
    # (link elements which are assigned to the annotations)
    new_data = new_data.replace(binascii.hexlify(b'www.it-ebooks.info'), b'')
    return binascii.unhexlify(new_data)

def main(args):
    try:
        args.files = list(set(args.files))
        for file_path in args.files:
            if not file_path:
                continue
            if args.verbose:
                print('Processing: {0}'.format(file_path))
            try:
                with open(file_path, 'rb') as input_file:
                    pdf_data = input_file.read()
            except IOError as e:
                sys.stderr.write('{0}: {1}\n'.format(file_path, e.strerror))
                sys.stderr.flush()
                continue

            # Backup the file with a different name
            if not args.no_backup:
                if args.verbose:
                    print('Creating backup: {0}.old'.format(file_path))
                shutil.move(file_path, '{0}.old'.format(file_path))

            # Modify the PDF file
            new_pdf_data = remove_evil_links(pdf_data)
            # Save the new file
            with open(file_path, 'wb') as out_file:
                out_file.write(new_pdf_data)
            if args.verbose:
                print('Saving modified file: {0}'.format(file_path))
    except KeyboardInterrupt:
        pass

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-f', '--files',
        help='One or more PDF files to remove it-ebook watermarks.',
        nargs='*', required=True
    )
    parser.add_argument(
        '-n', '--no-backup',
        help='Disables the creating of backups for the files ' +
             'which are being processed.',
        action='store_true'
    )
    parser.add_argument(
        '-v', '--verbose',
        action='store_true'
    )

    args = parser.parse_args()
    main(args)
	#!/usr/bin/env python3

	import sys
	import re
	import shutil
	import argparse
	import binascii

	pattern = b'''0a2f54797065202f416e6e6f740a2f53756274797065202f4c696e6b0a2f52656
	374205b20.?205d0a2f426f7264657220.?\n0a2f41203c3c0a2f54797065202f416374696f6e
	0a2f53202f5552490a2f5552492028687474703a2f2f7777772e69742d65626f6f6b732e696e666
	f2f290a3e3e'''.replace(b'\n', b'').strip()

	def remove_evil_links(pdf_data):
	'Removes all it-ebook links and metadata from the passed PDF data.'
	pdf_data = binascii.hexlify(pdf_data)

	# Remove each annotation element inside the PDF file
	# (This removes the "clickable" it-ebooks.info links)
	new_data = re.sub(pattern, b'', pdf_data)

	# Remove the actual links
	# (link elements which are assigned to the annotations)
	new_data = new_data.replace(binascii.hexlify(b'www.it-ebooks.info'), b'')
	return binascii.unhexlify(new_data)

	def main(args):
	try:
	args.files = list(set(args.files))
	for file_path in args.files:
	if not file_path:
	continue
	if args.verbose:
	print('Processing: {0}'.format(file_path))
	try:
	with open(file_path, 'rb') as input_file:
	pdf_data = input_file.read()
	except IOError as e:
	sys.stderr.write('{0}: {1}\n'.format(file_path, e.strerror))
	sys.stderr.flush()
	continue

	# Backup the file with a different name
	if not args.no_backup:
	if args.verbose:
	print('Creating backup: {0}.old'.format(file_path))
	shutil.move(file_path, '{0}.old'.format(file_path))

	# Modify the PDF file
	new_pdf_data = remove_evil_links(pdf_data)
	# Save the new file
	with open(file_path, 'wb') as out_file:
	out_file.write(new_pdf_data)
	if args.verbose:
	print('Saving modified file: {0}'.format(file_path))
	except KeyboardInterrupt:
	pass

	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument(
	'-f', '--files',
	help='One or more PDF files to remove it-ebook watermarks.',
	nargs='*', required=True
	)
	parser.add_argument(
	'-n', '--no-backup',
	help='Disables the creating of backups for the files ' +
	'which are being processed.',
	action='store_true'
	)
	parser.add_argument(
	'-v', '--verbose',
	action='store_true'
	)

	args = parser.parse_args()
	main(args)