Skip to content

Instantly share code, notes, and snippets.

@yejun
Created January 25, 2017 17:23
Show Gist options
  • Save yejun/d7e324356157ee0c127228b793675f05 to your computer and use it in GitHub Desktop.
Save yejun/d7e324356157ee0c127228b793675f05 to your computer and use it in GitHub Desktop.
Scan double sided document use single side ADF to PDF
import subprocess
import tempfile
import argparse
import shutil
import os.path
import glob
from PyPDF2 import PdfFileMerger, PdfFileReader
DEVICE=""
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Batch Scan')
parser.add_argument('-c', '--color', action='store_true')
parser.add_argument('-d', '--double', action='store_true')
parser.add_argument('-o', '--output', type=str, default='output.pdf')
parser.add_argument('--no-ocr', action='store_true')
args = parser.parse_args()
tmp = tempfile.mkdtemp()
def scan_page(side='f'):
cmd = ['scanimage', '-d', DEVICE, '--resolution', '300',
'--source', 'ADF', '--batch=%s/%s%%d.pnm' % (tmp, side),
'--batch-start=1000',
'-y', '279.4',
'--mode', 'Color' if args.color else 'Gray']
subprocess.check_call(cmd)
scan_page()
if args.double:
raw_input('Turn your pages')
scan_page('b')
for fn in glob.glob(os.path.join(tmp, '*.pnm')):
if args.no_ocr:
subprocess.check_call(['convert', fn,
'-density', '300x300',
'-quality', '75',
'-compress', 'jpeg',
fn[:-4]+'.pdf'])
else:
subprocess.check_call([
'tesseract',
fn, fn[:-4],
'/usr/share/tessdata/configs/pdf'])
pdfs = sorted(glob.glob(os.path.join(tmp, '*.pdf')))
sheets = len(pdfs) / 2
if args.double:
sorted_pdfs = [x for t in zip(pdfs[sheets:], reversed(pdfs[:sheets])) for x in t]
else:
sorted_pdfs = pdfs
merger = PdfFileMerger()
for fn in sorted_pdfs:
merger.append(PdfFileReader(file(fn, 'rb')))
merger.write(args.output)
shutil.rmtree(tmp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment