Created
November 29, 2016 14:56
-
-
Save mritzmann/bfb7298fd14427c282b8a702547c74d5 to your computer and use it in GitHub Desktop.
error with ocrmypdf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DEBUG - os.symlink(/home/ocr/scan0_161129-144058.pdf, /tmp/com.github.ocrmypdf.1eo3vclv/origin) | |
DEBUG - os.symlink(/tmp/com.github.ocrmypdf.1eo3vclv/origin, /tmp/com.github.ocrmypdf.1eo3vclv/origin.pdf) | |
DEBUG - [{'width_inches': Decimal('8.05'), 'images': [{'enc': 'jpeg', 'type': 'image', 'comp': 3, 'name': '/Im0', 'width': 2416, 'height': 3461, 'dpi_h': Decimal('300.1734605377276920989970676600933074951171875'), 'dpi_w': Decimal('300.1242236024844487474183551967144012451171875'), 'color': 'rgb', 'dpi': Decimal('300.149'), 'bpc': 8}], 'height_pixels': 3461, 'xres': Decimal('300.1242236024844487474183551967144012451171875'), 'height_inches': Decimal('11.53'), 'has_text': False, 'yres': Decimal('300.1734605377276920989970676600933074951171875'), 'width_pixels': 2416, 'pageno': 0}, {'width_inches': Decimal('8.13'), 'images': [{'enc': 'jpeg', 'type': 'image', 'comp': 3, 'name': '/Im1', 'width': 2440, 'height': 3465, 'dpi_h': Decimal('300'), 'dpi_w': Decimal('300.123001230012278028880245983600616455078125'), 'color': 'rgb', 'dpi': Decimal('300.061'), 'bpc': 8}], 'height_pixels': 3465, 'xres': Decimal('300.123001230012278028880245983600616455078125'), 'height_inches': Decimal('11.55'), 'has_text': False, 'yres': Decimal('300'), 'width_pixels': 2440, 'pageno': 1}] | |
DEBUG - os.symlink(/tmp/com.github.ocrmypdf.1eo3vclv/000001.page.pdf, /tmp/com.github.ocrmypdf.1eo3vclv/000001.ocr.page.pdf) | |
DEBUG - os.symlink(/tmp/com.github.ocrmypdf.1eo3vclv/000002.page.pdf, /tmp/com.github.ocrmypdf.1eo3vclv/000002.ocr.page.pdf) | |
DEBUG - os.symlink(/tmp/com.github.ocrmypdf.1eo3vclv/000001.ocr.page.pdf, /tmp/com.github.ocrmypdf.1eo3vclv/000001.ocr.oriented.pdf) | |
DEBUG - os.symlink(/tmp/com.github.ocrmypdf.1eo3vclv/000002.ocr.page.pdf, /tmp/com.github.ocrmypdf.1eo3vclv/000002.ocr.oriented.pdf) | |
DEBUG - Rasterize 000001.ocr.oriented.pdf with png16m | |
DEBUG - Rasterize 000002.ocr.oriented.pdf with png16m | |
DEBUG - os.symlink(/tmp/com.github.ocrmypdf.1eo3vclv/000001.page.png, /tmp/com.github.ocrmypdf.1eo3vclv/000001.pp-deskew.png) | |
DEBUG - os.symlink(/tmp/com.github.ocrmypdf.1eo3vclv/000002.page.png, /tmp/com.github.ocrmypdf.1eo3vclv/000002.pp-deskew.png) | |
DEBUG - os.symlink(/tmp/com.github.ocrmypdf.1eo3vclv/000001.pp-deskew.png, /tmp/com.github.ocrmypdf.1eo3vclv/000001.pp-clean.png) | |
DEBUG - os.symlink(/tmp/com.github.ocrmypdf.1eo3vclv/000002.pp-deskew.png, /tmp/com.github.ocrmypdf.1eo3vclv/000002.pp-clean.png) | |
DEBUG - | |
Original exception: | |
Exception #1 | |
'subprocess.CalledProcessError(Command '['tesseract', '-l', 'eng', '/tmp/com.github.ocrmypdf.1eo3vclv/000001.pp-clean.png', '/tmp/com.github.ocrmypdf.1eo3vclv/000001.badxml', 'hocr']' returned non-zero exit status -9)' raised in ... | |
Task = def ocrmypdf.main.ocr_tesseract_hocr(...): | |
Job = [tmp/com.github.ocrmypdf.1eo3vclv/000001.pp-clean.png -> tmp/com.github.ocrmypdf.1eo3vclv/000001.hocr, <ocrmypdf.main.WrappedLogger>, [{'width_inches': Decimal('8.05'), 'images': [{'enc': 'jpeg', 'type': 'image', 'comp': 3, 'name': '/Im0', 'width': 2416, 'height': 3461, 'dpi_h': Decimal('300.1734605377276920989970676600933074951171875'), 'dpi_w': Decimal('300.1242236024844487474183551967144012451171875'), 'color': 'rgb', 'dpi': Decimal('300.149'), 'bpc': 8}], 'height_pixels': 3461, 'xres': Decimal('300.1242236024844487474183551967144012451171875'), 'height_inches': Decimal('11.53'), 'has_text': False, 'yres': Decimal('300.1734605377276920989970676600933074951171875'), 'width_pixels': 2416, 'pageno': 0}, {'width_inches': Decimal('8.13'), 'images': [{'enc': 'jpeg', 'type': 'image', 'comp': 3, 'name': '/Im1', 'width': 2440, 'height': 3465, 'dpi_h': Decimal('300'), 'dpi_w': Decimal('300.123001230012278028880245983600616455078125'), 'color': 'rgb', 'dpi': Decimal('300.061'), 'bpc': 8}], 'height_pixels': 3465, 'xres': Decimal('300.123001230012278028880245983600616455078125'), 'height_inches': Decimal('11.55'), 'has_text': False, 'yres': Decimal('300'), 'width_pixels': 2440, 'pageno': 1}], <unlocked _thread.lock>] | |
Traceback (most recent call last): | |
File "/usr/lib/python3/dist-packages/ruffus/task.py", line 751, in run_pooled_job_without_exceptions | |
register_cleanup, touch_files_only) | |
File "/usr/lib/python3/dist-packages/ruffus/task.py", line 567, in job_wrapper_io_files | |
ret_val = user_defined_work_func(*params) | |
File "/usr/lib/python3/dist-packages/ocrmypdf/main.py", line 888, in ocr_tesseract_hocr | |
log=log | |
File "/usr/lib/python3/dist-packages/ocrmypdf/tesseract.py", line 195, in generate_hocr | |
raise e from e | |
File "/usr/lib/python3/dist-packages/ocrmypdf/tesseract.py", line 182, in generate_hocr | |
universal_newlines=True, timeout=timeout) | |
File "/usr/lib/python3.5/subprocess.py", line 626, in check_output | |
**kwargs).stdout | |
File "/usr/lib/python3.5/subprocess.py", line 708, in run | |
output=stdout, stderr=stderr) | |
subprocess.CalledProcessError: Command '['tesseract', '-l', 'eng', '/tmp/com.github.ocrmypdf.1eo3vclv/000001.pp-clean.png', '/tmp/com.github.ocrmypdf.1eo3vclv/000001.badxml', 'hocr']' returned non-zero exit status -9 | |
ERROR - Error occurred while running this command: | |
(Command '['tesseract', '-l', 'eng', '/tmp/com.github.ocrmypdf.1eo3vclv/000001.pp-clean.png', '/tmp/com.github.ocrmypdf.1eo3vclv/000001.badxml', 'hocr']' returned non-zero exit status -9) | |
________________________________________ | |
Tasks which will be run: | |
Task enters queue = 'ocrmypdf.main.triage' | |
Completed Task = 'ocrmypdf.main.triage' | |
Task enters queue = 'ocrmypdf.main.repair_pdf' | |
Completed Task = 'ocrmypdf.main.repair_pdf' | |
Task enters queue = 'ocrmypdf.main.split_pages' | |
Task enters queue = 'ocrmypdf.main.generate_postscript_stub' | |
Completed Task = 'ocrmypdf.main.split_pages' | |
Task enters queue = 'ocrmypdf.main.orient_page' | |
Completed Task = 'ocrmypdf.main.generate_postscript_stub' | |
Completed Task = 'ocrmypdf.main.orient_page' | |
Task enters queue = 'ocrmypdf.main.rasterize_with_ghostscript' | |
Task enters queue = 'ocrmypdf.main.skip_page' | |
Uptodate Task = 'ocrmypdf.main.skip_page' | |
WARNING: | |
In Task 'ocrmypdf.main.skip_page': | |
No jobs were run because no file names matched. | |
Please make sure that the regular expression is correctly specified. | |
Completed Task = 'ocrmypdf.main.rasterize_with_ghostscript' | |
Task enters queue = 'ocrmypdf.main.preprocess_deskew' | |
Completed Task = 'ocrmypdf.main.preprocess_deskew' | |
Task enters queue = 'ocrmypdf.main.preprocess_clean' | |
Completed Task = 'ocrmypdf.main.preprocess_clean' | |
Task enters queue = 'ocrmypdf.main.select_image_for_pdf' | |
Task enters queue = 'ocrmypdf.main.ocr_tesseract_hocr' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
log for ocrmypdf/OCRmyPDF#109