Skip to content

Instantly share code, notes, and snippets.

@x1001000
Created December 4, 2022 21:53
Show Gist options
  • Save x1001000/7fbd9ce0e84620da8b17047ecbd91c8a to your computer and use it in GitHub Desktop.
Save x1001000/7fbd9ce0e84620da8b17047ecbd91c8a to your computer and use it in GitHub Desktop.
OCRmyPDF
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"name": "OCRmyPDF",
"authorship_tag": "ABX9TyOe/XpKuIZYE2waAJaMGxQM",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/x1001000/7fbd9ce0e84620da8b17047ecbd91c8a/ocrmypdf.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"source": [
"from google.colab import files\n",
"assert files.upload()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 107
},
"id": "R4PXpI3gs2H0",
"outputId": "7a9c0461-1ed7-4e35-8597-9e6dd3b73fd5"
},
"execution_count": 1,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <input type=\"file\" id=\"files-f324a257-b0cb-40a8-aa3b-2fe5324dabc5\" name=\"files[]\" multiple disabled\n",
" style=\"border:none\" />\n",
" <output id=\"result-f324a257-b0cb-40a8-aa3b-2fe5324dabc5\">\n",
" Upload widget is only available when the cell has been executed in the\n",
" current browser session. Please rerun this cell to enable.\n",
" </output>\n",
" <script>// Copyright 2017 Google LLC\n",
"//\n",
"// Licensed under the Apache License, Version 2.0 (the \"License\");\n",
"// you may not use this file except in compliance with the License.\n",
"// You may obtain a copy of the License at\n",
"//\n",
"// http://www.apache.org/licenses/LICENSE-2.0\n",
"//\n",
"// Unless required by applicable law or agreed to in writing, software\n",
"// distributed under the License is distributed on an \"AS IS\" BASIS,\n",
"// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
"// See the License for the specific language governing permissions and\n",
"// limitations under the License.\n",
"\n",
"/**\n",
" * @fileoverview Helpers for google.colab Python module.\n",
" */\n",
"(function(scope) {\n",
"function span(text, styleAttributes = {}) {\n",
" const element = document.createElement('span');\n",
" element.textContent = text;\n",
" for (const key of Object.keys(styleAttributes)) {\n",
" element.style[key] = styleAttributes[key];\n",
" }\n",
" return element;\n",
"}\n",
"\n",
"// Max number of bytes which will be uploaded at a time.\n",
"const MAX_PAYLOAD_SIZE = 100 * 1024;\n",
"\n",
"function _uploadFiles(inputId, outputId) {\n",
" const steps = uploadFilesStep(inputId, outputId);\n",
" const outputElement = document.getElementById(outputId);\n",
" // Cache steps on the outputElement to make it available for the next call\n",
" // to uploadFilesContinue from Python.\n",
" outputElement.steps = steps;\n",
"\n",
" return _uploadFilesContinue(outputId);\n",
"}\n",
"\n",
"// This is roughly an async generator (not supported in the browser yet),\n",
"// where there are multiple asynchronous steps and the Python side is going\n",
"// to poll for completion of each step.\n",
"// This uses a Promise to block the python side on completion of each step,\n",
"// then passes the result of the previous step as the input to the next step.\n",
"function _uploadFilesContinue(outputId) {\n",
" const outputElement = document.getElementById(outputId);\n",
" const steps = outputElement.steps;\n",
"\n",
" const next = steps.next(outputElement.lastPromiseValue);\n",
" return Promise.resolve(next.value.promise).then((value) => {\n",
" // Cache the last promise value to make it available to the next\n",
" // step of the generator.\n",
" outputElement.lastPromiseValue = value;\n",
" return next.value.response;\n",
" });\n",
"}\n",
"\n",
"/**\n",
" * Generator function which is called between each async step of the upload\n",
" * process.\n",
" * @param {string} inputId Element ID of the input file picker element.\n",
" * @param {string} outputId Element ID of the output display.\n",
" * @return {!Iterable<!Object>} Iterable of next steps.\n",
" */\n",
"function* uploadFilesStep(inputId, outputId) {\n",
" const inputElement = document.getElementById(inputId);\n",
" inputElement.disabled = false;\n",
"\n",
" const outputElement = document.getElementById(outputId);\n",
" outputElement.innerHTML = '';\n",
"\n",
" const pickedPromise = new Promise((resolve) => {\n",
" inputElement.addEventListener('change', (e) => {\n",
" resolve(e.target.files);\n",
" });\n",
" });\n",
"\n",
" const cancel = document.createElement('button');\n",
" inputElement.parentElement.appendChild(cancel);\n",
" cancel.textContent = 'Cancel upload';\n",
" const cancelPromise = new Promise((resolve) => {\n",
" cancel.onclick = () => {\n",
" resolve(null);\n",
" };\n",
" });\n",
"\n",
" // Wait for the user to pick the files.\n",
" const files = yield {\n",
" promise: Promise.race([pickedPromise, cancelPromise]),\n",
" response: {\n",
" action: 'starting',\n",
" }\n",
" };\n",
"\n",
" cancel.remove();\n",
"\n",
" // Disable the input element since further picks are not allowed.\n",
" inputElement.disabled = true;\n",
"\n",
" if (!files) {\n",
" return {\n",
" response: {\n",
" action: 'complete',\n",
" }\n",
" };\n",
" }\n",
"\n",
" for (const file of files) {\n",
" const li = document.createElement('li');\n",
" li.append(span(file.name, {fontWeight: 'bold'}));\n",
" li.append(span(\n",
" `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n",
" `last modified: ${\n",
" file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n",
" 'n/a'} - `));\n",
" const percent = span('0% done');\n",
" li.appendChild(percent);\n",
"\n",
" outputElement.appendChild(li);\n",
"\n",
" const fileDataPromise = new Promise((resolve) => {\n",
" const reader = new FileReader();\n",
" reader.onload = (e) => {\n",
" resolve(e.target.result);\n",
" };\n",
" reader.readAsArrayBuffer(file);\n",
" });\n",
" // Wait for the data to be ready.\n",
" let fileData = yield {\n",
" promise: fileDataPromise,\n",
" response: {\n",
" action: 'continue',\n",
" }\n",
" };\n",
"\n",
" // Use a chunked sending to avoid message size limits. See b/62115660.\n",
" let position = 0;\n",
" do {\n",
" const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n",
" const chunk = new Uint8Array(fileData, position, length);\n",
" position += length;\n",
"\n",
" const base64 = btoa(String.fromCharCode.apply(null, chunk));\n",
" yield {\n",
" response: {\n",
" action: 'append',\n",
" file: file.name,\n",
" data: base64,\n",
" },\n",
" };\n",
"\n",
" let percentDone = fileData.byteLength === 0 ?\n",
" 100 :\n",
" Math.round((position / fileData.byteLength) * 100);\n",
" percent.textContent = `${percentDone}% done`;\n",
"\n",
" } while (position < fileData.byteLength);\n",
" }\n",
"\n",
" // All done.\n",
" yield {\n",
" response: {\n",
" action: 'complete',\n",
" }\n",
" };\n",
"}\n",
"\n",
"scope.google = scope.google || {};\n",
"scope.google.colab = scope.google.colab || {};\n",
"scope.google.colab._files = {\n",
" _uploadFiles,\n",
" _uploadFilesContinue,\n",
"};\n",
"})(self);\n",
"</script> "
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Saving 131_vHIT.pdf to 131_vHIT.pdf\n",
"Saving 131.pdf to 131.pdf\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!apt update\n",
"!apt install tesseract-ocr\n",
"!apt install ghostscript\n",
"!pip install ocrmypdf --quiet\n",
"%cd /usr/local/lib/python3.8/dist-packages/ocrmypdf/subprocess\n",
"!sed -i \"346a \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ pass\" __init__.py\n",
"!sed -i \"346d\" __init__.py"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "etffOoGx5kZf",
"outputId": "67133c07-ac84-41a4-aa8a-3e73e2392a25"
},
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[33m\r0% [Working]\u001b[0m\r \rIgn:1 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 InRelease\n",
"\u001b[33m\r0% [Connecting to archive.ubuntu.com (91.189.91.38)] [Waiting for headers] [Con\u001b[0m\r \rHit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 InRelease\n",
"\u001b[33m\r0% [Waiting for headers] [Waiting for headers] [Connecting to cloud.r-project.o\u001b[0m\r \rGet:3 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]\n",
"\u001b[33m\r0% [Waiting for headers] [3 InRelease 14.2 kB/88.7 kB 16%] [Connected to cloud.\u001b[0m\u001b[33m\r0% [2 InRelease gpgv 1,581 B] [Waiting for headers] [3 InRelease 14.2 kB/88.7 k\u001b[0m\r \rHit:4 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 Release\n",
"\u001b[33m\r0% [2 InRelease gpgv 1,581 B] [Waiting for headers] [3 InRelease 14.2 kB/88.7 k\u001b[0m\r \rGet:5 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]\n",
"Hit:6 http://archive.ubuntu.com/ubuntu bionic InRelease\n",
"Get:7 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]\n",
"Get:8 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease [15.9 kB]\n",
"Get:9 http://archive.ubuntu.com/ubuntu bionic-backports InRelease [83.3 kB]\n",
"Hit:11 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease\n",
"Hit:12 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic InRelease\n",
"Get:13 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease [21.3 kB]\n",
"Get:14 http://security.ubuntu.com/ubuntu bionic-security/restricted amd64 Packages [1,307 kB]\n",
"Get:15 http://archive.ubuntu.com/ubuntu bionic-updates/multiverse amd64 Packages [30.0 kB]\n",
"Get:16 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 Packages [3,519 kB]\n",
"Get:17 http://security.ubuntu.com/ubuntu bionic-security/multiverse amd64 Packages [22.9 kB]\n",
"Get:18 http://archive.ubuntu.com/ubuntu bionic-updates/restricted amd64 Packages [1,348 kB]\n",
"Get:19 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic/main Sources [2,228 kB]\n",
"Get:20 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic/main amd64 Packages [1,140 kB]\n",
"Get:21 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic/main amd64 Packages [40.8 kB]\n",
"Fetched 9,937 kB in 5s (2,053 kB/s)\n",
"Reading package lists... Done\n",
"Building dependency tree \n",
"Reading state information... Done\n",
"7 packages can be upgraded. Run 'apt list --upgradable' to see them.\n",
"Reading package lists... Done\n",
"Building dependency tree \n",
"Reading state information... Done\n",
"The following package was automatically installed and is no longer required:\n",
" libnvidia-common-460\n",
"Use 'apt autoremove' to remove it.\n",
"The following additional packages will be installed:\n",
" tesseract-ocr-eng tesseract-ocr-osd\n",
"The following NEW packages will be installed:\n",
" tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd\n",
"0 upgraded, 3 newly installed, 0 to remove and 7 not upgraded.\n",
"Need to get 4,795 kB of archives.\n",
"After this operation, 15.8 MB of additional disk space will be used.\n",
"Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 tesseract-ocr-eng all 4.00~git24-0e00fe6-1.2 [1,588 kB]\n",
"Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 tesseract-ocr-osd all 4.00~git24-0e00fe6-1.2 [2,989 kB]\n",
"Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 tesseract-ocr amd64 4.00~git2288-10f4998a-2 [218 kB]\n",
"Fetched 4,795 kB in 1s (5,720 kB/s)\n",
"Selecting previously unselected package tesseract-ocr-eng.\n",
"(Reading database ... 124015 files and directories currently installed.)\n",
"Preparing to unpack .../tesseract-ocr-eng_4.00~git24-0e00fe6-1.2_all.deb ...\n",
"Unpacking tesseract-ocr-eng (4.00~git24-0e00fe6-1.2) ...\n",
"Selecting previously unselected package tesseract-ocr-osd.\n",
"Preparing to unpack .../tesseract-ocr-osd_4.00~git24-0e00fe6-1.2_all.deb ...\n",
"Unpacking tesseract-ocr-osd (4.00~git24-0e00fe6-1.2) ...\n",
"Selecting previously unselected package tesseract-ocr.\n",
"Preparing to unpack .../tesseract-ocr_4.00~git2288-10f4998a-2_amd64.deb ...\n",
"Unpacking tesseract-ocr (4.00~git2288-10f4998a-2) ...\n",
"Setting up tesseract-ocr-osd (4.00~git24-0e00fe6-1.2) ...\n",
"Setting up tesseract-ocr-eng (4.00~git24-0e00fe6-1.2) ...\n",
"Setting up tesseract-ocr (4.00~git2288-10f4998a-2) ...\n",
"Processing triggers for man-db (2.8.3-2ubuntu0.1) ...\n",
"Reading package lists... Done\n",
"Building dependency tree \n",
"Reading state information... Done\n",
"The following package was automatically installed and is no longer required:\n",
" libnvidia-common-460\n",
"Use 'apt autoremove' to remove it.\n",
"The following additional packages will be installed:\n",
" fonts-droid-fallback fonts-noto-mono gsfonts libcupsfilters1 libcupsimage2\n",
" libgs9 libgs9-common libijs-0.35 libjbig2dec0 poppler-data\n",
"Suggested packages:\n",
" fonts-noto ghostscript-x poppler-utils fonts-japanese-mincho\n",
" | fonts-ipafont-mincho fonts-japanese-gothic | fonts-ipafont-gothic\n",
" fonts-arphic-ukai fonts-arphic-uming fonts-nanum\n",
"The following NEW packages will be installed:\n",
" fonts-droid-fallback fonts-noto-mono ghostscript gsfonts libcupsfilters1\n",
" libcupsimage2 libgs9 libgs9-common libijs-0.35 libjbig2dec0 poppler-data\n",
"0 upgraded, 11 newly installed, 0 to remove and 7 not upgraded.\n",
"Need to get 14.1 MB of archives.\n",
"After this operation, 49.9 MB of additional disk space will be used.\n",
"Get:1 http://archive.ubuntu.com/ubuntu bionic/main amd64 fonts-droid-fallback all 1:6.0.1r16-1.1 [1,805 kB]\n",
"Get:2 http://archive.ubuntu.com/ubuntu bionic/main amd64 poppler-data all 0.4.8-2 [1,479 kB]\n",
"Get:3 http://archive.ubuntu.com/ubuntu bionic/main amd64 fonts-noto-mono all 20171026-2 [75.5 kB]\n",
"Get:4 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libcupsimage2 amd64 2.2.7-1ubuntu2.9 [18.6 kB]\n",
"Get:5 http://archive.ubuntu.com/ubuntu bionic/main amd64 libijs-0.35 amd64 0.35-13 [15.5 kB]\n",
"Get:6 http://archive.ubuntu.com/ubuntu bionic/main amd64 libjbig2dec0 amd64 0.13-6 [55.9 kB]\n",
"Get:7 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libgs9-common all 9.26~dfsg+0-0ubuntu0.18.04.17 [5,092 kB]\n",
"Get:8 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libgs9 amd64 9.26~dfsg+0-0ubuntu0.18.04.17 [2,267 kB]\n",
"Get:9 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 ghostscript amd64 9.26~dfsg+0-0ubuntu0.18.04.17 [51.3 kB]\n",
"Get:10 http://archive.ubuntu.com/ubuntu bionic/main amd64 gsfonts all 1:8.11+urwcyr1.0.7~pre44-4.4 [3,120 kB]\n",
"Get:11 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libcupsfilters1 amd64 1.20.2-0ubuntu3.1 [108 kB]\n",
"Fetched 14.1 MB in 1s (12.9 MB/s)\n",
"Selecting previously unselected package fonts-droid-fallback.\n",
"(Reading database ... 124062 files and directories currently installed.)\n",
"Preparing to unpack .../00-fonts-droid-fallback_1%3a6.0.1r16-1.1_all.deb ...\n",
"Unpacking fonts-droid-fallback (1:6.0.1r16-1.1) ...\n",
"Selecting previously unselected package poppler-data.\n",
"Preparing to unpack .../01-poppler-data_0.4.8-2_all.deb ...\n",
"Unpacking poppler-data (0.4.8-2) ...\n",
"Selecting previously unselected package fonts-noto-mono.\n",
"Preparing to unpack .../02-fonts-noto-mono_20171026-2_all.deb ...\n",
"Unpacking fonts-noto-mono (20171026-2) ...\n",
"Selecting previously unselected package libcupsimage2:amd64.\n",
"Preparing to unpack .../03-libcupsimage2_2.2.7-1ubuntu2.9_amd64.deb ...\n",
"Unpacking libcupsimage2:amd64 (2.2.7-1ubuntu2.9) ...\n",
"Selecting previously unselected package libijs-0.35:amd64.\n",
"Preparing to unpack .../04-libijs-0.35_0.35-13_amd64.deb ...\n",
"Unpacking libijs-0.35:amd64 (0.35-13) ...\n",
"Selecting previously unselected package libjbig2dec0:amd64.\n",
"Preparing to unpack .../05-libjbig2dec0_0.13-6_amd64.deb ...\n",
"Unpacking libjbig2dec0:amd64 (0.13-6) ...\n",
"Selecting previously unselected package libgs9-common.\n",
"Preparing to unpack .../06-libgs9-common_9.26~dfsg+0-0ubuntu0.18.04.17_all.deb ...\n",
"Unpacking libgs9-common (9.26~dfsg+0-0ubuntu0.18.04.17) ...\n",
"Selecting previously unselected package libgs9:amd64.\n",
"Preparing to unpack .../07-libgs9_9.26~dfsg+0-0ubuntu0.18.04.17_amd64.deb ...\n",
"Unpacking libgs9:amd64 (9.26~dfsg+0-0ubuntu0.18.04.17) ...\n",
"Selecting previously unselected package ghostscript.\n",
"Preparing to unpack .../08-ghostscript_9.26~dfsg+0-0ubuntu0.18.04.17_amd64.deb ...\n",
"Unpacking ghostscript (9.26~dfsg+0-0ubuntu0.18.04.17) ...\n",
"Selecting previously unselected package gsfonts.\n",
"Preparing to unpack .../09-gsfonts_1%3a8.11+urwcyr1.0.7~pre44-4.4_all.deb ...\n",
"Unpacking gsfonts (1:8.11+urwcyr1.0.7~pre44-4.4) ...\n",
"Selecting previously unselected package libcupsfilters1:amd64.\n",
"Preparing to unpack .../10-libcupsfilters1_1.20.2-0ubuntu3.1_amd64.deb ...\n",
"Unpacking libcupsfilters1:amd64 (1.20.2-0ubuntu3.1) ...\n",
"Setting up libgs9-common (9.26~dfsg+0-0ubuntu0.18.04.17) ...\n",
"Setting up fonts-droid-fallback (1:6.0.1r16-1.1) ...\n",
"Setting up gsfonts (1:8.11+urwcyr1.0.7~pre44-4.4) ...\n",
"Setting up poppler-data (0.4.8-2) ...\n",
"Setting up fonts-noto-mono (20171026-2) ...\n",
"Setting up libcupsfilters1:amd64 (1.20.2-0ubuntu3.1) ...\n",
"Setting up libcupsimage2:amd64 (2.2.7-1ubuntu2.9) ...\n",
"Setting up libjbig2dec0:amd64 (0.13-6) ...\n",
"Setting up libijs-0.35:amd64 (0.35-13) ...\n",
"Setting up libgs9:amd64 (9.26~dfsg+0-0ubuntu0.18.04.17) ...\n",
"Setting up ghostscript (9.26~dfsg+0-0ubuntu0.18.04.17) ...\n",
"Processing triggers for man-db (2.8.3-2ubuntu0.1) ...\n",
"Processing triggers for fontconfig (2.12.6-0ubuntu2) ...\n",
"Processing triggers for libc-bin (2.27-3ubuntu1.6) ...\n",
"\u001b[K |████████████████████████████████| 122 kB 6.3 MB/s \n",
"\u001b[K |████████████████████████████████| 46 kB 3.5 MB/s \n",
"\u001b[K |████████████████████████████████| 2.8 MB 64.5 MB/s \n",
"\u001b[K |████████████████████████████████| 97 kB 7.5 MB/s \n",
"\u001b[K |████████████████████████████████| 2.6 MB 38.6 MB/s \n",
"\u001b[K |████████████████████████████████| 5.6 MB 9.6 MB/s \n",
"\u001b[K |████████████████████████████████| 3.2 MB 70.6 MB/s \n",
"\u001b[K |████████████████████████████████| 86 kB 5.9 MB/s \n",
"\u001b[K |████████████████████████████████| 4.0 MB 47.8 MB/s \n",
"\u001b[?25h Building wheel for img2pdf (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"pytest 3.6.4 requires pluggy<0.8,>=0.5, but you have pluggy 1.0.0 which is incompatible.\u001b[0m\n",
"/usr/local/lib/python3.8/dist-packages/ocrmypdf/subprocess\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"exit()"
],
"metadata": {
"id": "pWcw33PATcqe"
},
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import ocrmypdf\n",
"ocrmypdf.ocr('131.pdf', '131.ocr.pdf')\n",
"ocrmypdf.ocr('131_vHIT.pdf', '131_vHIT.ocr.pdf')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "JcDY2hNjmJJq",
"outputId": "a14dedff-129f-42ee-a997-972e34598eee"
},
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"ERROR:ocrmypdf.subprocess:\n",
"OCRmyPDF requires 'tesseract' 4.1.1 or higher. Your system appears\n",
"to have 4.0.0-beta.1. Please update this program.\n",
"\n",
"ERROR:ocrmypdf.subprocess:\n",
"OCRmyPDF requires 'gs' 9.50 or higher. Your system appears\n",
"to have 9.26. Please update this program.\n",
"\n",
"Scanning contents: 100%|██████████| 3/3 [00:05<00:00, 1.94s/page]\n",
"OCR: 100%|██████████| 3.0/3.0 [00:14<00:00, 4.98s/page]\n",
"PDF/A conversion: 100%|██████████| 3/3 [00:01<00:00, 1.97page/s]\n",
"Recompressing JPEGs: 0image [00:00, ?image/s]\n",
"Deflating JPEGs: 100%|██████████| 1/1 [00:00<00:00, 331.07image/s]\n",
"JBIG2: 0item [00:00, ?item/s]\n",
"ERROR:ocrmypdf.subprocess:\n",
"OCRmyPDF requires 'tesseract' 4.1.1 or higher. Your system appears\n",
"to have 4.0.0-beta.1. Please update this program.\n",
"\n",
"ERROR:ocrmypdf.subprocess:\n",
"OCRmyPDF requires 'gs' 9.50 or higher. Your system appears\n",
"to have 9.26. Please update this program.\n",
"\n",
"Scanning contents: 100%|██████████| 4/4 [00:00<00:00, 4.02page/s]\n",
"OCR: 100%|██████████| 4.0/4.0 [00:17<00:00, 4.42s/page]\n",
"PDF/A conversion: 100%|██████████| 4/4 [00:01<00:00, 3.06page/s]\n",
"Recompressing JPEGs: 0image [00:00, ?image/s]\n",
"Deflating JPEGs: 0image [00:00, ?image/s]\n",
"JBIG2: 0item [00:00, ?item/s]\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<ExitCode.ok: 0>"
]
},
"metadata": {},
"execution_count": 1
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment