Created
November 25, 2022 06:13
-
-
Save x1001000/6f97aa2da2487212f4358ea38924d064 to your computer and use it in GitHub Desktop.
PyPDF2-extract_text
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"authorship_tag": "ABX9TyOxy8LFoUzuc+PmLx9nJT6N", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/x1001000/6f97aa2da2487212f4358ea38924d064/pypdf2-extract_text.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!pip install PyPDF2 --quiet\n", | |
"from PyPDF2 import PdfReader\n", | |
"import re" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Cf6fhMaFQUCD", | |
"outputId": "64654c80-3dfd-453c-fb2e-e9b81f127563" | |
}, | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"\u001b[?25l\r\u001b[K |█▌ | 10 kB 22.7 MB/s eta 0:00:01\r\u001b[K |███ | 20 kB 11.7 MB/s eta 0:00:01\r\u001b[K |████▌ | 30 kB 14.9 MB/s eta 0:00:01\r\u001b[K |██████ | 40 kB 6.0 MB/s eta 0:00:01\r\u001b[K |███████▍ | 51 kB 5.4 MB/s eta 0:00:01\r\u001b[K |█████████ | 61 kB 6.3 MB/s eta 0:00:01\r\u001b[K |██████████▍ | 71 kB 7.1 MB/s eta 0:00:01\r\u001b[K |███████████▉ | 81 kB 7.9 MB/s eta 0:00:01\r\u001b[K |█████████████▍ | 92 kB 6.6 MB/s eta 0:00:01\r\u001b[K |██████████████▉ | 102 kB 7.0 MB/s eta 0:00:01\r\u001b[K |████████████████▍ | 112 kB 7.0 MB/s eta 0:00:01\r\u001b[K |█████████████████▉ | 122 kB 7.0 MB/s eta 0:00:01\r\u001b[K |███████████████████▎ | 133 kB 7.0 MB/s eta 0:00:01\r\u001b[K |████████████████████▉ | 143 kB 7.0 MB/s eta 0:00:01\r\u001b[K |██████████████████████▎ | 153 kB 7.0 MB/s eta 0:00:01\r\u001b[K |███████████████████████▊ | 163 kB 7.0 MB/s eta 0:00:01\r\u001b[K |█████████████████████████▎ | 174 kB 7.0 MB/s eta 0:00:01\r\u001b[K |██████████████████████████▊ | 184 kB 7.0 MB/s eta 0:00:01\r\u001b[K |████████████████████████████▏ | 194 kB 7.0 MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▊ | 204 kB 7.0 MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▏| 215 kB 7.0 MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 220 kB 7.0 MB/s \n", | |
"\u001b[?25h" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 35 | |
}, | |
"id": "fg9jQmrjQJuV", | |
"outputId": "0593de53-2f1a-4c41-ac96-0cc0428b3e4d" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"'5.16'" | |
], | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "string" | |
} | |
}, | |
"metadata": {}, | |
"execution_count": 2 | |
} | |
], | |
"source": [ | |
"reader = PdfReader('VNG-EyeSeeCam.pdf')\n", | |
"text = reader.pages[0].extract_text()\n", | |
"Mean = re.search('Mean ± \\(SD\\) (.*) ± \\(.*\\), Median', text).group(1)\n", | |
"Mean" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"reader = PdfReader('vHIT-EyeSeeCam.pdf')\n", | |
"text = reader.pages[0].extract_text()\n", | |
"Gain_Asymmetry = re.search(' ....(.*%)\\n', text).group(1)\n", | |
"left, right = re.findall('60 ms : (.*) ±', text)\n", | |
"Gain_Asymmetry" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 35 | |
}, | |
"id": "QoHMqQpMRvvr", | |
"outputId": "73ad1daf-a3b6-420b-b580-31fea680de35" | |
}, | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"'1%'" | |
], | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "string" | |
} | |
}, | |
"metadata": {}, | |
"execution_count": 3 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"right" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 35 | |
}, | |
"id": "KpQ-ajInYliJ", | |
"outputId": "05aa6003-e9bd-4f80-8cf4-86e098011cb4" | |
}, | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"'1.05'" | |
], | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "string" | |
} | |
}, | |
"metadata": {}, | |
"execution_count": 4 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"left" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 35 | |
}, | |
"id": "ebZTOWj_acJX", | |
"outputId": "37a897f7-db59-4ff8-fe29-dfc656499069" | |
}, | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"'1.17'" | |
], | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "string" | |
} | |
}, | |
"metadata": {}, | |
"execution_count": 5 | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment