Skip to content

Instantly share code, notes, and snippets.

@x1001000
Created November 25, 2022 06:13
Show Gist options
  • Save x1001000/6f97aa2da2487212f4358ea38924d064 to your computer and use it in GitHub Desktop.
Save x1001000/6f97aa2da2487212f4358ea38924d064 to your computer and use it in GitHub Desktop.
PyPDF2-extract_text
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyOxy8LFoUzuc+PmLx9nJT6N",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/x1001000/6f97aa2da2487212f4358ea38924d064/pypdf2-extract_text.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"source": [
"!pip install PyPDF2 --quiet\n",
"from PyPDF2 import PdfReader\n",
"import re"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Cf6fhMaFQUCD",
"outputId": "64654c80-3dfd-453c-fb2e-e9b81f127563"
},
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[?25l\r\u001b[K |█▌ | 10 kB 22.7 MB/s eta 0:00:01\r\u001b[K |███ | 20 kB 11.7 MB/s eta 0:00:01\r\u001b[K |████▌ | 30 kB 14.9 MB/s eta 0:00:01\r\u001b[K |██████ | 40 kB 6.0 MB/s eta 0:00:01\r\u001b[K |███████▍ | 51 kB 5.4 MB/s eta 0:00:01\r\u001b[K |█████████ | 61 kB 6.3 MB/s eta 0:00:01\r\u001b[K |██████████▍ | 71 kB 7.1 MB/s eta 0:00:01\r\u001b[K |███████████▉ | 81 kB 7.9 MB/s eta 0:00:01\r\u001b[K |█████████████▍ | 92 kB 6.6 MB/s eta 0:00:01\r\u001b[K |██████████████▉ | 102 kB 7.0 MB/s eta 0:00:01\r\u001b[K |████████████████▍ | 112 kB 7.0 MB/s eta 0:00:01\r\u001b[K |█████████████████▉ | 122 kB 7.0 MB/s eta 0:00:01\r\u001b[K |███████████████████▎ | 133 kB 7.0 MB/s eta 0:00:01\r\u001b[K |████████████████████▉ | 143 kB 7.0 MB/s eta 0:00:01\r\u001b[K |██████████████████████▎ | 153 kB 7.0 MB/s eta 0:00:01\r\u001b[K |███████████████████████▊ | 163 kB 7.0 MB/s eta 0:00:01\r\u001b[K |█████████████████████████▎ | 174 kB 7.0 MB/s eta 0:00:01\r\u001b[K |██████████████████████████▊ | 184 kB 7.0 MB/s eta 0:00:01\r\u001b[K |████████████████████████████▏ | 194 kB 7.0 MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▊ | 204 kB 7.0 MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▏| 215 kB 7.0 MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 220 kB 7.0 MB/s \n",
"\u001b[?25h"
]
}
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "fg9jQmrjQJuV",
"outputId": "0593de53-2f1a-4c41-ac96-0cc0428b3e4d"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'5.16'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 2
}
],
"source": [
"reader = PdfReader('VNG-EyeSeeCam.pdf')\n",
"text = reader.pages[0].extract_text()\n",
"Mean = re.search('Mean ± \\(SD\\) (.*) ± \\(.*\\), Median', text).group(1)\n",
"Mean"
]
},
{
"cell_type": "code",
"source": [
"reader = PdfReader('vHIT-EyeSeeCam.pdf')\n",
"text = reader.pages[0].extract_text()\n",
"Gain_Asymmetry = re.search(' ....(.*%)\\n', text).group(1)\n",
"left, right = re.findall('60 ms : (.*) ±', text)\n",
"Gain_Asymmetry"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "QoHMqQpMRvvr",
"outputId": "73ad1daf-a3b6-420b-b580-31fea680de35"
},
"execution_count": 3,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'1%'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 3
}
]
},
{
"cell_type": "code",
"source": [
"right"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "KpQ-ajInYliJ",
"outputId": "05aa6003-e9bd-4f80-8cf4-86e098011cb4"
},
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'1.05'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"source": [
"left"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "ebZTOWj_acJX",
"outputId": "37a897f7-db59-4ff8-fe29-dfc656499069"
},
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'1.17'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 5
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment