This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com> | |
# SPDX-FileCopyrightText: 2024 James R. Barlow <james@purplerock.ca> | |
# SPDX-License-Identifier: MPL-2.0 | |
# Initial code derived from ocrmypdf/_exec/ghostscript.py | |
# Note that Ghostscript is AGPL-licensed. However, we are calling it via subprocess here, so not sure whether copyleft would apply. | |
# See also https://www.gnu.org/licenses/gpl-faq.en.html#MereAggregation | |
import io | |
import os |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com> | |
# SPDX-License-Identifier: MPL-2.0 | |
# Note that (py)mupdf is AGPL-licensed, so this code is altogether affected by copyleft | |
import PIL.Image | |
import fitz as pymupdf | |
def invoke_pymupdf(filepath, index, scale=4, rotation=0, password=None): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com> | |
# SPDX-License-Identifier: MPL-2.0 | |
# Note that Poppler is GPL-licensed, so this code is altogether affected by copyleft | |
import io | |
import PIL.Image | |
from popplerqt5 import Poppler | |
from PyQt5.QtCore import QByteArray, QBuffer | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com> | |
# SPDX-License-Identifier: MPL-2.0 | |
# Note that Poppler is GPL-licensed, so this code is altogether affected by copyleft | |
import PIL.Image | |
import poppler # python-poppler | |
from poppler.cpp.page_renderer import render_hint | |
def _translate_rotation(rotation): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com> | |
# SPDX-License-Identifier: MPL-2.0 | |
# Note that Poppler is GPL-licensed, so this code is altogether affected by copyleft | |
import math | |
from pathlib import Path | |
import PIL.Image | |
import cairo | |
import gi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com> | |
# SPDX-License-Identifier: MPL-2.0 | |
# Sophisticated parser for a page number specification mini-language | |
# Technically, this might be a use case for a parser generator like pyparsing or PLY, but this is a manual implementation based on common string operations. | |
__all__ = ["parse_pagenums"] | |
import logging | |
from collections import namedtuple |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com> | |
# SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause | |
import sys | |
import argparse | |
if sys.version_info >= (3, 9): | |
from argparse import BooleanOptionalAction | |
else: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com> | |
# SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause | |
import math | |
import argparse | |
import itertools | |
import pypdfium2 as pdfium | |
from pathlib import Path | |
parser = argparse.ArgumentParser() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2023 geisserml <geisserml@gmail.com> | |
# SPDX-License-Identifier: CC-BY-4.0 OR Apache-2.0 OR BSD-3-Clause | |
# Unlike repository files, there is no "raw view" for GH releases, but we can extract the plain markdown content using GH web API | |
# See also https://stackoverflow.com/q/76995969/15547292 | |
# The following code snippet shows how to get a release title from pdfium-binaries to extract the full version | |
import re | |
import json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2023 geisserml <geisserml@gmail.com> | |
# SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause OR MPL-2.0 | |
# Safer tar extraction (hopefully) preventing CVE-2007-4559 etc. | |
# Tries to use the most elegant strategy available in the caller's python version (>= 3.6) | |
__all__ = ["safer_tar_unpack"] | |
import sys | |
if sys.version_info >= (3, 11, 4): # PEP 706 |
NewerOlder