Skip to content

Instantly share code, notes, and snippets.

View mara004's full-sized avatar
💭
Might stop working on software soon

mara004

💭
Might stop working on software soon
View GitHub Profile
@mara004
mara004 / ghostscript_shell.py
Last active July 11, 2024 20:43
PDF rendering with Ghostscript (via subprocess)
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com>
# SPDX-FileCopyrightText: 2024 James R. Barlow <james@purplerock.ca>
# SPDX-License-Identifier: MPL-2.0
# Initial code derived from ocrmypdf/_exec/ghostscript.py
# Note that Ghostscript is AGPL-licensed. However, we are calling it via subprocess here, so not sure whether copyleft would apply.
# See also https://www.gnu.org/licenses/gpl-faq.en.html#MereAggregation
import io
import os
@mara004
mara004 / pymupdf.py
Created July 11, 2024 20:10
PDF rendering with pymupdf
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com>
# SPDX-License-Identifier: MPL-2.0
# Note that (py)mupdf is AGPL-licensed, so this code is altogether affected by copyleft
import PIL.Image
import fitz as pymupdf
def invoke_pymupdf(filepath, index, scale=4, rotation=0, password=None):
@mara004
mara004 / poppler_qt5.py
Last active July 13, 2024 14:31
PDF rendering with poppler-qt5
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com>
# SPDX-License-Identifier: MPL-2.0
# Note that Poppler is GPL-licensed, so this code is altogether affected by copyleft
import io
import PIL.Image
from popplerqt5 import Poppler
from PyQt5.QtCore import QByteArray, QBuffer
@mara004
mara004 / poppler.py
Last active July 11, 2024 20:35
PDF rendering with python-poppler
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com>
# SPDX-License-Identifier: MPL-2.0
# Note that Poppler is GPL-licensed, so this code is altogether affected by copyleft
import PIL.Image
import poppler # python-poppler
from poppler.cpp.page_renderer import render_hint
def _translate_rotation(rotation):
@mara004
mara004 / poppler_gtk.py
Last active July 11, 2024 20:29
PDF rendering with poppler-gtk
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com>
# SPDX-License-Identifier: MPL-2.0
# Note that Poppler is GPL-licensed, so this code is altogether affected by copyleft
import math
from pathlib import Path
import PIL.Image
import cairo
import gi
@mara004
mara004 / pnp.py
Last active July 8, 2024 13:11
Page number spec parser [Draft]
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com>
# SPDX-License-Identifier: MPL-2.0
# Sophisticated parser for a page number specification mini-language
# Technically, this might be a use case for a parser generator like pyparsing or PLY, but this is a manual implementation based on common string operations.
__all__ = ["parse_pagenums"]
import logging
from collections import namedtuple
@mara004
mara004 / argparse_compat.py
Last active July 21, 2024 22:34
Argparse compat extensions
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com>
# SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
import sys
import argparse
if sys.version_info >= (3, 9):
from argparse import BooleanOptionalAction
else:
@mara004
mara004 / tile.py
Last active July 11, 2024 20:36
JPEG to PDF N-up with pypdfium2
# SPDX-FileCopyrightText: 2024 geisserml <geisserml@gmail.com>
# SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
import math
import argparse
import itertools
import pypdfium2 as pdfium
from pathlib import Path
parser = argparse.ArgumentParser()
@mara004
mara004 / parse_gh_release.py
Last active September 26, 2023 00:28
Extract information from GitHub release notes
# SPDX-FileCopyrightText: 2023 geisserml <geisserml@gmail.com>
# SPDX-License-Identifier: CC-BY-4.0 OR Apache-2.0 OR BSD-3-Clause
# Unlike repository files, there is no "raw view" for GH releases, but we can extract the plain markdown content using GH web API
# See also https://stackoverflow.com/q/76995969/15547292
# The following code snippet shows how to get a release title from pdfium-binaries to extract the full version
import re
import json
@mara004
mara004 / safer_tar_extract.py
Last active July 21, 2024 22:23
Safer tar extraction
# SPDX-FileCopyrightText: 2023 geisserml <geisserml@gmail.com>
# SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause OR MPL-2.0
# Safer tar extraction (hopefully) preventing CVE-2007-4559 etc.
# Tries to use the most elegant strategy available in the caller's python version (>= 3.6)
__all__ = ["safer_tar_unpack"]
import sys
if sys.version_info >= (3, 11, 4): # PEP 706