Skip to content

Instantly share code, notes, and snippets.

View mara004's full-sized avatar
💭
Might stop working on software soon

mara004

💭
Might stop working on software soon
View GitHub Profile
@jrsmith3
jrsmith3 / sla2pdf.py
Created March 12, 2014 01:31
Converts every scribus document to a PDF in a specified directory.
"""
Convert every .sla to a pdf in a specified directory.
This script can only be run from within [scribus](http://http://scribus.net).
"""
import os
work_dir = #you have to explicitly tell scribus where your working directory is.
filenames = os.listdir(work_dir)
#!/usr/bin/python
# Copyright 2006 Google Inc.
# Author: agl@imperialviolet.org (Adam Langley)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
@bittner
bittner / keyboard-keys.md
Created February 28, 2019 22:50
Keyboard keys markup in MarkDown

Ctrl + Alt + Space

@lebedov
lebedov / jpype_api_demo.py
Last active July 10, 2023 14:04
How to call pdfbox's API with JPype.
#!/usr/bin/env python3
"""
How to call pdfbox's API with JPype.
"""
import pathlib
import pkg_resources
import re
import urllib.request
@lebedov
lebedov / jpype_pdf_text_stripper.py
Created April 28, 2021 12:29
How to use pdfbox's PDFTextStripper class in Python.
#!/usr/bin/env python3
"""
How to use pdfbox's PDFTextStripper class in Python.
"""
import pathlib
import pkg_resources
import re
import urllib.request
@mara004
mara004 / pdfbox.py
Last active June 20, 2024 16:44
PDF rendering with PDFBox, from Python
# SPDX-FileCopyrightText: 2023 geisserml <geisserml@gmail.com>
# SPDX-License-Identifier: Apache-2.0
# Assuming you have an Apache PDFBox 3 jar in the same directory
from pathlib import Path
import jpype
import jpype.imports
import PIL.Image
@mara004
mara004 / pdfbox_version_parsing.py
Last active July 14, 2023 11:50
Parse pdfbox versions and build a nice, robust representation
# SPDX-FileCopyrightText: 2023 geisserml <geisserml@gmail.com>
# SPDX-License-Identifier: CC-BY-4.0 OR Apache-2.0
import re
from datetime import datetime
from urllib.request import urlopen
from packaging.version import Version
PB_RELEASE_URL = "https://archive.apache.org/dist/pdfbox/"
PB_DISTS_RE = r'<a href="([\d\.]+.+?)/">.+</a>\s+([\d\-]+ [\d:]+)'
@mara004
mara004 / pypdfjs.py
Last active June 20, 2024 16:45
PDF rendering with pdf.js, from Python
# SPDX-FileCopyrightText: 2023 geisserml <geisserml@gmail.com>
# SPDX-License-Identifier: Apache-2.0
# See also https://github.com/extremeheat/JSPyBridge/blob/master/examples/python/pdfjs.py
# Py-Depends: pillow, javascript >= 1.1.0 (jspybridge)
# Js-Depends: pdfjs-dist, canvas
# Use `python -m pip install` and `python -m javascript --install`
import argparse
@mara004
mara004 / safer_tar_extract.py
Last active June 20, 2024 10:21
Safer tar extraction
# SPDX-FileCopyrightText: 2023 geisserml <geisserml@gmail.com>
# SPDX-License-Identifier: CC-BY-4.0 OR Apache-2.0 OR BSD-3-Clause
# Safer tar extraction (hopefully) preventing CVE-2007-4559 etc.
# Tries to use the most elegant strategy available in the caller's python version (>= 3.6)
__all__ = ["safer_tar_unpack"]
import sys
if sys.version_info >= (3, 11, 4): # PEP 706
@mara004
mara004 / parse_gh_release.py
Last active September 26, 2023 00:28
Extract information from GitHub release notes
# SPDX-FileCopyrightText: 2023 geisserml <geisserml@gmail.com>
# SPDX-License-Identifier: CC-BY-4.0 OR Apache-2.0 OR BSD-3-Clause
# Unlike repository files, there is no "raw view" for GH releases, but we can extract the plain markdown content using GH web API
# See also https://stackoverflow.com/q/76995969/15547292
# The following code snippet shows how to get a release title from pdfium-binaries to extract the full version
import re
import json