Skip to content

Instantly share code, notes, and snippets.

@michalc
michalc / block_or_not.py
Created April 24, 2024 13:34
Script for checking if any PostgreSQL session blocks another
# docker run --rm -it -p 5432:5432 -e POSTGRES_PASSWORD=password postgres:14
import pprint
import threading
import time
from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager
import psycopg2
import psycopg2.extras
@michalc
michalc / sqlite.py
Last active April 3, 2024 17:26
Use libsqlite3 directly from Python with ctypes: without using the built-in sqlite3 Python package, and without compiling anything
# From https://stackoverflow.com/a/68876046/1319998, which is itself inspired by https://stackoverflow.com/a/68814418/1319998
from contextlib import contextmanager
from collections import namedtuple
from ctypes import cdll, byref, string_at, c_char_p, c_int, c_double, c_int64, c_void_p
from ctypes.util import find_library
from sys import platform
def query(db_file, sql, params=()):
@michalc
michalc / s3_bulk_delete.py
Last active December 7, 2023 19:17
Bulk delete files from an AWS S3 bucket in Python using multiple threads via a parallel (eventually) depth-first search
# Deletes objects in bulk using boto3's delete_objects, but using multiple threads to achieve some
# parallelism - in spite of the GIL multiple HTTP requests to S3 should happen at the same time.
# Instead of looping over all keys under the root prefix, it walks the tree of keys of delimiter-
# defined "folders" in a depth-first way, which allows each page to be processed by a separate
# thread as its discovered. Depth-first is done because the depth is limited by the maximum key
# size of 1024 in S3, and so means that there is a limit to the memory used by the algorithm to
# store the next requests to make. This would not be the case with breadth-first because there is
# no limit to how many keys are in any folder.
#
# To do the search in parallel, each bit of work (i.e. an HTTP request to fetch a page of keys
@michalc
michalc / cloudfoundry-check-stacks.py
Created November 14, 2023 07:38
Check the stack of apps in CloudFoundry
import json
import os
from urllib.parse import urlparse, parse_qsl
import requests
from rich import box
from rich.console import Console
from rich.table import Table
with open(f'{os.environ["HOME"]}/.cf/config.json') as f:
@michalc
michalc / aws_sig_v4_headers.py
Created October 23, 2018 05:33
Function that calculates AWS signature version 4 headers
import datetime
import hashlib
import hmac
import urllib.parse
def aws_sig_v4_headers(access_key_id, secret_access_key, pre_auth_headers,
service, region, host, method, path, query, payload):
algorithm = 'AWS4-HMAC-SHA256'
@michalc
michalc / normalise_environment.py
Last active October 8, 2023 07:05
Structured data in environment variables: nested dictionaries and lists
def normalise_environment(key_values):
''' Converts denormalised dict of (string -> string) pairs, where the first string
is treated as a path into a nested list/dictionary structure
{
"FOO__1__BAR": "setting-1",
"FOO__1__BAZ": "setting-2",
"FOO__2__FOO": "setting-3",
"FOO__2__BAR": "setting-4",
"FIZZ": "setting-5",
@michalc
michalc / decrypt-ses-emails-in-s3.py
Last active November 16, 2022 03:49
Decrypt KMS-encrypted SES emails in an S3 bucket
import base64
import json
import boto3
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
s3 = boto3.resource('s3')
bucket = s3.Bucket('my-bucket')
kms_client = boto3.client('kms')
@michalc
michalc / make_8gb_legacy_zip.py
Created January 3, 2022 16:37
Generating a Zip 2.0 file that's (just under) 8GiB
# Often it's claimed that a Zip 2.0 file cannot be bigger than 4GiB
# Here's how to make one that's just under 8GiB
from datetime import datetime
from stream_zip import stream_zip, ZIP_32
now = datetime.now()
perms = 0o600
def files():
for i in range(0, 0xffff):
@michalc
michalc / asyncio_read_write_lock.py
Last active January 3, 2022 09:46
Python asyncio read-write lock, using a generic first-in-first-out lock
import asyncio
import collections
import contextlib
class Read(asyncio.Future):
@staticmethod
def is_compatible(holds):
return not holds[Write]
class Write(asyncio.Future):
class MyPipeline(_PipelineV2):
# Everything is a method so nothing happens on import time for flexibility (although possibly
# does a bit of discovery magic... need to think about that...)
# Everything is a _static_ method: nothing on self since things are run on different bits of hardware,
# and gets any run-time dependencies injected in
#
# _PipelineV2 won't actually have any code: other parts of the system will interrogate its
# subclasses as needed. For example
# - Code in Data Flow would construct a DAG
# - The test harness would the run this and upstream pipelines synchronously