Skip to content

Instantly share code, notes, and snippets.

View jonashaag's full-sized avatar

Jonas Haag jonashaag

View GitHub Profile
@jonashaag
jonashaag / boto3_copytree.py
Created August 28, 2022 19:18
boto3 copytree prefix
import shutil
from pathlib import Path
def s3_copytree(bucket, src_prefix: str, dst_folder: Path):
for s3_f in bucket.objects.filter(Prefix=src_prefix):
target_path = dst_folder / s3_f.key.removeprefix(src_prefix)
target_path.parent.mkdir(parents=True, exist_ok=True)
with open(target_path, "wb") as target_f:
shutil.copyfileobj(s3_f.get()["Body"], target_f)
@jonashaag
jonashaag / index.ts
Created August 23, 2022 11:12
Cloudflare Workers simple key value database
declare const KV: KVNamespace
addEventListener('fetch', event => event.respondWith(handleRequest(event.request)))
const handleRequest = async (request: Request): Promise<Response> => {
if (!auth(request)) {
return new Response('Forbidden', { status: 403 })
}
const key = new URL(request.url).pathname.substr(1)
@jonashaag
jonashaag / byteswap.pyx
Created July 22, 2022 10:00
Fast byteswaps in Cython, multiple orders magnitude speedup over struct.unpack.
from cython cimport Py_ssize_t
from libc.stdint cimport (
uint8_t,
uint16_t,
uint32_t,
uint64_t,
)
def read_float_with_byteswap(bytes data, Py_ssize_t offset, bint byteswap):
@jonashaag
jonashaag / conda_proxy.py
Created April 14, 2022 20:45
Simple caching Conda proxy
import atexit
import base64
import logging
import os
import pickle
import diskcache
import proxy2
@jonashaag
jonashaag / python_int12.py
Created April 11, 2022 21:10
Python int12
def pack(arr):
"""Pack integers < 2**12 into 12 bit integers, encoded as bytes"""
out = []
for i in range(0, len(arr), 2):
e1, e2 = arr[i:i+2]
assert 0 <= e1 < 2**12
assert 0 <= e2 < 2**12
e1 = (e1 << 4) | (e2 >> 8)
e2 &= 2**8-1
assert e1 <= 2**16-1
@jonashaag
jonashaag / distutils_compiler_launcher.py
Last active March 12, 2022 21:26
Python distutils/setup.py/pip compiler launcher (ccache/sccache)
class CompilerLauncherMixin:
"""Add "compiler launchers" to distutils.
We use this to be able to run the Pandas build using "ccache".
A compiler launcher is a program that is invoked instead of invoking the
compiler directly. It is passed the full compiler invocation command line.
A similar feature exists in CMake, see
https://cmake.org/cmake/help/latest/prop_tgt/LANG_COMPILER_LAUNCHER.html.
@jonashaag
jonashaag / snowflake_fix_fetch.py
Created February 22, 2022 16:24
Snowflake fix fetch
def snowflake_fix_fetch(
df,
*,
parse_variant_columns=(),
lower_column_names=True,
convert_int64=True,
):
"""Apply Snowflake-specific fixes to a dataframe fetched from Snowflake.
- Lower all column names
- Parse VARIANT columns
@jonashaag
jonashaag / conftest.py
Created February 16, 2022 15:07
PySpark Continuous Integration setup
from pyspark.sql import SparkSession
def local_pyspark_cluster(n_cpus=1, memory_mb=512) -> SparkSession:
"""Start a local PySpark cluster with default settings.
Returns a client to that session.
"""
return (
SparkSession.builder.master(f"local[{n_cpus}]")
@jonashaag
jonashaag / pytest_mark_fixture.py
Last active October 18, 2021 16:59
pytest mark a fixture
import pytest
def mark_fixture(mark, *args, **kwargs):
"""Decorator to mark a fixture.
Usage:
@mark_fixture(pytest.mark.slow, scope="session", ...)
def my_fixture():
...
@jonashaag
jonashaag / trunet.py
Created June 26, 2021 14:57
REAL-TIME DENOISING AND DEREVERBERATION WTIH TINY RECURRENT U-NET
import torch
from torch.nn import *
def pointwise(in_channels, out_channels):
return Sequential(
Conv2d(in_channels, out_channels, 1, 1),
BatchNorm2d(out_channels),
ReLU(),
)