Skip to content

Instantly share code, notes, and snippets.

Avatar
🥅
goal_net

Jacob Chapman chapmanjacobd

🥅
goal_net
View GitHub Profile
@chapmanjacobd
chapmanjacobd / gdal_block_reading.py
Last active March 9, 2023 04:27
Actual speed impact of reading mismatched blocks
View gdal_block_reading.py
import math
import osgeo.gdal as gdal
import timeit
file_path = "1677269839.tif"
ds = gdal.Open(file_path)
file_block_size = ds.GetRasterBand(1).GetBlockSize()
xoff = 0
View btrfs_disk_extent_stats.py
import argparse
from multiprocessing import Pool
import btrfs
parser = argparse.ArgumentParser()
parser.add_argument("btrfs_fs_mountpoint")
args = parser.parse_args()
@chapmanjacobd
chapmanjacobd / filter_file.py
Created January 26, 2023 20:30
writelines() is faster than write() if your data can fit in RAM
View filter_file.py
def filter_file(path, sieve):
with open(path, 'r') as fr:
lines = fr.readlines()
with tempfile.NamedTemporaryFile(mode='w', delete=False) as temp:
temp.writelines(l for l in lines if l.rstrip() not in sieve)
temp.flush()
os.fsync(temp.fileno())
os.replace(temp.name, path)
@>>> timeit.timeit("filter_file('/tmp/t', ['abcnewsvideo 9758031'])", number=100, setup="from __main__ import filter_file")
@chapmanjacobd
chapmanjacobd / btrfs_fun_single_vs_myself.md
Created January 22, 2023 05:49
BTRFS single mode evaluation
View btrfs_fun_single_vs_myself.md

The experiment

Preparation

truncate -s20G d1.img
truncate -s20G d2.img
truncate -s20G d3.img
truncate -s20G d4.img
set ld1 (sudo losetup --show --find d1.img)
@chapmanjacobd
chapmanjacobd / aaronsw1.md
Last active November 22, 2022 01:59
aaronsw.com but as one markdown file but it's actually two because GitHub only shows so much on one page
View aaronsw1.md

Aaron Swartz

Aaron Swartz is the founder of Demand Progress, which launched the campaign against the Internet censorship bills (SOPA/PIPA) and now has over a million members. He is also a Contributing Editor to [The

@chapmanjacobd
chapmanjacobd / de9im.py
Last active November 21, 2022 19:48
Dimensionally Extended 9-Intersections Matrix utilities
View de9im.py
# Author: Sean Gillies
# License: BSD
# https://pypi.org/project/de9im/
"""
>>> from de9im import pattern
>>> side_hug = pattern('FF*F0****')
>>> im = p.relate(q)
>>> print im
FF2F01212
View read_jsonl.zstd.py
# https://old.reddit.com/r/pushshift/comments/ajmcc0/information_and_code_examples_on_how_to_use_the/
with open("filename.zst", 'rb') as fh:
dctx = zstd.ZstdDecompressor(max_window_size=2147483648)
with dctx.stream_reader(fh) as reader:
previous_line = ""
while True:
chunk = reader.read(2**24) # 16mb chunks
if not chunk:
break
View maybe I won't actually do it this way.py
import itertools
subcommand_abbrevs = []
def consequtive_combos(s):
combos=[list(itertools.combinations(s,x)) for x in range(1,len(s))]
combos=set([''.join(e) for e in sum(combos,[]) if e[0] == s[0] and ''.join(e) not in subcommand_abbrevs])
subcommand_abbrevs.extend(combos)
return combos
q=consequtive_combos('merge-online-local')
View get_subtitles.py
def youtube_dl_id(file) -> str:
if len(file) < 15:
return ""
# rename old youtube_dl format to new one: cargo install renamer; fd -tf . -x renamer '\-([\w\-_]{11})\.= [$1].' {}
yt_id_regex = re.compile(r"-([\w\-_]{11})\..*$|\[([\w\-_]{11})\]\..*$", flags=re.M)
file = str(file).strip()
yt_ids = yt_id_regex.findall(file)
if not yt_ids:
return ""