Skip to content

Instantly share code, notes, and snippets.

View jxmorris12's full-sized avatar
🐳
just chilling

Jack Morris jxmorris12

🐳
just chilling
View GitHub Profile
@jxmorris12
jxmorris12 / torch_ddp_verify.py
Last active April 19, 2024 15:54
verify parameter weights & gradients in pytorch
def verify_ddp_weights_equal(model: torch.nn.Module, atol: float = 1e-5) -> None:
if hasattr(model, "module"):
model = model.module
world_size = get_world_size()
for name, param in model.named_parameters():
gathered_param = gather(param).reshape((world_size, -1))
absolute_diffs = (gathered_param[None, 0, :] - gathered_param).abs()
rank_params_eq = (absolute_diffs < atol).all()
assert rank_params_eq, f"❌ param [{name}] not equal - got max_absolute_diff={absolute_diffs.max()}"
@jxmorris12
jxmorris12 / slice._sparse_tensor.py
Created March 4, 2024 21:34
pytorch sparse tensor slice
import torch
def slice_sparse_tensor_rows(t: torch.sparse.Tensor, min_row: int, max_row: int) -> torch.sparse.Tensor:
row_idxs = t.indices()[0]
index_mask = (min_row <= row_idxs) & (row_idxs < max_row)
num_rows = (max_row - min_row)
num_cols = t.shape[1]
idxs = t.indices()[:, index_mask]
@jxmorris12
jxmorris12 / datasets_fast_load_from_disk.py
Created January 19, 2024 23:24
datasets_fast_load_from_disk.py
from typing import Iterable
import concurrent
imoprt datasets
import glob
import json
import multiprocessing
import os
def load_dataset_tables(
@jxmorris12
jxmorris12 / upload_dataset.py
Created October 25, 2023 17:49
load a dataset from JSON and upload it to huggingface
import argparse
import glob
import datasets
import pandas as pd
def load_datasets(data_folder):
train_file = glob.glob(f"{data_folder}/train*.jsonl")[0]
test_file = f"{data_folder}/test.jsonl"
dev_file = glob.glob(f"{data_folder}/dev*.jsonl")[0]
@jxmorris12
jxmorris12 / msmarco_corpus.py
Last active October 25, 2023 13:52
load msmarco corpus
from typing import Dict, Tuple
import logging
import os
import pathlib
import requests
import zipfile
import beir
import beir.datasets
Python
map a function to a list: — map (f, list) — NOT the other way around
set a breakpoint: import pdb; pdb.set_trace()
—> ACTUALLY starting in python 3.7 you can just do breakpoint() !
best way to profile any python code: pip install pyinstrument; python -m pyinstrument ./myprog.py
run a pytest test by pattern: pytest -k <pattern>
@jxmorris12
jxmorris12 / airpods.py
Created November 2, 2020 14:49
automatically connect Mac to Bluetooth headphones
#!/usr/bin/env python
# jm8wx 11/2/20
import subprocess
import re
airpods_name = "Jack’s AirPods Pro"
def _color(s):
return "\033[94m" + s + "\033[0m"
@jxmorris12
jxmorris12 / git-fatfiles
Created October 4, 2019 19:58
print large stuff in your git repo
git rev-list --all --objects | \
sed -n $(git rev-list --objects --all | \
cut -f1 -d' ' | \
git cat-file --batch-check | \
grep blob | \
sort -n -k 3 | \
tail -n40 | \
while read hash type size; do
echo -n "-e s/$hash/$size/p ";
done) | \
@jxmorris12
jxmorris12 / stdc++.h
Created October 2, 2019 21:22
stdc++.h for Mac OSX (/usr/local/include/bits/stdc++.h)
// C++ includes used for precompiling -*- C++ -*-
// Copyright (C) 2003-2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
@jxmorris12
jxmorris12 / Makefile
Created September 5, 2019 14:51
compiles xv6 with Mac OS X qemu and cross-compiler i386-elf-gcc-gdb
OBJS = \
bio.o\
console.o\
exec.o\
file.o\
fs.o\
ide.o\
ioapic.o\
kalloc.o\
kbd.o\