Skip to content

Instantly share code, notes, and snippets.

View dtch1997's full-sized avatar

Daniel Tan dtch1997

View GitHub Profile
@dtch1997
dtch1997 / anonymize.py
Created September 24, 2025 13:46
Anonymize codebases for submission
#!/usr/bin/env python3
"""
Script to anonymize specific terms in files by replacing them with 'XXXX'.
Usage:
python anonymize.py <input_dir> <output_dir> <terms_to_anonymize>
Arguments:
input_dir: Directory containing files to anonymize
output_dir: Directory where anonymized files will be written
@dtch1997
dtch1997 / device_manager.py
Created July 11, 2024 08:32
Device manager for Pytorch
""" Utilities for managing torch device """
import torch
from contextlib import contextmanager
from typing import Generator
def get_default_device() -> str:
if torch.cuda.is_available():
return "cuda"
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
@dtch1997
dtch1997 / patterns.py
Created July 11, 2024 08:31
Design patterns in Python
"""Useful design patterns and abstractions"""
from typing import Generic, TypeVar
T = TypeVar("T")
# Reference: https://stackoverflow.com/a/7346105
class Singleton(Generic[T]):
"""
@dtch1997
dtch1997 / data_utils.py
Last active July 11, 2024 08:30
Utilities for working with JSON-formatted machine learning data
# Utilities for working with dataframes
import pandas as pd
from IPython.display import display, HTML
def flatten_dict(nested_dict, prefix="", delimiter="."):
""" Flattens a nested dictionary """
flattened = {}
for key, value in nested_dict.items():
new_key = f"{prefix}{key}"
if isinstance(value, dict):
@dtch1997
dtch1997 / 1_1_1_.fasta
Created June 27, 2024 21:05
FastA file from Enzyme Comission for dehydrogenases
>sp|P49383|ADH2_KLULA Alcohol dehydrogenase 2 OS=Kluyveromyces lactis (strain ATCC 8585 / CBS 2359 / DSM 70799 / NBRC 1267 / NRRL Y-1140 / WM37) OX=284590 GN=ADH2 PE=3 SV=2
MSIPETQKGVIFYENGGELQYKDIPVPKPKANELLINVKYSGVCHTDLHAWKGDWPLPTK
LPLVGGHEGAGVVVAMGENVKGWNIGDFAGIKWLNGSCMSCEYCELSNESNCPDADLSGY
THDGSFQQYATADAVQAARIPKGTDLAEVAPILCAGVTVYKALKSADLKAGDWVAISGAC
GGLGSLAIQYAKAMGYRVLGIDTGAEKAKLFKELGGEYFVDYAVSKDLIKEIVDATNGGA
HGVINVSVSEFAIEQSTNYVRSNGTVVLVGLPRDAKCKSDVFTQVVKSVSIVGSYVGNRA
DTREALDFFARGLVHAPIKIVGLSELADVYDKMVKGEIVGRYVVDTSK
>sp|N4WW42|RED3_COCH4 Dehydrogenase RED3 OS=Cochliobolus heterostrophus (strain C4 / ATCC 48331 / race T) OX=665024 GN=RED3 PE=3 SV=1
MGLVKGNCGCYWGIKGHWSRNCSPVCEIANKNYYSRRGIAPRRTFWSVSNKSLVHLDANS
LMIDYENVFYYTTDITSNKAIIESSERIRQDHGNPSVLINNAGVANGKTILEESEDERRR
@dtch1997
dtch1997 / sl_sae_to_tl_sae.py
Last active May 17, 2024 18:50
Convert an SAELens SAE to a TransformerLens SAE
from transformer_lens import HookedSAEConfig, HookedSAE
from sae_lens import SparseAutoencoder
from sae_lens.training.train_sae_on_language_model import LanguageModelSAERunnerConfig
def sl_sae_cfg_to_hooked_sae_cfg(
resid_sae_cfg: LanguageModelSAERunnerConfig,
) -> HookedSAEConfig:
new_cfg = {
"d_sae": resid_sae_cfg.d_sae,
"d_in": resid_sae_cfg.d_in,
@dtch1997
dtch1997 / test_script.sh
Last active April 25, 2024 09:26
SGE script with Slack Notifications
#!/bin/bash
#$ -N test_script
#$ -o /home/$USER/logs/
#$ -e /home/$USER/logs/
# Set up a web hook here, SAVE SETTINGS, then paste URL
# https://api.slack.com/messaging/webhooks
SLACK_WEBHOOK_URL="Replace Me"
# Find Your Slack User ID
@dtch1997
dtch1997 / open_neuronpedia.py
Created April 11, 2024 12:35
Open Neuronpedia Quick List
""" A script to open a list of SAE features in Neuronpedia in your web browser.
Usage:
1. Clone this gist.
2. Change LAYER, FEATURES to whatever you want.
3. Run the script.
"""
import json
import urllib.parse
@dtch1997
dtch1997 / obsidian-web-clipper.js
Last active January 31, 2024 09:17 — forked from kepano/obsidian-web-clipper.js
Obsidian Web Clipper Bookmarklet to save articles and pages from the web (for Safari, Chrome, Firefox, and mobile browsers)
javascript: Promise.all([import('https://unpkg.com/turndown@6.0.0?module'), import('https://unpkg.com/@tehshrike/readability@0.2.0'), ]).then(async ([{
default: Turndown
}, {
default: Readability
}]) => {
/* Optional vault name */
const vault = "";
/* Optional folder name such as "Clippings/" */
@dtch1997
dtch1997 / print_pstats.py
Created March 1, 2023 18:39
Simple script to print Python Profiler stats
"""
Usage:
1. python -m cProfile -o [path/to/output.profile] [path/to/script.py]
2. python print_pstats.py -i [path/to/output.profile] [optional args]
By default prints the top 20 functions, sorted by total time spent in function
"""
import pstats
import argparse
from pstats import SortKey