Skip to content

Instantly share code, notes, and snippets.

View mitchellnw's full-sized avatar

Mitchell Wortsman mitchellnw

View GitHub Profile
@mitchellnw
mitchellnw / relu-attn-bf16.ipynb
Last active December 26, 2023 04:09
triton-a100.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@mitchellnw
mitchellnw / relu-attention-fp32.ipynb
Last active December 25, 2023 20:50
relu-attention-fp32.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@mitchellnw
mitchellnw / stableadamwunfused.py
Last active October 17, 2023 21:40
This is the unfused version of StableAdamW. It is slower than the fused version (coming). Beta version.
import numpy as np
import torch
# This is the unfused version of StableAdamW. It is slower than the fused version (coming).
class StableAdamWUnfused(torch.optim.Optimizer):
def __init__(self, params, lr=0.002, weight_decay=0.2, betas=(0.9, 0.99), eps=1e-6, clip_thresh=1., precision='amp_bfloat16', custom_scalar=65536):
beta1, beta2 = betas[0], betas[1]
defaults = dict(lr=lr, weight_decay=weight_decay, beta1=beta1, beta2=beta2)