Skip to content

Instantly share code, notes, and snippets.

View fxmarty's full-sized avatar

fxmarty

View GitHub Profile
from functools import partial
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange, repeat
from flash_attn.utils.benchmark import benchmark_forward
@fxmarty
fxmarty / gist:7e75cc3942d6974e4849093ebea0a331
Created December 11, 2023 12:58
benchmark_sdpa_training
import argparse
import random
from typing import Dict
import numpy as np
import torch
from tqdm.auto import tqdm
from transformers import AutoModelForCausalLM
import gc
@fxmarty
fxmarty / gist:5113e4304fbdd38c9c3702ce44683f6a
Created December 11, 2023 12:59
benchmark_sdpa_inference
import argparse
import numpy as np
import pandas as pd
import torch
import gc
from tqdm import tqdm
from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig
from optimum.exporters import TasksManager
@fxmarty
fxmarty / opt.py
Created February 6, 2024 15:15
Repro opt vits
# 1. conda create -n ryzen101 python=3.9
# 2. install Ryzen AI Software following https://ryzenai.docs.amd.com/en/latest/manual_installation.html
# 3. Run .\transformers\setup.bat
# 4. Run .\transformers\opt-onnx\setup.bat recommended in the README can not be run - the file does not exist.
# 5. Run .\set_opt_onnx_env.bat opt-125m
# 6. Run .\prepare_model.bat opt-125m
# 7. And then run:
import onnxruntime
import numpy as np
@fxmarty
fxmarty / gist:1f2ae05aeb0b65535d6c153d671f19db
Last active February 15, 2024 10:53
Free disk space ubuntu github actions for Python/C++/Rust development
- name: Free disk space
run: |
# Go from 19G to 54G free disk space in 3min
df -h
sudo apt-get update
sudo apt-get purge -y '^apache.*'
sudo apt-get purge -y '^imagemagick.*'
sudo apt-get purge -y '^dotnet.*'
sudo apt-get purge -y '^aspnetcore.*'
sudo apt-get purge -y 'php.*'
@fxmarty
fxmarty / test_static_cache_train.py
Created February 28, 2024 14:29
torch.compile + static cache train benchmark
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
from transformers.cache_utils import StaticCache
import time
from torch.profiler import ProfilerActivity, profile, tensorboard_trace_handler
import contextlib
import numpy as np
tokenizer = AutoTokenizer.from_pretrained(
"NousResearch/Llama-2-7b-chat-hf", padding_side="left", pad_token="<s>"
@fxmarty
fxmarty / test_static_cache_forward.py
Created February 28, 2024 14:29
torch.compile + static cache decoding benchmark
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
from transformers.cache_utils import StaticCache
import time
import numpy as np
tokenizer = AutoTokenizer.from_pretrained(
"NousResearch/Llama-2-7b-chat-hf", padding_side="left", pad_token="<s>"
)

We use

        attn_output = torch.nn.functional.scaled_dot_product_attention(
            query_states,
            key_states,
            value_states,
            attn_mask=causal_mask,
            dropout_p=self.attention_dropout if self.training else 0.0,
            is_causal=causal_mask is None and q_len > 1,