Skip to content

Instantly share code, notes, and snippets.

View fxmarty's full-sized avatar

fxmarty

View GitHub Profile
@fxmarty
fxmarty / transformers_compile.py
Created July 25, 2024 14:47
transformers_compile.py
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
from transformers.cache_utils import StaticCache
import logging
import time
#model_id = "fxmarty/tiny-llama-fast-tokenizer"
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(
@fxmarty
fxmarty / benchmark_quanto.py
Created July 17, 2024 14:53
benchmark quanto
import torch
import torch.nn as nn
import time
import numpy as np
from optimum.quanto import Calibration, freeze, qint4, qint8, quantize, qfloat8, qfloat8_e4m3fn
from torch.profiler import ProfilerActivity, profile
M_SHAPES = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]
N_SHAPE = 4096
@fxmarty
fxmarty / profile_quanto.py
Created July 17, 2024 14:49
profile quanto
import torch
import torch.nn as nn
from optimum.quanto import Calibration, freeze, qint4, qint8, quantize, qfloat8, qfloat8_e4m3fn
from torch.profiler import ProfilerActivity, profile
M_SHAPE = 4096
class MyModel(nn.Module):
def __init__(self):
@fxmarty
fxmarty / torch_library.py
Last active July 17, 2024 10:10
torch_library
import torch
import time
from torch.profiler import ProfilerActivity, profile
# We somehow need this import otherwise we get AttributeError: '_OpNamespace' 'mycppops' object has no attribute 'sin'
import mycppops
torch.library.define("mylib::sin", "(Tensor x) -> Tensor")
@torch.library.impl("mylib::sin", "default")

We use

        attn_output = torch.nn.functional.scaled_dot_product_attention(
            query_states,
            key_states,
            value_states,
            attn_mask=causal_mask,
            dropout_p=self.attention_dropout if self.training else 0.0,
            is_causal=causal_mask is None and q_len > 1,
@fxmarty
fxmarty / test_static_cache_forward.py
Created February 28, 2024 14:29
torch.compile + static cache decoding benchmark
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
from transformers.cache_utils import StaticCache
import time
import numpy as np
tokenizer = AutoTokenizer.from_pretrained(
"NousResearch/Llama-2-7b-chat-hf", padding_side="left", pad_token="<s>"
)
@fxmarty
fxmarty / test_static_cache_train.py
Created February 28, 2024 14:29
torch.compile + static cache train benchmark
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
from transformers.cache_utils import StaticCache
import time
from torch.profiler import ProfilerActivity, profile, tensorboard_trace_handler
import contextlib
import numpy as np
tokenizer = AutoTokenizer.from_pretrained(
"NousResearch/Llama-2-7b-chat-hf", padding_side="left", pad_token="<s>"
@fxmarty
fxmarty / gist:1f2ae05aeb0b65535d6c153d671f19db
Last active February 15, 2024 10:53
Free disk space ubuntu github actions for Python/C++/Rust development
- name: Free disk space
run: |
# Go from 19G to 54G free disk space in 3min
df -h
sudo apt-get update
sudo apt-get purge -y '^apache.*'
sudo apt-get purge -y '^imagemagick.*'
sudo apt-get purge -y '^dotnet.*'
sudo apt-get purge -y '^aspnetcore.*'
sudo apt-get purge -y 'php.*'
@fxmarty
fxmarty / opt.py
Created February 6, 2024 15:15
Repro opt vits
# 1. conda create -n ryzen101 python=3.9
# 2. install Ryzen AI Software following https://ryzenai.docs.amd.com/en/latest/manual_installation.html
# 3. Run .\transformers\setup.bat
# 4. Run .\transformers\opt-onnx\setup.bat recommended in the README can not be run - the file does not exist.
# 5. Run .\set_opt_onnx_env.bat opt-125m
# 6. Run .\prepare_model.bat opt-125m
# 7. And then run:
import onnxruntime
import numpy as np
@fxmarty
fxmarty / gist:5113e4304fbdd38c9c3702ce44683f6a
Created December 11, 2023 12:59
benchmark_sdpa_inference
import argparse
import numpy as np
import pandas as pd
import torch
import gc
from tqdm import tqdm
from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig
from optimum.exporters import TasksManager