fxmarty

## transformers_compile.py
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
from transformers.cache_utils import StaticCache
import logging
import time

#model_id = "fxmarty/tiny-llama-fast-tokenizer"
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(

## benchmark_quanto.py
import torch
import torch.nn as nn
import time
import numpy as np

from optimum.quanto import Calibration, freeze, qint4, qint8, quantize, qfloat8, qfloat8_e4m3fn
from torch.profiler import ProfilerActivity, profile

M_SHAPES = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]
N_SHAPE = 4096

## profile_quanto.py
import torch
import torch.nn as nn

from optimum.quanto import Calibration, freeze, qint4, qint8, quantize, qfloat8, qfloat8_e4m3fn
from torch.profiler import ProfilerActivity, profile

M_SHAPE = 4096

class MyModel(nn.Module):
    def __init__(self):

## torch_library.py
import torch
import time
from torch.profiler import ProfilerActivity, profile

# We somehow need this import otherwise we get AttributeError: '_OpNamespace' 'mycppops' object has no attribute 'sin'
import mycppops

torch.library.define("mylib::sin", "(Tensor x) -> Tensor")

@torch.library.impl("mylib::sin", "default")

## gist:1313f39037fc1c112508989628c57363

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                fxmarty
                / gist:1313f39037fc1c112508989628c57363
            
            
              Created
              April 8, 2024 10:45
            
              
                dynamo_log_sdpa.md
              
          
    We use
        attn_output = torch.nn.functional.scaled_dot_product_attention(
            query_states,
            key_states,
            value_states,
            attn_mask=causal_mask,
            dropout_p=self.attention_dropout if self.training else 0.0,
            is_causal=causal_mask is None and q_len > 1,

  
## test_static_cache_forward.py
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
from transformers.cache_utils import StaticCache
import time
import numpy as np

tokenizer = AutoTokenizer.from_pretrained(
    "NousResearch/Llama-2-7b-chat-hf", padding_side="left", pad_token="<s>"
)

## test_static_cache_train.py
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
from transformers.cache_utils import StaticCache
import time
from torch.profiler import ProfilerActivity, profile, tensorboard_trace_handler
import contextlib
import numpy as np

tokenizer = AutoTokenizer.from_pretrained(
    "NousResearch/Llama-2-7b-chat-hf", padding_side="left", pad_token="<s>"

## gist:1f2ae05aeb0b65535d6c153d671f19db
      - name: Free disk space
        run: |
          # Go from 19G to 54G free disk space in 3min
          df -h
          sudo apt-get update
          sudo apt-get purge -y '^apache.*'
          sudo apt-get purge -y '^imagemagick.*'
          sudo apt-get purge -y '^dotnet.*'
          sudo apt-get purge -y '^aspnetcore.*'
          sudo apt-get purge -y 'php.*'

## opt.py
# 1. conda create -n ryzen101 python=3.9
# 2. install Ryzen AI Software following https://ryzenai.docs.amd.com/en/latest/manual_installation.html
# 3. Run .\transformers\setup.bat
# 4. Run .\transformers\opt-onnx\setup.bat recommended in the README can not be run - the file does not exist.
# 5. Run .\set_opt_onnx_env.bat opt-125m
# 6. Run .\prepare_model.bat opt-125m
# 7. And then run:

import onnxruntime
import numpy as np

## gist:5113e4304fbdd38c9c3702ce44683f6a
import argparse

import numpy as np
import pandas as pd
import torch
import gc
from tqdm import tqdm
from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig
from optimum.exporters import TasksManager
	from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
	import torch
	from transformers.cache_utils import StaticCache
	import logging
	import time

	#model_id = "fxmarty/tiny-llama-fast-tokenizer"
	model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

	tokenizer = AutoTokenizer.from_pretrained(
	import torch
	import torch.nn as nn
	import time
	import numpy as np

	from optimum.quanto import Calibration, freeze, qint4, qint8, quantize, qfloat8, qfloat8_e4m3fn
	from torch.profiler import ProfilerActivity, profile

	M_SHAPES = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]
	N_SHAPE = 4096
	- name: Free disk space
	run: \|
	# Go from 19G to 54G free disk space in 3min
	df -h
	sudo apt-get update
	sudo apt-get purge -y '^apache.*'
	sudo apt-get purge -y '^imagemagick.*'
	sudo apt-get purge -y '^dotnet.*'
	sudo apt-get purge -y '^aspnetcore.*'
	sudo apt-get purge -y 'php.*'
	# 1. conda create -n ryzen101 python=3.9
	# 2. install Ryzen AI Software following https://ryzenai.docs.amd.com/en/latest/manual_installation.html
	# 3. Run .\transformers\setup.bat
	# 4. Run .\transformers\opt-onnx\setup.bat recommended in the README can not be run - the file does not exist.
	# 5. Run .\set_opt_onnx_env.bat opt-125m
	# 6. Run .\prepare_model.bat opt-125m
	# 7. And then run:

	import onnxruntime
	import numpy as np
	import argparse

	import numpy as np
	import pandas as pd
	import torch
	import gc
	from tqdm import tqdm
	from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig
	from optimum.exporters import TasksManager