fxmarty

## gist:39e7c7566f660cdd6033e2d259f45948
from functools import partial
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

from einops import rearrange, repeat

from flash_attn.utils.benchmark import benchmark_forward

## gist:7e75cc3942d6974e4849093ebea0a331
import argparse
import random
from typing import Dict

import numpy as np
import torch
from tqdm.auto import tqdm
from transformers import AutoModelForCausalLM
import gc

## gist:5113e4304fbdd38c9c3702ce44683f6a
import argparse

import numpy as np
import pandas as pd
import torch
import gc
from tqdm import tqdm
from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig
from optimum.exporters import TasksManager

## opt.py
# 1. conda create -n ryzen101 python=3.9
# 2. install Ryzen AI Software following https://ryzenai.docs.amd.com/en/latest/manual_installation.html
# 3. Run .\transformers\setup.bat
# 4. Run .\transformers\opt-onnx\setup.bat recommended in the README can not be run - the file does not exist.
# 5. Run .\set_opt_onnx_env.bat opt-125m
# 6. Run .\prepare_model.bat opt-125m
# 7. And then run:

import onnxruntime
import numpy as np

## gist:1f2ae05aeb0b65535d6c153d671f19db
      - name: Free disk space
        run: |
          # Go from 19G to 54G free disk space in 3min
          df -h
          sudo apt-get update
          sudo apt-get purge -y '^apache.*'
          sudo apt-get purge -y '^imagemagick.*'
          sudo apt-get purge -y '^dotnet.*'
          sudo apt-get purge -y '^aspnetcore.*'
          sudo apt-get purge -y 'php.*'

## test_static_cache_train.py
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
from transformers.cache_utils import StaticCache
import time
from torch.profiler import ProfilerActivity, profile, tensorboard_trace_handler
import contextlib
import numpy as np

tokenizer = AutoTokenizer.from_pretrained(
    "NousResearch/Llama-2-7b-chat-hf", padding_side="left", pad_token="<s>"

## test_static_cache_forward.py
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
from transformers.cache_utils import StaticCache
import time
import numpy as np

tokenizer = AutoTokenizer.from_pretrained(
    "NousResearch/Llama-2-7b-chat-hf", padding_side="left", pad_token="<s>"
)

## gist:1313f39037fc1c112508989628c57363

      
        
          
            
              
              1 file
            
          
          
            
              
              0 forks
            
          
          
            
              
              0 comments
            
          
          
            
              
              0 stars
            
          
        
        
          
              
          
          
            
                fxmarty
                / gist:1313f39037fc1c112508989628c57363
            
            
              Created
              April 8, 2024 10:45
            
              
                dynamo_log_sdpa.md
              
          
        
      
        
  
      
    We use
        attn_output = torch.nn.functional.scaled_dot_product_attention(
            query_states,
            key_states,
            value_states,
            attn_mask=causal_mask,
            dropout_p=self.attention_dropout if self.training else 0.0,
            is_causal=causal_mask is None and q_len > 1,
	from functools import partial
	import math
	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	from einops import rearrange, repeat

	from flash_attn.utils.benchmark import benchmark_forward
	import argparse
	import random
	from typing import Dict

	import numpy as np
	import torch
	from tqdm.auto import tqdm
	from transformers import AutoModelForCausalLM
	import gc
	import argparse

	import numpy as np
	import pandas as pd
	import torch
	import gc
	from tqdm import tqdm
	from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig
	from optimum.exporters import TasksManager
	# 1. conda create -n ryzen101 python=3.9
	# 2. install Ryzen AI Software following https://ryzenai.docs.amd.com/en/latest/manual_installation.html
	# 3. Run .\transformers\setup.bat
	# 4. Run .\transformers\opt-onnx\setup.bat recommended in the README can not be run - the file does not exist.
	# 5. Run .\set_opt_onnx_env.bat opt-125m
	# 6. Run .\prepare_model.bat opt-125m
	# 7. And then run:

	import onnxruntime
	import numpy as np
	- name: Free disk space
	run: \|
	# Go from 19G to 54G free disk space in 3min
	df -h
	sudo apt-get update
	sudo apt-get purge -y '^apache.*'
	sudo apt-get purge -y '^imagemagick.*'
	sudo apt-get purge -y '^dotnet.*'
	sudo apt-get purge -y '^aspnetcore.*'
	sudo apt-get purge -y 'php.*'
	from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
	import torch
	from transformers.cache_utils import StaticCache
	import time
	from torch.profiler import ProfilerActivity, profile, tensorboard_trace_handler
	import contextlib
	import numpy as np

	tokenizer = AutoTokenizer.from_pretrained(
	"NousResearch/Llama-2-7b-chat-hf", padding_side="left", pad_token="<s>"