Yi Liu yiliu30

## recover_dataloader_from_calib_fn.py
import torch


# User scripts
class CustomModel(torch.nn.Module):

    def __init__(self) -> None:
        super().__init__()
        self.fc = torch.nn.Linear(10, 10)

## forgot_to_check_out_with_recurse_submodules.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                yiliu30
                / forgot_to_check_out_with_recurse_submodules.md
            
            
              Created
              March 9, 2024 11:59
                — forked from cnlohr/forgot_to_check_out_with_recurse_submodules.md
            
              
                Git forgot to clone recursively (forgot to check out with recurse submodules)
              
          
    Did you just check out / clone a repository and forget --recursive / --recurse-submodules?  Well, do this:
git submodule update --init --recursive --remote

For information about why this nugget was lost and burried in another stack exchange article, see this: https://meta.stackoverflow.com/questions/400424/how-can-i-request-that-a-question-merge-be-reversed

  
## fold_quantize_opt125.py
# https://pytorch.org/tutorials/prototype/pt2e_quant_ptq_x86_inductor.html

import torch._inductor.config as config
import torch
import copy
from torch.ao.quantization.quantize_pt2e import prepare_pt2e, convert_pt2e
import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq
from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
from torch._export import capture_pre_autograd_graph

## auto_round_hpu.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                yiliu30
                / auto_round_hpu.md
            
            
              Last active
              March 22, 2024 11:04
            
          
    1. Setting up the environment

To install the Intel Gaudi Software Stack and launch the docker image, please follow this guide.
docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04//habanalabs/pytorch-installer-2.1.1:latest

# Check the container ID
docker ps

# Login into container

  
## MemoryTrackingMode.py
# =============================================================================
# Mem tracker
# Refactored from https://github.com/pytorch/pytorch/pull/124688
# =============================================================================


from torch.utils.flop_counter import FlopCounterMode
import torch
import math

## mt_with_loop.py
import torch
from torch import nn
from torch.utils.flop_counter import FlopCounterMode


def test_mt_loop():
    class DummyModel(nn.Module):
        def __init__(self, layers: int, dim: int):
            super(DummyModel, self).__init__()
            self._module_list = []

## markdonw_emoji.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                yiliu30
                / markdonw_emoji.md
            
            
              Last active
              December 3, 2024 05:47
                — forked from rxaviers/gist:7360908
            
              
                Complete list of github markdown emoji markup
              
          
    People


 :bowtie:
😄 :smile:
😆 :laughing:


😊 :blush:
😃 :smiley:
☺️ :relaxed:


😏 :smirk:
😍 :heart_eyes:
😘 :kissing_heart:


😚 :kissing_closed_eyes:
😳 :flushed:
😌 :relieved:


😆 :satisfied:
😁 :grin:
😉 :wink:


😜 :stuck_out_tongue_winking_eye:
😝 :stuck_out_tongue_closed_eyes:
😀 :grinning:


😗 :kissing:
😙 :kissing_smiling_eyes:
😛 :stuck_out_tongue:


## auto_round_numpy_error_log2.txt
2024-06-17T01:47:06.5681923Z ==================================== ERRORS ====================================
2024-06-17T01:47:06.5682582Z _____________ ERROR at setup of TestAutoRound.test_autoround[True] _____________
2024-06-17T01:47:06.5682735Z
2024-06-17T01:47:06.5683220Z self = <class 'test_autoround.TestAutoRound'>
2024-06-17T01:47:06.5683303Z
2024-06-17T01:47:06.5683401Z     def setup_class(self):
2024-06-17T01:47:06.5683608Z         self.gptj = transformers.AutoModelForCausalLM.from_pretrained(
2024-06-17T01:47:06.5684018Z             "hf-internal-testing/tiny-random-GPTJForCausalLM",
2024-06-17T01:47:06.5684147Z             torchscript=True,
2024-06-17T01:47:06.5684250Z         )

## tgi_p.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                yiliu30
                / tgi_p.md
            
            
              Last active
              July 3, 2024 02:23
            
          
    If I understand correctly, currently, TGI selects a kernel for all layers based on the algorithm name. Do you consider extending this to allow a mapping between layer names and kernels.
This would decouple the quantization process (calculating the scale and zero for a given tensor) from the inference process (selecting the right kernel). Here are some thoughts:

Support for mixed data types and bitwidths: This would enable models to use different precisions for different layers, maintaining higher precision for critical layers. For instance, GPTQ does not quantize the lm_head. Similarly, llama.cpp uses different bitwidths for feedforward layers and other layers. TGI currently lacks this capability.
Eliminate redundant unpack-repack processes: If we require all quantized models to have the GPTQ format and want to use a new kernel like marlin, the flow involves multiple packing and unpacking steps. Ideally, we should only need one packing step.


## 1-pw_op_fusion.py
import torch
import torch._inductor.config
import time

torch._inductor.config.triton.cudagraphs = False
torch.set_float32_matmul_precision('high')

def bench(f, name=None, iters=100, warmup=5, display=True, profile=False):
    for _ in range(warmup):
        f()
	import torch


	# User scripts
	class CustomModel(torch.nn.Module):

	def __init__(self) -> None:
	super().__init__()
	self.fc = torch.nn.Linear(10, 10)
	# https://pytorch.org/tutorials/prototype/pt2e_quant_ptq_x86_inductor.html

	import torch._inductor.config as config
	import torch
	import copy
	from torch.ao.quantization.quantize_pt2e import prepare_pt2e, convert_pt2e
	import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq
	from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
	from torch._export import capture_pre_autograd_graph
	# =============================================================================
	# Mem tracker
	# Refactored from https://github.com/pytorch/pytorch/pull/124688
	# =============================================================================



	from torch.utils.flop_counter import FlopCounterMode
	import torch
	import math
	import torch
	from torch import nn
	from torch.utils.flop_counter import FlopCounterMode


	def test_mt_loop():
	class DummyModel(nn.Module):
	def __init__(self, layers: int, dim: int):
	super(DummyModel, self).__init__()
	self._module_list = []
`:bowtie:`	😄 `:smile:`	😆 `:laughing:`
😊 `:blush:`	😃 `:smiley:`	☺️ `:relaxed:`
😏 `:smirk:`	😍 `:heart_eyes:`	😘 `:kissing_heart:`
😚 `:kissing_closed_eyes:`	😳 `:flushed:`	😌 `:relieved:`
😆 `:satisfied:`	😁 `:grin:`	😉 `:wink:`
😜 `:stuck_out_tongue_winking_eye:`	😝 `:stuck_out_tongue_closed_eyes:`	😀 `:grinning:`
😗 `:kissing:`	😙 `:kissing_smiling_eyes:`	😛 `:stuck_out_tongue:`
	2024-06-17T01:47:06.5681923Z ==================================== ERRORS ====================================
	2024-06-17T01:47:06.5682582Z _____________ ERROR at setup of TestAutoRound.test_autoround[True] _____________
	2024-06-17T01:47:06.5682735Z
	2024-06-17T01:47:06.5683220Z self = <class 'test_autoround.TestAutoRound'>
	2024-06-17T01:47:06.5683303Z
	2024-06-17T01:47:06.5683401Z def setup_class(self):
	2024-06-17T01:47:06.5683608Z self.gptj = transformers.AutoModelForCausalLM.from_pretrained(
	2024-06-17T01:47:06.5684018Z "hf-internal-testing/tiny-random-GPTJForCausalLM",
	2024-06-17T01:47:06.5684147Z torchscript=True,
	2024-06-17T01:47:06.5684250Z )
	import torch
	import torch._inductor.config
	import time

	torch._inductor.config.triton.cudagraphs = False
	torch.set_float32_matmul_precision('high')

	def bench(f, name=None, iters=100, warmup=5, display=True, profile=False):
	for _ in range(warmup):
	f()