Yi Liu yiliu30

## _quant_block_w_wrap_lr_as_tensor_log
Warning, examples/language-modeling/main.py is deprecated, please use auto-round cmd line instead. The file will be deleted in the V0.4.1 release
/models/Llama-2-7b-chat-hf
2024-11-13 00:59:25 INFO utils.py L494: Using GPU device

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]
Loading checkpoint shards:  50%|█████     | 1/2 [00:01<00:01,  1.52s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.00it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.08s/it]
2024-11-13 00:59:29 INFO autoround.py L218: using torch.float16 for quantization tuning
2024-11-13 00:59:29 INFO autoround.py L286: start calibration

## test_load_gptq.py
# Copied from https://huggingface.co/TheBloke/Llama-2-7B-Chat-GPTQ#you-can-then-use-the-following-code

# ==------------------------------------------------------------------------------------------==
# Set the model name or path
# CUDA_VISIBLE_DEVICES=None OMP_NUM_THREADS=56 numactl -l -C 0-55  python  test_load.py
# ==------------------------------------------------------------------------------------------==

model_name_or_path = "./Llama-3.2-3B-Instruct-w4g128-auto_round-gptq-hf"
model_name_or_path = "./tmp_autoround/opt-125m-w4g128-auto-gptq"

## nsight.sh
# This isn't supposed to run as a bash script, i named it with ".sh" for syntax highlighting.

# https://developer.nvidia.com/nsight-systems
# https://docs.nvidia.com/nsight-systems/profiling/index.html

# My preferred nsys (command line executable used to create profiles) commands
#
# In your script, write
# torch.cuda.nvtx.range_push("region name")
# ...

## static_kv_cache.py
from transformers import AutoModelForCausalLM, AutoTokenizer, StaticCache
import torch
from typing import Optional
device = "cuda"

# Copied from the gpt-fast repo
def multinomial_sample_one_no_sync(probs_sort): # Does multinomial sampling without a cuda synchronization
    q = torch.empty_like(probs_sort).exponential_(1)
    return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)

## best_benchmark.py
FRANCE_ARTICLE = """<s>Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings Flight 9525 insisted Wednesday that he was not aware of any video footage from on board the plane. Marseille prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A person who has such a video needs to immediately give it to the investigators." Robin\'s comments follow claims by two magazines, German daily Bild and French Paris Match, of a cell phone video showing the harrowing final seconds from on board Germanwings Flight 9525 as it crashed into the French Alps. All 150 on board were killed. Paris Match and Bild reported that the video was recovered from a phone at the wreckage site. The two publications described the supposed video, but did not post it on their websites. The publications said that they watched the video, which was found by a source close to the investigation. \"One can hear cries of 'My God' in several languages,\" Par

## bench_pt2e_resnet.py
# Adpated from https://pytorch.org/tutorials/prototype/pt2e_quant_x86_inductor.html

import torch
import torchvision.models as models
import copy
from torch.ao.quantization.quantize_pt2e import prepare_pt2e, convert_pt2e
import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq
from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
from torch._export import capture_pre_autograd_graph

## test_pt2e_two_convs_not_fuse.py
from torch.ao.quantization.quantizer.x86_inductor_quantizer import (
    X86InductorQuantizer,
    get_default_x86_inductor_quantization_config,
)
import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq
import torch

from torch._export import capture_pre_autograd_graph

from torch.ao.quantization.quantize_pt2e import (

## inspect_pt2e_qdq_graph.py
from torch.ao.quantization.quantizer.x86_inductor_quantizer import (
    X86InductorQuantizer,
    get_default_x86_inductor_quantization_config,
)
import torch
from loguru import logger
from torch._export import capture_pre_autograd_graph

from torch.ao.quantization.quantize_pt2e import (
    convert_pt2e,

## triton_per_fused_add_clamp_div_mean_mul_pow_round_rsqrt_0

import torch
from torch import nn


def silu_and_mul(x):
    act_out = torch.nn.functional.silu(x)
    return act_out * x


## work-with-multiple-github-accounts.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                yiliu30
                / work-with-multiple-github-accounts.md
            
            
              Created
              September 23, 2024 12:56
                — forked from rahularity/work-with-multiple-github-accounts.md
            
              
                How To Work With Multiple Github Accounts on your PC
              
          
    How To Work With Multiple Github Accounts on a single Machine

Let suppose I have two github accounts, https://github.com/rahul-office and https://github.com/rahul-personal. Now i want to setup my mac to easily talk to both the github accounts.

NOTE: This logic can be extended to more than two accounts also. :)

The setup can be done in 5 easy steps:
Steps:


Step 1 : Create SSH keys for all accounts
Step 2 : Add SSH keys to SSH Agent
	Warning, examples/language-modeling/main.py is deprecated, please use auto-round cmd line instead. The file will be deleted in the V0.4.1 release
	/models/Llama-2-7b-chat-hf
	2024-11-13 00:59:25 INFO utils.py L494: Using GPU device

	Loading checkpoint shards: 0%\| \| 0/2 [00:00<?, ?it/s]
	Loading checkpoint shards: 50%\|█████ \| 1/2 [00:01<00:01, 1.52s/it]
	Loading checkpoint shards: 100%\|██████████\| 2/2 [00:02<00:00, 1.00it/s]
	Loading checkpoint shards: 100%\|██████████\| 2/2 [00:02<00:00, 1.08s/it]
	2024-11-13 00:59:29 INFO autoround.py L218: using torch.float16 for quantization tuning
	2024-11-13 00:59:29 INFO autoround.py L286: start calibration
	# Copied from https://huggingface.co/TheBloke/Llama-2-7B-Chat-GPTQ#you-can-then-use-the-following-code

	# ==------------------------------------------------------------------------------------------==
	# Set the model name or path
	# CUDA_VISIBLE_DEVICES=None OMP_NUM_THREADS=56 numactl -l -C 0-55 python test_load.py
	# ==------------------------------------------------------------------------------------------==

	model_name_or_path = "./Llama-3.2-3B-Instruct-w4g128-auto_round-gptq-hf"
	model_name_or_path = "./tmp_autoround/opt-125m-w4g128-auto-gptq"
	# This isn't supposed to run as a bash script, i named it with ".sh" for syntax highlighting.

	# https://developer.nvidia.com/nsight-systems
	# https://docs.nvidia.com/nsight-systems/profiling/index.html

	# My preferred nsys (command line executable used to create profiles) commands
	#
	# In your script, write
	# torch.cuda.nvtx.range_push("region name")
	# ...
	from transformers import AutoModelForCausalLM, AutoTokenizer, StaticCache
	import torch
	from typing import Optional
	device = "cuda"

	# Copied from the gpt-fast repo
	def multinomial_sample_one_no_sync(probs_sort): # Does multinomial sampling without a cuda synchronization
	q = torch.empty_like(probs_sort).exponential_(1)
	return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)
	# Adpated from https://pytorch.org/tutorials/prototype/pt2e_quant_x86_inductor.html

	import torch
	import torchvision.models as models
	import copy
	from torch.ao.quantization.quantize_pt2e import prepare_pt2e, convert_pt2e
	import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq
	from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
	from torch._export import capture_pre_autograd_graph
	from torch.ao.quantization.quantizer.x86_inductor_quantizer import (
	X86InductorQuantizer,
	get_default_x86_inductor_quantization_config,
	)
	import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq
	import torch

	from torch._export import capture_pre_autograd_graph

	from torch.ao.quantization.quantize_pt2e import (

	import torch
	from torch import nn


	def silu_and_mul(x):
	act_out = torch.nn.functional.silu(x)
	return act_out * x