Younes Belkada younesbelkada

## bnb-serialization.py
# pip install -U bitsandbytes
# pip install -U git+https://github.com/huggingface/transformers.git
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
path_to_hub = XXX
tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
model.push_to_hub(path_to_hub)

## awq-fused.py
from transformers import AutoModelForCausalLM, AwqConfig

model_name = "TheBloke/Mistral-7B-OpenOrca-AWQ"
code_revision = "f1b2cd1b7459ceecfdc1fac5bb8725f13707c589"

quantization_config = AwqConfig(
    bits=4,
    fuse_max_seq_len=512,
    modules_to_fuse={
        "attention": ["q_proj", "k_proj", "v_proj", "o_proj"],

## bechmark-fa-2-mistral-7b.py
import argparse
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

device = torch.device("cuda:0")

def get_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--max-new-tokens",

## benchmark-mistral-7b.py
import argparse
from mistral.cache import RotatingBufferCache
import torch
import inspect
from typing import List
from pathlib import Path

from mistral.model import Transformer
from mistral.tokenizer import Tokenizer

## llama-guanaco-fa2.py
# import torch
# from transformers import AutoTokenizer, AutoModelForCausalLM

# model_id = "andrewrreed/falcon-7b-guanaco-qlora-arr"
# tokenizer = AutoTokenizer.from_pretrained(model_id)
# model = AutoModelForCausalLM.from_pretrained(model_id,
#     torch_dtype=torch.bfloat16,
#     load_in_4bit=True,
# )

## bench-fa-2.py
import torch
import os
import argparse
import matplotlib.pyplot as plt
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
import seaborn as sns


def get_parser():

## benchmark-bnb-gptq.py
# You need the following libraries
# transformers == 4.32.0
# bitsandbytes == 0.41.0
# auto-gptq == 0.4.2
# optimum == 1.12.0
import torch
import matplotlib.pyplot as plt
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import seaborn as sns

## train_adapters_transformers.py
from datasets import load_dataset
import torch
from peft import LoraConfig, prepare_model_for_int8_training
from trl import SFTTrainer
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer, TrainingArguments

dataset_name = "timdettmers/openassistant-guanaco"
dataset = load_dataset(dataset_name, split="train")

model_name = "facebook/opt-350m"

## finetune_mpt30b_guanaco.py
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software

## finetune_llama_v2.py
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
	# pip install -U bitsandbytes
	# pip install -U git+https://github.com/huggingface/transformers.git
	from transformers import AutoModelForCausalLM, AutoTokenizer

	model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
	path_to_hub = XXX
	tokenizer = AutoTokenizer.from_pretrained(model_id)

	model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
	model.push_to_hub(path_to_hub)
	from transformers import AutoModelForCausalLM, AwqConfig

	model_name = "TheBloke/Mistral-7B-OpenOrca-AWQ"
	code_revision = "f1b2cd1b7459ceecfdc1fac5bb8725f13707c589"

	quantization_config = AwqConfig(
	bits=4,
	fuse_max_seq_len=512,
	modules_to_fuse={
	"attention": ["q_proj", "k_proj", "v_proj", "o_proj"],
	import argparse
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	device = torch.device("cuda:0")

	def get_parser():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"--max-new-tokens",
	import argparse
	from mistral.cache import RotatingBufferCache
	import torch
	import inspect
	from typing import List
	from pathlib import Path

	from mistral.model import Transformer
	from mistral.tokenizer import Tokenizer
	# import torch
	# from transformers import AutoTokenizer, AutoModelForCausalLM

	# model_id = "andrewrreed/falcon-7b-guanaco-qlora-arr"
	# tokenizer = AutoTokenizer.from_pretrained(model_id)
	# model = AutoModelForCausalLM.from_pretrained(model_id,
	# torch_dtype=torch.bfloat16,
	# load_in_4bit=True,
	# )
	import torch
	import os
	import argparse
	import matplotlib.pyplot as plt
	from tqdm import tqdm
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import seaborn as sns


	def get_parser():
	# You need the following libraries
	# transformers == 4.32.0
	# bitsandbytes == 0.41.0
	# auto-gptq == 0.4.2
	# optimum == 1.12.0
	import torch
	import matplotlib.pyplot as plt
	from tqdm import tqdm
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
	import seaborn as sns
	from datasets import load_dataset
	import torch
	from peft import LoraConfig, prepare_model_for_int8_training
	from trl import SFTTrainer
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer, TrainingArguments

	dataset_name = "timdettmers/openassistant-guanaco"
	dataset = load_dataset(dataset_name, split="train")

	model_name = "facebook/opt-350m"
	# coding=utf-8
	# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software