Skip to content

Instantly share code, notes, and snippets.

View younesbelkada's full-sized avatar
:octocat:
Working from home

Younes Belkada younesbelkada

:octocat:
Working from home
View GitHub Profile
@younesbelkada
younesbelkada / bnb-serialization.py
Created December 25, 2023 18:10
push bnb 4 bit models on the hub
# pip install -U bitsandbytes
# pip install -U git+https://github.com/huggingface/transformers.git
from transformers import AutoModelForCausalLM, AutoTokenizer
model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
path_to_hub = XXX
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
model.push_to_hub(path_to_hub)
@younesbelkada
younesbelkada / awq-fused.py
Created December 4, 2023 13:46
Run autoawq + fused modules using HF transformers on a custom model
from transformers import AutoModelForCausalLM, AwqConfig
model_name = "TheBloke/Mistral-7B-OpenOrca-AWQ"
code_revision = "f1b2cd1b7459ceecfdc1fac5bb8725f13707c589"
quantization_config = AwqConfig(
bits=4,
fuse_max_seq_len=512,
modules_to_fuse={
"attention": ["q_proj", "k_proj", "v_proj", "o_proj"],
@younesbelkada
younesbelkada / bechmark-fa-2-mistral-7b.py
Created October 2, 2023 16:21
Benchmark transformers + FA2 + Mistral 7B
import argparse
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
device = torch.device("cuda:0")
def get_parser():
parser = argparse.ArgumentParser()
parser.add_argument(
"--max-new-tokens",
@younesbelkada
younesbelkada / benchmark-mistral-7b.py
Last active February 14, 2024 13:11
Benchmark Mistral 7b model
import argparse
from mistral.cache import RotatingBufferCache
import torch
import inspect
from typing import List
from pathlib import Path
from mistral.model import Transformer
from mistral.tokenizer import Tokenizer
@younesbelkada
younesbelkada / llama-guanaco-fa2.py
Created September 22, 2023 15:00
Fine tune Llama 2 on Guanaco dataset using Flash Attention 2
# import torch
# from transformers import AutoTokenizer, AutoModelForCausalLM
# model_id = "andrewrreed/falcon-7b-guanaco-qlora-arr"
# tokenizer = AutoTokenizer.from_pretrained(model_id)
# model = AutoModelForCausalLM.from_pretrained(model_id,
# torch_dtype=torch.bfloat16,
# load_in_4bit=True,
# )
@younesbelkada
younesbelkada / bench-fa-2.py
Last active November 25, 2024 16:54
Benchmark FA2 + transformers integration
import torch
import os
import argparse
import matplotlib.pyplot as plt
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
import seaborn as sns
def get_parser():
@younesbelkada
younesbelkada / benchmark-bnb-gptq.py
Last active November 26, 2024 16:24
Benchmark bnb 4bit vs GPTQ
# You need the following libraries
# transformers == 4.32.0
# bitsandbytes == 0.41.0
# auto-gptq == 0.4.2
# optimum == 1.12.0
import torch
import matplotlib.pyplot as plt
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import seaborn as sns
@younesbelkada
younesbelkada / train_adapters_transformers.py
Created August 3, 2023 09:43
Train adapters using transformers integration of PEFT
from datasets import load_dataset
import torch
from peft import LoraConfig, prepare_model_for_int8_training
from trl import SFTTrainer
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer, TrainingArguments
dataset_name = "timdettmers/openassistant-guanaco"
dataset = load_dataset(dataset_name, split="train")
model_name = "facebook/opt-350m"
@younesbelkada
younesbelkada / finetune_mpt30b_guanaco.py
Last active August 30, 2023 06:04
Fine tune MPT-30B on Guanaco dataset and turn it into a chatbot - read the docstrings to install the correct versions of the required libraries.
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
@younesbelkada
younesbelkada / finetune_llama_v2.py
Last active December 15, 2024 04:59
Fine tune Llama v2 models on Guanaco Dataset
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software