safetensors2llama.cpp
from huggingface_hub import snapshot_download
model_id="vincentoh/llama3_70b_no_robot_fsdp_qlora"
snapshot_download(repo_id=model_id, local_dir="llama70b-hf",local_dir_use_symlinks=False, revision="main")
url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Cute_dog.jpg/1600px-Cute_dog.jpg?20140729055059' | |
import torch, os, math, gzip, pickle | |
import matplotlib.pyplot as plt | |
from urllib.request import urlretrieve | |
from pathlib import Path | |
from torch import tensor | |
import torchvision as tv | |
import torchvision.transforms.functional as tvf | |
from torchvision import io |
MODEL_ID="vincentoh/llama3-alpaca-dpo-instruct" | |
TRUST_REMOTE_CODE="yes" | |
DTYPE="bfloat16" | |
BATCH_SIZE="auto" | |
CUDA_DEVICES=0 | |
sudo apt update | |
sudo apt install -y screen vim git-lfs | |
pip install -q requests accelerate sentencepiece pytablewriter einops protobuf huggingface_hub==0.21.4 |
safetensors2llama.cpp
from huggingface_hub import snapshot_download
model_id="vincentoh/llama3_70b_no_robot_fsdp_qlora"
snapshot_download(repo_id=model_id, local_dir="llama70b-hf",local_dir_use_symlinks=False, revision="main")
from transformers import AutoModelForCausalLM, AutoTokenizer | |
device = "cuda" | |
model_id = "vincentoh/llama3_70b_no_robot_fsdp_qlora" | |
model = AutoModelForCausalLM.from_pretrained(model_id) | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
messages = [{"role": "user", "content": "Why is the sky blue?"},] |
from datasets import load_dataset | |
# Convert dataset to OAI messages | |
system_message = """You are Llama, an AI assistant created by BigSnarfDude to be helpful and honest. Your knowledge spans a wide range of topics, allowing you to engage in substantive conversations and provide analysis on complex subjects.""" | |
def create_conversation(sample): | |
if sample["messages"][0]["role"] == "system": | |
return sample | |
else: | |
sample["messages"] = [{"role": "system", "content": system_message}] + sample["messages"] |
Method Bits 7B 13B 30B 65B 8x7B
Full 16 160GB 320GB 600GB 1200GB 1000GB
Freeze 16 20GB 40GB 120GB 240GB 200GB
LoRA 16 16GB 32GB 80GB 160GB 120GB
QLoRA 8 10GB 16GB 40GB 80GB 80GB
QLoRA 4 6GB 12GB 24GB 48GB 32GB
import transformers | |
import torch | |
from huggingface_hub import login | |
login(token = '') | |
model_id = "meta-llama/Meta-Llama-3-8B-Instruct" |