safetensors2llama.cpp
from huggingface_hub import snapshot_download
model_id="vincentoh/llama3_70b_no_robot_fsdp_qlora"
snapshot_download(repo_id=model_id, local_dir="llama70b-hf",local_dir_use_symlinks=False, revision="main")
python convert.py --outtype f16 ~/Downloads/llama70b-hf --vocab-type bpe
./quantize ~/Downloads/llama70b-hf/ggml-model-f16.gguf Q4_K_M
from huggingface_hub import HfApi
api = HfApi()
model_id = "vincentoh/llama3-70b-GGUF"
api.create_repo(model_id, exist_ok=True, repo_type="model")
api.upload_file(
path_or_fileobj="~/Downloads/llama70b-hf/ggml-model-Q4_K_M.gguf",
path_in_repo=model_id,
repo_id=model_id,
)