bigsnarfdude/safetensors_to_GGUF.md

## safetensors_to_GGUF.md

      
    Raw
  

              safetensors_to_GGUF.md
            
          
    safetensors2llama.cpp

from huggingface_hub import snapshot_download
model_id="vincentoh/llama3_70b_no_robot_fsdp_qlora"
snapshot_download(repo_id=model_id, local_dir="llama70b-hf",local_dir_use_symlinks=False, revision="main")


python convert.py --outtype f16 ~/Downloads/llama70b-hf --vocab-type bpe
./quantize ~/Downloads/llama70b-hf/ggml-model-f16.gguf Q4_K_M

from huggingface_hub import HfApi
api = HfApi()

model_id = "vincentoh/llama3-70b-GGUF"
api.create_repo(model_id, exist_ok=True, repo_type="model")
api.upload_file(
    path_or_fileobj="~/Downloads/llama70b-hf/ggml-model-Q4_K_M.gguf",
    path_in_repo=model_id,
    repo_id=model_id,
)