Skip to content

Instantly share code, notes, and snippets.

@maziyarpanahi
Forked from 152334H/miqu-upload-hf.py
Created February 9, 2024 22:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save maziyarpanahi/20bfb07e0d70d987853dc0fca217ec82 to your computer and use it in GitHub Desktop.
Save maziyarpanahi/20bfb07e0d70d987853dc0fca217ec82 to your computer and use it in GitHub Desktop.
upload miqu ckpt to hf
from transformers import LlamaConfig as LC, LlamaForCausalLM as LLM, LlamaTokenizer as LT
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
import torch
lt = LT.from_pretrained("NousResearch/Llama-2-7b-hf")
c = LC.from_pretrained("NousResearch/Llama-2-70b-hf")
c.max_position_embeddings = 32764
c.rope_theta = 1000000
with init_empty_weights(): m = LLM(c)
m = m.half().eval()
m.requires_grad_(False)
ckpt = 'hf_pm2.pt'
print(f'loading {ckpt=}')
model = load_checkpoint_and_dispatch(m, checkpoint=ckpt, device_map='auto', no_split_module_classes=["LlamaDecoderLayer"])
model.save_pretrained("./miqu-1-70b-sf", push_to_hub=True, safe_serialization=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment