Skip to content

Instantly share code, notes, and snippets.

@152334H
Created January 30, 2024 16:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save 152334H/27d4181ce3641cec335131b971584ddd to your computer and use it in GitHub Desktop.
Save 152334H/27d4181ce3641cec335131b971584ddd to your computer and use it in GitHub Desktop.
upload miqu ckpt to hf
from transformers import LlamaConfig as LC, LlamaForCausalLM as LLM, LlamaTokenizer as LT
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
import torch
lt = LT.from_pretrained("NousResearch/Llama-2-7b-hf")
c = LC.from_pretrained("NousResearch/Llama-2-70b-hf")
c.max_position_embeddings = 32764
c.rope_theta = 1000000
with init_empty_weights(): m = LLM(c)
m = m.half().eval()
m.requires_grad_(False)
ckpt = 'hf_pm2.pt'
print(f'loading {ckpt=}')
model = load_checkpoint_and_dispatch(m, checkpoint=ckpt, device_map='auto', no_split_module_classes=["LlamaDecoderLayer"])
model.save_pretrained("./miqu-1-70b-sf", push_to_hub=True, safe_serialization=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment