Skip to content

Instantly share code, notes, and snippets.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
base_model: Qwen/Qwen1.5-1.8B
base_model_config: Qwen/Qwen1.5-1.8B
model_type: Qwen2ForCausalLM
tokenizer_type: AutoTokenizer
hub_model_id: Qwen-Prometheus-1.8B
load_in_8bit: false
load_in_4bit: true
strict: false
# interleaved-DUS(iDUS) is modification of Depth-Up Scaling(DUS) introduced by SOLAR-10.7B
# iDUS does not simply attach model layers, but builds them by interlocking them
#
# This code refers to silphendio's gist
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoTokenizer
import torch
model_path = 'Cartinoe5930/Llama2_init_Mistral' # huggingface name or local folder
new_model_path = 'Cartinoe5930/SOLAR-DUS-implement' # Same size model
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
import torch
hf_token = "your huggingface access token"
# Load the configuration of Llama2 with modification on hidden_dim & num_kv_heads equal to those of Mistral
model_config = AutoConfig.from_pretrained("meta-llama/Llama-2-7b-hf", token=hf_token, intermediate_size=14336, num_key_value_heads=8, torch_dtype="float16")
# Load the model with Llama2 architecture and Mistral weights
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", tordh_dtype=torch.float16, config=model_config)