Skip to content

Instantly share code, notes, and snippets.

@aurotripathy
Last active February 9, 2025 19:32
Show Gist options
  • Save aurotripathy/1bc1b6cadf2e2e8ea9460af405b1a4b1 to your computer and use it in GitHub Desktop.
Save aurotripathy/1bc1b6cadf2e2e8ea9460af405b1a4b1 to your computer and use it in GitHub Desktop.
"""
MistralForCausalLM(
(model): MistralModel(
(embed_tokens): Embedding(131072, 5120)
(layers): ModuleList(
(0-39): 40 x MistralDecoderLayer(
(self_attn): MistralAttention(
(q_proj): Linear(in_features=5120, out_features=4096, bias=False)
(k_proj): Linear(in_features=5120, out_features=1024, bias=False)
(v_proj): Linear(in_features=5120, out_features=1024, bias=False)
(o_proj): Linear(in_features=4096, out_features=5120, bias=False)
)
(mlp): MistralMLP(
(gate_proj): Linear(in_features=5120, out_features=14336, bias=False)
(up_proj): Linear(in_features=5120, out_features=14336, bias=False)
(down_proj): Linear(in_features=14336, out_features=5120, bias=False)
(act_fn): SiLU()
)
(input_layernorm): MistralRMSNorm((5120,), eps=1e-05)
(post_attention_layernorm): MistralRMSNorm((5120,), eps=1e-05)
)
)
(norm): MistralRMSNorm((5120,), eps=1e-05)
(rotary_emb): MistralRotaryEmbedding()
)
(lm_head): Linear(in_features=5120, out_features=131072, bias=False)
)
"""
## Better indenting
"""
MistralForCausalLM(
(model): MistralModel(
(embed_tokens): Embedding(131072, 5120)
(layers): ModuleList(
(0-39): 40 x MistralDecoderLayer(
(self_attn): MistralAttention(
(q_proj): Linear(in_features=5120, out_features=4096, bias=False)
(k_proj): Linear(in_features=5120, out_features=1024, bias=False)
(v_proj): Linear(in_features=5120, out_features=1024, bias=False)
(o_proj): Linear(in_features=4096, out_features=5120, bias=False)
)
(mlp): MistralMLP(
(gate_proj): Linear(in_features=5120, out_features=14336, bias=False)
(up_proj): Linear(in_features=5120, out_features=14336, bias=False)
(down_proj): Linear(in_features=14336, out_features=5120, bias=False)
(act_fn): SiLU()
)
(input_layernorm): MistralRMSNorm((5120,), eps=1e-05)
(post_attention_layernorm): MistralRMSNorm((5120,), eps=1e-05)
)
)
(norm): MistralRMSNorm((5120,), eps=1e-05)
(rotary_emb): MistralRotaryEmbedding()
)
(lm_head): Linear(in_features=5120, out_features=131072, bias=False)
)
"""
"""
LlamaForCausalLM(
(model): LlamaModel(
(embed_tokens): Embedding(128256, 4096)
(layers): ModuleList(
(0-31): 32 x LlamaDecoderLayer(
(self_attn): LlamaAttention(
(q_proj): Linear(in_features=4096, out_features=4096, bias=False)
(k_proj): Linear(in_features=4096, out_features=1024, bias=False)
(v_proj): Linear(in_features=4096, out_features=1024, bias=False)
(o_proj): Linear(in_features=4096, out_features=4096, bias=False)
)
(mlp): LlamaMLP(
(gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
(up_proj): Linear(in_features=4096, out_features=14336, bias=False)
(down_proj): Linear(in_features=14336, out_features=4096, bias=False)
(act_fn): SiLU()
)
(input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
(post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
)
)
(norm): LlamaRMSNorm((4096,), eps=1e-05)
(rotary_emb): LlamaRotaryEmbedding()
)
(lm_head): Linear(in_features=4096, out_features=128256, bias=False)
)
"""
# code
import os
token = os.environ['HF_TOKEN']
model_str = "mistralai/Mistral-Nemo-Instruct-2407"
from huggingface_hub import snapshot_download
from pathlib import Path
model_path = Path.home().joinpath('mistral_models', 'Nemo-Instruct')
model_path.mkdir(parents=True, exist_ok=True)
snapshot_download(
repo_id=model_str,
local_dir=model_path
)
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
tokenizer = AutoTokenizer.from_pretrained(model_path, token=token)
model = AutoModelForCausalLM.from_pretrained(
model_path,
token=token,
torch_dtype=torch.bfloat16,
device_map="auto"
)
print(model)
model_str = "meta-llama/Meta-Llama-3-8B-Instruct"
from huggingface_hub import snapshot_download
from pathlib import Path
model_path = Path.home().joinpath('llama_models', 'Llama-Instruct')
model_path.mkdir(parents=True, exist_ok=True)
snapshot_download(
repo_id=model_str,
local_dir=model_path
)
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
tokenizer = AutoTokenizer.from_pretrained(model_path, token=token)
model = AutoModelForCausalLM.from_pretrained(
model_path,
token=token,
torch_dtype=torch.bfloat16,
device_map="auto"
)
print(model)
@aurotripathy
Copy link
Author

added better indenting

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment