Last active
February 9, 2025 19:32
-
-
Save aurotripathy/1bc1b6cadf2e2e8ea9460af405b1a4b1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
MistralForCausalLM( | |
(model): MistralModel( | |
(embed_tokens): Embedding(131072, 5120) | |
(layers): ModuleList( | |
(0-39): 40 x MistralDecoderLayer( | |
(self_attn): MistralAttention( | |
(q_proj): Linear(in_features=5120, out_features=4096, bias=False) | |
(k_proj): Linear(in_features=5120, out_features=1024, bias=False) | |
(v_proj): Linear(in_features=5120, out_features=1024, bias=False) | |
(o_proj): Linear(in_features=4096, out_features=5120, bias=False) | |
) | |
(mlp): MistralMLP( | |
(gate_proj): Linear(in_features=5120, out_features=14336, bias=False) | |
(up_proj): Linear(in_features=5120, out_features=14336, bias=False) | |
(down_proj): Linear(in_features=14336, out_features=5120, bias=False) | |
(act_fn): SiLU() | |
) | |
(input_layernorm): MistralRMSNorm((5120,), eps=1e-05) | |
(post_attention_layernorm): MistralRMSNorm((5120,), eps=1e-05) | |
) | |
) | |
(norm): MistralRMSNorm((5120,), eps=1e-05) | |
(rotary_emb): MistralRotaryEmbedding() | |
) | |
(lm_head): Linear(in_features=5120, out_features=131072, bias=False) | |
) | |
""" | |
## Better indenting | |
""" | |
MistralForCausalLM( | |
(model): MistralModel( | |
(embed_tokens): Embedding(131072, 5120) | |
(layers): ModuleList( | |
(0-39): 40 x MistralDecoderLayer( | |
(self_attn): MistralAttention( | |
(q_proj): Linear(in_features=5120, out_features=4096, bias=False) | |
(k_proj): Linear(in_features=5120, out_features=1024, bias=False) | |
(v_proj): Linear(in_features=5120, out_features=1024, bias=False) | |
(o_proj): Linear(in_features=4096, out_features=5120, bias=False) | |
) | |
(mlp): MistralMLP( | |
(gate_proj): Linear(in_features=5120, out_features=14336, bias=False) | |
(up_proj): Linear(in_features=5120, out_features=14336, bias=False) | |
(down_proj): Linear(in_features=14336, out_features=5120, bias=False) | |
(act_fn): SiLU() | |
) | |
(input_layernorm): MistralRMSNorm((5120,), eps=1e-05) | |
(post_attention_layernorm): MistralRMSNorm((5120,), eps=1e-05) | |
) | |
) | |
(norm): MistralRMSNorm((5120,), eps=1e-05) | |
(rotary_emb): MistralRotaryEmbedding() | |
) | |
(lm_head): Linear(in_features=5120, out_features=131072, bias=False) | |
) | |
""" | |
""" | |
LlamaForCausalLM( | |
(model): LlamaModel( | |
(embed_tokens): Embedding(128256, 4096) | |
(layers): ModuleList( | |
(0-31): 32 x LlamaDecoderLayer( | |
(self_attn): LlamaAttention( | |
(q_proj): Linear(in_features=4096, out_features=4096, bias=False) | |
(k_proj): Linear(in_features=4096, out_features=1024, bias=False) | |
(v_proj): Linear(in_features=4096, out_features=1024, bias=False) | |
(o_proj): Linear(in_features=4096, out_features=4096, bias=False) | |
) | |
(mlp): LlamaMLP( | |
(gate_proj): Linear(in_features=4096, out_features=14336, bias=False) | |
(up_proj): Linear(in_features=4096, out_features=14336, bias=False) | |
(down_proj): Linear(in_features=14336, out_features=4096, bias=False) | |
(act_fn): SiLU() | |
) | |
(input_layernorm): LlamaRMSNorm((4096,), eps=1e-05) | |
(post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05) | |
) | |
) | |
(norm): LlamaRMSNorm((4096,), eps=1e-05) | |
(rotary_emb): LlamaRotaryEmbedding() | |
) | |
(lm_head): Linear(in_features=4096, out_features=128256, bias=False) | |
) | |
""" | |
# code | |
import os | |
token = os.environ['HF_TOKEN'] | |
model_str = "mistralai/Mistral-Nemo-Instruct-2407" | |
from huggingface_hub import snapshot_download | |
from pathlib import Path | |
model_path = Path.home().joinpath('mistral_models', 'Nemo-Instruct') | |
model_path.mkdir(parents=True, exist_ok=True) | |
snapshot_download( | |
repo_id=model_str, | |
local_dir=model_path | |
) | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
tokenizer = AutoTokenizer.from_pretrained(model_path, token=token) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_path, | |
token=token, | |
torch_dtype=torch.bfloat16, | |
device_map="auto" | |
) | |
print(model) | |
model_str = "meta-llama/Meta-Llama-3-8B-Instruct" | |
from huggingface_hub import snapshot_download | |
from pathlib import Path | |
model_path = Path.home().joinpath('llama_models', 'Llama-Instruct') | |
model_path.mkdir(parents=True, exist_ok=True) | |
snapshot_download( | |
repo_id=model_str, | |
local_dir=model_path | |
) | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
tokenizer = AutoTokenizer.from_pretrained(model_path, token=token) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_path, | |
token=token, | |
torch_dtype=torch.bfloat16, | |
device_map="auto" | |
) | |
print(model) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
added better indenting