sparverius/model.json

## model.json
{
 "model": "Model",
 "model.embed_tokens": "Embedding",
 "model.embed_tokens.weight": {"shape": [32000, 4096], "dtype": "float16"},
 "model.layers": "ModuleList",
 "model.layers.0": "DecoderLayer",
 "model.layers.0.self_attn": "Attention",
 "model.layers.0.self_attn.rotary_emb": "RotaryEmbedding",
 "model.layers.0.self_attn.rotary_emb.inv_freq": {"shape": [64], "dtype": "float32"},
 "model.layers.0.self_attn.rotary_emb.cos_cached": {"shape": [1, 1, 4096, 128], "dtype": "float16"},
 "model.layers.0.self_attn.rotary_emb.sin_cached": {"shape": [1, 1, 4096, 128], "dtype": "float16"},
 "model.layers.0.self_attn.k_proj": "QuantLinear",
 "model.layers.0.self_attn.k_proj.qweight": {"shape": [512, 4096], "dtype": "int32"},
 "model.layers.0.self_attn.k_proj.qzeros": {"shape": [32, 512], "dtype": "int32"},
 "model.layers.0.self_attn.k_proj.scales": {"shape": [32, 4096], "dtype": "float16"},
 "model.layers.0.self_attn.k_proj.g_idx": {"shape": [4096], "dtype": "int32"},
 "model.layers.0.self_attn.k_proj.bias": {"shape": [4096], "dtype": "float16"},
 "model.layers.0.self_attn.o_proj": "QuantLinear",
 "model.layers.0.self_attn.o_proj.qweight": {"shape": [512, 4096], "dtype": "int32"},
 "model.layers.0.self_attn.o_proj.qzeros": {"shape": [32, 512], "dtype": "int32"},
 "model.layers.0.self_attn.o_proj.scales": {"shape": [32, 4096], "dtype": "float16"},
 "model.layers.0.self_attn.o_proj.g_idx": {"shape": [4096], "dtype": "int32"},
 "model.layers.0.self_attn.o_proj.bias": {"shape": [4096], "dtype": "float16"},
 "model.layers.0.self_attn.q_proj": "QuantLinear",
 "model.layers.0.self_attn.q_proj.qweight": {"shape": [512, 4096], "dtype": "int32"},
 "model.layers.0.self_attn.q_proj.qzeros": {"shape": [32, 512],"dtype": "int32"},
 "model.layers.0.self_attn.q_proj.scales": {"shape": [32, 4096], "dtype": "float16"},
 "model.layers.0.self_attn.q_proj.g_idx": {"shape": [4096], "dtype": "int32"},
 "model.layers.0.self_attn.q_proj.bias": {"shape": [4096], "dtype": "float16"},
 "model.layers.0.self_attn.v_proj": "QuantLinear",
 "model.layers.0.self_attn.v_proj.qweight": {"shape": [512, 4096], "dtype": "int32"},
 "model.layers.0.self_attn.v_proj.qzeros": {"shape": [32, 512], "dtype": "int32"},
 "model.layers.0.self_attn.v_proj.scales": {"shape": [32, 4096], "dtype": "float16"},
 "model.layers.0.self_attn.v_proj.g_idx": {"shape": [4096], "dtype": "int32"},
 "model.layers.0.self_attn.v_proj.bias": {"shape": [4096], "dtype": "float16"},
 "model.layers.0.mlp": "MLP",
 "model.layers.0.mlp.act_fn": "SiLUActivation",
 "model.layers.0.mlp.down_proj": "QuantLinear",
 "model.layers.0.mlp.down_proj.qweight": {"shape": [1376, 4096], "dtype": "int32"},
 "model.layers.0.mlp.down_proj.qzeros": {"shape": [86, 512], "dtype": "int32"},
 "model.layers.0.mlp.down_proj.scales": {"shape": [86, 4096], "dtype": "float16"},
 "model.layers.0.mlp.down_proj.g_idx": {"shape": [11008], "dtype": "int32"},
 "model.layers.0.mlp.down_proj.bias": {"shape": [4096], "dtype": "float16"},
 "model.layers.0.mlp.gate_proj": "QuantLinear",
 "model.layers.0.mlp.gate_proj.qweight": {"shape": [512, 11008], "dtype": "int32"},
 "model.layers.0.mlp.gate_proj.qzeros": {"shape": [32, 1376], "dtype": "int32"},
 "model.layers.0.mlp.gate_proj.scales": {"shape": [32, 11008], "dtype": "float16"},
 "model.layers.0.mlp.gate_proj.g_idx": {"shape": [4096], "dtype": "int32"},
 "model.layers.0.mlp.gate_proj.bias": {"shape": [11008], "dtype": "float16"},
 "model.layers.0.mlp.up_proj": "QuantLinear",
 "model.layers.0.mlp.up_proj.qweight": {"shape": [512, 11008], "dtype": "int32"},
 "model.layers.0.mlp.up_proj.qzeros": {"shape": [32, 1376], "dtype": "int32"},
 "model.layers.0.mlp.up_proj.scales": {"shape": [32, 11008], "dtype": "float16"},
 "model.layers.0.mlp.up_proj.g_idx": {"shape": [4096], "dtype": "int32"},
 "model.layers.0.mlp.up_proj.bias": {"shape": [11008], "dtype": "float16"},
 "model.layers.0.input_layernorm": "RMSNorm",
 "model.layers.0.input_layernorm.weight": {"shape": [4096], "dtype": "float16"},
 "model.layers.0.post_attention_layernorm": "RMSNorm",
 "model.layers.0.post_attention_layernorm.weight": {"shape": [4096], "dtype": "float16"},
 ...
}
	{
	"model": "Model",
	"model.embed_tokens": "Embedding",
	"model.embed_tokens.weight": {"shape": [32000, 4096], "dtype": "float16"},
	"model.layers": "ModuleList",
	"model.layers.0": "DecoderLayer",
	"model.layers.0.self_attn": "Attention",
	"model.layers.0.self_attn.rotary_emb": "RotaryEmbedding",
	"model.layers.0.self_attn.rotary_emb.inv_freq": {"shape": [64], "dtype": "float32"},
	"model.layers.0.self_attn.rotary_emb.cos_cached": {"shape": [1, 1, 4096, 128], "dtype": "float16"},
	"model.layers.0.self_attn.rotary_emb.sin_cached": {"shape": [1, 1, 4096, 128], "dtype": "float16"},
	"model.layers.0.self_attn.k_proj": "QuantLinear",
	"model.layers.0.self_attn.k_proj.qweight": {"shape": [512, 4096], "dtype": "int32"},
	"model.layers.0.self_attn.k_proj.qzeros": {"shape": [32, 512], "dtype": "int32"},
	"model.layers.0.self_attn.k_proj.scales": {"shape": [32, 4096], "dtype": "float16"},
	"model.layers.0.self_attn.k_proj.g_idx": {"shape": [4096], "dtype": "int32"},
	"model.layers.0.self_attn.k_proj.bias": {"shape": [4096], "dtype": "float16"},
	"model.layers.0.self_attn.o_proj": "QuantLinear",
	"model.layers.0.self_attn.o_proj.qweight": {"shape": [512, 4096], "dtype": "int32"},
	"model.layers.0.self_attn.o_proj.qzeros": {"shape": [32, 512], "dtype": "int32"},
	"model.layers.0.self_attn.o_proj.scales": {"shape": [32, 4096], "dtype": "float16"},
	"model.layers.0.self_attn.o_proj.g_idx": {"shape": [4096], "dtype": "int32"},
	"model.layers.0.self_attn.o_proj.bias": {"shape": [4096], "dtype": "float16"},
	"model.layers.0.self_attn.q_proj": "QuantLinear",
	"model.layers.0.self_attn.q_proj.qweight": {"shape": [512, 4096], "dtype": "int32"},
	"model.layers.0.self_attn.q_proj.qzeros": {"shape": [32, 512],"dtype": "int32"},
	"model.layers.0.self_attn.q_proj.scales": {"shape": [32, 4096], "dtype": "float16"},
	"model.layers.0.self_attn.q_proj.g_idx": {"shape": [4096], "dtype": "int32"},
	"model.layers.0.self_attn.q_proj.bias": {"shape": [4096], "dtype": "float16"},
	"model.layers.0.self_attn.v_proj": "QuantLinear",
	"model.layers.0.self_attn.v_proj.qweight": {"shape": [512, 4096], "dtype": "int32"},
	"model.layers.0.self_attn.v_proj.qzeros": {"shape": [32, 512], "dtype": "int32"},
	"model.layers.0.self_attn.v_proj.scales": {"shape": [32, 4096], "dtype": "float16"},
	"model.layers.0.self_attn.v_proj.g_idx": {"shape": [4096], "dtype": "int32"},
	"model.layers.0.self_attn.v_proj.bias": {"shape": [4096], "dtype": "float16"},
	"model.layers.0.mlp": "MLP",
	"model.layers.0.mlp.act_fn": "SiLUActivation",
	"model.layers.0.mlp.down_proj": "QuantLinear",
	"model.layers.0.mlp.down_proj.qweight": {"shape": [1376, 4096], "dtype": "int32"},
	"model.layers.0.mlp.down_proj.qzeros": {"shape": [86, 512], "dtype": "int32"},
	"model.layers.0.mlp.down_proj.scales": {"shape": [86, 4096], "dtype": "float16"},
	"model.layers.0.mlp.down_proj.g_idx": {"shape": [11008], "dtype": "int32"},
	"model.layers.0.mlp.down_proj.bias": {"shape": [4096], "dtype": "float16"},
	"model.layers.0.mlp.gate_proj": "QuantLinear",
	"model.layers.0.mlp.gate_proj.qweight": {"shape": [512, 11008], "dtype": "int32"},
	"model.layers.0.mlp.gate_proj.qzeros": {"shape": [32, 1376], "dtype": "int32"},
	"model.layers.0.mlp.gate_proj.scales": {"shape": [32, 11008], "dtype": "float16"},
	"model.layers.0.mlp.gate_proj.g_idx": {"shape": [4096], "dtype": "int32"},
	"model.layers.0.mlp.gate_proj.bias": {"shape": [11008], "dtype": "float16"},
	"model.layers.0.mlp.up_proj": "QuantLinear",
	"model.layers.0.mlp.up_proj.qweight": {"shape": [512, 11008], "dtype": "int32"},
	"model.layers.0.mlp.up_proj.qzeros": {"shape": [32, 1376], "dtype": "int32"},
	"model.layers.0.mlp.up_proj.scales": {"shape": [32, 11008], "dtype": "float16"},
	"model.layers.0.mlp.up_proj.g_idx": {"shape": [4096], "dtype": "int32"},
	"model.layers.0.mlp.up_proj.bias": {"shape": [11008], "dtype": "float16"},
	"model.layers.0.input_layernorm": "RMSNorm",
	"model.layers.0.input_layernorm.weight": {"shape": [4096], "dtype": "float16"},
	"model.layers.0.post_attention_layernorm": "RMSNorm",
	"model.layers.0.post_attention_layernorm.weight": {"shape": [4096], "dtype": "float16"},
	...
	}