Skip to content

Instantly share code, notes, and snippets.

@abetlen
Created May 27, 2024 02:46
Show Gist options
  • Save abetlen/f23b5c6f74a0af4634801ea85b093787 to your computer and use it in GitHub Desktop.
Save abetlen/f23b5c6f74a0af4634801ea85b093787 to your computer and use it in GitHub Desktop.
import os
import argparse
import numpy as np
import numpy.typing as npt
import gguf
from safetensors import safe_open
import json
import typing
class SafetensorsIndexFile(typing.TypedDict):
weight_map: typing.Dict[str, str]
class SafetensorsIndex:
def __init__(self, index_file_path: str):
directory = os.path.dirname(index_file_path)
self.index = typing.cast(SafetensorsIndexFile, json.load(open(index_file_path)))
self.weight_map = self.index["weight_map"]
files = set(self.weight_map.values())
self.tensors = {file: safe_open(os.path.join(directory, file), framework="np") for file in files}
def get_tensor(self, key: str) -> npt.NDArray[np.float32]:
return typing.cast(npt.NDArray[np.float32], self.tensors[self.weight_map[key]].get_tensor(key)) # type: ignore
def k(raw_key: str, arch: str) -> str:
return raw_key.format(arch=arch)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-d",
"--dir-model",
required=True,
help="path to directory containing the tokenizer",
)
args = parser.parse_args()
import pathlib
dir_model = pathlib.Path(args.dir_model)
# set model name to folder name
name = dir_model.name
tensors = SafetensorsIndex((dir_model / "model.safetensors.index.json").as_posix())
config = json.load(open(dir_model / "config.json"))
text_config = config["text_config"]
vision_config = config["vision_config"]
### Vision model
ftype = 1 # fp16
fname_middle = "mmproj-"
has_text_encoder = False
has_llava_projector = True
n_layers_clip = 27
fname_out = f"{name}-mmproj-f16.gguf"
fout = gguf.GGUFWriter(fname_out, arch="clip")
fout.add_bool("clip.has_text_encoder", False)
fout.add_bool("clip.has_vision_encoder", True)
fout.add_bool("clip.has_llava_projector", True)
fout.add_file_type(ftype) # fp16
model_name = f"google/{name}"
fout.add_name(model_name)
fout.add_description("image encoder for " + model_name)
fout.add_string("clip.projector_type", "mlp")
image_size = vision_config.get("image_size", 224)
# vision model hparams
VISION = "clip.vision"
fout.add_uint32("clip.vision.image_size", image_size)
fout.add_uint32("clip.vision.patch_size", vision_config["patch_size"])
fout.add_uint32(k(gguf.KEY_EMBEDDING_LENGTH, VISION), vision_config["hidden_size"])
fout.add_uint32(k(gguf.KEY_FEED_FORWARD_LENGTH, VISION), vision_config["intermediate_size"])
fout.add_uint32("clip.vision.projection_dim", vision_config["projection_dim"])
fout.add_uint32(k(gguf.KEY_ATTENTION_HEAD_COUNT, VISION), vision_config["num_attention_heads"])
fout.add_float32(k(gguf.KEY_ATTENTION_LAYERNORM_EPS, VISION), 1e-6)
fout.add_uint32(k(gguf.KEY_BLOCK_COUNT, VISION), n_layers_clip + 1)
fout.add_array("clip.vision.image_mean", [0.5, 0.5, 0.5])
fout.add_array("clip.vision.image_std", [0.5, 0.5, 0.5])
fout.add_bool("clip.use_gelu", True) # using regular GELU instead of quick
# vision projection
fout.add_tensor(
"mm.0.weight",
tensors.get_tensor("multi_modal_projector.linear.weight").astype(np.float16),
)
fout.add_tensor(
"mm.0.bias",
tensors.get_tensor("multi_modal_projector.linear.bias").astype(np.float32),
)
# encoder (siglip)
fout.add_tensor(
"v.position_embd.weight",
tensors.get_tensor("vision_tower.vision_model.embeddings.position_embedding.weight").astype(np.float16),
)
fout.add_tensor(
"v.patch_embd.weight",
tensors.get_tensor("vision_tower.vision_model.embeddings.patch_embedding.weight")
.reshape(vision_config["hidden_size"], 3, vision_config["patch_size"], vision_config["patch_size"])
.astype(np.float16),
)
fout.add_tensor(
"v.patch_embd.bias",
tensors.get_tensor("vision_tower.vision_model.embeddings.patch_embedding.bias").astype(np.float32),
)
fout.add_tensor(
"v.post_ln.weight",
tensors.get_tensor("vision_tower.vision_model.post_layernorm.weight").astype(np.float32),
)
fout.add_tensor(
"v.post_ln.bias",
tensors.get_tensor("vision_tower.vision_model.post_layernorm.bias").astype(np.float32),
)
def blk_tensor(i: int, name: str):
return tensors.get_tensor(
rf"vision_tower.vision_model.encoder.layers.{i}.{name}"
)
def add_tensor(blk_id: int, gguf_id: typing.Optional[int] = None):
if gguf_id is None:
gguf_id = blk_id
q_w = blk_tensor(blk_id, "self_attn.q_proj.weight")
k_w = blk_tensor(blk_id, "self_attn.k_proj.weight")
v_w = blk_tensor(blk_id, "self_attn.v_proj.weight")
q_b = blk_tensor(blk_id, "self_attn.q_proj.bias")
k_b = blk_tensor(blk_id, "self_attn.k_proj.bias")
v_b = blk_tensor(blk_id, "self_attn.v_proj.bias")
fout.add_tensor(f"v.blk.{gguf_id}.attn_q.weight", q_w.astype(np.float16))
fout.add_tensor(f"v.blk.{gguf_id}.attn_q.bias", q_b.astype(np.float32))
fout.add_tensor(f"v.blk.{gguf_id}.attn_k.weight", k_w.astype(np.float16))
fout.add_tensor(f"v.blk.{gguf_id}.attn_k.bias", k_b.astype(np.float32))
fout.add_tensor(f"v.blk.{gguf_id}.attn_v.weight", v_w.astype(np.float16))
fout.add_tensor(f"v.blk.{gguf_id}.attn_v.bias", v_b.astype(np.float32))
fout.add_tensor(
f"v.blk.{gguf_id}.attn_out.weight",
blk_tensor(blk_id, "self_attn.out_proj.weight").astype(np.float16),
)
fout.add_tensor(
f"v.blk.{gguf_id}.attn_out.bias",
blk_tensor(blk_id, "self_attn.out_proj.bias").astype(np.float32),
)
fout.add_tensor(
f"v.blk.{gguf_id}.ln1.weight",
blk_tensor(blk_id, "layer_norm1.weight").astype(np.float32),
)
fout.add_tensor(
f"v.blk.{gguf_id}.ln1.bias",
blk_tensor(blk_id, "layer_norm1.bias").astype(np.float32),
)
fout.add_tensor(
f"v.blk.{gguf_id}.ffn_down.weight",
blk_tensor(blk_id, "mlp.fc1.weight").astype(np.float16),
)
fout.add_tensor(
f"v.blk.{gguf_id}.ffn_down.bias",
blk_tensor(blk_id, "mlp.fc1.bias").astype(np.float32),
)
fout.add_tensor(
f"v.blk.{gguf_id}.ffn_up.weight",
blk_tensor(blk_id, "mlp.fc2.weight").astype(np.float16),
)
fout.add_tensor(
f"v.blk.{gguf_id}.ffn_up.bias",
blk_tensor(blk_id, "mlp.fc2.bias").astype(np.float32),
)
fout.add_tensor(
f"v.blk.{gguf_id}.ln2.weight",
blk_tensor(blk_id, "layer_norm2.weight").astype(np.float32),
)
fout.add_tensor(
f"v.blk.{gguf_id}.ln2.bias",
blk_tensor(blk_id, "layer_norm2.bias").astype(np.float32),
)
for i in range(n_layers_clip):
add_tensor(i)
# Duplicate the last block (llava-cli skips over this)
add_tensor(n_layers_clip - 1, n_layers_clip)
fout.write_header_to_file()
fout.write_kv_data_to_file()
fout.write_tensors_to_file()
fout.close()
print(f"GGUF written to {fname_out}")
### Text model
# general GGUF init
fname_out = f"{name}-text-model-f16.gguf"
fout = gguf.GGUFWriter(fname_out, arch="gemma")
ftype = 1
fout.add_name(name)
fout.add_context_length(2048)
fout.add_block_count(text_config["num_hidden_layers"])
fout.add_embedding_length(text_config["hidden_size"])
fout.add_feed_forward_length(text_config["intermediate_size"])
fout.add_head_count(text_config["num_attention_heads"])
fout.add_head_count_kv(text_config["num_key_value_heads"])
fout.add_key_length(256)
fout.add_value_length(256)
fout.add_layer_norm_rms_eps(1e-6)
fout.add_file_type(ftype)
fout.add_add_bos_token(True)
### Tokenizer
# Taken from _set_vocab_sentencepiece
from enum import IntEnum
class SentencePieceTokenTypes(IntEnum):
NORMAL = 1
UNKNOWN = 2
CONTROL = 3
USER_DEFINED = 4
UNUSED = 5
BYTE = 6
from sentencepiece import SentencePieceProcessor
tokenizer_path = dir_model / 'tokenizer.model'
tokens: typing.List[bytes] = []
scores: typing.List[float] = []
toktypes: typing.List[int] = []
if not tokenizer_path.is_file():
raise FileNotFoundError(f"File not found: {tokenizer_path}")
tokenizer = SentencePieceProcessor()
tokenizer.LoadFromFile(str(tokenizer_path))
vocab_size = config["vocab_size"]
tokens: typing.List[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
scores: typing.List[float] = [-10000.0] * vocab_size
toktypes: typing.List[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
for token_id in range(tokenizer.vocab_size()):
piece = tokenizer.IdToPiece(token_id)
text = piece.encode("utf-8")
score = tokenizer.GetScore(token_id)
toktype = SentencePieceTokenTypes.NORMAL
if tokenizer.IsUnknown(token_id):
toktype = SentencePieceTokenTypes.UNKNOWN
elif tokenizer.IsControl(token_id):
toktype = SentencePieceTokenTypes.CONTROL
elif tokenizer.IsUnused(token_id):
toktype = SentencePieceTokenTypes.UNUSED
elif tokenizer.IsByte(token_id):
toktype = SentencePieceTokenTypes.BYTE
tokens[token_id] = text
scores[token_id] = score
toktypes[token_id] = toktype
added_tokens_file = dir_model / 'added_tokens.json'
if added_tokens_file.is_file():
with open(added_tokens_file, "r", encoding="utf-8") as f:
added_tokens_json = json.load(f)
for key in added_tokens_json:
token_id = added_tokens_json[key]
if (token_id >= vocab_size):
print(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
continue
tokens[token_id] = key.encode("utf-8")
scores[token_id] = -1000.0
toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
if vocab_size > len(tokens):
pad_count = vocab_size - len(tokens)
print(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
for i in range(1, pad_count + 1):
tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
scores.append(-1000.0)
toktypes.append(SentencePieceTokenTypes.UNUSED)
fout.add_tokenizer_model("llama")
fout.add_tokenizer_pre("default")
fout.add_token_list(tokens)
fout.add_token_scores(scores)
fout.add_token_types(toktypes)
special_vocab = gguf.SpecialVocab(dir_model, n_vocab=len(tokens))
special_vocab.add_to_gguf(fout)
### Text model
fout.add_tensor(
"token_embd.weight",
tensors.get_tensor("language_model.model.embed_tokens.weight").astype(np.float16),
)
for i in range(text_config["num_hidden_layers"]):
fout.add_tensor(
f"blk.{i}.attn_norm.weight",
tensors.get_tensor(f"language_model.model.layers.{i}.input_layernorm.weight").astype(
np.float32
# https://github.com/huggingface/transformers/blob/fc37f38915372c15992b540dfcbbe00a916d4fc6/src/transformers/models/gemma/modeling_gemma.py#L89
) + 1,
)
fout.add_tensor(
f"blk.{i}.ffn_down.weight",
tensors.get_tensor(f"language_model.model.layers.{i}.mlp.down_proj.weight").astype(
np.float16
),
)
fout.add_tensor(
f"blk.{i}.ffn_gate.weight",
tensors.get_tensor(f"language_model.model.layers.{i}.mlp.gate_proj.weight").astype(
np.float16
),
)
fout.add_tensor(
f"blk.{i}.ffn_up.weight",
tensors.get_tensor(f"language_model.model.layers.{i}.mlp.up_proj.weight").astype(
np.float16
),
)
fout.add_tensor(
f"blk.{i}.ffn_norm.weight",
tensors.get_tensor(f"language_model.model.layers.{i}.post_attention_layernorm.weight").astype(
np.float32
) + 1,
)
fout.add_tensor(
f"blk.{i}.attn_k.weight",
tensors.get_tensor(
f"language_model.model.layers.{i}.self_attn.k_proj.weight"
).astype(np.float16),
)
fout.add_tensor(
f"blk.{i}.attn_output.weight",
tensors.get_tensor(
f"language_model.model.layers.{i}.self_attn.o_proj.weight"
).astype(np.float16),
)
fout.add_tensor(
f"blk.{i}.attn_q.weight",
tensors.get_tensor(
f"language_model.model.layers.{i}.self_attn.q_proj.weight"
).astype(np.float16),
)
fout.add_tensor(
f"blk.{i}.attn_v.weight",
tensors.get_tensor(
f"language_model.model.layers.{i}.self_attn.v_proj.weight"
).astype(np.float16),
)
fout.add_tensor(
"output_norm.weight",
tensors.get_tensor("language_model.model.norm.weight").astype(np.float32) + 1,
)
# save gguf
fout.write_header_to_file()
fout.write_kv_data_to_file()
fout.write_tensors_to_file()
fout.close()
print(f"GGUF written to {fname_out}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment