Skip to content

Instantly share code, notes, and snippets.

@152334H
Created January 30, 2024 16:31
Show Gist options
  • Save 152334H/e450483235d0522c5574168a84a6ce1c to your computer and use it in GitHub Desktop.
Save 152334H/e450483235d0522c5574168a84a6ce1c to your computer and use it in GitHub Desktop.
llama.cpp (fbe7dfa53caff0a7e830b676e6e949917a5c71b4) patch for miqu
diff --git a/gguf-py/scripts/gguf-dump.py b/gguf-py/scripts/gguf-dump.py
index dbf89150..eeea947f 100755
--- a/gguf-py/scripts/gguf-dump.py
+++ b/gguf-py/scripts/gguf-dump.py
@@ -24,6 +24,56 @@ def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
file_endian = host_endian
return (host_endian, file_endian)
+from tqdm import tqdm
+import gguf.tensor_mapping as tm
+import re
+import torch
+LAYERS = 80
+INT_REGEX = re.compile(r'\.[0-9]+\.')
+def get_layer_idx(k: str): return int(INT_REGEX.search(k).group()[1:-1])
+def possibly_fix_key(k: str):
+ '''
+ [ 8192 32000] output.weight -> lm_head.linear
+ [ 8192 32000] token_embd.weight -> transformer.embd.wte
+ [8192] output_norm.weight -> lm_head.ln
+ [8192] blk.79.attn_norm.weight -> model.layers.layers.79.norm
+ [8192] blk.79.ffn_norm.weight -> h.79.ln_2
+ [28672 8192] blk.79.ffn_down.weight -> model.layers.layers.79.mlp.down_proj
+ [ 8192 28672] blk.79.ffn_gate.weight -> model.layers.layers.79.mlp.gate_proj
+ [ 8192 28672] blk.79.ffn_up.weight -> model.layers.layers.79.mlp.up_proj
+ [8192 1024] blk.79.attn_k.weight -> model.layers.layers.79.self_attn.k_proj
+ [8192 8192] blk.79.attn_output.weight -> model.layers.layers.79.self_attn.o_proj
+ [8192 8192] blk.79.attn_q.weight -> model.layers.layers.79.self_attn.q_proj
+ [8192 1024] blk.79.attn_v.weight -> model.layers.layers.79.self_attn.v_proj
+ '''
+ if k[0] == 'h' and k[-4:] == 'ln_2': # blk.79.ffn_norm.weight -> h.79.ln_2
+ k = f'model.layers.{get_layer_idx(k)}.post_attention_layernorm'
+ elif k[-4:] == 'norm':
+ k = f'model.layers.{get_layer_idx(k)}.input_layernorm'
+
+ D = {'lm_head.linear': 'lm_head', 'transformer.embd.wte': 'model.embed_tokens', 'lm_head.ln': 'model.norm'}
+ if k in D: k = D[k]
+
+ k = k.replace('layers.layers', 'layers')
+ k += '.weight'
+
+ return k
+# see https://github.com/ggerganov/llama.cpp/blob/master/convert.py#L1182
+def possibly_permute(t: torch.Tensor, k: str):
+ if 'q_proj' in k or 'k_proj' in k:
+ H = 64 if 'q_proj' in k else 8
+ return t.reshape(H, t.shape[0] // H // 2, 2, *t.shape[1:]).swapaxes(1,2).reshape(t.shape)
+ return t
+def convert_to_hf(r: GGUFReader):
+ d = {v[1]:k for k,v in tm.get_tensor_name_map(tm.MODEL_ARCH.LLAMA, LAYERS).mapping.items()}
+ for rt in r.tensors: print(rt.shape, '\t', rt.name, ' -> ', d[rt.name[:-7]])
+ assert all(rt.data.dtype == np.float32 or rt.data.dtype == np.float16 for rt in r.tensors)
+ sd = {
+ possibly_fix_key(d[rt.name[:-7]]): torch.from_numpy(rt.data).half().view(*reversed(rt.shape.tolist()))
+ for rt in tqdm(r.tensors)
+ }
+ sd = {k: possibly_permute(v, k) for k,v in tqdm(sd.items()})
+ return sd
# For more information about what field.parts and field.data represent,
# please see the comments in the modify_gguf.py example.
@@ -103,10 +153,13 @@ def main() -> None:
parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata")
parser.add_argument("--json", action="store_true", help="Produce JSON output")
parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)")
+ parser.add_argument("--to-hf", action="store_true")
args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
if not args.json:
print(f'* Loading: {args.model}')
reader = GGUFReader(args.model, 'r')
+ if args.to_hf:
+ return convert_to_hf(reader)
if args.json:
dump_metadata_json(reader, args)
else:
@@ -114,4 +167,5 @@ def main() -> None:
if __name__ == '__main__':
- main()
+ c = main()
+ torch.save(c,"hf_pm2.pt")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment