Skip to content

Instantly share code, notes, and snippets.

@alex4o
Created February 4, 2024 08:35
Show Gist options
  • Save alex4o/7809ed6597cb88c4f44fcbab03475d9e to your computer and use it in GitHub Desktop.
Save alex4o/7809ed6597cb88c4f44fcbab03475d9e to your computer and use it in GitHub Desktop.
llama.cpp pixel6pro vulkan logs
./main -m ../phi-2-orange.Q4_K_M.gguf
Log start
main: build = 22 (277fad3)
main: built with clang version 17.0.6 for aarch64-unknown-linux-android24
main: seed = 1707035493
ggml_vk_init()
ggml_vk_find_queue_family_index()
ggml_vk_find_queue_family_index()
ggml_vulkan: Using Mali-G78 | uma: 1 | fp16: 1 | warp size: 16
ggml_vk_load_shaders()
ggml_vk_create_pipeline(matmul_f32_l, main, 3, 56, (128,128,1), specialization_constants, 1)
ggml_vk_create_pipeline(matmul_f32_m, main, 3, 56, (64,64,1), specialization_constants, 1)
ggml_vk_create_pipeline(matmul_f32_s, main, 3, 56, (32,32,1), specialization_constants, 1)
ggml_vk_create_pipeline(matmul_f32_aligned_l, main, 3, 56, (128,128,1), specialization_constants, 128)
ggml_vk_create_pipeline(matmul_f32_aligned_m, main, 3, 56, (64,64,1), specialization_constants, 64)
ggml_vk_create_pipeline(matmul_f32_aligned_s, main, 3, 56, (32,32,1), specialization_constants, 32)
ggml_vk_create_pipeline(matmul_f16_l, main, 3, 56, (128,128,1), specialization_constants, 1)
ggml_vk_create_pipeline(matmul_f16_m, main, 3, 56, (64,64,1), specialization_constants, 1)
ggml_vk_create_pipeline(matmul_f16_s, main, 3, 56, (32,32,1), specialization_constants, 1)
ggml_vk_create_pipeline(matmul_f16_aligned_l, main, 3, 56, (128,128,1), specialization_constants, 128)
ggml_vk_create_pipeline(matmul_f16_aligned_m, main, 3, 56, (64,64,1), specialization_constants, 64)
ggml_vk_create_pipeline(matmul_f16_aligned_s, main, 3, 56, (32,32,1), specialization_constants, 32)
ggml_vk_create_pipeline(matmul_f16_f32_l, main, 3, 56, (128,128,1), specialization_constants, 1)
ggml_vk_create_pipeline(matmul_f16_f32_m, main, 3, 56, (64,64,1), specialization_constants, 1)
ggml_vk_create_pipeline(matmul_f16_f32_s, main, 3, 56, (32,32,1), specialization_constants, 1)
ggml_vk_create_pipeline(matmul_f16_f32_aligned_l, main, 3, 56, (128,128,1), specialization_constants, 128)
ggml_vk_create_pipeline(matmul_f16_f32_aligned_m, main, 3, 56, (64,64,1), specialization_constants, 64)
ggml_vk_create_pipeline(matmul_f16_f32_aligned_s, main, 3, 56, (32,32,1), specialization_constants, 32)
ggml_vk_create_pipeline(mul_mat_vec_f16_f32, main, 3, 12, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(mul_mat_vec_q4_0_f32, main, 3, 12, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(mul_mat_vec_q4_1_f32, main, 3, 12, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(mul_mat_vec_q5_0_f32, main, 3, 12, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(mul_mat_vec_q5_1_f32, main, 3, 12, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(mul_mat_vec_q8_0_f32, main, 3, 12, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(mul_mat_vec_q2_K_f32, main, 3, 12, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(mul_mat_vec_q3_K_f32, main, 3, 12, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(mul_mat_vec_q4_K_f32, main, 3, 12, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(mul_mat_vec_q5_K_f32, main, 3, 12, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(mul_mat_vec_q6_K_f32, main, 3, 12, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(f32_to_f16, main, 2, 16, (64,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(dequant_f16, main, 2, 16, (8192,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(dequant_q4_0, main, 2, 16, (8192,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(dequant_q4_1, main, 2, 16, (8192,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(dequant_q5_0, main, 2, 16, (8192,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(dequant_q5_1, main, 2, 16, (8192,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(dequant_q8_0, main, 2, 16, (8192,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(dequant_q2_K, main, 2, 16, (16384,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(dequant_q3_K, main, 2, 16, (16384,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(dequant_q4_K, main, 2, 16, (8192,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(dequant_q5_K, main, 2, 16, (16384,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(dequant_q6_K, main, 2, 16, (16384,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(get_rows_f16, main, 3, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(get_rows_q4_0, main, 3, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(get_rows_q4_1, main, 3, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(get_rows_q5_0, main, 3, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(get_rows_q5_1, main, 3, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(get_rows_q8_0, main, 3, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(get_rows_f16_f32, main, 3, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(get_rows_q4_0_f32, main, 3, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(get_rows_q4_1_f32, main, 3, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(get_rows_q5_0_f32, main, 3, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(get_rows_q5_1_f32, main, 3, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(get_rows_q8_0_f32, main, 3, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(split_k_reduce, main, 2, 8, (256,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(mul_mat_vec_p021_f16_f32, main, 3, 24, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(mul_mat_vec_nc_f16_f32, main, 3, 28, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(norm_f32, main, 2, 16, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(rms_norm_f32, main, 2, 16, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(cpy_f32_f32, main, 2, 48, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(cpy_f32_f16, main, 2, 48, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(cpy_f16_f16, main, 2, 48, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(add_f32, main, 3, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(mul_f32, main, 3, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(scale_f32, main, 2, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(sqr_f32, main, 2, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(clamp_f32, main, 2, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(gelu_f32, main, 2, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(silu_f32, main, 2, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(relu_f32, main, 2, 16, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(diag_mask_inf_f32, main, 2, 12, (512,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(soft_max_f32, main, 3, 16, (1,1,1), specialization_constants, 1)
ggml_vk_create_pipeline(rope_f32, main, 3, 40, (1,512,1), specialization_constants, 1)
ggml_vk_create_pipeline(rope_f16, main, 3, 40, (1,512,1), specialization_constants, 1)
ggml_vk_create_pipeline(rope_neox_f32, main, 3, 52, (1,512,1), specialization_constants, 1)
ggml_vk_create_pipeline(rope_neox_f16, main, 3, 52, (1,512,1), specialization_constants, 1)
ggml_vk_create_queue()
ggml_vk_create_queue()
llama_model_loader: loaded meta data with 21 key-value pairs and 325 tensors from ../phi-2-orange.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv 0: general.architecture str = phi2
llama_model_loader: - kv 1: general.name str = Phi2
llama_model_loader: - kv 2: phi2.context_length u32 = 2048
llama_model_loader: - kv 3: phi2.embedding_length u32 = 2560
llama_model_loader: - kv 4: phi2.feed_forward_length u32 = 10240
llama_model_loader: - kv 5: phi2.block_count u32 = 32
llama_model_loader: - kv 6: phi2.attention.head_count u32 = 32
llama_model_loader: - kv 7: phi2.attention.head_count_kv u32 = 32
llama_model_loader: - kv 8: phi2.attention.layer_norm_epsilon f32 = 0.000010
llama_model_loader: - kv 9: phi2.rope.dimension_count u32 = 32
llama_model_loader: - kv 10: general.file_type u32 = 15
llama_model_loader: - kv 11: tokenizer.ggml.add_bos_token bool = false
llama_model_loader: - kv 12: tokenizer.ggml.model str = gpt2
llama_model_loader: - kv 13: tokenizer.ggml.tokens arr[str,51200] = ["!", "\"", "#", "$", "%", "&", "'", ...
llama_model_loader: - kv 14: tokenizer.ggml.token_type arr[i32,51200] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
llama_model_loader: - kv 15: tokenizer.ggml.merges arr[str,50000] = ["Ġ t", "Ġ a", "h e", "i n", "r e",...
llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32 = 50256
llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32 = 50295
llama_model_loader: - kv 18: tokenizer.ggml.unknown_token_id u32 = 50256
llama_model_loader: - kv 19: tokenizer.ggml.padding_token_id u32 = 50256
llama_model_loader: - kv 20: general.quantization_version u32 = 2
llama_model_loader: - type f32: 195 tensors
llama_model_loader: - type q4_K: 81 tensors
llama_model_loader: - type q5_K: 32 tensors
llama_model_loader: - type q6_K: 17 tensors
llm_load_vocab: mismatch in special tokens definition ( 910/51200 vs 944/51200 ).
llm_load_print_meta: format = GGUF V3 (latest)
llm_load_print_meta: arch = phi2
llm_load_print_meta: vocab type = BPE
llm_load_print_meta: n_vocab = 51200
llm_load_print_meta: n_merges = 50000
llm_load_print_meta: n_ctx_train = 2048
llm_load_print_meta: n_embd = 2560
llm_load_print_meta: n_head = 32
llm_load_print_meta: n_head_kv = 32
llm_load_print_meta: n_layer = 32
llm_load_print_meta: n_rot = 32
llm_load_print_meta: n_embd_head_k = 80
llm_load_print_meta: n_embd_head_v = 80
llm_load_print_meta: n_gqa = 1
llm_load_print_meta: n_embd_k_gqa = 2560
llm_load_print_meta: n_embd_v_gqa = 2560
llm_load_print_meta: f_norm_eps = 1.0e-05
llm_load_print_meta: f_norm_rms_eps = 0.0e+00
llm_load_print_meta: f_clamp_kqv = 0.0e+00
llm_load_print_meta: f_max_alibi_bias = 0.0e+00
llm_load_print_meta: n_ff = 10240
llm_load_print_meta: n_expert = 0
llm_load_print_meta: n_expert_used = 0
llm_load_print_meta: rope scaling = linear
llm_load_print_meta: freq_base_train = 10000.0
llm_load_print_meta: freq_scale_train = 1
llm_load_print_meta: n_yarn_orig_ctx = 2048
llm_load_print_meta: rope_finetuned = unknown
llm_load_print_meta: model type = 3B
llm_load_print_meta: model ftype = Q4_K - Medium
llm_load_print_meta: model params = 2.78 B
llm_load_print_meta: model size = 1.66 GiB (5.14 BPW)
llm_load_print_meta: general.name = Phi2
llm_load_print_meta: BOS token = 50256 '<|endoftext|>'
llm_load_print_meta: EOS token = 50295 '<|im_end|>'
llm_load_print_meta: UNK token = 50256 '<|endoftext|>'
llm_load_print_meta: PAD token = 50256 '<|endoftext|>'
llm_load_print_meta: LF token = 128 'Ä'
llm_load_tensors: ggml ctx size = 0.12 MiB
llm_load_tensors: offloading 0 repeating layers to GPU
llm_load_tensors: offloaded 0/33 layers to GPU
llm_load_tensors: CPU buffer size = 1704.63 MiB
..................................................................................
llama_new_context_with_model: n_ctx = 512
llama_new_context_with_model: freq_base = 10000.0
llama_new_context_with_model: freq_scale = 1
ggml_vk_host_malloc(167772160)
ggml_vk_create_buffer(167772160, { HostVisible | HostCoherent | HostCached })
ggml_vulkan: Failed to allocate pinned memory.
ggml_vulkan: No suitable memory type found: ErrorOutOfDeviceMemory
llama_kv_cache_init: CPU KV buffer size = 160.00 MiB
llama_new_context_with_model: KV self size = 160.00 MiB, K (f16): 80.00 MiB, V (f16): 80.00 MiB
ggml_vk_host_malloc(6297600)
ggml_vk_create_buffer(6297600, { HostVisible | HostCoherent | HostCached })
ggml_vulkan: Failed to allocate pinned memory.
ggml_vulkan: No suitable memory type found: ErrorOutOfDeviceMemory
llama_new_context_with_model: CPU input buffer size = 6.01 MiB
ggml_vk_host_malloc(1)
ggml_vk_create_buffer(1, { HostVisible | HostCoherent | HostCached })
ggml_vulkan: Failed to allocate pinned memory.
ggml_vulkan: No suitable memory type found: ErrorOutOfDeviceMemory
ggml_vk_host_malloc(121110580)
ggml_vk_create_buffer(121110580, { HostVisible | HostCoherent | HostCached })
ggml_vulkan: Failed to allocate pinned memory.
ggml_vulkan: No suitable memory type found: ErrorOutOfDeviceMemory
llama_new_context_with_model: CPU compute buffer size = 115.50 MiB
llama_new_context_with_model: graph splits (measure): 1
ggml_vk_preallocate_buffers_graph(0xb400007091fd10e0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd1270)
ggml_vk_preallocate_buffers_graph(0xb400007091fd1720)
ggml_vk_preallocate_buffers_graph(0xb400007091fd18b0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd1a40)
ggml_vk_preallocate_buffers_graph(0xb400007091fd1bd0)
ggml_vk_create_extra(0xb400007091fd1bd0 (wqkv-0, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_pool_malloc(61440)
ggml_vk_create_buffer(61440, { DeviceLocal })
ggml_vk_preallocate_buffers_graph(0xb400007091fd1d60)
ggml_vk_preallocate_buffers_graph(0xb400007091fd1ef0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd2080)
ggml_vk_preallocate_buffers_graph(0xb400007091fd2850)
ggml_vk_preallocate_buffers_graph(0xb400007091fd1400)
ggml_vk_preallocate_buffers_graph(0xb400007091fd2b70)
ggml_vk_preallocate_buffers_graph(0xb400007091fd2d00)
ggml_vk_preallocate_buffers_graph(0xb400007091fd2210)
ggml_vk_preallocate_buffers_graph(0xb400007091fd23a0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd29e0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd2e90)
ggml_vk_preallocate_buffers_graph(0xb400007091fd2530)
ggml_vk_preallocate_buffers_graph(0xb400007091fd26c0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd3340)
ggml_vk_preallocate_buffers_graph(0xb400007091fd3660)
ggml_vk_preallocate_buffers_graph(0xb400007091fd3020)
ggml_vk_preallocate_buffers_graph(0xb400007091fd31b0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd34d0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd37f0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd3fc0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd3b10)
ggml_vk_preallocate_buffers_graph(0xb400007091fd3980)
ggml_vk_preallocate_buffers_graph(0xb400007091fd3ca0)
ggml_vk_create_extra(0xb400007091fd3ca0 (kq-0, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fd1590)
ggml_vk_preallocate_buffers_graph(0xb400007091fd3e30)
ggml_vk_preallocate_buffers_graph(0xb400007091fd4150)
ggml_vk_create_extra(0xb400007091fd4150 (kqv-0, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fd42e0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd4470)
ggml_vk_preallocate_buffers_graph(0xb400007091fd4600)
ggml_vk_create_extra(0xb400007091fd4600 (kqv_wo-0, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fd4790)
ggml_vk_preallocate_buffers_graph(0xb400007091fd4920)
ggml_vk_create_extra(0xb400007091fd4920 (ffn_up-0, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_pool_malloc(81920)
ggml_vk_create_buffer(81920, { DeviceLocal })
ggml_vk_preallocate_buffers_graph(0xb400007091fd4ab0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd4c40)
ggml_vk_preallocate_buffers_graph(0xb400007091fd4dd0)
ggml_vk_create_extra(0xb400007091fd4dd0 (ffn_down-0, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fd4f60)
ggml_vk_preallocate_buffers_graph(0xb400007091fd50f0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd5280)
ggml_vk_preallocate_buffers_graph(0xb400007091fd5410)
ggml_vk_preallocate_buffers_graph(0xb400007091fd55a0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd5730)
ggml_vk_preallocate_buffers_graph(0xb400007091fd58c0)
ggml_vk_create_extra(0xb400007091fd58c0 (wqkv-1, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fd5a50)
ggml_vk_preallocate_buffers_graph(0xb400007091fd5be0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd5d70)
ggml_vk_preallocate_buffers_graph(0xb400007091fd6540)
ggml_vk_preallocate_buffers_graph(0xb400007091fd6860)
ggml_vk_preallocate_buffers_graph(0xb400007091fd69f0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd5f00)
ggml_vk_preallocate_buffers_graph(0xb400007091fd6090)
ggml_vk_preallocate_buffers_graph(0xb400007091fd66d0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd6b80)
ggml_vk_preallocate_buffers_graph(0xb400007091fd6220)
ggml_vk_preallocate_buffers_graph(0xb400007091fd63b0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd7030)
ggml_vk_preallocate_buffers_graph(0xb400007091fd7350)
ggml_vk_preallocate_buffers_graph(0xb400007091fd6d10)
ggml_vk_preallocate_buffers_graph(0xb400007091fd6ea0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd71c0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd74e0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd7cb0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd7800)
ggml_vk_preallocate_buffers_graph(0xb400007091fd7670)
ggml_vk_preallocate_buffers_graph(0xb400007091fd7990)
ggml_vk_create_extra(0xb400007091fd7990 (kq-1, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fd7b20)
ggml_vk_preallocate_buffers_graph(0xb400007091fd7e40)
ggml_vk_create_extra(0xb400007091fd7e40 (kqv-1, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fd7fd0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd8160)
ggml_vk_preallocate_buffers_graph(0xb400007091fd82f0)
ggml_vk_create_extra(0xb400007091fd82f0 (kqv_wo-1, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fd8480)
ggml_vk_preallocate_buffers_graph(0xb400007091fd8610)
ggml_vk_create_extra(0xb400007091fd8610 (ffn_up-1, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fd87a0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd8930)
ggml_vk_preallocate_buffers_graph(0xb400007091fd8ac0)
ggml_vk_create_extra(0xb400007091fd8ac0 (ffn_down-1, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fd8c50)
ggml_vk_preallocate_buffers_graph(0xb400007091fd8de0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd8f70)
ggml_vk_preallocate_buffers_graph(0xb400007091fd9100)
ggml_vk_preallocate_buffers_graph(0xb400007091fd9290)
ggml_vk_preallocate_buffers_graph(0xb400007091fd9420)
ggml_vk_preallocate_buffers_graph(0xb400007091fd95b0)
ggml_vk_create_extra(0xb400007091fd95b0 (wqkv-2, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fd9740)
ggml_vk_preallocate_buffers_graph(0xb400007091fd98d0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd9a60)
ggml_vk_preallocate_buffers_graph(0xb400007091fda230)
ggml_vk_preallocate_buffers_graph(0xb400007091fda550)
ggml_vk_preallocate_buffers_graph(0xb400007091fda6e0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd9bf0)
ggml_vk_preallocate_buffers_graph(0xb400007091fd9d80)
ggml_vk_preallocate_buffers_graph(0xb400007091fda3c0)
ggml_vk_preallocate_buffers_graph(0xb400007091fda870)
ggml_vk_preallocate_buffers_graph(0xb400007091fd9f10)
ggml_vk_preallocate_buffers_graph(0xb400007091fda0a0)
ggml_vk_preallocate_buffers_graph(0xb400007091fdad20)
ggml_vk_preallocate_buffers_graph(0xb400007091fdb040)
ggml_vk_preallocate_buffers_graph(0xb400007091fdaa00)
ggml_vk_preallocate_buffers_graph(0xb400007091fdab90)
ggml_vk_preallocate_buffers_graph(0xb400007091fdaeb0)
ggml_vk_preallocate_buffers_graph(0xb400007091fdb1d0)
ggml_vk_preallocate_buffers_graph(0xb400007091fdb9a0)
ggml_vk_preallocate_buffers_graph(0xb400007091fdb4f0)
ggml_vk_preallocate_buffers_graph(0xb400007091fdb360)
ggml_vk_preallocate_buffers_graph(0xb400007091fdb680)
ggml_vk_create_extra(0xb400007091fdb680 (kq-2, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fdb810)
ggml_vk_preallocate_buffers_graph(0xb400007091fdbb30)
ggml_vk_create_extra(0xb400007091fdbb30 (kqv-2, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fdbcc0)
ggml_vk_preallocate_buffers_graph(0xb400007091fdbe50)
ggml_vk_preallocate_buffers_graph(0xb400007091fdbfe0)
ggml_vk_create_extra(0xb400007091fdbfe0 (kqv_wo-2, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fdc170)
ggml_vk_preallocate_buffers_graph(0xb400007091fdc300)
ggml_vk_create_extra(0xb400007091fdc300 (ffn_up-2, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fdc490)
ggml_vk_preallocate_buffers_graph(0xb400007091fdc620)
ggml_vk_preallocate_buffers_graph(0xb400007091fdc7b0)
ggml_vk_create_extra(0xb400007091fdc7b0 (ffn_down-2, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fdc940)
ggml_vk_preallocate_buffers_graph(0xb400007091fdcad0)
ggml_vk_preallocate_buffers_graph(0xb400007091fdcc60)
ggml_vk_preallocate_buffers_graph(0xb400007091fdcdf0)
ggml_vk_preallocate_buffers_graph(0xb400007091fdcf80)
ggml_vk_preallocate_buffers_graph(0xb400007091fdd110)
ggml_vk_preallocate_buffers_graph(0xb400007091fdd2a0)
ggml_vk_create_extra(0xb400007091fdd2a0 (wqkv-3, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fdd430)
ggml_vk_preallocate_buffers_graph(0xb400007091fdd5c0)
ggml_vk_preallocate_buffers_graph(0xb400007091fdd750)
ggml_vk_preallocate_buffers_graph(0xb400007091fddf20)
ggml_vk_preallocate_buffers_graph(0xb400007091fde240)
ggml_vk_preallocate_buffers_graph(0xb400007091fde3d0)
ggml_vk_preallocate_buffers_graph(0xb400007091fdd8e0)
ggml_vk_preallocate_buffers_graph(0xb400007091fdda70)
ggml_vk_preallocate_buffers_graph(0xb400007091fde0b0)
ggml_vk_preallocate_buffers_graph(0xb400007091fde560)
ggml_vk_preallocate_buffers_graph(0xb400007091fddc00)
ggml_vk_preallocate_buffers_graph(0xb400007091fddd90)
ggml_vk_preallocate_buffers_graph(0xb400007091fdea10)
ggml_vk_preallocate_buffers_graph(0xb400007091fded30)
ggml_vk_preallocate_buffers_graph(0xb400007091fde6f0)
ggml_vk_preallocate_buffers_graph(0xb400007091fde880)
ggml_vk_preallocate_buffers_graph(0xb400007091fdeba0)
ggml_vk_preallocate_buffers_graph(0xb400007091fdeec0)
ggml_vk_preallocate_buffers_graph(0xb400007091fdf690)
ggml_vk_preallocate_buffers_graph(0xb400007091fdf1e0)
ggml_vk_preallocate_buffers_graph(0xb400007091fdf050)
ggml_vk_preallocate_buffers_graph(0xb400007091fdf370)
ggml_vk_create_extra(0xb400007091fdf370 (kq-3, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fdf500)
ggml_vk_preallocate_buffers_graph(0xb400007091fdf820)
ggml_vk_create_extra(0xb400007091fdf820 (kqv-3, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fdf9b0)
ggml_vk_preallocate_buffers_graph(0xb400007091fdfb40)
ggml_vk_preallocate_buffers_graph(0xb400007091fdfcd0)
ggml_vk_create_extra(0xb400007091fdfcd0 (kqv_wo-3, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fdfe60)
ggml_vk_preallocate_buffers_graph(0xb400007091fdfff0)
ggml_vk_create_extra(0xb400007091fdfff0 (ffn_up-3, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fe0180)
ggml_vk_preallocate_buffers_graph(0xb400007091fe0310)
ggml_vk_preallocate_buffers_graph(0xb400007091fe04a0)
ggml_vk_create_extra(0xb400007091fe04a0 (ffn_down-3, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fe0630)
ggml_vk_preallocate_buffers_graph(0xb400007091fe07c0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe0950)
ggml_vk_preallocate_buffers_graph(0xb400007091fe0ae0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe0c70)
ggml_vk_preallocate_buffers_graph(0xb400007091fe0e00)
ggml_vk_preallocate_buffers_graph(0xb400007091fe0f90)
ggml_vk_create_extra(0xb400007091fe0f90 (wqkv-4, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fe1120)
ggml_vk_preallocate_buffers_graph(0xb400007091fe12b0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe1440)
ggml_vk_preallocate_buffers_graph(0xb400007091fe1c10)
ggml_vk_preallocate_buffers_graph(0xb400007091fe1f30)
ggml_vk_preallocate_buffers_graph(0xb400007091fe20c0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe15d0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe1760)
ggml_vk_preallocate_buffers_graph(0xb400007091fe1da0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe2250)
ggml_vk_preallocate_buffers_graph(0xb400007091fe18f0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe1a80)
ggml_vk_preallocate_buffers_graph(0xb400007091fe2700)
ggml_vk_preallocate_buffers_graph(0xb400007091fe2a20)
ggml_vk_preallocate_buffers_graph(0xb400007091fe23e0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe2570)
ggml_vk_preallocate_buffers_graph(0xb400007091fe2890)
ggml_vk_preallocate_buffers_graph(0xb400007091fe2bb0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe3380)
ggml_vk_preallocate_buffers_graph(0xb400007091fe2ed0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe2d40)
ggml_vk_preallocate_buffers_graph(0xb400007091fe3060)
ggml_vk_create_extra(0xb400007091fe3060 (kq-4, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fe31f0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe3510)
ggml_vk_create_extra(0xb400007091fe3510 (kqv-4, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fe36a0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe3830)
ggml_vk_preallocate_buffers_graph(0xb400007091fe39c0)
ggml_vk_create_extra(0xb400007091fe39c0 (kqv_wo-4, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fe3b50)
ggml_vk_preallocate_buffers_graph(0xb400007091fe3ce0)
ggml_vk_create_extra(0xb400007091fe3ce0 (ffn_up-4, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fe3e70)
ggml_vk_preallocate_buffers_graph(0xb400007091fe4000)
ggml_vk_preallocate_buffers_graph(0xb400007091fe4190)
ggml_vk_create_extra(0xb400007091fe4190 (ffn_down-4, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fe4320)
ggml_vk_preallocate_buffers_graph(0xb400007091fe44b0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe4640)
ggml_vk_preallocate_buffers_graph(0xb400007091fe47d0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe4960)
ggml_vk_preallocate_buffers_graph(0xb400007091fe4af0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe4c80)
ggml_vk_create_extra(0xb400007091fe4c80 (wqkv-5, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fe4e10)
ggml_vk_preallocate_buffers_graph(0xb400007091fe4fa0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe5130)
ggml_vk_preallocate_buffers_graph(0xb400007091fe5900)
ggml_vk_preallocate_buffers_graph(0xb400007091fe5c20)
ggml_vk_preallocate_buffers_graph(0xb400007091fe5db0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe52c0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe5450)
ggml_vk_preallocate_buffers_graph(0xb400007091fe5a90)
ggml_vk_preallocate_buffers_graph(0xb400007091fe5f40)
ggml_vk_preallocate_buffers_graph(0xb400007091fe55e0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe5770)
ggml_vk_preallocate_buffers_graph(0xb400007091fe63f0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe6710)
ggml_vk_preallocate_buffers_graph(0xb400007091fe60d0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe6260)
ggml_vk_preallocate_buffers_graph(0xb400007091fe6580)
ggml_vk_preallocate_buffers_graph(0xb400007091fe68a0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe7070)
ggml_vk_preallocate_buffers_graph(0xb400007091fe6bc0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe6a30)
ggml_vk_preallocate_buffers_graph(0xb400007091fe6d50)
ggml_vk_create_extra(0xb400007091fe6d50 (kq-5, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fe6ee0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe7200)
ggml_vk_create_extra(0xb400007091fe7200 (kqv-5, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fe7390)
ggml_vk_preallocate_buffers_graph(0xb400007091fe7520)
ggml_vk_preallocate_buffers_graph(0xb400007091fe76b0)
ggml_vk_create_extra(0xb400007091fe76b0 (kqv_wo-5, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fe7840)
ggml_vk_preallocate_buffers_graph(0xb400007091fe79d0)
ggml_vk_create_extra(0xb400007091fe79d0 (ffn_up-5, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fe7b60)
ggml_vk_preallocate_buffers_graph(0xb400007091fe7cf0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe7e80)
ggml_vk_create_extra(0xb400007091fe7e80 (ffn_down-5, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fe8010)
ggml_vk_preallocate_buffers_graph(0xb400007091fe81a0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe8330)
ggml_vk_preallocate_buffers_graph(0xb400007091fe84c0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe8650)
ggml_vk_preallocate_buffers_graph(0xb400007091fe87e0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe8970)
ggml_vk_create_extra(0xb400007091fe8970 (wqkv-6, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fe8b00)
ggml_vk_preallocate_buffers_graph(0xb400007091fe8c90)
ggml_vk_preallocate_buffers_graph(0xb400007091fe8e20)
ggml_vk_preallocate_buffers_graph(0xb400007091fe95f0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe9910)
ggml_vk_preallocate_buffers_graph(0xb400007091fe9aa0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe8fb0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe9140)
ggml_vk_preallocate_buffers_graph(0xb400007091fe9780)
ggml_vk_preallocate_buffers_graph(0xb400007091fe9c30)
ggml_vk_preallocate_buffers_graph(0xb400007091fe92d0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe9460)
ggml_vk_preallocate_buffers_graph(0xb400007091fea0e0)
ggml_vk_preallocate_buffers_graph(0xb400007091fea400)
ggml_vk_preallocate_buffers_graph(0xb400007091fe9dc0)
ggml_vk_preallocate_buffers_graph(0xb400007091fe9f50)
ggml_vk_preallocate_buffers_graph(0xb400007091fea270)
ggml_vk_preallocate_buffers_graph(0xb400007091fea590)
ggml_vk_preallocate_buffers_graph(0xb400007091fead60)
ggml_vk_preallocate_buffers_graph(0xb400007091fea8b0)
ggml_vk_preallocate_buffers_graph(0xb400007091fea720)
ggml_vk_preallocate_buffers_graph(0xb400007091feaa40)
ggml_vk_create_extra(0xb400007091feaa40 (kq-6, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091feabd0)
ggml_vk_preallocate_buffers_graph(0xb400007091feaef0)
ggml_vk_create_extra(0xb400007091feaef0 (kqv-6, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091feb080)
ggml_vk_preallocate_buffers_graph(0xb400007091feb210)
ggml_vk_preallocate_buffers_graph(0xb400007091feb3a0)
ggml_vk_create_extra(0xb400007091feb3a0 (kqv_wo-6, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091feb530)
ggml_vk_preallocate_buffers_graph(0xb400007091feb6c0)
ggml_vk_create_extra(0xb400007091feb6c0 (ffn_up-6, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091feb850)
ggml_vk_preallocate_buffers_graph(0xb400007091feb9e0)
ggml_vk_preallocate_buffers_graph(0xb400007091febb70)
ggml_vk_create_extra(0xb400007091febb70 (ffn_down-6, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091febd00)
ggml_vk_preallocate_buffers_graph(0xb400007091febe90)
ggml_vk_preallocate_buffers_graph(0xb400007091fec020)
ggml_vk_preallocate_buffers_graph(0xb400007091fec1b0)
ggml_vk_preallocate_buffers_graph(0xb400007091fec340)
ggml_vk_preallocate_buffers_graph(0xb400007091fec4d0)
ggml_vk_preallocate_buffers_graph(0xb400007091fec660)
ggml_vk_create_extra(0xb400007091fec660 (wqkv-7, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fec7f0)
ggml_vk_preallocate_buffers_graph(0xb400007091fec980)
ggml_vk_preallocate_buffers_graph(0xb400007091fecb10)
ggml_vk_preallocate_buffers_graph(0xb400007091fed2e0)
ggml_vk_preallocate_buffers_graph(0xb400007091fed600)
ggml_vk_preallocate_buffers_graph(0xb400007091fed790)
ggml_vk_preallocate_buffers_graph(0xb400007091fecca0)
ggml_vk_preallocate_buffers_graph(0xb400007091fece30)
ggml_vk_preallocate_buffers_graph(0xb400007091fed470)
ggml_vk_preallocate_buffers_graph(0xb400007091fed920)
ggml_vk_preallocate_buffers_graph(0xb400007091fecfc0)
ggml_vk_preallocate_buffers_graph(0xb400007091fed150)
ggml_vk_preallocate_buffers_graph(0xb400007091feddd0)
ggml_vk_preallocate_buffers_graph(0xb400007091fee0f0)
ggml_vk_preallocate_buffers_graph(0xb400007091fedab0)
ggml_vk_preallocate_buffers_graph(0xb400007091fedc40)
ggml_vk_preallocate_buffers_graph(0xb400007091fedf60)
ggml_vk_preallocate_buffers_graph(0xb400007091fee280)
ggml_vk_preallocate_buffers_graph(0xb400007091feea50)
ggml_vk_preallocate_buffers_graph(0xb400007091fee5a0)
ggml_vk_preallocate_buffers_graph(0xb400007091fee410)
ggml_vk_preallocate_buffers_graph(0xb400007091fee730)
ggml_vk_create_extra(0xb400007091fee730 (kq-7, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fee8c0)
ggml_vk_preallocate_buffers_graph(0xb400007091feebe0)
ggml_vk_create_extra(0xb400007091feebe0 (kqv-7, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091feed70)
ggml_vk_preallocate_buffers_graph(0xb400007091feef00)
ggml_vk_preallocate_buffers_graph(0xb400007091fef090)
ggml_vk_create_extra(0xb400007091fef090 (kqv_wo-7, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fef220)
ggml_vk_preallocate_buffers_graph(0xb400007091fef3b0)
ggml_vk_create_extra(0xb400007091fef3b0 (ffn_up-7, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fef540)
ggml_vk_preallocate_buffers_graph(0xb400007091fef6d0)
ggml_vk_preallocate_buffers_graph(0xb400007091fef860)
ggml_vk_create_extra(0xb400007091fef860 (ffn_down-7, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fef9f0)
ggml_vk_preallocate_buffers_graph(0xb400007091fefb80)
ggml_vk_preallocate_buffers_graph(0xb400007091fefd10)
ggml_vk_preallocate_buffers_graph(0xb400007091fefea0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff0030)
ggml_vk_preallocate_buffers_graph(0xb400007091ff01c0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff0350)
ggml_vk_create_extra(0xb400007091ff0350 (wqkv-8, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ff04e0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff0670)
ggml_vk_preallocate_buffers_graph(0xb400007091ff0800)
ggml_vk_preallocate_buffers_graph(0xb400007091ff0fd0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff12f0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff1480)
ggml_vk_preallocate_buffers_graph(0xb400007091ff0990)
ggml_vk_preallocate_buffers_graph(0xb400007091ff0b20)
ggml_vk_preallocate_buffers_graph(0xb400007091ff1160)
ggml_vk_preallocate_buffers_graph(0xb400007091ff1610)
ggml_vk_preallocate_buffers_graph(0xb400007091ff0cb0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff0e40)
ggml_vk_preallocate_buffers_graph(0xb400007091ff1ac0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff1de0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff17a0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff1930)
ggml_vk_preallocate_buffers_graph(0xb400007091ff1c50)
ggml_vk_preallocate_buffers_graph(0xb400007091ff1f70)
ggml_vk_preallocate_buffers_graph(0xb400007091ff2740)
ggml_vk_preallocate_buffers_graph(0xb400007091ff2290)
ggml_vk_preallocate_buffers_graph(0xb400007091ff2100)
ggml_vk_preallocate_buffers_graph(0xb400007091ff2420)
ggml_vk_create_extra(0xb400007091ff2420 (kq-8, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ff25b0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff28d0)
ggml_vk_create_extra(0xb400007091ff28d0 (kqv-8, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ff2a60)
ggml_vk_preallocate_buffers_graph(0xb400007091ff2bf0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff2d80)
ggml_vk_create_extra(0xb400007091ff2d80 (kqv_wo-8, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ff2f10)
ggml_vk_preallocate_buffers_graph(0xb400007091ff30a0)
ggml_vk_create_extra(0xb400007091ff30a0 (ffn_up-8, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ff3230)
ggml_vk_preallocate_buffers_graph(0xb400007091ff33c0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff3550)
ggml_vk_create_extra(0xb400007091ff3550 (ffn_down-8, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ff36e0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff3870)
ggml_vk_preallocate_buffers_graph(0xb400007091ff3a00)
ggml_vk_preallocate_buffers_graph(0xb400007091ff3b90)
ggml_vk_preallocate_buffers_graph(0xb400007091ff3d20)
ggml_vk_preallocate_buffers_graph(0xb400007091ff3eb0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff4040)
ggml_vk_create_extra(0xb400007091ff4040 (wqkv-9, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ff41d0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff4360)
ggml_vk_preallocate_buffers_graph(0xb400007091ff44f0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff4cc0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff4fe0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff5170)
ggml_vk_preallocate_buffers_graph(0xb400007091ff4680)
ggml_vk_preallocate_buffers_graph(0xb400007091ff4810)
ggml_vk_preallocate_buffers_graph(0xb400007091ff4e50)
ggml_vk_preallocate_buffers_graph(0xb400007091ff5300)
ggml_vk_preallocate_buffers_graph(0xb400007091ff49a0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff4b30)
ggml_vk_preallocate_buffers_graph(0xb400007091ff57b0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff5ad0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff5490)
ggml_vk_preallocate_buffers_graph(0xb400007091ff5620)
ggml_vk_preallocate_buffers_graph(0xb400007091ff5940)
ggml_vk_preallocate_buffers_graph(0xb400007091ff5c60)
ggml_vk_preallocate_buffers_graph(0xb400007091ff6430)
ggml_vk_preallocate_buffers_graph(0xb400007091ff5f80)
ggml_vk_preallocate_buffers_graph(0xb400007091ff5df0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff6110)
ggml_vk_create_extra(0xb400007091ff6110 (kq-9, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ff62a0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff65c0)
ggml_vk_create_extra(0xb400007091ff65c0 (kqv-9, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ff6750)
ggml_vk_preallocate_buffers_graph(0xb400007091ff68e0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff6a70)
ggml_vk_create_extra(0xb400007091ff6a70 (kqv_wo-9, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ff6c00)
ggml_vk_preallocate_buffers_graph(0xb400007091ff6d90)
ggml_vk_create_extra(0xb400007091ff6d90 (ffn_up-9, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ff6f20)
ggml_vk_preallocate_buffers_graph(0xb400007091ff70b0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff7240)
ggml_vk_create_extra(0xb400007091ff7240 (ffn_down-9, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ff73d0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff7560)
ggml_vk_preallocate_buffers_graph(0xb400007091ff76f0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff7880)
ggml_vk_preallocate_buffers_graph(0xb400007091ff7a10)
ggml_vk_preallocate_buffers_graph(0xb400007091ff7ba0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff7d30)
ggml_vk_create_extra(0xb400007091ff7d30 (wqkv-10, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ff7ec0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff8050)
ggml_vk_preallocate_buffers_graph(0xb400007091ff81e0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff89b0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff8cd0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff8e60)
ggml_vk_preallocate_buffers_graph(0xb400007091ff8370)
ggml_vk_preallocate_buffers_graph(0xb400007091ff8500)
ggml_vk_preallocate_buffers_graph(0xb400007091ff8b40)
ggml_vk_preallocate_buffers_graph(0xb400007091ff8ff0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff8690)
ggml_vk_preallocate_buffers_graph(0xb400007091ff8820)
ggml_vk_preallocate_buffers_graph(0xb400007091ff94a0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff97c0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff9180)
ggml_vk_preallocate_buffers_graph(0xb400007091ff9310)
ggml_vk_preallocate_buffers_graph(0xb400007091ff9630)
ggml_vk_preallocate_buffers_graph(0xb400007091ff9950)
ggml_vk_preallocate_buffers_graph(0xb400007091ffa120)
ggml_vk_preallocate_buffers_graph(0xb400007091ff9c70)
ggml_vk_preallocate_buffers_graph(0xb400007091ff9ae0)
ggml_vk_preallocate_buffers_graph(0xb400007091ff9e00)
ggml_vk_create_extra(0xb400007091ff9e00 (kq-10, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ff9f90)
ggml_vk_preallocate_buffers_graph(0xb400007091ffa2b0)
ggml_vk_create_extra(0xb400007091ffa2b0 (kqv-10, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ffa440)
ggml_vk_preallocate_buffers_graph(0xb400007091ffa5d0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffa760)
ggml_vk_create_extra(0xb400007091ffa760 (kqv_wo-10, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ffa8f0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffaa80)
ggml_vk_create_extra(0xb400007091ffaa80 (ffn_up-10, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ffac10)
ggml_vk_preallocate_buffers_graph(0xb400007091ffada0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffaf30)
ggml_vk_create_extra(0xb400007091ffaf30 (ffn_down-10, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ffb0c0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffb250)
ggml_vk_preallocate_buffers_graph(0xb400007091ffb3e0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffb570)
ggml_vk_preallocate_buffers_graph(0xb400007091ffb700)
ggml_vk_preallocate_buffers_graph(0xb400007091ffb890)
ggml_vk_preallocate_buffers_graph(0xb400007091ffba20)
ggml_vk_create_extra(0xb400007091ffba20 (wqkv-11, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ffbbb0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffbd40)
ggml_vk_preallocate_buffers_graph(0xb400007091ffbed0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffc6a0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffc9c0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffcb50)
ggml_vk_preallocate_buffers_graph(0xb400007091ffc060)
ggml_vk_preallocate_buffers_graph(0xb400007091ffc1f0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffc830)
ggml_vk_preallocate_buffers_graph(0xb400007091ffcce0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffc380)
ggml_vk_preallocate_buffers_graph(0xb400007091ffc510)
ggml_vk_preallocate_buffers_graph(0xb400007091ffd190)
ggml_vk_preallocate_buffers_graph(0xb400007091ffd4b0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffce70)
ggml_vk_preallocate_buffers_graph(0xb400007091ffd000)
ggml_vk_preallocate_buffers_graph(0xb400007091ffd320)
ggml_vk_preallocate_buffers_graph(0xb400007091ffd640)
ggml_vk_preallocate_buffers_graph(0xb400007091ffde10)
ggml_vk_preallocate_buffers_graph(0xb400007091ffd960)
ggml_vk_preallocate_buffers_graph(0xb400007091ffd7d0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffdaf0)
ggml_vk_create_extra(0xb400007091ffdaf0 (kq-11, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ffdc80)
ggml_vk_preallocate_buffers_graph(0xb400007091ffdfa0)
ggml_vk_create_extra(0xb400007091ffdfa0 (kqv-11, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ffe130)
ggml_vk_preallocate_buffers_graph(0xb400007091ffe2c0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffe450)
ggml_vk_create_extra(0xb400007091ffe450 (kqv_wo-11, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ffe5e0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffe770)
ggml_vk_create_extra(0xb400007091ffe770 (ffn_up-11, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ffe900)
ggml_vk_preallocate_buffers_graph(0xb400007091ffea90)
ggml_vk_preallocate_buffers_graph(0xb400007091ffec20)
ggml_vk_create_extra(0xb400007091ffec20 (ffn_down-11, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091ffedb0)
ggml_vk_preallocate_buffers_graph(0xb400007091ffef40)
ggml_vk_preallocate_buffers_graph(0xb400007091fff0d0)
ggml_vk_preallocate_buffers_graph(0xb400007091fff260)
ggml_vk_preallocate_buffers_graph(0xb400007091fff3f0)
ggml_vk_preallocate_buffers_graph(0xb400007091fff580)
ggml_vk_preallocate_buffers_graph(0xb400007091fff710)
ggml_vk_create_extra(0xb400007091fff710 (wqkv-12, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007091fff8a0)
ggml_vk_preallocate_buffers_graph(0xb400007091fffa30)
ggml_vk_preallocate_buffers_graph(0xb400007091fffbc0)
ggml_vk_preallocate_buffers_graph(0xb400007092000390)
ggml_vk_preallocate_buffers_graph(0xb4000070920006b0)
ggml_vk_preallocate_buffers_graph(0xb400007092000840)
ggml_vk_preallocate_buffers_graph(0xb400007091fffd50)
ggml_vk_preallocate_buffers_graph(0xb400007091fffee0)
ggml_vk_preallocate_buffers_graph(0xb400007092000520)
ggml_vk_preallocate_buffers_graph(0xb4000070920009d0)
ggml_vk_preallocate_buffers_graph(0xb400007092000070)
ggml_vk_preallocate_buffers_graph(0xb400007092000200)
ggml_vk_preallocate_buffers_graph(0xb400007092000e80)
ggml_vk_preallocate_buffers_graph(0xb4000070920011a0)
ggml_vk_preallocate_buffers_graph(0xb400007092000b60)
ggml_vk_preallocate_buffers_graph(0xb400007092000cf0)
ggml_vk_preallocate_buffers_graph(0xb400007092001010)
ggml_vk_preallocate_buffers_graph(0xb400007092001330)
ggml_vk_preallocate_buffers_graph(0xb400007092001b00)
ggml_vk_preallocate_buffers_graph(0xb400007092001650)
ggml_vk_preallocate_buffers_graph(0xb4000070920014c0)
ggml_vk_preallocate_buffers_graph(0xb4000070920017e0)
ggml_vk_create_extra(0xb4000070920017e0 (kq-12, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092001970)
ggml_vk_preallocate_buffers_graph(0xb400007092001c90)
ggml_vk_create_extra(0xb400007092001c90 (kqv-12, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092001e20)
ggml_vk_preallocate_buffers_graph(0xb400007092001fb0)
ggml_vk_preallocate_buffers_graph(0xb400007092002140)
ggml_vk_create_extra(0xb400007092002140 (kqv_wo-12, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb4000070920022d0)
ggml_vk_preallocate_buffers_graph(0xb400007092002460)
ggml_vk_create_extra(0xb400007092002460 (ffn_up-12, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb4000070920025f0)
ggml_vk_preallocate_buffers_graph(0xb400007092002780)
ggml_vk_preallocate_buffers_graph(0xb400007092002910)
ggml_vk_create_extra(0xb400007092002910 (ffn_down-12, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092002aa0)
ggml_vk_preallocate_buffers_graph(0xb400007092002c30)
ggml_vk_preallocate_buffers_graph(0xb400007092002dc0)
ggml_vk_preallocate_buffers_graph(0xb400007092002f50)
ggml_vk_preallocate_buffers_graph(0xb4000070920030e0)
ggml_vk_preallocate_buffers_graph(0xb400007092003270)
ggml_vk_preallocate_buffers_graph(0xb400007092003400)
ggml_vk_create_extra(0xb400007092003400 (wqkv-13, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092003590)
ggml_vk_preallocate_buffers_graph(0xb400007092003720)
ggml_vk_preallocate_buffers_graph(0xb4000070920038b0)
ggml_vk_preallocate_buffers_graph(0xb400007092004080)
ggml_vk_preallocate_buffers_graph(0xb4000070920043a0)
ggml_vk_preallocate_buffers_graph(0xb400007092004530)
ggml_vk_preallocate_buffers_graph(0xb400007092003a40)
ggml_vk_preallocate_buffers_graph(0xb400007092003bd0)
ggml_vk_preallocate_buffers_graph(0xb400007092004210)
ggml_vk_preallocate_buffers_graph(0xb4000070920046c0)
ggml_vk_preallocate_buffers_graph(0xb400007092003d60)
ggml_vk_preallocate_buffers_graph(0xb400007092003ef0)
ggml_vk_preallocate_buffers_graph(0xb400007092004b70)
ggml_vk_preallocate_buffers_graph(0xb400007092004e90)
ggml_vk_preallocate_buffers_graph(0xb400007092004850)
ggml_vk_preallocate_buffers_graph(0xb4000070920049e0)
ggml_vk_preallocate_buffers_graph(0xb400007092004d00)
ggml_vk_preallocate_buffers_graph(0xb400007092005020)
ggml_vk_preallocate_buffers_graph(0xb4000070920057f0)
ggml_vk_preallocate_buffers_graph(0xb400007092005340)
ggml_vk_preallocate_buffers_graph(0xb4000070920051b0)
ggml_vk_preallocate_buffers_graph(0xb4000070920054d0)
ggml_vk_create_extra(0xb4000070920054d0 (kq-13, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092005660)
ggml_vk_preallocate_buffers_graph(0xb400007092005980)
ggml_vk_create_extra(0xb400007092005980 (kqv-13, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092005b10)
ggml_vk_preallocate_buffers_graph(0xb400007092005ca0)
ggml_vk_preallocate_buffers_graph(0xb400007092005e30)
ggml_vk_create_extra(0xb400007092005e30 (kqv_wo-13, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092005fc0)
ggml_vk_preallocate_buffers_graph(0xb400007092006150)
ggml_vk_create_extra(0xb400007092006150 (ffn_up-13, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb4000070920062e0)
ggml_vk_preallocate_buffers_graph(0xb400007092006470)
ggml_vk_preallocate_buffers_graph(0xb400007092006600)
ggml_vk_create_extra(0xb400007092006600 (ffn_down-13, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092006790)
ggml_vk_preallocate_buffers_graph(0xb400007092006920)
ggml_vk_preallocate_buffers_graph(0xb400007092006ab0)
ggml_vk_preallocate_buffers_graph(0xb400007092006c40)
ggml_vk_preallocate_buffers_graph(0xb400007092006dd0)
ggml_vk_preallocate_buffers_graph(0xb400007092006f60)
ggml_vk_preallocate_buffers_graph(0xb4000070920070f0)
ggml_vk_create_extra(0xb4000070920070f0 (wqkv-14, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092007280)
ggml_vk_preallocate_buffers_graph(0xb400007092007410)
ggml_vk_preallocate_buffers_graph(0xb4000070920075a0)
ggml_vk_preallocate_buffers_graph(0xb400007092007d70)
ggml_vk_preallocate_buffers_graph(0xb400007092008090)
ggml_vk_preallocate_buffers_graph(0xb400007092008220)
ggml_vk_preallocate_buffers_graph(0xb400007092007730)
ggml_vk_preallocate_buffers_graph(0xb4000070920078c0)
ggml_vk_preallocate_buffers_graph(0xb400007092007f00)
ggml_vk_preallocate_buffers_graph(0xb4000070920083b0)
ggml_vk_preallocate_buffers_graph(0xb400007092007a50)
ggml_vk_preallocate_buffers_graph(0xb400007092007be0)
ggml_vk_preallocate_buffers_graph(0xb400007092008860)
ggml_vk_preallocate_buffers_graph(0xb400007092008b80)
ggml_vk_preallocate_buffers_graph(0xb400007092008540)
ggml_vk_preallocate_buffers_graph(0xb4000070920086d0)
ggml_vk_preallocate_buffers_graph(0xb4000070920089f0)
ggml_vk_preallocate_buffers_graph(0xb400007092008d10)
ggml_vk_preallocate_buffers_graph(0xb4000070920094e0)
ggml_vk_preallocate_buffers_graph(0xb400007092009030)
ggml_vk_preallocate_buffers_graph(0xb400007092008ea0)
ggml_vk_preallocate_buffers_graph(0xb4000070920091c0)
ggml_vk_create_extra(0xb4000070920091c0 (kq-14, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092009350)
ggml_vk_preallocate_buffers_graph(0xb400007092009670)
ggml_vk_create_extra(0xb400007092009670 (kqv-14, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092009800)
ggml_vk_preallocate_buffers_graph(0xb400007092009990)
ggml_vk_preallocate_buffers_graph(0xb400007092009b20)
ggml_vk_create_extra(0xb400007092009b20 (kqv_wo-14, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092009cb0)
ggml_vk_preallocate_buffers_graph(0xb400007092009e40)
ggml_vk_create_extra(0xb400007092009e40 (ffn_up-14, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092009fd0)
ggml_vk_preallocate_buffers_graph(0xb40000709200a160)
ggml_vk_preallocate_buffers_graph(0xb40000709200a2f0)
ggml_vk_create_extra(0xb40000709200a2f0 (ffn_down-14, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709200a480)
ggml_vk_preallocate_buffers_graph(0xb40000709200a610)
ggml_vk_preallocate_buffers_graph(0xb40000709200a7a0)
ggml_vk_preallocate_buffers_graph(0xb40000709200a930)
ggml_vk_preallocate_buffers_graph(0xb40000709200aac0)
ggml_vk_preallocate_buffers_graph(0xb40000709200ac50)
ggml_vk_preallocate_buffers_graph(0xb40000709200ade0)
ggml_vk_create_extra(0xb40000709200ade0 (wqkv-15, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709200af70)
ggml_vk_preallocate_buffers_graph(0xb40000709200b100)
ggml_vk_preallocate_buffers_graph(0xb40000709200b290)
ggml_vk_preallocate_buffers_graph(0xb40000709200ba60)
ggml_vk_preallocate_buffers_graph(0xb40000709200bd80)
ggml_vk_preallocate_buffers_graph(0xb40000709200bf10)
ggml_vk_preallocate_buffers_graph(0xb40000709200b420)
ggml_vk_preallocate_buffers_graph(0xb40000709200b5b0)
ggml_vk_preallocate_buffers_graph(0xb40000709200bbf0)
ggml_vk_preallocate_buffers_graph(0xb40000709200c0a0)
ggml_vk_preallocate_buffers_graph(0xb40000709200b740)
ggml_vk_preallocate_buffers_graph(0xb40000709200b8d0)
ggml_vk_preallocate_buffers_graph(0xb40000709200c550)
ggml_vk_preallocate_buffers_graph(0xb40000709200c870)
ggml_vk_preallocate_buffers_graph(0xb40000709200c230)
ggml_vk_preallocate_buffers_graph(0xb40000709200c3c0)
ggml_vk_preallocate_buffers_graph(0xb40000709200c6e0)
ggml_vk_preallocate_buffers_graph(0xb40000709200ca00)
ggml_vk_preallocate_buffers_graph(0xb40000709200d1d0)
ggml_vk_preallocate_buffers_graph(0xb40000709200cd20)
ggml_vk_preallocate_buffers_graph(0xb40000709200cb90)
ggml_vk_preallocate_buffers_graph(0xb40000709200ceb0)
ggml_vk_create_extra(0xb40000709200ceb0 (kq-15, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709200d040)
ggml_vk_preallocate_buffers_graph(0xb40000709200d360)
ggml_vk_create_extra(0xb40000709200d360 (kqv-15, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709200d4f0)
ggml_vk_preallocate_buffers_graph(0xb40000709200d680)
ggml_vk_preallocate_buffers_graph(0xb40000709200d810)
ggml_vk_create_extra(0xb40000709200d810 (kqv_wo-15, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709200d9a0)
ggml_vk_preallocate_buffers_graph(0xb40000709200db30)
ggml_vk_create_extra(0xb40000709200db30 (ffn_up-15, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709200dcc0)
ggml_vk_preallocate_buffers_graph(0xb40000709200de50)
ggml_vk_preallocate_buffers_graph(0xb40000709200dfe0)
ggml_vk_create_extra(0xb40000709200dfe0 (ffn_down-15, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709200e170)
ggml_vk_preallocate_buffers_graph(0xb40000709200e300)
ggml_vk_preallocate_buffers_graph(0xb40000709200e490)
ggml_vk_preallocate_buffers_graph(0xb40000709200e620)
ggml_vk_preallocate_buffers_graph(0xb40000709200e7b0)
ggml_vk_preallocate_buffers_graph(0xb40000709200e940)
ggml_vk_preallocate_buffers_graph(0xb40000709200ead0)
ggml_vk_create_extra(0xb40000709200ead0 (wqkv-16, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709200ec60)
ggml_vk_preallocate_buffers_graph(0xb40000709200edf0)
ggml_vk_preallocate_buffers_graph(0xb40000709200ef80)
ggml_vk_preallocate_buffers_graph(0xb40000709200f750)
ggml_vk_preallocate_buffers_graph(0xb40000709200fa70)
ggml_vk_preallocate_buffers_graph(0xb40000709200fc00)
ggml_vk_preallocate_buffers_graph(0xb40000709200f110)
ggml_vk_preallocate_buffers_graph(0xb40000709200f2a0)
ggml_vk_preallocate_buffers_graph(0xb40000709200f8e0)
ggml_vk_preallocate_buffers_graph(0xb40000709200fd90)
ggml_vk_preallocate_buffers_graph(0xb40000709200f430)
ggml_vk_preallocate_buffers_graph(0xb40000709200f5c0)
ggml_vk_preallocate_buffers_graph(0xb400007092010240)
ggml_vk_preallocate_buffers_graph(0xb400007092010560)
ggml_vk_preallocate_buffers_graph(0xb40000709200ff20)
ggml_vk_preallocate_buffers_graph(0xb4000070920100b0)
ggml_vk_preallocate_buffers_graph(0xb4000070920103d0)
ggml_vk_preallocate_buffers_graph(0xb4000070920106f0)
ggml_vk_preallocate_buffers_graph(0xb400007092010ec0)
ggml_vk_preallocate_buffers_graph(0xb400007092010a10)
ggml_vk_preallocate_buffers_graph(0xb400007092010880)
ggml_vk_preallocate_buffers_graph(0xb400007092010ba0)
ggml_vk_create_extra(0xb400007092010ba0 (kq-16, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092010d30)
ggml_vk_preallocate_buffers_graph(0xb400007092011050)
ggml_vk_create_extra(0xb400007092011050 (kqv-16, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb4000070920111e0)
ggml_vk_preallocate_buffers_graph(0xb400007092011370)
ggml_vk_preallocate_buffers_graph(0xb400007092011500)
ggml_vk_create_extra(0xb400007092011500 (kqv_wo-16, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092011690)
ggml_vk_preallocate_buffers_graph(0xb400007092011820)
ggml_vk_create_extra(0xb400007092011820 (ffn_up-16, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb4000070920119b0)
ggml_vk_preallocate_buffers_graph(0xb400007092011b40)
ggml_vk_preallocate_buffers_graph(0xb400007092011cd0)
ggml_vk_create_extra(0xb400007092011cd0 (ffn_down-16, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092011e60)
ggml_vk_preallocate_buffers_graph(0xb400007092011ff0)
ggml_vk_preallocate_buffers_graph(0xb400007092012180)
ggml_vk_preallocate_buffers_graph(0xb400007092012310)
ggml_vk_preallocate_buffers_graph(0xb4000070920124a0)
ggml_vk_preallocate_buffers_graph(0xb400007092012630)
ggml_vk_preallocate_buffers_graph(0xb4000070920127c0)
ggml_vk_create_extra(0xb4000070920127c0 (wqkv-17, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092012950)
ggml_vk_preallocate_buffers_graph(0xb400007092012ae0)
ggml_vk_preallocate_buffers_graph(0xb400007092012c70)
ggml_vk_preallocate_buffers_graph(0xb400007092013440)
ggml_vk_preallocate_buffers_graph(0xb400007092013760)
ggml_vk_preallocate_buffers_graph(0xb4000070920138f0)
ggml_vk_preallocate_buffers_graph(0xb400007092012e00)
ggml_vk_preallocate_buffers_graph(0xb400007092012f90)
ggml_vk_preallocate_buffers_graph(0xb4000070920135d0)
ggml_vk_preallocate_buffers_graph(0xb400007092013a80)
ggml_vk_preallocate_buffers_graph(0xb400007092013120)
ggml_vk_preallocate_buffers_graph(0xb4000070920132b0)
ggml_vk_preallocate_buffers_graph(0xb400007092013f30)
ggml_vk_preallocate_buffers_graph(0xb400007092014250)
ggml_vk_preallocate_buffers_graph(0xb400007092013c10)
ggml_vk_preallocate_buffers_graph(0xb400007092013da0)
ggml_vk_preallocate_buffers_graph(0xb4000070920140c0)
ggml_vk_preallocate_buffers_graph(0xb4000070920143e0)
ggml_vk_preallocate_buffers_graph(0xb400007092014bb0)
ggml_vk_preallocate_buffers_graph(0xb400007092014700)
ggml_vk_preallocate_buffers_graph(0xb400007092014570)
ggml_vk_preallocate_buffers_graph(0xb400007092014890)
ggml_vk_create_extra(0xb400007092014890 (kq-17, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092014a20)
ggml_vk_preallocate_buffers_graph(0xb400007092014d40)
ggml_vk_create_extra(0xb400007092014d40 (kqv-17, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092014ed0)
ggml_vk_preallocate_buffers_graph(0xb400007092015060)
ggml_vk_preallocate_buffers_graph(0xb4000070920151f0)
ggml_vk_create_extra(0xb4000070920151f0 (kqv_wo-17, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092015380)
ggml_vk_preallocate_buffers_graph(0xb400007092015510)
ggml_vk_create_extra(0xb400007092015510 (ffn_up-17, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb4000070920156a0)
ggml_vk_preallocate_buffers_graph(0xb400007092015830)
ggml_vk_preallocate_buffers_graph(0xb4000070920159c0)
ggml_vk_create_extra(0xb4000070920159c0 (ffn_down-17, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092015b50)
ggml_vk_preallocate_buffers_graph(0xb400007092015ce0)
ggml_vk_preallocate_buffers_graph(0xb400007092015e70)
ggml_vk_preallocate_buffers_graph(0xb400007092016000)
ggml_vk_preallocate_buffers_graph(0xb400007092016190)
ggml_vk_preallocate_buffers_graph(0xb400007092016320)
ggml_vk_preallocate_buffers_graph(0xb4000070920164b0)
ggml_vk_create_extra(0xb4000070920164b0 (wqkv-18, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092016640)
ggml_vk_preallocate_buffers_graph(0xb4000070920167d0)
ggml_vk_preallocate_buffers_graph(0xb400007092016960)
ggml_vk_preallocate_buffers_graph(0xb400007092017130)
ggml_vk_preallocate_buffers_graph(0xb400007092017450)
ggml_vk_preallocate_buffers_graph(0xb4000070920175e0)
ggml_vk_preallocate_buffers_graph(0xb400007092016af0)
ggml_vk_preallocate_buffers_graph(0xb400007092016c80)
ggml_vk_preallocate_buffers_graph(0xb4000070920172c0)
ggml_vk_preallocate_buffers_graph(0xb400007092017770)
ggml_vk_preallocate_buffers_graph(0xb400007092016e10)
ggml_vk_preallocate_buffers_graph(0xb400007092016fa0)
ggml_vk_preallocate_buffers_graph(0xb400007092017c20)
ggml_vk_preallocate_buffers_graph(0xb400007092017f40)
ggml_vk_preallocate_buffers_graph(0xb400007092017900)
ggml_vk_preallocate_buffers_graph(0xb400007092017a90)
ggml_vk_preallocate_buffers_graph(0xb400007092017db0)
ggml_vk_preallocate_buffers_graph(0xb4000070920180d0)
ggml_vk_preallocate_buffers_graph(0xb4000070920188a0)
ggml_vk_preallocate_buffers_graph(0xb4000070920183f0)
ggml_vk_preallocate_buffers_graph(0xb400007092018260)
ggml_vk_preallocate_buffers_graph(0xb400007092018580)
ggml_vk_create_extra(0xb400007092018580 (kq-18, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092018710)
ggml_vk_preallocate_buffers_graph(0xb400007092018a30)
ggml_vk_create_extra(0xb400007092018a30 (kqv-18, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092018bc0)
ggml_vk_preallocate_buffers_graph(0xb400007092018d50)
ggml_vk_preallocate_buffers_graph(0xb400007092018ee0)
ggml_vk_create_extra(0xb400007092018ee0 (kqv_wo-18, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092019070)
ggml_vk_preallocate_buffers_graph(0xb400007092019200)
ggml_vk_create_extra(0xb400007092019200 (ffn_up-18, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092019390)
ggml_vk_preallocate_buffers_graph(0xb400007092019520)
ggml_vk_preallocate_buffers_graph(0xb4000070920196b0)
ggml_vk_create_extra(0xb4000070920196b0 (ffn_down-18, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092019840)
ggml_vk_preallocate_buffers_graph(0xb4000070920199d0)
ggml_vk_preallocate_buffers_graph(0xb400007092019b60)
ggml_vk_preallocate_buffers_graph(0xb400007092019cf0)
ggml_vk_preallocate_buffers_graph(0xb400007092019e80)
ggml_vk_preallocate_buffers_graph(0xb40000709201a010)
ggml_vk_preallocate_buffers_graph(0xb40000709201a1a0)
ggml_vk_create_extra(0xb40000709201a1a0 (wqkv-19, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709201a330)
ggml_vk_preallocate_buffers_graph(0xb40000709201a4c0)
ggml_vk_preallocate_buffers_graph(0xb40000709201a650)
ggml_vk_preallocate_buffers_graph(0xb40000709201ae20)
ggml_vk_preallocate_buffers_graph(0xb40000709201b140)
ggml_vk_preallocate_buffers_graph(0xb40000709201b2d0)
ggml_vk_preallocate_buffers_graph(0xb40000709201a7e0)
ggml_vk_preallocate_buffers_graph(0xb40000709201a970)
ggml_vk_preallocate_buffers_graph(0xb40000709201afb0)
ggml_vk_preallocate_buffers_graph(0xb40000709201b460)
ggml_vk_preallocate_buffers_graph(0xb40000709201ab00)
ggml_vk_preallocate_buffers_graph(0xb40000709201ac90)
ggml_vk_preallocate_buffers_graph(0xb40000709201b910)
ggml_vk_preallocate_buffers_graph(0xb40000709201bc30)
ggml_vk_preallocate_buffers_graph(0xb40000709201b5f0)
ggml_vk_preallocate_buffers_graph(0xb40000709201b780)
ggml_vk_preallocate_buffers_graph(0xb40000709201baa0)
ggml_vk_preallocate_buffers_graph(0xb40000709201bdc0)
ggml_vk_preallocate_buffers_graph(0xb40000709201c590)
ggml_vk_preallocate_buffers_graph(0xb40000709201c0e0)
ggml_vk_preallocate_buffers_graph(0xb40000709201bf50)
ggml_vk_preallocate_buffers_graph(0xb40000709201c270)
ggml_vk_create_extra(0xb40000709201c270 (kq-19, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709201c400)
ggml_vk_preallocate_buffers_graph(0xb40000709201c720)
ggml_vk_create_extra(0xb40000709201c720 (kqv-19, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709201c8b0)
ggml_vk_preallocate_buffers_graph(0xb40000709201ca40)
ggml_vk_preallocate_buffers_graph(0xb40000709201cbd0)
ggml_vk_create_extra(0xb40000709201cbd0 (kqv_wo-19, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709201cd60)
ggml_vk_preallocate_buffers_graph(0xb40000709201cef0)
ggml_vk_create_extra(0xb40000709201cef0 (ffn_up-19, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709201d080)
ggml_vk_preallocate_buffers_graph(0xb40000709201d210)
ggml_vk_preallocate_buffers_graph(0xb40000709201d3a0)
ggml_vk_create_extra(0xb40000709201d3a0 (ffn_down-19, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709201d530)
ggml_vk_preallocate_buffers_graph(0xb40000709201d6c0)
ggml_vk_preallocate_buffers_graph(0xb40000709201d850)
ggml_vk_preallocate_buffers_graph(0xb40000709201d9e0)
ggml_vk_preallocate_buffers_graph(0xb40000709201db70)
ggml_vk_preallocate_buffers_graph(0xb40000709201dd00)
ggml_vk_preallocate_buffers_graph(0xb40000709201de90)
ggml_vk_create_extra(0xb40000709201de90 (wqkv-20, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709201e020)
ggml_vk_preallocate_buffers_graph(0xb40000709201e1b0)
ggml_vk_preallocate_buffers_graph(0xb40000709201e340)
ggml_vk_preallocate_buffers_graph(0xb40000709201eb10)
ggml_vk_preallocate_buffers_graph(0xb40000709201ee30)
ggml_vk_preallocate_buffers_graph(0xb40000709201efc0)
ggml_vk_preallocate_buffers_graph(0xb40000709201e4d0)
ggml_vk_preallocate_buffers_graph(0xb40000709201e660)
ggml_vk_preallocate_buffers_graph(0xb40000709201eca0)
ggml_vk_preallocate_buffers_graph(0xb40000709201f150)
ggml_vk_preallocate_buffers_graph(0xb40000709201e7f0)
ggml_vk_preallocate_buffers_graph(0xb40000709201e980)
ggml_vk_preallocate_buffers_graph(0xb40000709201f600)
ggml_vk_preallocate_buffers_graph(0xb40000709201f920)
ggml_vk_preallocate_buffers_graph(0xb40000709201f2e0)
ggml_vk_preallocate_buffers_graph(0xb40000709201f470)
ggml_vk_preallocate_buffers_graph(0xb40000709201f790)
ggml_vk_preallocate_buffers_graph(0xb40000709201fab0)
ggml_vk_preallocate_buffers_graph(0xb400007092020280)
ggml_vk_preallocate_buffers_graph(0xb40000709201fdd0)
ggml_vk_preallocate_buffers_graph(0xb40000709201fc40)
ggml_vk_preallocate_buffers_graph(0xb40000709201ff60)
ggml_vk_create_extra(0xb40000709201ff60 (kq-20, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb4000070920200f0)
ggml_vk_preallocate_buffers_graph(0xb400007092020410)
ggml_vk_create_extra(0xb400007092020410 (kqv-20, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb4000070920205a0)
ggml_vk_preallocate_buffers_graph(0xb400007092020730)
ggml_vk_preallocate_buffers_graph(0xb4000070920208c0)
ggml_vk_create_extra(0xb4000070920208c0 (kqv_wo-20, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092020a50)
ggml_vk_preallocate_buffers_graph(0xb400007092020be0)
ggml_vk_create_extra(0xb400007092020be0 (ffn_up-20, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092020d70)
ggml_vk_preallocate_buffers_graph(0xb400007092020f00)
ggml_vk_preallocate_buffers_graph(0xb400007092021090)
ggml_vk_create_extra(0xb400007092021090 (ffn_down-20, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092021220)
ggml_vk_preallocate_buffers_graph(0xb4000070920213b0)
ggml_vk_preallocate_buffers_graph(0xb400007092021540)
ggml_vk_preallocate_buffers_graph(0xb4000070920216d0)
ggml_vk_preallocate_buffers_graph(0xb400007092021860)
ggml_vk_preallocate_buffers_graph(0xb4000070920219f0)
ggml_vk_preallocate_buffers_graph(0xb400007092021b80)
ggml_vk_create_extra(0xb400007092021b80 (wqkv-21, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092021d10)
ggml_vk_preallocate_buffers_graph(0xb400007092021ea0)
ggml_vk_preallocate_buffers_graph(0xb400007092022030)
ggml_vk_preallocate_buffers_graph(0xb400007092022800)
ggml_vk_preallocate_buffers_graph(0xb400007092022b20)
ggml_vk_preallocate_buffers_graph(0xb400007092022cb0)
ggml_vk_preallocate_buffers_graph(0xb4000070920221c0)
ggml_vk_preallocate_buffers_graph(0xb400007092022350)
ggml_vk_preallocate_buffers_graph(0xb400007092022990)
ggml_vk_preallocate_buffers_graph(0xb400007092022e40)
ggml_vk_preallocate_buffers_graph(0xb4000070920224e0)
ggml_vk_preallocate_buffers_graph(0xb400007092022670)
ggml_vk_preallocate_buffers_graph(0xb4000070920232f0)
ggml_vk_preallocate_buffers_graph(0xb400007092023610)
ggml_vk_preallocate_buffers_graph(0xb400007092022fd0)
ggml_vk_preallocate_buffers_graph(0xb400007092023160)
ggml_vk_preallocate_buffers_graph(0xb400007092023480)
ggml_vk_preallocate_buffers_graph(0xb4000070920237a0)
ggml_vk_preallocate_buffers_graph(0xb400007092023f70)
ggml_vk_preallocate_buffers_graph(0xb400007092023ac0)
ggml_vk_preallocate_buffers_graph(0xb400007092023930)
ggml_vk_preallocate_buffers_graph(0xb400007092023c50)
ggml_vk_create_extra(0xb400007092023c50 (kq-21, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092023de0)
ggml_vk_preallocate_buffers_graph(0xb400007092024100)
ggml_vk_create_extra(0xb400007092024100 (kqv-21, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092024290)
ggml_vk_preallocate_buffers_graph(0xb400007092024420)
ggml_vk_preallocate_buffers_graph(0xb4000070920245b0)
ggml_vk_create_extra(0xb4000070920245b0 (kqv_wo-21, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092024740)
ggml_vk_preallocate_buffers_graph(0xb4000070920248d0)
ggml_vk_create_extra(0xb4000070920248d0 (ffn_up-21, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092024a60)
ggml_vk_preallocate_buffers_graph(0xb400007092024bf0)
ggml_vk_preallocate_buffers_graph(0xb400007092024d80)
ggml_vk_create_extra(0xb400007092024d80 (ffn_down-21, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092024f10)
ggml_vk_preallocate_buffers_graph(0xb4000070920250a0)
ggml_vk_preallocate_buffers_graph(0xb400007092025230)
ggml_vk_preallocate_buffers_graph(0xb4000070920253c0)
ggml_vk_preallocate_buffers_graph(0xb400007092025550)
ggml_vk_preallocate_buffers_graph(0xb4000070920256e0)
ggml_vk_preallocate_buffers_graph(0xb400007092025870)
ggml_vk_create_extra(0xb400007092025870 (wqkv-22, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092025a00)
ggml_vk_preallocate_buffers_graph(0xb400007092025b90)
ggml_vk_preallocate_buffers_graph(0xb400007092025d20)
ggml_vk_preallocate_buffers_graph(0xb4000070920264f0)
ggml_vk_preallocate_buffers_graph(0xb400007092026810)
ggml_vk_preallocate_buffers_graph(0xb4000070920269a0)
ggml_vk_preallocate_buffers_graph(0xb400007092025eb0)
ggml_vk_preallocate_buffers_graph(0xb400007092026040)
ggml_vk_preallocate_buffers_graph(0xb400007092026680)
ggml_vk_preallocate_buffers_graph(0xb400007092026b30)
ggml_vk_preallocate_buffers_graph(0xb4000070920261d0)
ggml_vk_preallocate_buffers_graph(0xb400007092026360)
ggml_vk_preallocate_buffers_graph(0xb400007092026fe0)
ggml_vk_preallocate_buffers_graph(0xb400007092027300)
ggml_vk_preallocate_buffers_graph(0xb400007092026cc0)
ggml_vk_preallocate_buffers_graph(0xb400007092026e50)
ggml_vk_preallocate_buffers_graph(0xb400007092027170)
ggml_vk_preallocate_buffers_graph(0xb400007092027490)
ggml_vk_preallocate_buffers_graph(0xb400007092027c60)
ggml_vk_preallocate_buffers_graph(0xb4000070920277b0)
ggml_vk_preallocate_buffers_graph(0xb400007092027620)
ggml_vk_preallocate_buffers_graph(0xb400007092027940)
ggml_vk_create_extra(0xb400007092027940 (kq-22, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092027ad0)
ggml_vk_preallocate_buffers_graph(0xb400007092027df0)
ggml_vk_create_extra(0xb400007092027df0 (kqv-22, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092027f80)
ggml_vk_preallocate_buffers_graph(0xb400007092028110)
ggml_vk_preallocate_buffers_graph(0xb4000070920282a0)
ggml_vk_create_extra(0xb4000070920282a0 (kqv_wo-22, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092028430)
ggml_vk_preallocate_buffers_graph(0xb4000070920285c0)
ggml_vk_create_extra(0xb4000070920285c0 (ffn_up-22, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092028750)
ggml_vk_preallocate_buffers_graph(0xb4000070920288e0)
ggml_vk_preallocate_buffers_graph(0xb400007092028a70)
ggml_vk_create_extra(0xb400007092028a70 (ffn_down-22, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092028c00)
ggml_vk_preallocate_buffers_graph(0xb400007092028d90)
ggml_vk_preallocate_buffers_graph(0xb400007092028f20)
ggml_vk_preallocate_buffers_graph(0xb4000070920290b0)
ggml_vk_preallocate_buffers_graph(0xb400007092029240)
ggml_vk_preallocate_buffers_graph(0xb4000070920293d0)
ggml_vk_preallocate_buffers_graph(0xb400007092029560)
ggml_vk_create_extra(0xb400007092029560 (wqkv-23, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb4000070920296f0)
ggml_vk_preallocate_buffers_graph(0xb400007092029880)
ggml_vk_preallocate_buffers_graph(0xb400007092029a10)
ggml_vk_preallocate_buffers_graph(0xb40000709202a1e0)
ggml_vk_preallocate_buffers_graph(0xb40000709202a500)
ggml_vk_preallocate_buffers_graph(0xb40000709202a690)
ggml_vk_preallocate_buffers_graph(0xb400007092029ba0)
ggml_vk_preallocate_buffers_graph(0xb400007092029d30)
ggml_vk_preallocate_buffers_graph(0xb40000709202a370)
ggml_vk_preallocate_buffers_graph(0xb40000709202a820)
ggml_vk_preallocate_buffers_graph(0xb400007092029ec0)
ggml_vk_preallocate_buffers_graph(0xb40000709202a050)
ggml_vk_preallocate_buffers_graph(0xb40000709202acd0)
ggml_vk_preallocate_buffers_graph(0xb40000709202aff0)
ggml_vk_preallocate_buffers_graph(0xb40000709202a9b0)
ggml_vk_preallocate_buffers_graph(0xb40000709202ab40)
ggml_vk_preallocate_buffers_graph(0xb40000709202ae60)
ggml_vk_preallocate_buffers_graph(0xb40000709202b180)
ggml_vk_preallocate_buffers_graph(0xb40000709202b950)
ggml_vk_preallocate_buffers_graph(0xb40000709202b4a0)
ggml_vk_preallocate_buffers_graph(0xb40000709202b310)
ggml_vk_preallocate_buffers_graph(0xb40000709202b630)
ggml_vk_create_extra(0xb40000709202b630 (kq-23, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709202b7c0)
ggml_vk_preallocate_buffers_graph(0xb40000709202bae0)
ggml_vk_create_extra(0xb40000709202bae0 (kqv-23, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709202bc70)
ggml_vk_preallocate_buffers_graph(0xb40000709202be00)
ggml_vk_preallocate_buffers_graph(0xb40000709202bf90)
ggml_vk_create_extra(0xb40000709202bf90 (kqv_wo-23, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709202c120)
ggml_vk_preallocate_buffers_graph(0xb40000709202c2b0)
ggml_vk_create_extra(0xb40000709202c2b0 (ffn_up-23, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709202c440)
ggml_vk_preallocate_buffers_graph(0xb40000709202c5d0)
ggml_vk_preallocate_buffers_graph(0xb40000709202c760)
ggml_vk_create_extra(0xb40000709202c760 (ffn_down-23, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709202c8f0)
ggml_vk_preallocate_buffers_graph(0xb40000709202ca80)
ggml_vk_preallocate_buffers_graph(0xb40000709202cc10)
ggml_vk_preallocate_buffers_graph(0xb40000709202cda0)
ggml_vk_preallocate_buffers_graph(0xb40000709202cf30)
ggml_vk_preallocate_buffers_graph(0xb40000709202d0c0)
ggml_vk_preallocate_buffers_graph(0xb40000709202d250)
ggml_vk_create_extra(0xb40000709202d250 (wqkv-24, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709202d3e0)
ggml_vk_preallocate_buffers_graph(0xb40000709202d570)
ggml_vk_preallocate_buffers_graph(0xb40000709202d700)
ggml_vk_preallocate_buffers_graph(0xb40000709202ded0)
ggml_vk_preallocate_buffers_graph(0xb40000709202e1f0)
ggml_vk_preallocate_buffers_graph(0xb40000709202e380)
ggml_vk_preallocate_buffers_graph(0xb40000709202d890)
ggml_vk_preallocate_buffers_graph(0xb40000709202da20)
ggml_vk_preallocate_buffers_graph(0xb40000709202e060)
ggml_vk_preallocate_buffers_graph(0xb40000709202e510)
ggml_vk_preallocate_buffers_graph(0xb40000709202dbb0)
ggml_vk_preallocate_buffers_graph(0xb40000709202dd40)
ggml_vk_preallocate_buffers_graph(0xb40000709202e9c0)
ggml_vk_preallocate_buffers_graph(0xb40000709202ece0)
ggml_vk_preallocate_buffers_graph(0xb40000709202e6a0)
ggml_vk_preallocate_buffers_graph(0xb40000709202e830)
ggml_vk_preallocate_buffers_graph(0xb40000709202eb50)
ggml_vk_preallocate_buffers_graph(0xb40000709202ee70)
ggml_vk_preallocate_buffers_graph(0xb40000709202f640)
ggml_vk_preallocate_buffers_graph(0xb40000709202f190)
ggml_vk_preallocate_buffers_graph(0xb40000709202f000)
ggml_vk_preallocate_buffers_graph(0xb40000709202f320)
ggml_vk_create_extra(0xb40000709202f320 (kq-24, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709202f4b0)
ggml_vk_preallocate_buffers_graph(0xb40000709202f7d0)
ggml_vk_create_extra(0xb40000709202f7d0 (kqv-24, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709202f960)
ggml_vk_preallocate_buffers_graph(0xb40000709202faf0)
ggml_vk_preallocate_buffers_graph(0xb40000709202fc80)
ggml_vk_create_extra(0xb40000709202fc80 (kqv_wo-24, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709202fe10)
ggml_vk_preallocate_buffers_graph(0xb40000709202ffa0)
ggml_vk_create_extra(0xb40000709202ffa0 (ffn_up-24, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092030130)
ggml_vk_preallocate_buffers_graph(0xb4000070920302c0)
ggml_vk_preallocate_buffers_graph(0xb400007092030450)
ggml_vk_create_extra(0xb400007092030450 (ffn_down-24, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb4000070920305e0)
ggml_vk_preallocate_buffers_graph(0xb400007092030770)
ggml_vk_preallocate_buffers_graph(0xb400007092030900)
ggml_vk_preallocate_buffers_graph(0xb400007092030a90)
ggml_vk_preallocate_buffers_graph(0xb400007092030c20)
ggml_vk_preallocate_buffers_graph(0xb400007092030db0)
ggml_vk_preallocate_buffers_graph(0xb400007092030f40)
ggml_vk_create_extra(0xb400007092030f40 (wqkv-25, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb4000070920310d0)
ggml_vk_preallocate_buffers_graph(0xb400007092031260)
ggml_vk_preallocate_buffers_graph(0xb4000070920313f0)
ggml_vk_preallocate_buffers_graph(0xb400007092031bc0)
ggml_vk_preallocate_buffers_graph(0xb400007092031ee0)
ggml_vk_preallocate_buffers_graph(0xb400007092032070)
ggml_vk_preallocate_buffers_graph(0xb400007092031580)
ggml_vk_preallocate_buffers_graph(0xb400007092031710)
ggml_vk_preallocate_buffers_graph(0xb400007092031d50)
ggml_vk_preallocate_buffers_graph(0xb400007092032200)
ggml_vk_preallocate_buffers_graph(0xb4000070920318a0)
ggml_vk_preallocate_buffers_graph(0xb400007092031a30)
ggml_vk_preallocate_buffers_graph(0xb4000070920326b0)
ggml_vk_preallocate_buffers_graph(0xb4000070920329d0)
ggml_vk_preallocate_buffers_graph(0xb400007092032390)
ggml_vk_preallocate_buffers_graph(0xb400007092032520)
ggml_vk_preallocate_buffers_graph(0xb400007092032840)
ggml_vk_preallocate_buffers_graph(0xb400007092032b60)
ggml_vk_preallocate_buffers_graph(0xb400007092033330)
ggml_vk_preallocate_buffers_graph(0xb400007092032e80)
ggml_vk_preallocate_buffers_graph(0xb400007092032cf0)
ggml_vk_preallocate_buffers_graph(0xb400007092033010)
ggml_vk_create_extra(0xb400007092033010 (kq-25, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb4000070920331a0)
ggml_vk_preallocate_buffers_graph(0xb4000070920334c0)
ggml_vk_create_extra(0xb4000070920334c0 (kqv-25, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092033650)
ggml_vk_preallocate_buffers_graph(0xb4000070920337e0)
ggml_vk_preallocate_buffers_graph(0xb400007092033970)
ggml_vk_create_extra(0xb400007092033970 (kqv_wo-25, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092033b00)
ggml_vk_preallocate_buffers_graph(0xb400007092033c90)
ggml_vk_create_extra(0xb400007092033c90 (ffn_up-25, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092033e20)
ggml_vk_preallocate_buffers_graph(0xb400007092033fb0)
ggml_vk_preallocate_buffers_graph(0xb400007092034140)
ggml_vk_create_extra(0xb400007092034140 (ffn_down-25, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb4000070920342d0)
ggml_vk_preallocate_buffers_graph(0xb400007092034460)
ggml_vk_preallocate_buffers_graph(0xb4000070920345f0)
ggml_vk_preallocate_buffers_graph(0xb400007092034780)
ggml_vk_preallocate_buffers_graph(0xb400007092034910)
ggml_vk_preallocate_buffers_graph(0xb400007092034aa0)
ggml_vk_preallocate_buffers_graph(0xb400007092034c30)
ggml_vk_create_extra(0xb400007092034c30 (wqkv-26, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092034dc0)
ggml_vk_preallocate_buffers_graph(0xb400007092034f50)
ggml_vk_preallocate_buffers_graph(0xb4000070920350e0)
ggml_vk_preallocate_buffers_graph(0xb4000070920358b0)
ggml_vk_preallocate_buffers_graph(0xb400007092035bd0)
ggml_vk_preallocate_buffers_graph(0xb400007092035d60)
ggml_vk_preallocate_buffers_graph(0xb400007092035270)
ggml_vk_preallocate_buffers_graph(0xb400007092035400)
ggml_vk_preallocate_buffers_graph(0xb400007092035a40)
ggml_vk_preallocate_buffers_graph(0xb400007092035ef0)
ggml_vk_preallocate_buffers_graph(0xb400007092035590)
ggml_vk_preallocate_buffers_graph(0xb400007092035720)
ggml_vk_preallocate_buffers_graph(0xb4000070920363a0)
ggml_vk_preallocate_buffers_graph(0xb4000070920366c0)
ggml_vk_preallocate_buffers_graph(0xb400007092036080)
ggml_vk_preallocate_buffers_graph(0xb400007092036210)
ggml_vk_preallocate_buffers_graph(0xb400007092036530)
ggml_vk_preallocate_buffers_graph(0xb400007092036850)
ggml_vk_preallocate_buffers_graph(0xb400007092037020)
ggml_vk_preallocate_buffers_graph(0xb400007092036b70)
ggml_vk_preallocate_buffers_graph(0xb4000070920369e0)
ggml_vk_preallocate_buffers_graph(0xb400007092036d00)
ggml_vk_create_extra(0xb400007092036d00 (kq-26, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092036e90)
ggml_vk_preallocate_buffers_graph(0xb4000070920371b0)
ggml_vk_create_extra(0xb4000070920371b0 (kqv-26, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092037340)
ggml_vk_preallocate_buffers_graph(0xb4000070920374d0)
ggml_vk_preallocate_buffers_graph(0xb400007092037660)
ggml_vk_create_extra(0xb400007092037660 (kqv_wo-26, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb4000070920377f0)
ggml_vk_preallocate_buffers_graph(0xb400007092037980)
ggml_vk_create_extra(0xb400007092037980 (ffn_up-26, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092037b10)
ggml_vk_preallocate_buffers_graph(0xb400007092037ca0)
ggml_vk_preallocate_buffers_graph(0xb400007092037e30)
ggml_vk_create_extra(0xb400007092037e30 (ffn_down-26, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092037fc0)
ggml_vk_preallocate_buffers_graph(0xb400007092038150)
ggml_vk_preallocate_buffers_graph(0xb4000070920382e0)
ggml_vk_preallocate_buffers_graph(0xb400007092038470)
ggml_vk_preallocate_buffers_graph(0xb400007092038600)
ggml_vk_preallocate_buffers_graph(0xb400007092038790)
ggml_vk_preallocate_buffers_graph(0xb400007092038920)
ggml_vk_create_extra(0xb400007092038920 (wqkv-27, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092038ab0)
ggml_vk_preallocate_buffers_graph(0xb400007092038c40)
ggml_vk_preallocate_buffers_graph(0xb400007092038dd0)
ggml_vk_preallocate_buffers_graph(0xb4000070920395a0)
ggml_vk_preallocate_buffers_graph(0xb4000070920398c0)
ggml_vk_preallocate_buffers_graph(0xb400007092039a50)
ggml_vk_preallocate_buffers_graph(0xb400007092038f60)
ggml_vk_preallocate_buffers_graph(0xb4000070920390f0)
ggml_vk_preallocate_buffers_graph(0xb400007092039730)
ggml_vk_preallocate_buffers_graph(0xb400007092039be0)
ggml_vk_preallocate_buffers_graph(0xb400007092039280)
ggml_vk_preallocate_buffers_graph(0xb400007092039410)
ggml_vk_preallocate_buffers_graph(0xb40000709203a090)
ggml_vk_preallocate_buffers_graph(0xb40000709203a3b0)
ggml_vk_preallocate_buffers_graph(0xb400007092039d70)
ggml_vk_preallocate_buffers_graph(0xb400007092039f00)
ggml_vk_preallocate_buffers_graph(0xb40000709203a220)
ggml_vk_preallocate_buffers_graph(0xb40000709203a540)
ggml_vk_preallocate_buffers_graph(0xb40000709203ad10)
ggml_vk_preallocate_buffers_graph(0xb40000709203a860)
ggml_vk_preallocate_buffers_graph(0xb40000709203a6d0)
ggml_vk_preallocate_buffers_graph(0xb40000709203a9f0)
ggml_vk_create_extra(0xb40000709203a9f0 (kq-27, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709203ab80)
ggml_vk_preallocate_buffers_graph(0xb40000709203aea0)
ggml_vk_create_extra(0xb40000709203aea0 (kqv-27, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709203b030)
ggml_vk_preallocate_buffers_graph(0xb40000709203b1c0)
ggml_vk_preallocate_buffers_graph(0xb40000709203b350)
ggml_vk_create_extra(0xb40000709203b350 (kqv_wo-27, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709203b4e0)
ggml_vk_preallocate_buffers_graph(0xb40000709203b670)
ggml_vk_create_extra(0xb40000709203b670 (ffn_up-27, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709203b800)
ggml_vk_preallocate_buffers_graph(0xb40000709203b990)
ggml_vk_preallocate_buffers_graph(0xb40000709203bb20)
ggml_vk_create_extra(0xb40000709203bb20 (ffn_down-27, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709203bcb0)
ggml_vk_preallocate_buffers_graph(0xb40000709203be40)
ggml_vk_preallocate_buffers_graph(0xb40000709203bfd0)
ggml_vk_preallocate_buffers_graph(0xb40000709203c160)
ggml_vk_preallocate_buffers_graph(0xb40000709203c2f0)
ggml_vk_preallocate_buffers_graph(0xb40000709203c480)
ggml_vk_preallocate_buffers_graph(0xb40000709203c610)
ggml_vk_create_extra(0xb40000709203c610 (wqkv-28, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709203c7a0)
ggml_vk_preallocate_buffers_graph(0xb40000709203c930)
ggml_vk_preallocate_buffers_graph(0xb40000709203cac0)
ggml_vk_preallocate_buffers_graph(0xb40000709203d290)
ggml_vk_preallocate_buffers_graph(0xb40000709203d5b0)
ggml_vk_preallocate_buffers_graph(0xb40000709203d740)
ggml_vk_preallocate_buffers_graph(0xb40000709203cc50)
ggml_vk_preallocate_buffers_graph(0xb40000709203cde0)
ggml_vk_preallocate_buffers_graph(0xb40000709203d420)
ggml_vk_preallocate_buffers_graph(0xb40000709203d8d0)
ggml_vk_preallocate_buffers_graph(0xb40000709203cf70)
ggml_vk_preallocate_buffers_graph(0xb40000709203d100)
ggml_vk_preallocate_buffers_graph(0xb40000709203dd80)
ggml_vk_preallocate_buffers_graph(0xb40000709203e0a0)
ggml_vk_preallocate_buffers_graph(0xb40000709203da60)
ggml_vk_preallocate_buffers_graph(0xb40000709203dbf0)
ggml_vk_preallocate_buffers_graph(0xb40000709203df10)
ggml_vk_preallocate_buffers_graph(0xb40000709203e230)
ggml_vk_preallocate_buffers_graph(0xb40000709203ea00)
ggml_vk_preallocate_buffers_graph(0xb40000709203e550)
ggml_vk_preallocate_buffers_graph(0xb40000709203e3c0)
ggml_vk_preallocate_buffers_graph(0xb40000709203e6e0)
ggml_vk_create_extra(0xb40000709203e6e0 (kq-28, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709203e870)
ggml_vk_preallocate_buffers_graph(0xb40000709203eb90)
ggml_vk_create_extra(0xb40000709203eb90 (kqv-28, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709203ed20)
ggml_vk_preallocate_buffers_graph(0xb40000709203eeb0)
ggml_vk_preallocate_buffers_graph(0xb40000709203f040)
ggml_vk_create_extra(0xb40000709203f040 (kqv_wo-28, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709203f1d0)
ggml_vk_preallocate_buffers_graph(0xb40000709203f360)
ggml_vk_create_extra(0xb40000709203f360 (ffn_up-28, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709203f4f0)
ggml_vk_preallocate_buffers_graph(0xb40000709203f680)
ggml_vk_preallocate_buffers_graph(0xb40000709203f810)
ggml_vk_create_extra(0xb40000709203f810 (ffn_down-28, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709203f9a0)
ggml_vk_preallocate_buffers_graph(0xb40000709203fb30)
ggml_vk_preallocate_buffers_graph(0xb40000709203fcc0)
ggml_vk_preallocate_buffers_graph(0xb40000709203fe50)
ggml_vk_preallocate_buffers_graph(0xb40000709203ffe0)
ggml_vk_preallocate_buffers_graph(0xb400007092040170)
ggml_vk_preallocate_buffers_graph(0xb400007092040300)
ggml_vk_create_extra(0xb400007092040300 (wqkv-29, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092040490)
ggml_vk_preallocate_buffers_graph(0xb400007092040620)
ggml_vk_preallocate_buffers_graph(0xb4000070920407b0)
ggml_vk_preallocate_buffers_graph(0xb400007092040f80)
ggml_vk_preallocate_buffers_graph(0xb4000070920412a0)
ggml_vk_preallocate_buffers_graph(0xb400007092041430)
ggml_vk_preallocate_buffers_graph(0xb400007092040940)
ggml_vk_preallocate_buffers_graph(0xb400007092040ad0)
ggml_vk_preallocate_buffers_graph(0xb400007092041110)
ggml_vk_preallocate_buffers_graph(0xb4000070920415c0)
ggml_vk_preallocate_buffers_graph(0xb400007092040c60)
ggml_vk_preallocate_buffers_graph(0xb400007092040df0)
ggml_vk_preallocate_buffers_graph(0xb400007092041a70)
ggml_vk_preallocate_buffers_graph(0xb400007092041d90)
ggml_vk_preallocate_buffers_graph(0xb400007092041750)
ggml_vk_preallocate_buffers_graph(0xb4000070920418e0)
ggml_vk_preallocate_buffers_graph(0xb400007092041c00)
ggml_vk_preallocate_buffers_graph(0xb400007092041f20)
ggml_vk_preallocate_buffers_graph(0xb4000070920426f0)
ggml_vk_preallocate_buffers_graph(0xb400007092042240)
ggml_vk_preallocate_buffers_graph(0xb4000070920420b0)
ggml_vk_preallocate_buffers_graph(0xb4000070920423d0)
ggml_vk_create_extra(0xb4000070920423d0 (kq-29, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092042560)
ggml_vk_preallocate_buffers_graph(0xb400007092042880)
ggml_vk_create_extra(0xb400007092042880 (kqv-29, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092042a10)
ggml_vk_preallocate_buffers_graph(0xb400007092042ba0)
ggml_vk_preallocate_buffers_graph(0xb400007092042d30)
ggml_vk_create_extra(0xb400007092042d30 (kqv_wo-29, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092042ec0)
ggml_vk_preallocate_buffers_graph(0xb400007092043050)
ggml_vk_create_extra(0xb400007092043050 (ffn_up-29, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb4000070920431e0)
ggml_vk_preallocate_buffers_graph(0xb400007092043370)
ggml_vk_preallocate_buffers_graph(0xb400007092043500)
ggml_vk_create_extra(0xb400007092043500 (ffn_down-29, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092043690)
ggml_vk_preallocate_buffers_graph(0xb400007092043820)
ggml_vk_preallocate_buffers_graph(0xb4000070920439b0)
ggml_vk_preallocate_buffers_graph(0xb400007092043b40)
ggml_vk_preallocate_buffers_graph(0xb400007092043cd0)
ggml_vk_preallocate_buffers_graph(0xb400007092043e60)
ggml_vk_preallocate_buffers_graph(0xb400007092043ff0)
ggml_vk_create_extra(0xb400007092043ff0 (wqkv-30, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092044180)
ggml_vk_preallocate_buffers_graph(0xb400007092044310)
ggml_vk_preallocate_buffers_graph(0xb4000070920444a0)
ggml_vk_preallocate_buffers_graph(0xb400007092044c70)
ggml_vk_preallocate_buffers_graph(0xb400007092044f90)
ggml_vk_preallocate_buffers_graph(0xb400007092045120)
ggml_vk_preallocate_buffers_graph(0xb400007092044630)
ggml_vk_preallocate_buffers_graph(0xb4000070920447c0)
ggml_vk_preallocate_buffers_graph(0xb400007092044e00)
ggml_vk_preallocate_buffers_graph(0xb4000070920452b0)
ggml_vk_preallocate_buffers_graph(0xb400007092044950)
ggml_vk_preallocate_buffers_graph(0xb400007092044ae0)
ggml_vk_preallocate_buffers_graph(0xb400007092045760)
ggml_vk_preallocate_buffers_graph(0xb400007092045a80)
ggml_vk_preallocate_buffers_graph(0xb400007092045440)
ggml_vk_preallocate_buffers_graph(0xb4000070920455d0)
ggml_vk_preallocate_buffers_graph(0xb4000070920458f0)
ggml_vk_preallocate_buffers_graph(0xb400007092045c10)
ggml_vk_preallocate_buffers_graph(0xb4000070920463e0)
ggml_vk_preallocate_buffers_graph(0xb400007092045f30)
ggml_vk_preallocate_buffers_graph(0xb400007092045da0)
ggml_vk_preallocate_buffers_graph(0xb4000070920460c0)
ggml_vk_create_extra(0xb4000070920460c0 (kq-30, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092046250)
ggml_vk_preallocate_buffers_graph(0xb400007092046570)
ggml_vk_create_extra(0xb400007092046570 (kqv-30, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092046700)
ggml_vk_preallocate_buffers_graph(0xb400007092046890)
ggml_vk_preallocate_buffers_graph(0xb400007092046a20)
ggml_vk_create_extra(0xb400007092046a20 (kqv_wo-30, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092046bb0)
ggml_vk_preallocate_buffers_graph(0xb400007092046d40)
ggml_vk_create_extra(0xb400007092046d40 (ffn_up-30, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092046ed0)
ggml_vk_preallocate_buffers_graph(0xb400007092047060)
ggml_vk_preallocate_buffers_graph(0xb4000070920471f0)
ggml_vk_create_extra(0xb4000070920471f0 (ffn_down-30, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092047380)
ggml_vk_preallocate_buffers_graph(0xb400007092047510)
ggml_vk_preallocate_buffers_graph(0xb4000070920476a0)
ggml_vk_preallocate_buffers_graph(0xb400007092047830)
ggml_vk_preallocate_buffers_graph(0xb4000070920479c0)
ggml_vk_preallocate_buffers_graph(0xb400007092047b50)
ggml_vk_preallocate_buffers_graph(0xb400007092047ce0)
ggml_vk_create_extra(0xb400007092047ce0 (wqkv-31, MUL_MAT))
ggml_vk_guess_split_k(7680, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092047e70)
ggml_vk_preallocate_buffers_graph(0xb400007092048000)
ggml_vk_preallocate_buffers_graph(0xb400007092048190)
ggml_vk_preallocate_buffers_graph(0xb400007092048960)
ggml_vk_preallocate_buffers_graph(0xb400007092048c80)
ggml_vk_preallocate_buffers_graph(0xb400007092048e10)
ggml_vk_preallocate_buffers_graph(0xb400007092048320)
ggml_vk_preallocate_buffers_graph(0xb4000070920484b0)
ggml_vk_preallocate_buffers_graph(0xb400007092048af0)
ggml_vk_preallocate_buffers_graph(0xb400007092048fa0)
ggml_vk_preallocate_buffers_graph(0xb400007092048640)
ggml_vk_preallocate_buffers_graph(0xb4000070920487d0)
ggml_vk_preallocate_buffers_graph(0xb400007092049450)
ggml_vk_preallocate_buffers_graph(0xb400007092049770)
ggml_vk_preallocate_buffers_graph(0xb400007092049130)
ggml_vk_preallocate_buffers_graph(0xb4000070920492c0)
ggml_vk_preallocate_buffers_graph(0xb4000070920495e0)
ggml_vk_preallocate_buffers_graph(0xb400007092049900)
ggml_vk_preallocate_buffers_graph(0xb40000709204a0d0)
ggml_vk_preallocate_buffers_graph(0xb400007092049c20)
ggml_vk_preallocate_buffers_graph(0xb400007092049a90)
ggml_vk_preallocate_buffers_graph(0xb400007092049db0)
ggml_vk_create_extra(0xb400007092049db0 (kq-31, MUL_MAT))
ggml_vk_guess_split_k(32, 2, 80) = 1
ggml_vk_preallocate_buffers_graph(0xb400007092049f40)
ggml_vk_preallocate_buffers_graph(0xb40000709204a260)
ggml_vk_create_extra(0xb40000709204a260 (kqv-31, MUL_MAT))
ggml_vk_guess_split_k(80, 2, 32) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709204a3f0)
ggml_vk_preallocate_buffers_graph(0xb40000709204a580)
ggml_vk_preallocate_buffers_graph(0xb40000709204a710)
ggml_vk_create_extra(0xb40000709204a710 (kqv_wo-31, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709204a8a0)
ggml_vk_preallocate_buffers_graph(0xb40000709204aa30)
ggml_vk_create_extra(0xb40000709204aa30 (ffn_up-31, MUL_MAT))
ggml_vk_guess_split_k(10240, 2, 2560) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709204abc0)
ggml_vk_preallocate_buffers_graph(0xb40000709204ad50)
ggml_vk_preallocate_buffers_graph(0xb40000709204aee0)
ggml_vk_create_extra(0xb40000709204aee0 (ffn_down-31, MUL_MAT))
ggml_vk_guess_split_k(2560, 2, 10240) = 1
ggml_vk_preallocate_buffers_graph(0xb40000709204b070)
ggml_vk_preallocate_buffers_graph(0xb40000709204b200)
ggml_vk_preallocate_buffers_graph(0xb40000709204b390)
ggml_vk_preallocate_buffers_graph(0xb40000709204b520)
ggml_vk_preallocate_buffers_graph(0xb40000709204b6b0)
ggml_vk_preallocate_buffers_graph(0xb40000709204b840)
ggml_vk_preallocate_buffers_graph(0xb40000709204b9d0)
ggml_vk_create_extra(0xb40000709204b9d0 (result_output_no_bias, MUL_MAT))
ggml_vk_guess_split_k(51200, 2, 2560) = 1
ggml_vk_pool_malloc(409600)
ggml_vk_create_buffer(409600, { DeviceLocal })
ggml_vk_preallocate_buffers_graph(0xb40000709204bb60)
ggml_vk_preallocate_buffers()
qx_size: 107520000 qy_size: 81920 x_size: 262144000 y_size: 81920 split_k_size: 0
ggml_vk_create_buffer(104857600, { HostVisible | HostCoherent | HostCached })
ggml_vulkan: Memory allocation of size 104857600 failed.
ggml_vulkan: No suitable memory type found: ErrorOutOfDeviceMemory
libc++abi: terminating due to uncaught exception of type vk::SystemError: No suitable memory type found: ErrorOutOfDeviceMemory
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment