Skip to content

Instantly share code, notes, and snippets.

View abetlen's full-sized avatar

Andrei abetlen

  • Patagona Technologies
  • Toronto, Canada
  • X @abetlen
View GitHub Profile
import os
import json
import typing
import pathlib
import argparse
import numpy as np
import numpy.typing as npt
import gguf
from gguf import KEY_ATTENTION_HEAD_COUNT, KEY_ATTENTION_LAYERNORM_EPS, KEY_BLOCK_COUNT, KEY_EMBEDDING_LENGTH, KEY_FEED_FORWARD_LENGTH, GGUFWriter, TokenType, SpecialVocab
import os
import argparse
import numpy as np
import numpy.typing as npt
import gguf
from safetensors import safe_open
import json
import typing
from llama_cpp import Llama
from llama_cpp.llama_chat_format import Llava15ChatHandler
chat_handler = Llava15ChatHandler(clip_model_path="llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf")
llm = Llama(
model_path="llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q4_K_M.gguf",
chat_handler=chat_handler,
n_ctx=2048, # n_ctx should be increased to accomodate the image embedding
logits_all=True,# needed to make llava work
n_gpu_layers=-1
)