abetlen/main.py

## main.py
from llama_cpp import Llama
from llama_cpp.llama_chat_format import Llava15ChatHandler
chat_handler = Llava15ChatHandler(clip_model_path="llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf")
llm = Llama(
  model_path="llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q4_K_M.gguf",
  chat_handler=chat_handler,
  n_ctx=2048, # n_ctx should be increased to accomodate the image embedding
  logits_all=True,# needed to make llava work
  n_gpu_layers=-1
)
llm.create_chat_completion(
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png",
                    },
                },
                {"type": "text", "text": "What does the image say. Format your response as a json object with a single 'text' key."},
            ],
        }
    ],
    response_format={
        "type": "json_object",
        "schema": {
            "type": "object",
            "properties": {
                "text": {
                    "type": "string"
                }
            }
        }
    }
)

## setup.sh
pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/metal

GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf
cd llava-1.6-mistral-7b-gguf
git lfs pull --include=llava-v1.6-mistral-7b.Q4_K_M.gguf
git lfs pull --include=mmproj-model-f16.gguf
cd ..
python3 main.py
	from llama_cpp import Llama
	from llama_cpp.llama_chat_format import Llava15ChatHandler
	chat_handler = Llava15ChatHandler(clip_model_path="llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf")
	llm = Llama(
	model_path="llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q4_K_M.gguf",
	chat_handler=chat_handler,
	n_ctx=2048, # n_ctx should be increased to accomodate the image embedding
	logits_all=True,# needed to make llava work
	n_gpu_layers=-1
	)
	llm.create_chat_completion(
	messages=[
	{
	"role": "user",
	"content": [
	{
	"type": "image_url",
	"image_url": {
	"url": "https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png",
	},
	},
	{"type": "text", "text": "What does the image say. Format your response as a json object with a single 'text' key."},
	],
	}
	],
	response_format={
	"type": "json_object",
	"schema": {
	"type": "object",
	"properties": {
	"text": {
	"type": "string"
	}
	}
	}
	}
	)
	pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/metal

	GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf
	cd llava-1.6-mistral-7b-gguf
	git lfs pull --include=llava-v1.6-mistral-7b.Q4_K_M.gguf
	git lfs pull --include=mmproj-model-f16.gguf
	cd ..
	python3 main.py