davidmezzetti/txtai-llm.py

## txtai-llm.py
from txtai import LLM

# Hugging Face models
llm = LLM("google/gemma-2-9b")

# llama.cpp models automatically downloaded from HF HUB
llm = LLM("bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_M.gguf")

# Models served via APIs (OpenAI / Claude / Ollama)
llm = LLM("gpt-4o")
llm = LLM("claude-3-5-sonnet")
llm = LLM("ollama/mistral")

# Inputs as prompt strings
llm("Tell me how to solve complex math problems")

# Inputs as chat messages
llm([
  {"role": "system", "content": "You are a helpful assistant"},
  {"role": "user": "content": "List things to do in DC"}
])

# Run as a FastAPI service
# config.yml:
#  llm:
#    path: google/gemma-2-9b

$ CONFIG=config.yml uvicorn "txtai.api:app"
$ curl "http://localhost:8000/llm?text=prompt+string"

# Run as a Docker API service
$ docker build -t txtai-api --build-arg BASE_IMAGE=neuml/txtai-gpu api/.
$ docker run -p 8000:8000 txtai-api
	from txtai import LLM

	# Hugging Face models
	llm = LLM("google/gemma-2-9b")

	# llama.cpp models automatically downloaded from HF HUB
	llm = LLM("bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_M.gguf")

	# Models served via APIs (OpenAI / Claude / Ollama)
	llm = LLM("gpt-4o")
	llm = LLM("claude-3-5-sonnet")
	llm = LLM("ollama/mistral")

	# Inputs as prompt strings
	llm("Tell me how to solve complex math problems")

	# Inputs as chat messages
	llm([
	{"role": "system", "content": "You are a helpful assistant"},
	{"role": "user": "content": "List things to do in DC"}
	])

	# Run as a FastAPI service
	# config.yml:
	# llm:
	# path: google/gemma-2-9b

	$ CONFIG=config.yml uvicorn "txtai.api:app"
	$ curl "http://localhost:8000/llm?text=prompt+string"

	# Run as a Docker API service
	$ docker build -t txtai-api --build-arg BASE_IMAGE=neuml/txtai-gpu api/.
	$ docker run -p 8000:8000 txtai-api