pip install --upgrade git+https://github.com/ThatOneDevGuy/guidance_instructor
This lets you reliably get structured outputs from open source LLMs using guidance + pydantic.
- guidance is a library for restricting LLM outputs based on regular expressions and context free grammars.
- pydantic is an OpenAPI-friendly library for defining data classes with strong typing and validation.
from enum import Enum
from typing import Optional
from typing_extensions import Annotated
import guidance
from guidance_instructor import generate_object
from pydantic import BaseModel
# Load a chat-finetuned mistral model
model = guidance.models.LlamaCppChat("/bulk/models/openhermes-2.5-mistral-7b.Q5_K_M.gguf")
# Create a sample pydantic class. This nests a FruitEnum inside a SimpleClass.
class FruitEnum(str, Enum):
pear = "pear"
banana = "banana"
apple = "apple"
class SimpleClass(BaseModel):
name: Annotated[str, "Put the pony's name here."]
species: Annotated[str, "Put the pony's species here."]
favorite_fruit: Optional[FruitEnum]
# Send the 'user' message to the LLM. In guidance, messages and generations get appended
# to some pseudo-string that begins with the underlying model. The `with guidance.user()`
# notation is how guidance abstracts different roles that the LLM recognizes.
with guidance.user():
lm = model + "Extract the following into an object: Applejack is a cute earth pony mare with three apples for a cutie mark, which happen to be her favorite fruit."
# Read the 'assistant' message from the LLM
with guidance.assistant():
# This returns an "lm" object, which can be continued for further generations, plus a
# "applejack" object, which contains a SimpleClass representation of Applejack's information as
# described in the user instruction above.
lm, applejack = generate_object(lm, SimpleClass)
print(applejack)
# prints: {'name': 'Applejack', 'species': 'earth pony', 'favorite_fruit': 'apple'}