Skip to content

Instantly share code, notes, and snippets.

@cpfiffer
Last active March 7, 2025 19:07
Using Outlines to get structured output from R1
import time
from typing import Literal
import outlines
import re
import torch
from transformers import AutoTokenizer
from outlines.fsm.json_schema import convert_json_schema_to_str
from outlines_core.fsm.json_schema import build_regex_from_schema
from pydantic import BaseModel
# Loading the model.
# model_string = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B'
model_string = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B' # For smaller machines
# We'll use the vllm backend for this model, but you can use any other backend.
model = outlines.models.transformers(model_string, device='cuda')
tokenizer = AutoTokenizer.from_pretrained(model_string)
# Set up response format you want the LLM to respond with.
class YesNo(BaseModel):
answer: Literal['yes', 'no']
yesno_regex = build_regex_from_schema(convert_json_schema_to_str(YesNo))
# Add the thinking prefix to the regex
thinking_regex = r'<think>([^<]|<[^\/]|<\/[^t]|<\/t[^h]|<\/th[^i]|<\/thi[^n]|<\/thin[^k]|<\/think[^>])*<\/think>\n'
result_regex = thinking_regex + yesno_regex
print(result_regex)
# Apply the chat template
prompt = tokenizer.apply_chat_template(
[
{'role': 'system', 'content': 'You are a helpful assistant.'},
{'role': 'user', 'content': 'Roses are red. Violets are blue. Are roses and violets the same color? Yes or no. Provide a chain of thought inside a <think> tag, closing with </think> when you are finished. After, please write JSON with the following schema: {"answer": "yes" | "no"}'},
],
tokenize=False,
add_generation_prompt=True,
)
# Generator
start_time = time.time()
generator = outlines.generate.regex(model, result_regex)
end_time = time.time()
print(f"Time taken to create generator: {end_time - start_time} seconds")
# Generate the response
result = generator(prompt, max_tokens=1000)
print(result)
# Parse out the thinking + structured result
thinking_result = re.search(thinking_regex, result).group(1).strip()
structured_result = re.search(yesno_regex, result).group(0).strip()
# Print the result. The first group is the thinking, the second is the structured result.
print("Chain of thought")
print("----------------")
print(thinking_result)
print("\nStructured output")
print("----------------")
print(structured_result)
# Parse the structured result
output = YesNo.model_validate_json(structured_result)
print("\nPydantic output")
print("----------------")
print(output)
@cpfiffer
Copy link
Author

cpfiffer commented Feb 4, 2025

If you want to use this, you should only need to change your prompt and the structure:

# Set up response format you want the LLM to respond with.
class YesNo(BaseModel):
    answer: Literal['yes', 'no']

yesno_regex = build_regex_from_schema(convert_json_schema_to_str(YesNo))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment