Last active
March 7, 2025 19:07
Using Outlines to get structured output from R1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
from typing import Literal | |
import outlines | |
import re | |
import torch | |
from transformers import AutoTokenizer | |
from outlines.fsm.json_schema import convert_json_schema_to_str | |
from outlines_core.fsm.json_schema import build_regex_from_schema | |
from pydantic import BaseModel | |
# Loading the model. | |
# model_string = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B' | |
model_string = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B' # For smaller machines | |
# We'll use the vllm backend for this model, but you can use any other backend. | |
model = outlines.models.transformers(model_string, device='cuda') | |
tokenizer = AutoTokenizer.from_pretrained(model_string) | |
# Set up response format you want the LLM to respond with. | |
class YesNo(BaseModel): | |
answer: Literal['yes', 'no'] | |
yesno_regex = build_regex_from_schema(convert_json_schema_to_str(YesNo)) | |
# Add the thinking prefix to the regex | |
thinking_regex = r'<think>([^<]|<[^\/]|<\/[^t]|<\/t[^h]|<\/th[^i]|<\/thi[^n]|<\/thin[^k]|<\/think[^>])*<\/think>\n' | |
result_regex = thinking_regex + yesno_regex | |
print(result_regex) | |
# Apply the chat template | |
prompt = tokenizer.apply_chat_template( | |
[ | |
{'role': 'system', 'content': 'You are a helpful assistant.'}, | |
{'role': 'user', 'content': 'Roses are red. Violets are blue. Are roses and violets the same color? Yes or no. Provide a chain of thought inside a <think> tag, closing with </think> when you are finished. After, please write JSON with the following schema: {"answer": "yes" | "no"}'}, | |
], | |
tokenize=False, | |
add_generation_prompt=True, | |
) | |
# Generator | |
start_time = time.time() | |
generator = outlines.generate.regex(model, result_regex) | |
end_time = time.time() | |
print(f"Time taken to create generator: {end_time - start_time} seconds") | |
# Generate the response | |
result = generator(prompt, max_tokens=1000) | |
print(result) | |
# Parse out the thinking + structured result | |
thinking_result = re.search(thinking_regex, result).group(1).strip() | |
structured_result = re.search(yesno_regex, result).group(0).strip() | |
# Print the result. The first group is the thinking, the second is the structured result. | |
print("Chain of thought") | |
print("----------------") | |
print(thinking_result) | |
print("\nStructured output") | |
print("----------------") | |
print(structured_result) | |
# Parse the structured result | |
output = YesNo.model_validate_json(structured_result) | |
print("\nPydantic output") | |
print("----------------") | |
print(output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If you want to use this, you should only need to change your prompt and the structure: