Skip to content

Instantly share code, notes, and snippets.

@cpfiffer
Created February 7, 2025 20:03
Show Gist options
  • Save cpfiffer/998ce05d8fd089f143484a298e444689 to your computer and use it in GitHub Desktop.
Save cpfiffer/998ce05d8fd089f143484a298e444689 to your computer and use it in GitHub Desktop.
Ultra-simple text-to-SQL with Outlines, using a small subset of the SQL grammar.
import outlines
import os
from pydantic import BaseModel, Field
from transformers import AutoTokenizer
model_str = 'Qwen/Qwen2.5-7B-Instruct-1M'
model = outlines.models.transformers(
model_str,
device='cuda'
)
tokenizer = AutoTokenizer.from_pretrained(model_str)
sql_grammar = """
?start: query
// Keywords
SELECT: "SELECT"
FROM: "FROM"
JOIN: "JOIN"
ON: "ON"
// Rules
query: select_stmt
select_stmt: SELECT columns FROM table_name JOIN table_name ON join_cond
columns: "*"
table_name: ID
join_cond: ID "=" ID
// Tokens
ID: /[a-zA-Z][a-zA-Z0-9_]*/
// Ignore whitespace
%ignore /\s+/
"""
# Set up the prompt with a more specific example
user_prompt="""
Write a SQL query to join the users table with the orders table on user_id.
"""
system_prompt="""
You are a SQL expert. Generate simple JOIN queries.
"""
# Set up the prompt
prompt = tokenizer.apply_chat_template(
[
{'role': 'system', 'content': system_prompt},
{'role': 'user', 'content': user_prompt},
],
tokenize=False,
add_generation_prompt=True,
)
query_generator = outlines.generate.cfg(model, sql_grammar)
query = query_generator(prompt, max_tokens=1000)
print(query)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment