Skip to content

Instantly share code, notes, and snippets.

@metal3d
Created March 13, 2024 08:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save metal3d/448da715534baf686c1fffc685483296 to your computer and use it in GitHub Desktop.
Save metal3d/448da715534baf686c1fffc685483296 to your computer and use it in GitHub Desktop.
Generate French products in JSON with AI, using LM Studio or OpenAI
# Generate a list of products in JSON format using AI
# Author: Patrice Ferlet <metal3d@gmail.com>
# License: MIT
#
# This script uses the OpenAI API to generate a list of products in JSON format. You may use
# LM Studio to serve a prefered model locally. Use the API tab to serve the model.
import argparse
import glob
import json
import logging
import os
from openai import OpenAI
from rich.logging import RichHandler
NUM_PRODUCTS_PER_REQUEST = 5 # The number of products you want to generate
NUM_PRODUCTS = 50 # The total number of products you want to generate
MODEL_NAME = "local-model" # The model name you want to use, if you use LM Studio, it is not used
API_KEY = "not-needed" # The API key is not needed if you use LM Studio
API_URL = (
"http://localhost:1234/v1" # The URL of the API, if you use OpenAI, set it to None
)
logging.basicConfig(
level=logging.INFO,
format="%(message)s",
datefmt="[%X]",
handlers=[RichHandler()],
)
# Point to the local server
CLIENT = OpenAI(base_url=API_URL, api_key=API_KEY)
SCHEMA = """
```json
[
{
"name": "string",
"description": "string",
"price": 0.0,
"categories": ["string"]
}, {...}
}
```
"""
KNOWN_PRODUCTS = []
INIT_HISTORY = [
# the system role is used to set the language and the role of the model. We exxplicitly
# tell that the model must respond in French - change the message to your target language
{
"role": "system",
"content": (
"Tu es un développeur français et ne répond qu'en français. "
"Tu peux proposer des solutions à des problèmes de programmation "
"et générer du contenu technique."
),
},
# here we ask the model to generate a list of products in JSON format,
# with the given schema
{
"role": "user",
"content": (
"J'ai besoin que tu me génères, en JSON, une liste de "
f"{NUM_PRODUCTS_PER_REQUEST} produits avec name, "
"description, price et categories. Le nom doit être pertinent et unique, "
"la description claire, et de 2 à 5 catégories. "
"Le contenu doit être en français. "
"Retourne un tableau d'objets JSON. "
"Le schema doit être conforme à : " + SCHEMA
),
},
]
# This is a prompt to add to the conversation to continue the
# generation, we say that we already have some products, so
# the model will continue from this point. It avoids to send the entire history.
APPEND_PROMPT = """
J'ai déjà ces produits :
"""
FOUND_PRODUCTS_PROMPT = """Found products files in the current directory.
We can continue from here. The script will load the existing products and ask
the model to generate more products.
A new file will be created with the new products.
Or you can remove the existing files and start from scratch.
Do you want to remove them and start from scratch? (y/N): """
def load_existing_products():
"""Load the existing products and return a list of product existing names"""
product_files = glob.glob("products_*.json")
if not product_files:
return []
products = []
for file in product_files:
with open(file, "r", encoding="utf-8") as json_file:
products.extend(json.load(json_file))
return [p["name"] for p in products]
def list_of_products():
"""Generate a list of products in markdown format"""
return APPEND_PROMPT.strip() + "\n".join([f"- {p}" for p in KNOWN_PRODUCTS])
def create_completion(continuation=False):
"""Create a completion using the conversation history"""
# force a copy of the history
history = list(INIT_HISTORY)
# add the continuation prompt
if len(glob.glob("products_*.json")) > 0 or continuation:
history.append(
{
"role": "user",
"content": list_of_products(),
}
)
logging.info("History: %s", history)
return CLIENT.chat.completions.create(
model=MODEL_NAME, # this field is currently unused if you use LM Studio
messages=history, # pyright: ignore
temperature=0.7,
stream=False,
)
def extract_json_from_markdown(content):
""" Extract the JSON content from the response inside a markdown block """ ""
# The response is a string, there is a json inside "```" and "```", we extract it
# it is possible that the model starts the json with a newline, so we need to remove it
json_start = content.find("```") + 3
json_end = content.rfind("```")
json_content = content[json_start:json_end]
if json_content.startswith("json"):
json_content = json_content[4:]
try:
loaded = json.loads(json_content)
except Exception: # pyright: ignore pylint: disable=broad-except
logging.info("JSON content not found as Markdown")
return None
return loaded
def extract_json_from_content(content):
"""Extract the JSON content from the response"""
# if the response is not warp in brackets, add them
content = content.strip()
if not content.startswith("["):
content = f"[{content}]"
try:
json_content = json.loads(content)
except Exception: # pyright: ignore pylint: disable=broad-except
logging.info("JSON content not found as full response")
return None
return json_content
def extract_json_content(content):
"""Extract the JSON content from the response"""
content = content.strip()
loaded = extract_json_from_content(content)
if loaded is None:
loaded = extract_json_from_markdown(content)
if loaded is None:
logging.error("Error parsing JSON content, no content")
return None
# save known products name
KNOWN_PRODUCTS.extend([product["name"] for product in loaded])
return loaded
def save_json_content(content):
"""Save the JSON content to a file"""
# get next filename based on index
existing_files = glob.glob("products_*.json")
index = len(existing_files) + 1
if content is None:
logging.warning("No content to save")
return
filename = f"products_{index:04}.json"
with open(filename, "w", encoding="utf-8") as json_file:
json.dump(content, json_file, indent=2)
def main():
"""Generate a list of products in JSON format"""
# create flags to set NUM_PRODUCTS_PER_REQUEST and NUM_PRODUCTS
global NUM_PRODUCTS_PER_REQUEST, NUM_PRODUCTS, KNOWN_PRODUCTS # pylint: disable=global-statement
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument(
"-r",
"--per-request",
type=int,
default=NUM_PRODUCTS_PER_REQUEST,
help=(
"The number of products you want to generate per request, "
f"default is {NUM_PRODUCTS_PER_REQUEST}. "
"(not guaranteed, the model is sometimes a bit cheeky and decides to do what he wants)"
),
)
arg_parser.add_argument(
"-p",
"--num-products",
type=int,
default=NUM_PRODUCTS,
help=(
f"The total number of products you want to, default is {NUM_PRODUCTS}. "
"generate (one more time, the model is a bit cheeky)"
),
)
args = arg_parser.parse_args()
NUM_PRODUCTS_PER_REQUEST = args.per_request
NUM_PRODUCTS = args.num_products
# check if there are existing files
existing_files = glob.glob("products_*.json")
if existing_files:
logging.warning("Existing files: %s", existing_files)
if input(FOUND_PRODUCTS_PROMPT).lower() == "y":
for file in existing_files:
os.remove(file)
else:
KNOWN_PRODUCTS = load_existing_products()
# ask for the first 10 products
completion = create_completion(continuation=False) # initial request
# extract the JSON content from the response
content = str(completion.choices[0].message.content)
logging.info("Response: %s", content)
json_content = extract_json_content(content)
logging.info("Extracted JSON: %s", json_content)
save_json_content(json_content)
for _ in range(NUM_PRODUCTS // NUM_PRODUCTS_PER_REQUEST):
completion = create_completion(
continuation=True
) # continuation request to generate more products
# convert the response to a string
content = str(completion.choices[0].message.content)
# extract the JSON content from the response
json_content = extract_json_content(content)
logging.info("Extracted JSON: %s", json_content)
save_json_content(json_content)
if __name__ == "__main__":
main()
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
openai = "*"
rich = "*"
[dev-packages]
[requires]
python_version = "3.12"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment