metal3d/Pipfile

## generate-products.py
# Generate a list of products in JSON format using AI
# Author: Patrice Ferlet <metal3d@gmail.com>
# License: MIT
#
# This script uses the OpenAI API to generate a list of products in JSON format. You may use
# LM Studio to serve a prefered model locally. Use the API tab to serve the model.


import argparse
import glob
import json
import logging
import os

from openai import OpenAI
from rich.logging import RichHandler

NUM_PRODUCTS_PER_REQUEST = 5  # The number of products you want to generate
NUM_PRODUCTS = 50  # The total number of products you want to generate
MODEL_NAME = "local-model"  # The model name you want to use, if you use LM Studio, it is not used
API_KEY = "not-needed"  # The API key is not needed if you use LM Studio
API_URL = (
    "http://localhost:1234/v1"  # The URL of the API, if you use OpenAI, set it to None
)


logging.basicConfig(
    level=logging.INFO,
    format="%(message)s",
    datefmt="[%X]",
    handlers=[RichHandler()],
)

# Point to the local server
CLIENT = OpenAI(base_url=API_URL, api_key=API_KEY)

SCHEMA = """
```json
[
    {
        "name": "string",
        "description": "string",
        "price": 0.0,
        "categories": ["string"]
    }, {...}
}
```
"""

KNOWN_PRODUCTS = []
INIT_HISTORY = [
    # the system role is used to set the language and the role of the model. We exxplicitly
    # tell that the model must respond in French - change the message to your target language
    {
        "role": "system",
        "content": (
            "Tu es un développeur français et ne répond qu'en français. "
            "Tu peux proposer des solutions à des problèmes de programmation "
            "et générer du contenu technique."
        ),
    },
    # here we ask the model to generate a list of products in JSON format,
    # with the given schema
    {
        "role": "user",
        "content": (
            "J'ai besoin que tu me génères, en JSON, une liste de "
            f"{NUM_PRODUCTS_PER_REQUEST} produits avec name, "
            "description, price et categories. Le nom doit être pertinent et unique, "
            "la description claire, et de 2 à 5 catégories. "
            "Le contenu doit être en français. "
            "Retourne un tableau d'objets JSON. "
            "Le schema doit être conforme à : " + SCHEMA
        ),
    },
]

# This is a prompt to add to the conversation to continue the
# generation, we say that we already have some products, so
# the model will continue from this point. It avoids to send the entire history.
APPEND_PROMPT = """
J'ai déjà ces produits :
"""


FOUND_PRODUCTS_PROMPT = """Found products files in the current directory.

We can continue from here. The script will load the existing products and ask
the model to generate more products.

A new file will be created with the new products.

Or you can remove the existing files and start from scratch.

Do you want to remove them and start from scratch? (y/N): """


def load_existing_products():
    """Load the existing products and return a list of product existing names"""
    product_files = glob.glob("products_*.json")
    if not product_files:
        return []
    products = []
    for file in product_files:
        with open(file, "r", encoding="utf-8") as json_file:
            products.extend(json.load(json_file))
    return [p["name"] for p in products]


def list_of_products():
    """Generate a list of products in markdown format"""
    return APPEND_PROMPT.strip() + "\n".join([f"- {p}" for p in KNOWN_PRODUCTS])


def create_completion(continuation=False):
    """Create a completion using the conversation history"""

    # force a copy of the history
    history = list(INIT_HISTORY)

    # add the continuation prompt
    if len(glob.glob("products_*.json")) > 0 or continuation:
        history.append(
            {
                "role": "user",
                "content": list_of_products(),
            }
        )

    logging.info("History: %s", history)

    return CLIENT.chat.completions.create(
        model=MODEL_NAME,  # this field is currently unused if you use LM Studio
        messages=history,  # pyright: ignore
        temperature=0.7,
        stream=False,
    )


def extract_json_from_markdown(content):
    """ Extract the JSON content from the response inside a markdown block """ ""
    # The response is a string, there is a json inside "```" and "```", we extract it
    # it is possible that the model starts the json with a newline, so we need to remove it
    json_start = content.find("```") + 3
    json_end = content.rfind("```")
    json_content = content[json_start:json_end]
    if json_content.startswith("json"):
        json_content = json_content[4:]
    try:
        loaded = json.loads(json_content)
    except Exception:  # pyright: ignore pylint: disable=broad-except
        logging.info("JSON content not found as Markdown")
        return None
    return loaded


def extract_json_from_content(content):
    """Extract the JSON content from the response"""

    # if the response is not warp in brackets, add them
    content = content.strip()
    if not content.startswith("["):
        content = f"[{content}]"

    try:
        json_content = json.loads(content)
    except Exception:  # pyright: ignore pylint: disable=broad-except
        logging.info("JSON content not found as full response")
        return None
    return json_content


def extract_json_content(content):
    """Extract the JSON content from the response"""

    content = content.strip()
    loaded = extract_json_from_content(content)
    if loaded is None:
        loaded = extract_json_from_markdown(content)

    if loaded is None:
        logging.error("Error parsing JSON content, no content")
        return None

    # save known products name
    KNOWN_PRODUCTS.extend([product["name"] for product in loaded])

    return loaded


def save_json_content(content):
    """Save the JSON content to a file"""

    # get next filename based on index
    existing_files = glob.glob("products_*.json")
    index = len(existing_files) + 1

    if content is None:
        logging.warning("No content to save")
        return
    filename = f"products_{index:04}.json"
    with open(filename, "w", encoding="utf-8") as json_file:
        json.dump(content, json_file, indent=2)


def main():
    """Generate a list of products in JSON format"""

    # create flags to set NUM_PRODUCTS_PER_REQUEST and NUM_PRODUCTS

    global NUM_PRODUCTS_PER_REQUEST, NUM_PRODUCTS, KNOWN_PRODUCTS  # pylint: disable=global-statement

    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument(
        "-r",
        "--per-request",
        type=int,
        default=NUM_PRODUCTS_PER_REQUEST,
        help=(
            "The number of products you want to generate per request, "
            f"default is {NUM_PRODUCTS_PER_REQUEST}. "
            "(not guaranteed, the model is sometimes a bit cheeky and decides to do what he wants)"
        ),
    )
    arg_parser.add_argument(
        "-p",
        "--num-products",
        type=int,
        default=NUM_PRODUCTS,
        help=(
            f"The total number of products you want to, default is {NUM_PRODUCTS}. "
            "generate (one more time, the model is a bit cheeky)"
        ),
    )
    args = arg_parser.parse_args()
    NUM_PRODUCTS_PER_REQUEST = args.per_request
    NUM_PRODUCTS = args.num_products

    # check if there are existing files
    existing_files = glob.glob("products_*.json")
    if existing_files:
        logging.warning("Existing files: %s", existing_files)
        if input(FOUND_PRODUCTS_PROMPT).lower() == "y":
            for file in existing_files:
                os.remove(file)
        else:
            KNOWN_PRODUCTS = load_existing_products()

    # ask for the first 10 products
    completion = create_completion(continuation=False)  # initial request

    # extract the JSON content from the response
    content = str(completion.choices[0].message.content)
    logging.info("Response: %s", content)

    json_content = extract_json_content(content)
    logging.info("Extracted JSON: %s", json_content)
    save_json_content(json_content)

    for _ in range(NUM_PRODUCTS // NUM_PRODUCTS_PER_REQUEST):
        completion = create_completion(
            continuation=True
        )  # continuation request to generate more products

        # convert the response to a string
        content = str(completion.choices[0].message.content)

        # extract the JSON content from the response
        json_content = extract_json_content(content)
        logging.info("Extracted JSON: %s", json_content)
        save_json_content(json_content)


if __name__ == "__main__":
    main()

## Pipfile
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]
openai = "*"
rich = "*"

[dev-packages]

[requires]
python_version = "3.12"
	# Generate a list of products in JSON format using AI
	# Author: Patrice Ferlet <metal3d@gmail.com>
	# License: MIT
	#
	# This script uses the OpenAI API to generate a list of products in JSON format. You may use
	# LM Studio to serve a prefered model locally. Use the API tab to serve the model.


	import argparse
	import glob
	import json
	import logging
	import os

	from openai import OpenAI
	from rich.logging import RichHandler

	NUM_PRODUCTS_PER_REQUEST = 5 # The number of products you want to generate
	NUM_PRODUCTS = 50 # The total number of products you want to generate
	MODEL_NAME = "local-model" # The model name you want to use, if you use LM Studio, it is not used
	API_KEY = "not-needed" # The API key is not needed if you use LM Studio
	API_URL = (
	"http://localhost:1234/v1" # The URL of the API, if you use OpenAI, set it to None
	)


	logging.basicConfig(
	level=logging.INFO,
	format="%(message)s",
	datefmt="[%X]",
	handlers=[RichHandler()],
	)

	# Point to the local server
	CLIENT = OpenAI(base_url=API_URL, api_key=API_KEY)

	SCHEMA = """
	```json
	[
	{
	"name": "string",
	"description": "string",
	"price": 0.0,
	"categories": ["string"]
	}, {...}
	}
	```
	"""

	KNOWN_PRODUCTS = []
	INIT_HISTORY = [
	# the system role is used to set the language and the role of the model. We exxplicitly
	# tell that the model must respond in French - change the message to your target language
	{
	"role": "system",
	"content": (
	"Tu es un développeur français et ne répond qu'en français. "
	"Tu peux proposer des solutions à des problèmes de programmation "
	"et générer du contenu technique."
	),
	},
	# here we ask the model to generate a list of products in JSON format,
	# with the given schema
	{
	"role": "user",
	"content": (
	"J'ai besoin que tu me génères, en JSON, une liste de "
	f"{NUM_PRODUCTS_PER_REQUEST} produits avec name, "
	"description, price et categories. Le nom doit être pertinent et unique, "
	"la description claire, et de 2 à 5 catégories. "
	"Le contenu doit être en français. "
	"Retourne un tableau d'objets JSON. "
	"Le schema doit être conforme à : " + SCHEMA
	),
	},
	]

	# This is a prompt to add to the conversation to continue the
	# generation, we say that we already have some products, so
	# the model will continue from this point. It avoids to send the entire history.
	APPEND_PROMPT = """
	J'ai déjà ces produits :
	"""


	FOUND_PRODUCTS_PROMPT = """Found products files in the current directory.

	We can continue from here. The script will load the existing products and ask
	the model to generate more products.

	A new file will be created with the new products.

	Or you can remove the existing files and start from scratch.

	Do you want to remove them and start from scratch? (y/N): """


	def load_existing_products():
	"""Load the existing products and return a list of product existing names"""
	product_files = glob.glob("products_*.json")
	if not product_files:
	return []
	products = []
	for file in product_files:
	with open(file, "r", encoding="utf-8") as json_file:
	products.extend(json.load(json_file))
	return [p["name"] for p in products]


	def list_of_products():
	"""Generate a list of products in markdown format"""
	return APPEND_PROMPT.strip() + "\n".join([f"- {p}" for p in KNOWN_PRODUCTS])


	def create_completion(continuation=False):
	"""Create a completion using the conversation history"""

	# force a copy of the history
	history = list(INIT_HISTORY)

	# add the continuation prompt
	if len(glob.glob("products_*.json")) > 0 or continuation:
	history.append(
	{
	"role": "user",
	"content": list_of_products(),
	}
	)

	logging.info("History: %s", history)

	return CLIENT.chat.completions.create(
	model=MODEL_NAME, # this field is currently unused if you use LM Studio
	messages=history, # pyright: ignore
	temperature=0.7,
	stream=False,
	)


	def extract_json_from_markdown(content):
	""" Extract the JSON content from the response inside a markdown block """ ""
	# The response is a string, there is a json inside "```" and "```", we extract it
	# it is possible that the model starts the json with a newline, so we need to remove it
	json_start = content.find("```") + 3
	json_end = content.rfind("```")
	json_content = content[json_start:json_end]
	if json_content.startswith("json"):
	json_content = json_content[4:]
	try:
	loaded = json.loads(json_content)
	except Exception: # pyright: ignore pylint: disable=broad-except
	logging.info("JSON content not found as Markdown")
	return None
	return loaded


	def extract_json_from_content(content):
	"""Extract the JSON content from the response"""

	# if the response is not warp in brackets, add them
	content = content.strip()
	if not content.startswith("["):
	content = f"[{content}]"

	try:
	json_content = json.loads(content)
	except Exception: # pyright: ignore pylint: disable=broad-except
	logging.info("JSON content not found as full response")
	return None
	return json_content


	def extract_json_content(content):
	"""Extract the JSON content from the response"""

	content = content.strip()
	loaded = extract_json_from_content(content)
	if loaded is None:
	loaded = extract_json_from_markdown(content)

	if loaded is None:
	logging.error("Error parsing JSON content, no content")
	return None

	# save known products name
	KNOWN_PRODUCTS.extend([product["name"] for product in loaded])

	return loaded


	def save_json_content(content):
	"""Save the JSON content to a file"""

	# get next filename based on index
	existing_files = glob.glob("products_*.json")
	index = len(existing_files) + 1

	if content is None:
	logging.warning("No content to save")
	return
	filename = f"products_{index:04}.json"
	with open(filename, "w", encoding="utf-8") as json_file:
	json.dump(content, json_file, indent=2)


	def main():
	"""Generate a list of products in JSON format"""

	# create flags to set NUM_PRODUCTS_PER_REQUEST and NUM_PRODUCTS

	global NUM_PRODUCTS_PER_REQUEST, NUM_PRODUCTS, KNOWN_PRODUCTS # pylint: disable=global-statement

	arg_parser = argparse.ArgumentParser()
	arg_parser.add_argument(
	"-r",
	"--per-request",
	type=int,
	default=NUM_PRODUCTS_PER_REQUEST,
	help=(
	"The number of products you want to generate per request, "
	f"default is {NUM_PRODUCTS_PER_REQUEST}. "
	"(not guaranteed, the model is sometimes a bit cheeky and decides to do what he wants)"
	),
	)
	arg_parser.add_argument(
	"-p",
	"--num-products",
	type=int,
	default=NUM_PRODUCTS,
	help=(
	f"The total number of products you want to, default is {NUM_PRODUCTS}. "
	"generate (one more time, the model is a bit cheeky)"
	),
	)
	args = arg_parser.parse_args()
	NUM_PRODUCTS_PER_REQUEST = args.per_request
	NUM_PRODUCTS = args.num_products

	# check if there are existing files
	existing_files = glob.glob("products_*.json")
	if existing_files:
	logging.warning("Existing files: %s", existing_files)
	if input(FOUND_PRODUCTS_PROMPT).lower() == "y":
	for file in existing_files:
	os.remove(file)
	else:
	KNOWN_PRODUCTS = load_existing_products()

	# ask for the first 10 products
	completion = create_completion(continuation=False) # initial request

	# extract the JSON content from the response
	content = str(completion.choices[0].message.content)
	logging.info("Response: %s", content)

	json_content = extract_json_content(content)
	logging.info("Extracted JSON: %s", json_content)
	save_json_content(json_content)

	for _ in range(NUM_PRODUCTS // NUM_PRODUCTS_PER_REQUEST):
	completion = create_completion(
	continuation=True
	) # continuation request to generate more products

	# convert the response to a string
	content = str(completion.choices[0].message.content)

	# extract the JSON content from the response
	json_content = extract_json_content(content)
	logging.info("Extracted JSON: %s", json_content)
	save_json_content(json_content)


	if __name__ == "__main__":
	main()
	[[source]]
	url = "https://pypi.org/simple"
	verify_ssl = true
	name = "pypi"

	[packages]
	openai = "*"
	rich = "*"

	[dev-packages]

	[requires]
	python_version = "3.12"