Skip to content

Instantly share code, notes, and snippets.

@mtanco
Created March 5, 2024 19:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mtanco/8ba24cd0c3d758c8f0249bdc7efe7830 to your computer and use it in GitHub Desktop.
Save mtanco/8ba24cd0c3d758c8f0249bdc7efe7830 to your computer and use it in GitHub Desktop.
Testing system prompts, models, and specific ways of asking questions to understand how to get Python Executable code from an LLM
"""
Code Executing Testing from LLMs
Michelle Tanco - michelle.tanco@h2o.ai
March 4, 2024
We are attempting to get executable Python code from an LLM. This testing suite helps us with that process.
"""
import os
from h2ogpte import H2OGPTE
from h2ogpte.types import SessionError
from loguru import logger
SYSTEM_PROMPT = "You are a Python code generator, you only ever response with Python code that can be executed. " \
"You do not wrap your code in triple ticks and you do not return any non-code unless it " \
"is commented out. All comments and explainations are commented out so as to ensure the code is " \
"executable."
LLM = "h2oai/h2ogpt-32k-codellama-34b-instruct"
TEST_PROMPTS = {
# Requests that tend to be executable
1: "Create a function for calculating the factorial of a number.", # worked
2: "Create a function for calculating the factorial of a number and execute the function 5 times.", # worked
3: "Create a function for calculating the factorial of a number, comment your code.", # worked
7: "Print `Hello, world!` to the console.", # worked
8: "Write code for printing hello world.", # worked
10: "Write a function for summing two numbers", # worked
12: "A function for summing two numbers, execute the function 7 times", # worked
# It doesn't like checking valid input...
4: "Create a function for calculating the factorial of a number, check that the input is valid.",
5: "Write a function for calculating factorials, be sure to handle negative numbers",
# It cannot handle "how to" questions, it just wants to explain it's self so badly!!!
6: "How do I create a simple Python script that prints 'Hello, world' to the console?",
9: "How do I create a Python function that takes two numbers as input and returns their sum?",
11: "I need code to sum two numbers",
13: "How do I create a Python generator that yields a sequence of numbers.",
14: "Create a generator for sequences of numbers",
15: "I need a function for generating sequences of numbers"
}
client = H2OGPTE(address=os.getenv("H2OGPTE_URL"), api_key=os.getenv("H2OGPTE_API_TOKEN"))
collection_id = client.create_collection("Fake for testing", "")
chat_session_id = client.create_chat_session(collection_id)
logger.info(f"LLM: {LLM}")
logger.info(f"System Prompt: {SYSTEM_PROMPT}")
with client.connect(chat_session_id) as session:
for i in TEST_PROMPTS.keys():
message = TEST_PROMPTS[i]
logger.info(f"User Input: {message}")
try:
reply = session.query(
message=message,
system_prompt=SYSTEM_PROMPT,
timeout=60,
rag_config={"rag_type": "llm_only"},
llm=LLM
)
logger.info(f"LLM Output: {reply.content}")
exec(reply.content)
logger.success(f"The output was executable Python executable!")
except SessionError as ex: # Something went wrong with h2oPGTe - clean up and leave
logger.error(ex)
client.delete_collections([collection_id])
client.delete_chat_sessions([chat_session_id])
break
except SyntaxError as ex: # Output was not executable - document and try next test
logger.error(f"{LLM}\n{SYSTEM_PROMPT}\nQuestion {i}\n{reply.content}\n{ex}")
except Exception as ex:
logger.error(type(ex))
logger.error(ex)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment