Skip to content

Instantly share code, notes, and snippets.

@FoobarProtocol
Created October 21, 2023 23:46
Show Gist options
  • Save FoobarProtocol/19d5187ff7b9e6732261478f81189c56 to your computer and use it in GitHub Desktop.
Save FoobarProtocol/19d5187ff7b9e6732261478f81189c56 to your computer and use it in GitHub Desktop.
This is some really high quality code here from 'Peyton Cleveland' (guy from GitHub here - https://github.com/PeytonCleveland/Fair-Use/blob/main/Darwin-JSTS/main.py)
import os
import logging
import csv
import argparse
from tqdm import tqdm
import sys
from dotenv import load_dotenv
import openai
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
load_dotenv()
openai_api_key = os.environ.get('OPENAI_API_KEY')
if not openai_api_key:
logging.error("OPENAI_API_KEY not found in environment variables")
raise ValueError("OPENAI_API_KEY not found in environment variables")
openai.api_key = openai_api_key
def get_response(prompt_text, max_length=256):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt_text}],
temperature=0.8,
max_tokens=max_length # Setting the maximum tokens for the response
)
return response.choices[0].message.content.strip()
except openai.error.OpenAIError as e:
logging.error(
f"Error occurred while fetching response from OpenAI: {e}. Skipping this prompt.")
return None
def read_seed_topics(seed_file):
topics = []
with open(seed_file, 'r') as file:
for line in file:
topic = line.strip()
for level in range(1, 11): # Generate prompts for levels 1 to 10
system_prompt = f"You are a JavaScript tutor from MIT trying to create LeetCode-style coding questions for your student. Create a understandable, readable, and approachable LeetCode-style coding question a topic related to '{topic}' and ask a level {level} question."
topics.append((topic, level, system_prompt))
return topics
def save_to_csv(filename, data, mode='w'):
with open(filename, mode, newline='', encoding='utf-8') as file:
fieldnames = ["Topic", "Question",
"Answer", "Difficulty", "Explanation"]
if any('MultipleChoice' in row for row in data):
fieldnames.append("MultipleChoice")
writer = csv.DictWriter(file, fieldnames=fieldnames)
if mode == 'w':
writer.writeheader()
for row in data:
if 'MultipleChoice' not in row:
# Handle cases where it's not present
row['MultipleChoice'] = ""
writer.writerow({k: v for k, v in row.items() if k in fieldnames})
def generate_initial_questions(topics):
initial_qa_pairs = []
for topic, level, system_prompt in tqdm(topics, desc="Generating Initial Questions", ncols=100):
# Generate a user prompt (question)
question = get_response(
f"Craft a level {level} JavaScript coding question related to '{topic}'.", max_length=256)
# Use the system prompt to guide the assistant's response
answer = get_response(system_prompt, max_length=256)
# Generate a multiple-choice question
multiple_choice_question = get_response(
f"Create a multiple-choice question related to '{topic}' with 4 options.", max_length=256)
# Specify that we want a concise yet insightful explanation.
explanation = get_response(
f"Provide a concise yet insightful explanation supporting the answer to the coding question: '{question}'", max_length=256)
initial_qa_pairs.append({
"Topic": topic,
"Question": question,
"Answer": answer,
"Difficulty": level,
"Explanation": explanation,
"MultipleChoice": multiple_choice_question
})
return initial_qa_pairs
def evolve_questions(qa_pairs, epoch):
evolved_pairs = []
for qa in qa_pairs:
if qa["Difficulty"] >= 10:
evolved_pairs.append(qa)
continue
new_question = get_response(
f"Evolve the JavaScript question '{qa['Question']}' to make it a level {qa['Difficulty'] + 1} question.")
new_explanation = get_response(
f"Provide a indepth explanation for the JavaScript question: {new_question}")
evolved_pairs.append({
"Topic": qa["Topic"],
"Question": new_question,
"Answer": qa["Answer"],
"Difficulty": qa["Difficulty"] + 1,
"Explanation": new_explanation
})
return evolved_pairs
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Generate a set of JavaScript questions based on seed topics and evolve them over epochs.')
parser.add_argument('seed_topics', type=str,
help='Input CSV file containing initial topics.')
parser.add_argument('output_file', type=str,
help='Output CSV file to save the generated questions and answers.')
parser.add_argument('--epochs', type=int, default=9,
help='Number of epochs to run the evolution process. Default is 9.')
args = parser.parse_args()
seed_topics = read_seed_topics(args.seed_topics)
initial_qa_pairs = generate_initial_questions(seed_topics)
save_to_csv(args.output_file, initial_qa_pairs)
for epoch in tqdm(range(args.epochs), desc="Evolving Questions", ncols=100):
evolved_qa_pairs = evolve_questions(initial_qa_pairs, epoch + 1)
save_to_csv(args.output_file, evolved_qa_pairs, mode='a')
# Update initial_qa_pairs with evolved questions for the next epoch
initial_qa_pairs = evolved_qa_pairs.copy()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment