Skip to content

Instantly share code, notes, and snippets.

Created May 15, 2024 18:52
Show Gist options
  • Save laura-dietz/8d89da028e33700e6258ef61b9d8f436 to your computer and use it in GitHub Desktop.
Save laura-dietz/8d89da028e33700e6258ef61b9d8f436 to your computer and use it in GitHub Desktop.
Examples for how to use the ChatGPT API from python
import os
from openai import OpenAI
# 1. Must create an OpenAI account
# 2. Must generate a personalized API key at
# 3. Never save API keys in code, instead save as environment variable with `export OPEN_API_KEY=your_key`
# Read API key and initialize OpenAI client
if os.environ['OPENAI_API_KEY'] is None:
raise RuntimeError ("Must set environment variable \"OPENAI_API_KEY\"")
client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
# create a chat conversation, and send to the API
completion =
{"role": "system", "content": "You are an assistant that completes sentences."},
{"role": "user", "content": "The weather will"},
{"role": "assistant", "content": "be variable in New England."},
{"role": "user", "content": "The sky will"}
# print the answer, which is another response from assistant
# There are limits on how many tokens/requests to use per time unit, print how much you used
import os
from openai import OpenAI
# 1. Must create an OpenAI account
# 2. Must generate a personalized API key at
# 3. Never save API keys in code, instead save as environment variable with `export OPEN_API_KEY=your_key`
# Read API key and initialize OpenAI client
if os.environ['OPENAI_API_KEY'] is None:
raise RuntimeError ("Must set environment variable \"OPENAI_API_KEY\"")
client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
# Example with rate limiting for batch processing
print("\n\nExample with rate limiting for batch processing \n\n")
# Background:
import time
import random
import openai
class OpenAIRateLimiter:
def __init__(self, max_requests_per_minute=5000, max_tokens_per_minute=40000):
self.max_requests_per_minute = max_requests_per_minute
self.max_tokens_per_minute = max_tokens_per_minute
self.remaining_requests = max_requests_per_minute
self.remaining_tokens = max_tokens_per_minute
self.start_time = time.time()
def wait_if_needed(self):
if self.remaining_requests <= 0 or self.remaining_tokens <= 0:
time.sleep(max(60 - (time.time() - self.start_time), 0))
self.remaining_requests = self.max_requests_per_minute
self.remaining_tokens = self.max_tokens_per_minute
self.start_time = time.time()
def update_limits(self, used_tokens):
self.remaining_requests -= 1
self.remaining_tokens -= used_tokens
def __str__(self) -> str:
if self.start_time:
fmt_time = time.strftime("%H:%M:%S",time.localtime(self.start_time))
return f'remaining_request={self.remaining_requests} remaining_tokens={self.remaining_tokens} start_time={fmt_time}'
return ""
# Initialize rate limiter
rate_limiter = OpenAIRateLimiter()
# define a retry decorator
def retry_with_exponential_backoff(
initial_delay: float = 1,
exponential_base: float = 2,
jitter: bool = True,
max_retries: int = 10,
errors: tuple = (openai.RateLimitError,),
"""Retry a function with exponential backoff."""
def wrapper(*args, **kwargs):
# Initialize variables
num_retries = 0
delay = initial_delay
# Loop until a successful response or max_retries is hit or an exception is raised
while True:
return func(*args, **kwargs)
# Retry on specific errors
except errors as e:
# Increment retries
num_retries += 1
# Check if max retries has been reached
if num_retries > max_retries:
raise Exception(
f"Maximum number of retries ({max_retries}) exceeded."
# Increment the delay
delay *= exponential_base * (1 + jitter * random.random())
# Sleep for the delay
# Raise exceptions for any errors not specified
except Exception as e:
raise e
return wrapper
def query_gpt_batch_with_rate_limiting(prompt:str, gpt_model:str, max_tokens:int):
result = []
messages = [{"role":"user", "content":prompt}]
completion = retry_with_exponential_backoff(,messages=messages, max_tokens=max_tokens)
result = [choice.message.content.strip() for choice in completion.choices]
# Update rate limits
usage = dict(completion).get('usage')
print("usage", usage)
if usage is not None:
used_tokens = dict(usage).get('total_tokens')
raise RuntimeError("usage not provided")
return result[0]
answer = query_gpt_batch_with_rate_limiting(prompt="The weather will be", gpt_model="gpt-3.5-turbo", max_tokens=100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment