Skip to content

Instantly share code, notes, and snippets.

@khalido
Last active October 11, 2023 02:00
Show Gist options
  • Save khalido/c4aec1097d6b10228059d5c009fc873a to your computer and use it in GitHub Desktop.
Save khalido/c4aec1097d6b10228059d5c009fc873a to your computer and use it in GitHub Desktop.
how to use ThreadPoolExecutor with map, tqdm and an iterator doing stuff as the futures are returned
# gets movie info from OpenaI
def get_movie_info(movie: str = None, debug=False):
"""returns movie info from openai as a dict"""
msg = f"""You love movies and are helping complete a movie database.
Give me a short plot summary, main actors and concise review of the movie '{movie}'. Return the results in Json format with the fields:
["summary", "review", "actors"]."""
completion = client.chat.completions.create(
model="gpt-3.5-turbo",
temperature=0.5,
messages=[{"role": "user", "content": msg}],
)
if debug:
return completion
try:
info = json.loads(completion.choices[0].message.content)
except:
info = ""
return info
# calls openai api using ThreadPoolExecutor
# processes the incoming data as it rolls in and saves to disk ever so often
import pickle
from movies import get_movie_info
# I have a list of movies I want to get information about from openai
get_movies = list(set(movies_list).difference(openai_movie_info.keys()))
print(f"{len(openai_movie_info):,} movie info already present.")
max_threads = 30
with tqdm(total=len(get_movies)) as pbar:
with ThreadPoolExecutor(max_workers=max_threads) as e:
for i, (title, data) in enumerate(
zip(get_movies, e.map(get_movie_info, get_movies))
):
pbar.update(1)
openai_movie_info[title] = data
if i % 60 == 0:
with open("../data/openai_movie_info.pkl", "wb") as f:
pickle.dump(openai_movie_info, f)
len(openai_movie_info)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment