Skip to content

Instantly share code, notes, and snippets.

@gautierdag
Created April 27, 2023 10:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gautierdag/33f8fb5918699df439388e6a194c23b2 to your computer and use it in GitHub Desktop.
Save gautierdag/33f8fb5918699df439388e6a194c23b2 to your computer and use it in GitHub Desktop.
Simple caching of openai Embeddings
import openai
text_to_embeddings_cache = {}
def get_embeddings_for_instructions(instructions: list[str]):
response = openai.Embedding.create(
input=instructions,
model="text-embedding-ada-002"
)
return response['data']
i = 0
instruction_batch = []
with tqdm(total=len(language_instructions_to_actions)) as pbar:
while i < len(language_instructions_to_actions):
pbar.update(1)
# skip instructions that are already in the cache
if language_instructions_to_actions[i]["instruction"] in text_to_embeddings_cache:
i += 1
continue
# add instruction to batch
instruction_batch.append(language_instructions_to_actions[i]["instruction"])
# batch up to 100 instructions at a time
if len(instruction_batch) == 100:
embeddings = get_embeddings_for_instructions(instruction_batch)
assert len(embeddings) == len(instruction_batch)
for j in range(len(instruction_batch)):
text_to_embeddings_cache[instruction_batch[j]] = embeddings[j]["embedding"]
instruction_batch = []
i += 1
# process the remaining instructions
if len(instruction_batch) > 0:
embeddings = get_embeddings_for_instructions(instruction_batch)
assert len(embeddings) == len(instruction_batch)
for j in range(len(instruction_batch)):
text_to_embeddings_cache[instruction_batch[j]] = embeddings[j]["embedding"]
instruction_batch = []
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment