Skip to content

Instantly share code, notes, and snippets.

@julie-mills
Created April 18, 2023 18:54
Show Gist options
  • Save julie-mills/a4e1ac299159bb72e0b1b2f121fa97ea to your computer and use it in GitHub Desktop.
Save julie-mills/a4e1ac299159bb72e0b1b2f121fa97ea to your computer and use it in GitHub Desktop.
import gzip
import json
import openai
# Download the following file from https://cseweb.ucsd.edu/~jmcauley/datasets/amazon_v2/
product_data_full = []
for line in gzip.open('./amazon_metadata/meta_Video_Games.json.gz', 'rt', encoding='UTF-8'):
product_data_full.append(json.loads(line))
# Remove products without descriptions and embed a subset of the data to save time (and money)
product_data = []
for item in range(12000):
if product_data_full[item]['description'] and product_data_full[item]['price']:
product_data.append(product_data_full[item])
# Create embeddings for each product desciption
for item in product_data:
item['description_embedding'] = openai.Embedding.create(input=item['description'][0], model="text-embedding-ada-002")["data"][0]["embedding"]
# Create new file with embeddings
for item in product_data:
jsonString = json.dumps(item)
jsonFile.write(jsonString + '\n')
jsonFile.close()
# Generate embedding for future search input
search_query = 'open-world play, multiplayer options, and support for in-game purchases'
search_query_embedding = openai.Embedding.create(input=search_query, model="text-embedding-ada-002")["data"][0]["embedding"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment