Skip to content

Instantly share code, notes, and snippets.

@kemingy
Created May 29, 2023 16:22
Show Gist options
  • Save kemingy/72539d6ae272ad4b0a46fd1a18671f3a to your computer and use it in GitHub Desktop.
Save kemingy/72539d6ae272ad4b0a46fd1a18671f3a to your computer and use it in GitHub Desktop.
test OpenAI embedding API encode method
import json
import base64

import msgpack
import numpy as np

dim = 1536
emb = np.random.rand(dim).astype(np.float32)
emb_list = emb.tolist()

resp_emb = np.frombuffer(base64.b64decode(json.loads(json.dumps({'emb': base64.b64encode(emb.tobytes()).decode('utf-8')}))['emb']), dtype=np.float32)
assert sum(np.equal(emb, resp_emb)) == dim
resp_emb_list = json.loads(json.dumps({'emb': emb_list}))['emb']
assert resp_emb_list == emb_list
msg_emb = np.frombuffer(msgpack.unpackb(msgpack.packb({'emb': emb.tobytes()}))['emb'], dtype=np.float32)
assert sum(np.equal(emb, msg_emb)) == dim

get_ipython().run_line_magic('timeit', "json.loads(json.dumps({'emb': emb_list}))['emb']")
get_ipython().run_line_magic('timeit', "np.frombuffer(base64.b64decode(json.loads(json.dumps({'emb': base64.b64encode(emb.tobytes()).decode('utf-8')}))['emb']), dtype=np.float32)")
get_ipython().run_line_magic('timeit', "np.frombuffer(msgpack.unpackb(msgpack.packb({'emb': emb.tobytes()}))['emb'], dtype=np.float32)")

Run it with ipython bench.py:

484 µs ± 2.32 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
22.7 µs ± 38.8 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
764 ns ± 1.92 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment