Skip to content

Instantly share code, notes, and snippets.

@keddad
Created June 28, 2020 15:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save keddad/2778018278395643bcf468da6567c336 to your computer and use it in GitHub Desktop.
Save keddad/2778018278395643bcf468da6567c336 to your computer and use it in GitHub Desktop.
import psycopg2
import random
from timeit import default_timer as timer
def setup_db(db):
db = db.cursor()
db.execute("create extension if not exists cube;")
db.execute("drop table if exists vectors")
db.execute(
"create table vectors (id serial, file varchar, vec_low cube, vec_high cube);")
db.execute("create index vectors_vec_idx on vectors (vec_low, vec_high);")
def insert_random(db):
db = db.cursor()
encodings = [random.random() for _ in range(128)]
query = "INSERT INTO vectors (file, vec_low, vec_high) VALUES ('{}', CUBE(array[{}]), CUBE(array[{}]))".format(
"file",
','.join(str(s) for s in encodings[0:64]),
','.join(str(s) for s in encodings[64:128]),
)
db.execute(query)
db = psycopg2.connect(
database="postgres",
user="postgres",
password="pass",
host="127.0.0.1",
port="5432"
)
start = timer()
setup_db(db)
db.commit()
end = timer()
print(f"DB is ready, {end-start}")
start = timer()
for _ in range(10000):
insert_random(db)
end = timer()
print(f"Strings are inserted, {end-start}")
db.commit()
start = timer()
for i in range(1000):
cur = db.cursor()
encodings = [random.random() for i in range(128)]
query = "SELECT file FROM vectors " + \
"ORDER BY sqrt(power(CUBE(array[{}]) <-> vec_low, 2) + power(CUBE(array[{}]) <-> vec_high, 2)) ASC LIMIT 5".format(
','.join(str(s) for s in encodings[0:64]),
','.join(str(s) for s in encodings[64:128]),
)
cur.execute(query)
end = timer()
print(f"Done!, {end-start}, aprox. {(end-start) / 1000} for an operation")
db.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment