Skip to content

Instantly share code, notes, and snippets.

@vovayartsev
Created November 23, 2019 15:31
Show Gist options
  • Save vovayartsev/dbac1d2ee8b9517fba14de1faf73964b to your computer and use it in GitHub Desktop.
Save vovayartsev/dbac1d2ee8b9517fba14de1faf73964b to your computer and use it in GitHub Desktop.
# BENCHMARK for kNN via PG CUBES
# DB SETUP:
# CREATE extension cube;
# CREATE TABLE test_cubes AS
# SELECT i AS id,
# cube(array[
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000,
# random() * 10000 - 5000
# ])
# FROM generate_series(1,10000) i;
# ALTER TABLE test_cubes ADD PRIMARY KEY (id);
# CREATE INDEX magic ON test_cubes USING gist (cube);
# Starting Server:
# gunicorn --bind 0.0.0.0:5000 knn:app
# Starting Client:
# ab -n 3000 http://0.0.0.0:5000/
# Document Path: /
# Document Length: 8688 bytes
# Concurrency Level: 1
# Time taken for tests: 13.484 seconds
# Complete requests: 3000
# Failed requests: 0
# Total transferred: 26526000 bytes
# HTML transferred: 26064000 bytes
# Requests per second: 222.48 [#/sec] (mean)
# Time per request: 4.495 [ms] (mean)
# Time per request: 4.495 [ms] (mean, across all concurrent requests)
# Transfer rate: 1921.06 [Kbytes/sec] received
# Connection Times (ms)
# min mean[+/-sd] median max
# Connect: 0 0 0.1 0 1
# Processing: 3 4 1.1 4 54
# Waiting: 3 4 1.1 4 54
# Total: 4 4 1.2 4 55
# Percentage of the requests served within a certain time (ms)
# 50% 4
# 66% 4
# 75% 4
# 80% 5
# 90% 5
# 95% 5
# 98% 6
# 99% 7
# 100% 55 (longest request)
# Memory Usage:
# postres: ~60MB
# python: ~50mb
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from flask import jsonify
app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'postgresql:///cubes'
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
db = SQLAlchemy(app)
def sql(rawSql, sqlVars={}):
"Execute raw sql, optionally with prepared query"
assert type(rawSql) == str
assert type(sqlVars) == dict
res = db.session.execute(rawSql, sqlVars)
# db.session.commit()
return res
# SQL below is based on this article
# https://www.depesz.com/2016/01/10/waiting-for-9-6-cube-extension-knn-support/
@app.route("/")
def hello():
results = sql("""
SELECT * FROM (
SELECT id, cube, cube <-> cube(array[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]) AS distance
FROM test_cubes tc
) AS x
ORDER BY distance ASC LIMIT 22
""")
return jsonify([row._row for row in results]), 200
if __name__ == "__main__":
app.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment