Skip to content

Instantly share code, notes, and snippets.

@kmuthukk
Last active June 8, 2022 18:15
Show Gist options
  • Save kmuthukk/5eb29dc11ab736c3fe71f37e84fb95dc to your computer and use it in GitHub Desktop.
Save kmuthukk/5eb29dc11ab736c3fe71f37e84fb95dc to your computer and use it in GitHub Desktop.
Program to test range scan performance on columns with jsonb payload and varying number of jsonb attributes
# Dependencies:
# On CentOS you can install psycopg2 thus:
#
# sudo yum install postgresql-libs
# sudo yum install python-psycopg2
import psycopg2;
import time
import random
from multiprocessing.dummy import Pool as ThreadPool
from functools import partial
connect_str="host=localhost dbname=yugabyte user=yugabyte port=5433"
# Load Phase params
num_write_threads=2
num_users=10
num_actions_per_user=1000
# Read Only Phase params
num_reads=100
def create_table(col_type, num_attrs):
conn = psycopg2.connect(connect_str)
conn.set_session(autocommit=True)
cur = conn.cursor()
start_time = time.time()
cur.execute("""DROP TABLE IF EXISTS user_activity""");
now_time = time.time()
print("Dropped (if exists): user_activity table")
print("Time: %s ms" % ((now_time - start_time) * 1000))
print("====================")
start_time = time.time()
cur.execute("""
CREATE TABLE IF NOT EXISTS user_activity(
id text,
action_id integer,
action """ + col_type + """,
PRIMARY KEY(id, action_id)
)
""")
now_time = time.time()
print("Created user_activity table")
print("Time: %s ms" % ((now_time - start_time) * 1000))
print("====================")
def load_data_worker(col_type, num_attrs, thread_num):
thread_id = str(thread_num)
conn = psycopg2.connect(connect_str)
conn.set_session(autocommit=True)
cur = conn.cursor()
print("Thread-" + thread_id + ": Inserting %d users with %d actions with %d attrs in %s column "
% (num_users, num_actions_per_user, num_attrs, col_type))
start_time = time.time()
for idx in range(num_users):
for jdx in range(num_actions_per_user):
json_val = '{';
for kdx in range(num_attrs):
separator = ', ' if kdx != 0 else ''
json_val = json_val + separator + '"attr_' + str(kdx) + '" : ' + str(1000*jdx + kdx)
json_val = json_val + '}'
cur.execute("""INSERT INTO user_activity (id, action_id, action) VALUES (%s, %s, %s)""",
("user-"+thread_id+"-"+str(idx),
jdx,
json_val))
now_time = time.time()
rows_inserted = num_users * num_actions_per_user
print("Thread-" + thread_id + ": Inserted %d rows" % (rows_inserted))
print("Thread-" + thread_id + ": Time: %s ms" % ((now_time - start_time) * 1000))
print("Thread-" + thread_id + ": Inserts/sec: %s" % (rows_inserted / (now_time - start_time)))
print("Thread-" + thread_id + ": Avg Time: %s ms" % ((now_time - start_time) * 1000 / (rows_inserted)))
def load_data(col_type, num_attrs):
pool = ThreadPool(num_write_threads)
t1 = time.time()
results = pool.map(partial(load_data_worker, col_type, num_attrs), range(num_write_threads))
t2 = time.time()
total_rows=num_users*num_write_threads
print("====================")
print("Inserted %d users with %d actions and %d attributes per action (type %s)" %
(total_rows, num_actions_per_user, num_attrs, col_type))
print("Total Time: %s ms" % ((t2 - t1) * 1000))
print("Inserts/sec: %s" % (total_rows / (t2 - t1)))
print("====================")
def read_data(col_type, num_attrs):
conn = psycopg2.connect(connect_str)
conn.set_session(autocommit=True)
cur = conn.cursor()
cur.execute("PREPARE my_select(text) AS SELECT action_id, action FROM user_activity WHERE id=$1")
delta = 0
for i in range(num_reads):
# generate a random writer thread id + user id to read
rand_thread_id = str(random.randint(0, num_write_threads-1))
rand_user_id = str(random.randint(0, num_users-1))
user_id = "user-"+rand_thread_id+"-"+rand_user_id;
start_time = time.time()
cur.execute("EXECUTE my_select(%s)", (user_id, ))
rows = cur.fetchall()
now_time = time.time()
delta = delta + (now_time - start_time)
# For debugging
# print("Found=" + str(rows))
print("Avg Time For Range Query: %s ms (returns %d rows each of %d attrs in %s column)" %
((delta * 1000) / num_reads, num_actions_per_user, num_attrs, col_type))
# Main
for col_type in ["jsonb", "json", "text"]:
for attr_count in [1, 4, 40, 100, 400]:
create_table(col_type, attr_count)
load_data(col_type, attr_count)
read_data(col_type, attr_count)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment