Skip to content

Instantly share code, notes, and snippets.

@kmuthukk
Last active January 8, 2020 22:40
Show Gist options
  • Save kmuthukk/4ee79acf37bab8d080b572b918308708 to your computer and use it in GitHub Desktop.
Save kmuthukk/4ee79acf37bab8d080b572b918308708 to your computer and use it in GitHub Desktop.
# pip install yb-cassandra-driver
from cassandra.cluster import Cluster
from cassandra import ConsistencyLevel
import time
import random
from multiprocessing.dummy import Pool as ThreadPool
# Load Phase params
num_write_threads=1
num_read_threads=1
num_users=5000
cluster = Cluster(['127.0.0.1'])
session = cluster.connect()
def create_table():
start_time = time.time()
session.execute("""CREATE KEYSPACE IF NOT EXISTS k""");
session.execute("""USE k""");
session.execute("""DROP TABLE IF EXISTS users""");
now_time = time.time()
print("Dropped (if exists): users table")
print("Time: %s ms" % ((now_time - start_time) * 1000))
print("====================")
start_time = time.time()
session.execute("""
CREATE TABLE IF NOT EXISTS users(
id text,
name text,
age int,
PRIMARY KEY(id)
) WITH TRANSACTIONS = {'enabled' : false}
""")
now_time = time.time()
print("Created users table")
print("Time: %s ms" % ((now_time - start_time) * 1000))
print("====================")
def load_data_slave(thread_num):
thread_id = str(thread_num)
prepared = session.prepare("""INSERT INTO users (id, name, age) VALUES (?, ?, ?)""")
start_time = time.time()
for idx in range(num_users):
session.execute(prepared, ["user-"+thread_id+"-"+str(idx), "name--"+str(idx), 20 + (idx % 50)])
now_time = time.time()
print("Thread-" + thread_id + ": Avg Insert Time: %s ms" % ((now_time - start_time) * 1000 / (num_users)))
def query_data_slave(thread_num):
thread_id = str(thread_num)
prepared = session.prepare("""SELECT name, age FROM users WHERE id = ?""")
start_time = time.time()
for idx in range(num_users):
results = session.execute(prepared, ["user-"+thread_id+"-"+str(idx)])
now_time = time.time()
print("Thread-" + thread_id + ": Avg Read Time: %s ms" % ((now_time - start_time) * 1000 / (num_users)))
def load_data():
pool = ThreadPool(num_write_threads)
t1 = time.time()
results = pool.map(load_data_slave, range(num_write_threads))
t2 = time.time()
total_rows=num_users*num_write_threads
print("====================")
print("Inserted %d rows" % (total_rows))
print("Total Time: %s ms" % ((t2 - t1) * 1000))
print("Inserts/sec: %s" % (total_rows / (t2 - t1)))
print("====================")
def query_data():
pool = ThreadPool(num_read_threads)
t1 = time.time()
results = pool.map(query_data_slave, range(num_read_threads))
t2 = time.time()
total_rows=num_users*num_read_threads
print("====================")
print("Queried %d rows" % (total_rows))
print("Total Time: %s ms" % ((t2 - t1) * 1000))
print("Queries/sec: %s" % (total_rows / (t2 - t1)))
print("====================")
# Main
create_table()
load_data()
query_data()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment