Skip to content

Instantly share code, notes, and snippets.

@kmuthukk
Created August 27, 2019 02:26
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kmuthukk/c9e7a0e8bf748a37117444aeba6f5457 to your computer and use it in GitHub Desktop.
Save kmuthukk/c9e7a0e8bf748a37117444aeba6f5457 to your computer and use it in GitHub Desktop.
# pip install yb-cassandra-driver
from cassandra.cluster import Cluster
import time
import random
from multiprocessing.dummy import Pool as ThreadPool
# Load Phase params
num_write_threads=4
num_users=500
cluster = Cluster(['127.0.0.1'])
session = cluster.connect()
def create_table():
start_time = time.time()
session.execute("""CREATE KEYSPACE IF NOT EXISTS k""");
session.execute("""USE k""");
session.execute("""DROP TABLE IF EXISTS users""");
now_time = time.time()
print("Dropped (if exists): users table")
print("Time: %s ms" % ((now_time - start_time) * 1000))
print("====================")
start_time = time.time()
session.execute("""
CREATE TABLE IF NOT EXISTS users(
id text,
name text,
age int,
PRIMARY KEY(id)
) WITH TRANSACTIONS = {'enabled' : true}
""")
now_time = time.time()
print("Created users table")
print("Time: %s ms" % ((now_time - start_time) * 1000))
print("====================")
start_time = time.time()
session.execute("""
CREATE INDEX IF NOT EXISTS name_idx ON users(name)
""")
now_time = time.time()
print("Created users table")
print("Time: %s ms" % ((now_time - start_time) * 1000))
print("====================")
def load_data_slave(thread_num):
thread_id = str(thread_num)
print("Thread-" + thread_id + ": Inserting %d rows..." % (num_users))
start_time = time.time()
for idx in range(num_users):
session.execute("""INSERT INTO users (id, name, age) VALUES (%s, %s, %s)""",
("user-"+thread_id+"-"+str(idx),
"name--"+str(idx),
20 + (idx % 50)))
now_time = time.time()
print("Thread-" + thread_id + ": Inserted %d rows" % (num_users))
print("Thread-" + thread_id + ": Time: %s ms" % ((now_time - start_time) * 1000))
print("Thread-" + thread_id + ": Inserts/sec: %s" % (num_users / (now_time - start_time)))
print("Thread-" + thread_id + ": Avg Time: %s ms" % ((now_time - start_time) * 1000 / (num_users)))
def load_data():
pool = ThreadPool(num_write_threads)
t1 = time.time()
results = pool.map(load_data_slave, range(num_write_threads))
t2 = time.time()
total_rows=num_users*num_write_threads
print("====================")
print("Inserted %d rows" % (total_rows))
print("Total Time: %s ms" % ((t2 - t1) * 1000))
print("Inserts/sec: %s" % (total_rows / (t2 - t1)))
print("====================")
# Main
create_table()
load_data()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment