Skip to content

Instantly share code, notes, and snippets.

@srs81
Created July 10, 2012 18:30
Show Gist options
  • Save srs81/3085350 to your computer and use it in GitHub Desktop.
Save srs81/3085350 to your computer and use it in GitHub Desktop.
HBase write performance testing: including batching and number of threads
from multiprocessing import Pool
import happybase, os, random, base64, time, sys
# Total number of rows to be inserted
TOTAL_PUTS = 100000000
# Number of inserts that are in one batch operation
BATCH_PUTS = 10000
# Output is displayed after how many iterations
OUTPUT_ITERATIONS = 50000
# Number of simultaneous threads (actually processes)
NUMBER_OF_THREADS = 10
# The HBase Thrift servers
servers = ['127.0.0.1']
# The process
def hbase(i):
# Assign a random Thrift server
randnum = random.randrange(0, len(servers))
thriftServer = servers[randnum]
processID = "Process ID #" + str(i)
connection = happybase.Connection(thriftServer)
# Create the HBase table
randomTable = "table" + str(i)
connection.create_table(randomTable, {"c": {}})
b = connection.table(randomTable).batch()
starttime = time.time()
iterations = TOTAL_PUTS / NUMBER_OF_THREADS
for i in range(0, iterations):
# Add a random row key to the batch
randomRowKey = base64.urlsafe_b64encode(os.urandom(10))
counter = "counter"
value = "value"
b.put(randomRowKey, {"c:" + counter: str(value)})
# If its time, send the batch
if (i % BATCH_PUTS) == 0:
b.send()
# If its time, output
if (i % OUTPUT_ITERATIONS) == 0:
totaltime = time.time() - starttime
if totaltime > 0:
qps = i / totaltime
else:
qps = "Infinity!"
sys.stdout.write (processID + " => " + str(i) + " items took: " + str(totaltime) + " seconds. Qps = " + str(qps) + "\r")
sys.stdout.flush()
if __name__ == '__main__':
# Fire off the "threads"
starttime = time.time()
pool = Pool(processes=NUMBER_OF_THREADS)
pool.map (hbase, range(0, NUMBER_OF_THREADS))
totaltime = time.time() - starttime
if totaltime > 0:
qps = TOTAL_PUTS / totaltime
else:
qps = "Infinity!"
sys.stdout.write(" " * 80)
sys.stdout.flush()
print (str(TOTAL_PUTS) + " items took: " + str(totaltime) + " seconds. Qps = " + str(qps) + "\r")
# c = happybase.Connection()
# for t in c.tables():
# c.disable_table(t)
# c.delete_table(t)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment