Created
July 10, 2012 18:30
-
-
Save srs81/3085350 to your computer and use it in GitHub Desktop.
HBase write performance testing: including batching and number of threads
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from multiprocessing import Pool | |
import happybase, os, random, base64, time, sys | |
# Total number of rows to be inserted | |
TOTAL_PUTS = 100000000 | |
# Number of inserts that are in one batch operation | |
BATCH_PUTS = 10000 | |
# Output is displayed after how many iterations | |
OUTPUT_ITERATIONS = 50000 | |
# Number of simultaneous threads (actually processes) | |
NUMBER_OF_THREADS = 10 | |
# The HBase Thrift servers | |
servers = ['127.0.0.1'] | |
# The process | |
def hbase(i): | |
# Assign a random Thrift server | |
randnum = random.randrange(0, len(servers)) | |
thriftServer = servers[randnum] | |
processID = "Process ID #" + str(i) | |
connection = happybase.Connection(thriftServer) | |
# Create the HBase table | |
randomTable = "table" + str(i) | |
connection.create_table(randomTable, {"c": {}}) | |
b = connection.table(randomTable).batch() | |
starttime = time.time() | |
iterations = TOTAL_PUTS / NUMBER_OF_THREADS | |
for i in range(0, iterations): | |
# Add a random row key to the batch | |
randomRowKey = base64.urlsafe_b64encode(os.urandom(10)) | |
counter = "counter" | |
value = "value" | |
b.put(randomRowKey, {"c:" + counter: str(value)}) | |
# If its time, send the batch | |
if (i % BATCH_PUTS) == 0: | |
b.send() | |
# If its time, output | |
if (i % OUTPUT_ITERATIONS) == 0: | |
totaltime = time.time() - starttime | |
if totaltime > 0: | |
qps = i / totaltime | |
else: | |
qps = "Infinity!" | |
sys.stdout.write (processID + " => " + str(i) + " items took: " + str(totaltime) + " seconds. Qps = " + str(qps) + "\r") | |
sys.stdout.flush() | |
if __name__ == '__main__': | |
# Fire off the "threads" | |
starttime = time.time() | |
pool = Pool(processes=NUMBER_OF_THREADS) | |
pool.map (hbase, range(0, NUMBER_OF_THREADS)) | |
totaltime = time.time() - starttime | |
if totaltime > 0: | |
qps = TOTAL_PUTS / totaltime | |
else: | |
qps = "Infinity!" | |
sys.stdout.write(" " * 80) | |
sys.stdout.flush() | |
print (str(TOTAL_PUTS) + " items took: " + str(totaltime) + " seconds. Qps = " + str(qps) + "\r") | |
# c = happybase.Connection() | |
# for t in c.tables(): | |
# c.disable_table(t) | |
# c.delete_table(t) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment