Skip to content

Instantly share code, notes, and snippets.

@timtadh
Created August 23, 2012 21:43
Show Gist options
  • Save timtadh/3442267 to your computer and use it in GitHub Desktop.
Save timtadh/3442267 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
import random
from thrift import Thrift
from thrift.transport import TSocket
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
from hbase.ttypes import *
from datetime import datetime
transport = TSocket.TSocket('localhost', 9090)
# Buffering is critical. Raw sockets are very slow
transport = TTransport.TBufferedTransport(transport)
# Wrap in a protocol
protocol = TBinaryProtocol.TBinaryProtocol(transport)
client = Hbase.Client(protocol)
transport.open()
try: #Try to create the table if it doesn't exist
client.createTable('test_table', [ColumnDescriptor(name='meta')])
except AlreadyExists, tx:
print "Thrift exception"
print '%s' % (tx.message)
for benchmark in range(8): #range(8) goes up to 10,000,000 rows, takes about 3hrs on my 2GB linode
num = 10**benchmark
print "Benchmark #%d: %d rows" %(benchmark, num)
start = datetime.now()
for i in range(num):
client.deleteAllRow('test_table', "%d"%i)
print ' Rows deleted: %s' % (datetime.now() - start)
create_start = datetime.now()
for i in range(num):
client.mutateRow('test_table', "%d"%i,
[Mutation(column="meta:%s"%random.choice(['a', 'b', 'c']),
value="%d"%random.randrange(100))])
print ' Rows created: %s' % (datetime.now() - create_start)
get_start = datetime.now()
r = client.getRow('test_table', "%s"%random.randrange(num))
print ' Row lookup: %s' % (datetime.now() - get_start)
scan_start = datetime.now()
s = client.scannerOpen('test_table', '', ['meta:a'])
r = client.scannerGet(s)
count = 0
r = client.scannerGet(s)
while r:
count += 1
r = client.scannerGet(s)
print ' Fetched %s rows with meta:A: %s' %(count, datetime.now() - scan_start)
print ' Total Benchmark Time: %s' % (datetime.now() - start)
# Sample output --
#Benchmark #0: 1 rows
# Rows deleted: 0:00:00.000816
# Rows created: 0:00:00.001384
# Row lookup: 0:00:00.001078
# Fetched 319 rows with meta:A: 0:00:00.176335
# Total Benchmark Time: 0:00:00.179796
#Benchmark #1: 10 rows
# Rows deleted: 0:00:00.004934
# Rows created: 0:00:00.006587
# Row lookup: 0:00:00.000725
# Fetched 316 rows with meta:A: 0:00:00.171574
# Total Benchmark Time: 0:00:00.183974
#Benchmark #2: 100 rows
# Rows deleted: 0:00:00.051719
# Rows created: 0:00:00.064601
# Row lookup: 0:00:00.000687
# Fetched 328 rows with meta:A: 0:00:00.176993
# Total Benchmark Time: 0:00:00.294160
#Benchmark #3: 1000 rows
# Rows deleted: 0:00:00.488865
# Rows created: 0:00:00.585270
# Row lookup: 0:00:00.000675
# Fetched 312 rows with meta:A: 0:00:00.168604
# Total Benchmark Time: 0:00:01.243603
#Benchmark #4: 10000 rows
# Rows deleted: 0:00:04.546011
# Rows created: 0:00:05.570421
# Row lookup: 0:00:00.000995
# Fetched 3372 rows with meta:A: 0:00:01.810908
# Total Benchmark Time: 0:00:11.928531
#Benchmark #5: 100000 rows
# Rows deleted: 0:00:41.258448
# Rows created: 0:00:55.093361
# Row lookup: 0:00:00.000818
# Fetched 33274 rows with meta:A: 0:00:17.005304
# Total Benchmark Time: 0:01:53.358131
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment