Skip to content

Instantly share code, notes, and snippets.

@cnsoft
Forked from toastdriven/RiakHttp10k.txt
Created June 27, 2012 15:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cnsoft/3004903 to your computer and use it in GitHub Desktop.
Save cnsoft/3004903 to your computer and use it in GitHub Desktop.
Benching Riak's performance with the Python client.
import datetime
import json
import random
import riak
import time
import uuid
URLS = (
'/',
'/news/',
'/news/2010/08/13/first-post/',
'/news/2010/08/13/second-post/',
'/weblogs/slc_punk/2010/08/14/the_show/',
)
ACCESSED = (
datetime.datetime(2010, 8, 13, 7, 35, 0),
datetime.datetime(2010, 8, 13, 9, 16, 0),
datetime.datetime(2010, 8, 14, 10, 47, 0),
datetime.datetime(2010, 8, 14, 18, 10, 0),
datetime.datetime(2010, 8, 15, 8, 0, 0),
)
IPS = (
'127.0.0.1',
'192.168.0.1',
'107.123.4.7',
'244.34.156.5',
'10.0.1.2',
)
TOTAL_HITS = 10000
# TOTAL_HITS = 10000000
def make_timestamp(date):
return time.mktime(date.timetuple())
def get_access():
return {
'url': random.choice(URLS),
'accessed': make_timestamp(random.choice(ACCESSED)),
'ip': random.choice(IPS),
}
def time_it(func):
start_time = time.time()
result = func()
return (time.time() - start_time, result)
# Riak implementation
# HTTP Transport
# client = riak.RiakClient()
# Protobuf Transport
client = riak.RiakClient(port=8087, transport_class=riak.RiakPbcTransport)
bucket = client.bucket('test_pageviews')
def write_docs():
for i in xrange(0, TOTAL_HITS):
if i % 1000 == 0:
print ' Inserting %s' % i
the_id = uuid.uuid1()
obj = bucket.new(str(the_id), data=get_access())
obj.store()
def read_1000():
query = riak.RiakMapReduce(client).add('test_pageviews')
query.map("function(v){ return [JSON.parse(v.values[0].data).id]; }")
return len(query.run()[:1000])
def query_by_url():
query = riak.RiakMapReduce(client).add('test_pageviews')
query.map('function(v) { var data = JSON.parse(v.values[0].data); if(data.url == "/news/2010/08/13/first-post/") { return [1]; } else { return [0]; } }').reduce('Riak.reduceSum')
result = query.run()
if len(result):
return result[0]
else:
return 0
def query_by_date():
start_date = make_timestamp(datetime.date(2010, 8, 13))
end_date = make_timestamp(datetime.date(2010, 8, 14))
query = riak.RiakMapReduce(client).add('test_pageviews')
query.map('function(v) { var data = JSON.parse(v.values[0].data); if(data.accessed >= %s && data.accessed < %s) { return [1]; } return [0]; }' % (start_date, end_date)).reduce('Riak.reduceSum')
result = query.run()
if len(result):
return result[0]
else:
return 0
def query_by_url_and_date():
start_date = make_timestamp(datetime.date(2010, 8, 13))
end_date = make_timestamp(datetime.date(2010, 8, 14))
query = riak.RiakMapReduce(client).add('test_pageviews')
query.map('function(v) { var data = JSON.parse(v.values[0].data); if(data.accessed >= %s && data.accessed < %s) { if(data.url == "/news/2010/08/13/first-post/") { return [1]; } } return [0]; }' % (start_date, end_date)).reduce('Riak.reduceSum')
result = query.run()
if len(result):
return result[0]
else:
return 0
def nuke_it():
print " Getting keys to nuke at %s" % datetime.datetime.now()
keys = bucket.get_keys()
print " Got keys to nuke by %s" % datetime.datetime.now()
for count, key in enumerate(keys):
if count % 1000 == 0:
print ' Nuking %s' % count
the_obj = bucket.get(key)
the_obj.delete()
if __name__ == '__main__':
print "Writing %s records: %.2f seconds" % (TOTAL_HITS, time_it(write_docs)[0])
print "Read 1000: %.2f seconds" % time_it(read_1000)[0]
print "Query by URL: %.2f seconds (%s docs)" % time_it(query_by_url)
print "Query by Date: %.2f seconds (%s docs)" % time_it(query_by_date)
print "Query by URL & Date: %.2f seconds (%s docs)" % time_it(query_by_url_and_date)
print "Nuke it: %.2f seconds" % time_it(nuke_it)[0]
[daniel@Europa:Desktop]: python riak_test.py
Inserting 0
Inserting 1000
Inserting 2000
Inserting 3000
Inserting 4000
Inserting 5000
Inserting 6000
Inserting 7000
Inserting 8000
Inserting 9000
Writing 10000 records: 36.22 seconds
Read 1000: 4.81 seconds
Query by URL: 5.04 seconds (2031 docs)
Query by Date: 4.80 seconds (3949 docs)
Query by URL & Date: 4.92 seconds (797 docs)
Getting keys to nuke at 2010-10-28 19:43:20.862265
Got keys to nuke by 2010-10-28 19:44:11.839581
Nuking 0
Nuking 1000
Nuking 2000
Nuking 3000
Nuking 4000
Nuking 5000
Nuking 6000
Nuking 7000
Nuking 8000
Nuking 9000
Nuke it: 120.11 seconds
[daniel@Europa:Desktop]: python riak_test.py
Inserting 0
Inserting 1000
Inserting 2000
Inserting 3000
Inserting 4000
Inserting 5000
Inserting 6000
Inserting 7000
Inserting 8000
Inserting 9000
Writing 10000 records: 12.38 seconds
Read 1000: 5.86 seconds
Query by URL: 4.65 seconds (1969 docs)
Query by Date: 4.85 seconds (4016 docs)
Query by URL & Date: 4.73 seconds (790 docs)
Getting keys to nuke at 2010-10-28 22:06:18.746901
Got keys to nuke by 2010-10-28 22:06:18.967677
Nuking 0
Nuking 1000
Nuking 2000
Nuking 3000
Nuking 4000
Nuking 5000
Nuking 6000
Nuking 7000
Nuking 8000
Nuking 9000
Nuke it: 23.81 seconds
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment