public
Last active

Test script for CouchDB bulk insertions with different docid selection schemes

  • Download Gist
couchdb_test.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
from datetime import datetime
from inoi.util.random import sequential_id, monotonic_id
 
import httplib2
import json
import random
import time
import sys
import uuid
 
id_makers = {
'random': lambda: uuid.uuid4().hex,
'monotonic': monotonic_id,
'sequential': sequential_id,
}
 
database = sys.argv[1]
make_id = id_makers[sys.argv[2]]
baseurl = 'http://localhost:5984/%s' % database
 
bulk_size = 2000
total_docs = 2000000
 
http = httplib2.Http()
def send_bulk(bulk):
resp, content = http.request(
baseurl + '/_bulk_docs',
method='POST',
body=json.dumps({'docs': bulk}),
)
 
def make_bulk(size):
return [
{
'_id': make_id(),
'timestamp': datetime.now().isoformat(),
'data': random.random() * 2000,
}
for i in xrange(size)
]
 
def main():
max_bulk_rate = (-1, float('-inf'))
min_bulk_rate = (-1, float('inf'))
loop = 0
 
def print_stats():
end = time.time()
total_rate = (loop * bulk_size) / (end - start)
 
print '== loop %d ============================' % loop
print 'peak min: in loop %d, %.2f docs/sec' % min_bulk_rate
print 'peak max: in loop %d, %.2f docs/sec' % max_bulk_rate
print 'current: %.2f docs/sec' % bulk_rate
print 'total: %.2f docs/sec' % total_rate
 
start = time.time()
while True:
loop += 1
 
bulk_start = time.time()
bulk = make_bulk(bulk_size)
send_bulk(bulk)
bulk_end = time.time()
bulk_rate = bulk_size / (bulk_end - bulk_start)
 
if bulk_rate > max_bulk_rate[1]:
max_bulk_rate = (loop, bulk_rate)
if bulk_rate < min_bulk_rate[1]:
min_bulk_rate = (loop, bulk_rate)
 
if loop % 20 == 0:
print_stats()
 
if loop * bulk_size >= total_docs:
break
 
print ''
print 'FINISHED:'
print_stats()
 
if __name__ == '__main__':
main()
 
# python couchdb_test.py test_sequential_id sequential
#
# FINISHED:
# == loop 1000 ============================
# peak min: in loop 5, 3057.95 docs/sec
# peak max: in loop 8, 7904.39 docs/sec
# current: 7449.51 docs/sec
# total: 7294.78 docs/sec
#
# database size on disk: 648548454 bytes = 0.6 GB
 
# python couchdb_test.py test_monotonic_id monotonic
#
# FINISHED:
# == loop 1000 ============================
# peak min: in loop 195, 1911.73 docs/sec
# peak max: in loop 161, 7703.18 docs/sec
# current: 7511.34 docs/sec
# total: 7353.81 docs/sec
#
# database size on disk: 611405926 = 0.6 GB
 
# python couchdb_test.py test_random_id random
#
# FINISHED:
# == loop 1000 ============================
# peak min: in loop 889, 535.66 docs/sec
# peak max: in loop 1, 5473.21 docs/sec
# current: 1685.13 docs/sec
# total: 2133.34 docs/sec
#
# database size on disk: 4330426472 = 4.0 GB

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.