Skip to content

Instantly share code, notes, and snippets.

@nickva
Last active April 13, 2020 23:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nickva/e87aa2f7f896805bfee36a044a3800b0 to your computer and use it in GitHub Desktop.
Save nickva/e87aa2f7f896805bfee36a044a3800b0 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
#
# Script to benchmark CouchDB compaction. It creates a db, populates it with
# various types of data based on provided parameters, then times the
# compaction.
#
# By default this script uses the 15986 (local) port so it creates and operates
# on one shard only.
#
# Specifying a parameter multiple time runs the script will all possible
# combinations of that parameter along all possible combination fo other
# parameters. For example:
#
# $ ./compact_bench.py -r 1 -r 10 -r 50 -r 100 -n 10000
#
# **************** num=100000,revisions=1 ****************
# Updating : 35.3s docs/s:2831 revs/s:2831 fsize:121258183
# Compacting : 10.2s docs/s:9814 revs/s:9814 fsize:21127367
#
# **************** num=100000,revisions=10 ****************
# Updating : 47.4s docs/s:2111 revs/s:21114 fsize:170561735
# Compacting : 17.6s docs/s:5687 revs/s:56871 fsize:52142279
#
# **************** num=100000,revisions=50 ****************
# Updating : 95.6s docs/s:1046 revs/s:52311 fsize:358736071
# Compacting : 30.6s docs/s:3270 revs/s:163530 fsize:176410823
#
# **************** num=100000,revisions=100 ****************
# Updating : 163.2s docs/s:612 revs/s:61260 fsize:594022599
# Compacting : 44.8s docs/s:2232 revs/s:223206 fsize:321802439
import argparse
import sys
import couchdb
import random
import string
import uuid
import time
import copy
import itertools
URL = 'http://adm:pass@127.0.0.1:15984'
DBNAME = 'cbenchdb'
PARAMS = [
('num', 'n', 10000, "Number of documents"),
('batch_size', 'b', 1000, "Batch size"),
('size', 's', 1, "Emit value size"),
('revisions', 'r', 1, "Number of revisions / doc"),
('min_id_size', 'm', 1, "Minimum ID size"),
('random_ids', 'x', True, "Use random IDs?"),
('alphabet', 'a', '', "Data generation alphabet"),
('attachment_size', 't', 0, "Attachment size")
]
def main(args):
param_names = [pn for (pn, _, _, _) in PARAMS]
param_values = [None for _ in xrange(len(param_names))]
is_default = set()
default_values = {}
for pname, _, val, _ in PARAMS:
default_values[pname] = val
for an, av in args._get_kwargs():
if isinstance(av, list):
if av == []:
av = [default_values[an]]
is_default.add(an)
param_values[param_names.index(an)] = av
for vtup in itertools.product(*param_values):
zipped = zip(param_names, vtup)
paramstr = ",".join(["%s=%s" % (n, v) for (n, v) in zipped
if n not in is_default])
run_args = copy.copy(args)
for (n, v) in zipped:
setattr(run_args, n, v)
run(run_args, paramstr)
print
def get_ids(args):
random.seed(42)
ids = []
for i in xrange(args.num):
ids.append(_id(i, args))
return ids
def update_docs(db, args, ids):
t0 = time.time()
n = args.num
b = args.batch_size
print " Updating :",
sys.stdout.flush()
for i in xrange(n / b):
docs = [_doc(ids[i * b +j], args) for j in xrange(b)]
db.update(docs, new_edits=False)
sys.stdout.flush()
docs = [_doc(ids[i], args) for i in xrange((n - n % b), n)]
res = db.update(docs, new_edits=False)
_show_info(args, time.time() - t0, db)
def populate_db(db, args):
ids = get_ids(args)
update_docs(db, args, ids)
def compact(db, args):
print " Compacting :",
sys.stdout.flush()
if args.wait_input_before_compaction:
print
raw_input(" --- Press enter to start compaction ---")
print
assert db.compact() == True, "Compaction failed to start"
dt = wait_compaction_done(db)
_show_info(args, dt, db)
def wait_compaction_done(db):
t0 = time.time()
while True:
info = db.info()
compact_running = info.get('compact_running', False)
if not compact_running:
return time.time() - t0
time.sleep(0.25)
def run(args, paramstr):
print
print "*" * 16, paramstr, "*" * 16
s = couchdb.Server(args.url)
version = s.version()
print "Version: %s" % version
if args.dbname in s:
s.delete(args.dbname)
s.create(args.dbname)
db = s[args.dbname]
populate_db(db, args)
compact(db, args)
sys.stdout.flush()
def _show_info(args, dt, db):
docrate = int(args.num / dt)
revrate = int(args.num * args.revisions / dt)
fsize = db.info()['sizes']['file']
print "%.1fs docs/s:%s revs/s:%s fsize:%s " % (dt, docrate, revrate, fsize)
def _doc(_id, args):
data = _data(args)
revs = [_rand_id(6) for i in xrange(args.revisions)]
doc = {
'_id': _id,
'v': data,
'_rev': str(len(revs)) + '-' + revs[0],
'_revisions': {
'start': len(revs),
'ids': revs
}
}
if args.attachment_size > 0:
alphabet = _data_alphabet(args)
att_data = ''.join(random.choice(alphabet) for _ in xrange(args.attachment_size))
doc['_attachments'] ={
'att': {'content_type': 'app/binary', 'data': att_data.encode('base64').strip()}
}
return doc
_DEFAULT_ALPHABET = string.ascii_letters + string.digits
def _data_alphabet(args):
if args.alphabet:
return args.alphabet
else:
return _DEFAULT_ALPHABET
def _data(args):
alphabet = _data_alphabet(args)
return ''.join(random.choice(alphabet) for _ in xrange(args.size))
def _rand_id(size):
fmt = "%0" + str(2 * size) + "x"
return fmt % random.getrandbits(size * 8)
def _id(i, args):
if args.random_ids:
_id = _rand_id(max(args.min_id_size, 16))
else:
_id = '%06d' % i
extend = args.min_id_size - len(_id)
if extend > 0:
_id = _id + 'x' * extend
return _id
def _str2bool(val):
val = val.lower()
if val in ['true', 't', 'yes', 'yep']:
return True
else:
return False
def _args():
description = "Make a db, add some docs and then compact"
p = argparse.ArgumentParser(description = description)
p.add_argument('-u', '--url', default=URL, help = "Server URL")
p.add_argument('-d', '--dbname', default=DBNAME, help = "DB name")
p.add_argument('-w', '--wait-input-before-compaction', action="store_true",
default=False, help="Pause and wait for keypress before compaction")
for pname, short, default, hstr in PARAMS:
atype = type(default)
ashort = '-' + short
along = '--' + pname
if atype is bool:
atype = _str2bool
p.add_argument(ashort, along, type=atype, action="append",
default=[], help=hstr)
return p.parse_args()
if __name__=='__main__':
main(_args())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment