Skip to content

Instantly share code, notes, and snippets.

@dangra
Last active May 26, 2016 02:50
Show Gist options
  • Save dangra/fea8806ce2ae40c0678fc192a77df87e to your computer and use it in GitHub Desktop.
Save dangra/fea8806ce2ae40c0678fc192a77df87e to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
import uuid
import time
from argparse import ArgumentParser
from shove import Shove
from loremipsum import Generator
def _generator(count):
gen = Generator()
# My docs are ~1K bytes
word_mean = (sum(len(w) for w in gen.words) / len(gen.words))
gen.sentence_mean = 1000 / word_mean
return gen.generate_sentences(count)
def main():
ap = ArgumentParser()
ap.add_argument('--items', type=int, default=1100)
ap.add_argument('--stepsize', type=int, default=100)
ap.add_argument('--store')
ap.add_argument('--cache')
ap.add_argument('--sync', type=int)
ap.add_argument('--store-sql')
ap.add_argument('--cache-sql')
args = ap.parse_args()
shoveargs = {}
if args.store:
shoveargs['store'] = args.store
if args.cache:
shoveargs['cache'] = args.cache
if args.sync:
shoveargs['sync'] = args.sync
db = Shove(**shoveargs)
if args.store_sql:
db._store._cursor.execute(args.store_sql)
if args.cache_sql:
db._cache_cursor.execute(args.cache_sql)
ts0 = ts = time.time()
for idx, doc in enumerate(_generator(args.items)):
# isbn13 is 13 chars length
key = str(uuid.uuid1())[:13]
db[key] = doc
if idx and idx % args.stepsize == 0:
ots, ts = ts, time.time()
print('Processing {}, took {:.2f}s at {:.2f} doc/s'.format(
idx, ts - ots, args.stepsize / (ts - ots)
))
ts = time.time()
print('Total {} docs in {:.2f}s at {:.2f} doc/s'.format(
idx + 1, ts - ts0, (idx + 1) / (ts - ts0)
))
if __name__ == '__main__':
try:
sys.exit(main())
except KeyboardInterrupt:
pass
## Fully in-memory (the speed reference)
# ./shovebench.py --cache simple:// --store simple://
Processing 100, took 0.07s at 1360.55 doc/s
Processing 200, took 0.07s at 1451.46 doc/s
Processing 300, took 0.07s at 1526.46 doc/s
Processing 400, took 0.07s at 1472.12 doc/s
Processing 500, took 0.07s at 1463.23 doc/s
Processing 600, took 0.07s at 1460.13 doc/s
Processing 700, took 0.07s at 1460.00 doc/s
Processing 800, took 0.07s at 1458.81 doc/s
Processing 900, took 0.07s at 1456.62 doc/s
Processing 1000, took 0.07s at 1456.60 doc/s
Total 1099 docs in 0.75s at 1455.80 doc/s
## SQLite in filesystem for store and cache (nonsense)
# ./shovebench.py --cache lite://cache.db --store lite://store.db
Processing 100, took 3.54s at 28.25 doc/s
Processing 200, took 3.52s at 28.42 doc/s
Processing 300, took 3.51s at 28.46 doc/s
Processing 400, took 5.35s at 18.70 doc/s
Processing 500, took 5.37s at 18.63 doc/s
Processing 600, took 5.40s at 18.51 doc/s
Processing 700, took 5.37s at 18.61 doc/s
Processing 800, took 5.51s at 18.14 doc/s
Processing 900, took 5.37s at 18.63 doc/s
Processing 1000, took 5.35s at 18.69 doc/s
Total 1099 docs in 53.63s at 20.49 doc/s
## SQLite only for store and memory for cache
# ./shovebench.py --cache simple:// --store lite://store.db
Processing 100, took 1.91s at 52.36 doc/s
Processing 200, took 1.87s at 53.40 doc/s
Processing 300, took 1.84s at 54.47 doc/s
Processing 400, took 1.85s at 54.10 doc/s
Processing 500, took 1.86s at 53.67 doc/s
Processing 600, took 1.90s at 52.56 doc/s
Processing 700, took 1.85s at 54.15 doc/s
Processing 800, took 1.79s at 55.99 doc/s
Processing 900, took 1.79s at 55.91 doc/s
Processing 1000, took 1.81s at 55.26 doc/s
Total 1099 docs in 20.25s at 54.27 doc/s
## Filesytem backend -- One file per key (problematic when num(keys) > num(inodes))
# ./shovebench.py --cache simple:// --store file://file.db
Processing 100, took 0.07s at 1343.04 doc/s
Processing 200, took 0.06s at 1563.43 doc/s
Processing 300, took 0.06s at 1551.71 doc/s
Processing 400, took 0.07s at 1498.20 doc/s
Processing 500, took 0.07s at 1505.23 doc/s
Processing 600, took 0.07s at 1498.53 doc/s
Processing 700, took 0.07s at 1498.51 doc/s
Processing 800, took 0.07s at 1495.40 doc/s
Processing 900, took 0.07s at 1505.55 doc/s
Processing 1000, took 0.07s at 1502.29 doc/s
Total 1099 docs in 0.74s at 1495.16 doc/s
# ls file.db/ |wc -l
1100
## SQLite but completely disable fsync() (Acceptable when db is volatile)
# ./shovebench.py --cache simple:// --store lite://store.db --store-sql "pragma synchronous = off;"
Processing 100, took 0.08s at 1267.89 doc/s
Processing 200, took 0.07s at 1422.56 doc/s
Processing 300, took 0.07s at 1417.64 doc/s
Processing 400, took 0.07s at 1372.55 doc/s
Processing 500, took 0.07s at 1380.20 doc/s
Processing 600, took 0.07s at 1363.46 doc/s
Processing 700, took 0.07s at 1361.21 doc/s
Processing 800, took 0.07s at 1369.51 doc/s
Processing 900, took 0.07s at 1373.34 doc/s
Processing 1000, took 0.07s at 1376.23 doc/s
Total 1099 docs in 0.80s at 1369.56 doc/s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment