public
Last active

  • Download Gist
benchmark-storage.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
import sys
import collections
import gridfs
import io
import psycopg2
import pymongo
import random
import time
 
# For fairness use the same chunk size - 512k.
CHUNK_SIZE = 1024 * 512
 
class GridFSBench(object):
def setup(self):
self.mongo = pymongo.MongoClient()
self.fs = gridfs.GridFS(self.mongo.gridfs_benchmark_db)
 
def store(self, fp):
self.fs.put(fp, chunk_size=CHUNK_SIZE)
 
def cleanup(self):
if 'gridfs_benchmark_db' in self.mongo.database_names():
self.mongo.drop_database('gridfs_benchmark_db')
 
class LobjectBench(object):
def setup(self):
c = psycopg2.connect(dbname='template1')
c.autocommit = True
c.cursor().execute('CREATE DATABASE lobs_benchmark_db')
c.close()
self.conn = psycopg2.connect(dbname='lobs_benchmark_db')
 
def store(self, fp):
lob = self.conn.lobject(mode='wb')
while 1:
chunk = fp.read(CHUNK_SIZE)
if not chunk:
break
lob.write(chunk)
self.conn.commit()
 
def cleanup(self):
self.conn.close()
c = psycopg2.connect(dbname='template1')
c.autocommit = True
c.cursor().execute('DROP DATABASE IF EXISTS lobs_benchmark_db')
c.close()
 
class RandomData(object):
# See http://jessenoller.com/blog/2008/05/30/making-re-creatable-random-data-files-really-fast-in-python
def __init__(self, seed):
r = random.SystemRandom(seed)
self.seed = "".join(str(r.randint(0,9)) for i in range(20))
self.words = open("lorem.txt", "r").read().replace("\n", '').split()
 
def __iter__(self):
a = collections.deque(self.words)
b = collections.deque(self.seed)
while True:
yield ' '.join(list(a)[0:1024])
a.rotate(int(b[0]))
b.rotate(1)
 
def randombytes(datasource, size):
fp = io.BytesIO()
while fp.tell() < size:
fp.write(datasource.next())
fp.seek(0)
return fp
 
def humanize_bytes(size):
for factor, suffix in ((1<<20, 'M'), (1<<10, 'k'), (1, 'b')):
if size >= factor:
break
return '%i%s' % (size/factor, suffix)
 
def benchmark(size, reps):
seed = time.time()
 
gridbench = GridFSBench()
gridreport = open('gridfs-%s-%s.txt' % (humanize_bytes(size), reps), 'w')
lobbench = LobjectBench()
lobreport = open('lobject-%s-%s.txt' % (humanize_bytes(size), reps), 'w')
 
for (bench, report) in ((gridbench, gridreport), (lobbench, lobreport)):
data = iter(RandomData(seed))
bench.setup()
try:
for i in xrange(reps):
fp = randombytes(data, size)
start = time.clock()
bench.store(fp)
elapsed = time.clock() - start
report.write('%s\n' % elapsed)
finally:
bench.cleanup()
 
if __name__ == '__main__':
size = int(sys.argv[1])
reps = int(sys.argv[2])
print "Benchmarking %s x %s..." % (humanize_bytes(size), reps)
benchmark(size, reps)
results.md
Markdown

Results summary

Times are seconds required to store a file.

File size   GridFS avg      Lobject avg  Lobject diff
==========  ==============  ===========  ============
1k          0.0017968       0.0002748    85%
10k         0.00180485      0.0003415    81%
1M          0.00667265      0.00271265   59%
10M         0.04660705      0.0214903    54%
50M         0.22207705      0.1296414    42%

Detailed results

(Courtesy of ministat.)

1k

$ python benchmark-storage.py 1024 20
$ ministat -c99 -w74 gridfs-1k-20.txt lobject-1k-20.txt
x gridfs-1k-20.txt
+ lobject-1k-20.txt
+--------------------------------------------------------------------------+
|   +                                                                      |
|   +                                                                      |
|   +                                                                      |
|   +                                                                      |
|   +                                                                      |
|   +                                                                      |
|   ++                                                                     |
|   ++                                                                     |
|   ++ +                         x                                         |
|   ++ +                         x   x                                     |
|   ++ +                 +       x  xxxx x x xx x   xx xx x x             x|
||__M_A___|                        |________M_A__________|                 |
+--------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  20      0.001297       0.00286     0.0017275     0.0017968   0.000412761
+  20      0.000199      0.000995      0.000227     0.0002748 0.00017411327
Difference at 99.0% confidence
    -0.001522 +/- 0.000271665
    -84.7061% +/- 15.1194%
    (Student's t, pooled s = 0.00031677)

10k

$ python benchmark-storage.py 10240 20
$ ministat -c99 -w74 gridfs-10k-20.txt lobject-10k-20.txt
x gridfs-10k-20.txt
+ lobject-10k-20.txt
+--------------------------------------------------------------------------+
|+                                                                         |
|+                                                                         |
|+ +                                                                       |
|+ ++ +                           x                                        |
|++++ +                           x  x  x                                  |
|+++++++                  x x   xxxx xxxxx   xx     x  x                  x|
||_A_|                       |_______M__A__________|                       |
+--------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  20      0.001258       0.00316     0.0017045    0.00180485 0.00042674154
+  20      0.000253      0.000498     0.0003245     0.0003415 7.8833268e-05
Difference at 99.0% confidence
    -0.00146335 +/- 0.000263164
    -81.0788% +/- 14.5809%
    (Student's t, pooled s = 0.000306857)

1M

$ python benchmark-storage.py 1048576 20
$ ministat -c99 -w74 gridfs-1M-20.txt lobject-1M-20.txt
x gridfs-1M-20.txt
+ lobject-1M-20.txt
+--------------------------------------------------------------------------+
| +                                                                        |
| +                                                                        |
| ++                                   x                                   |
| ++                                   x                                   |
| ++                               x x x                                   |
| ++++                             x x x     x                             |
|+++++  ++ +                     xxx x xxx  xxx                           x|
||_MA_|                         |______M_A_______|                         |
+--------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  20      0.005882      0.010248     0.0064845    0.00667265 0.00093660683
+  20      0.002418      0.003446     0.0026345    0.00271265  0.0002695611
Difference at 99.0% confidence
    -0.00396 +/- 0.000591034
    -59.3467% +/- 8.85756%
    (Student's t, pooled s = 0.000689165)

10M

$ python benchmark-storage.py 10485760 20
$ ministat -c99 -w74 gridfs-10M-20.txt lobject-10M-20.txt
x gridfs-10M-20.txt
+ lobject-10M-20.txt
+--------------------------------------------------------------------------+
| +                                                                        |
| +                                                                        |
| ++                                                                       |
| ++                                                                       |
| ++                                                                       |
| ++                                         x                             |
|+++                                       xxx                             |
|+++                                       xxx     x                       |
|++++                                      xxx x   x x   xxx x     x      x|
| A|                                      |___M____A________|              |
+--------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  20      0.042279      0.058557     0.0440435    0.04660705  0.0046465036
+  20      0.020823      0.022263     0.0215675     0.0214903 0.00034488108
Difference at 99.0% confidence
    -0.0251167 +/- 0.00282549
    -53.8905% +/- 6.06237%
    (Student's t, pooled s = 0.00329461)

50M

$ python benchmark-storage.py 52428800 20
$ ministat -c99 -w74 gridfs-50M-20.txt lobject-50M-20.txt
x gridfs-50M-20.txt
+ lobject-50M-20.txt
+--------------------------------------------------------------------------+
| +                                                                        |
|++                                                 x                      |
|++                                                 x                      |
|++                + +                       x  x x x                      |
|++  +           ++++++++                  xxxxxx x xxx       x    x  x   x|
| |________A_________|                      |______M_A_______|             |
+--------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  20      0.201251      0.269507     0.2179865    0.22207705   0.019609101
+  20       0.10715      0.157961     0.1285205     0.1296414   0.021250329
Difference at 99.0% confidence
    -0.0924356 +/- 0.0175349
    -41.6232% +/- 7.89584%
    (Student's t, pooled s = 0.0204462)

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.