Skip to content

Instantly share code, notes, and snippets.

@schakrava
Created January 31, 2014 00:57
Show Gist options
  • Save schakrava/8724673 to your computer and use it in GitHub Desktop.
Save schakrava/8724673 to your computer and use it in GitHub Desktop.
Small file and write intensive workload
#!/usr/bin/env python
import os
import sys
import subprocess
from os.path import join
import time
DD = '/usr/bin/dd'
RCLI = '/opt/rock-dep/bin/rcli'
sf_per_stride = 20000 #prod = 20000 # 100
mf_per_stride = 125 #prod = 125 # 10
lf_per_stride = 25 #prod = 25 # 2
#in KBs
sf_size = 64
mf_size = 1048576 #prod = 1048576 (1GB) # 10240
lf_size = 15728640 #prod = 15728640 (15GB) # 40960
sf_bsize = sf_size
mf_bsize = 1024
lf_bsize = 1024
sf_bcount = 1
mf_bcount = mf_size / mf_bsize #prod = 1024
lf_bcount = lf_size / lf_bsize #prod = 5120
num_strides = 24 # prod = 24 (totalling 12TB) # 15
tsize_per_stride = ((sf_size * sf_per_stride) + (mf_size * mf_per_stride) +
(lf_size * lf_per_stride))
total_size = tsize_per_stride * num_strides
tcount_per_stride = sf_per_stride + mf_per_stride + lf_per_stride
total_count = tcount_per_stride * num_strides
def run_command(cmd, shell=False, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, throw=True):
p = subprocess.Popen(cmd, shell=shell, stdout=stdout, stderr=stderr)
out, err = p.communicate()
out = out.split('\n')
err = err.split('\n')
rc = p.returncode
return (out, err, rc)
def create_stride(count, basedir):
"""
creates a stride of files. Names of files are numbers starting from 'count'
up to the end of the stride
"""
print('creating small files [%d, %d)' % (count, count+sf_per_stride))
for i in range(sf_per_stride):
run_command([DD, 'if=/dev/zero', 'of=%s/%d.sf' % (basedir, count),
'bs=%sk' % sf_bsize, 'count=%d' % sf_bcount])
count = count + 1
print('creating medium files [%d, %d)' % (count, count+mf_per_stride))
for i in range(mf_per_stride):
run_command([DD, 'if=/dev/zero', 'of=%s/%d.mf' % (basedir, count),
'bs=%sk' % mf_bsize, 'count=%d' % mf_bcount])
count = count + 1
print ('creating large files [%d, %d)' % (count, count+lf_per_stride))
for i in range(lf_per_stride):
run_command([DD, 'if=/dev/zero', 'of=%s/%d.lf' % (basedir, count),
'bs=%sk' % lf_bsize, 'count=%d' % lf_bcount])
count = count + 1
return count
def delete_stride(count, basedir):
"""
overwrite some sf and mfs
"""
print('deleting small files [%d, %d)' % (count, count +
sf_per_stride/2))
for i in range(sf_per_stride/2):
run_command(['/bin/rm', '-f', '%s/%d.sf' % (basedir, count)])
count = count + 1
print('deleting medium files [%d, %d)' % (count, count + mf_per_stride/4))
for i in range(mf_per_stride/4):
run_command(['/bin/rm', '-f', '%s/%d.mf' % (basedir, count)])
count = count + 1
def create_files(num_strides, basedir, share_name):
"""
create a whole bunch of files in a loop, one stride at a time
"""
count = 0
prev_count = 0
t0 = time.time()
for i in range(num_strides):
bt = time.time()
if (i > 0):
#delete some of the previous stride
delete_stride(prev_count, basedir)
basedir = join(basedir, 'stride-%d' % i)
os.mkdir(basedir)
prev_count = count
count = create_stride(count, basedir)
at = time.time()
print('total size = %dKB stride size = %dKB total count = %d '
'stride_count = %d stride dir = %s. duration = %f' %
((tsize_per_stride * (i+1)), tsize_per_stride, count,
tcount_per_stride, basedir, at - bt))
print ('taking snaphost -- stride-%d' % i)
run_command([RCLI, 'shares', 'share', share_name, 'snapshot', 'add',
'stride-%d' % i])
t1 = time.time()
print('total size = %dKB number of files = %d. strides = %d '
'count per stride = %d. total_time = %f' %
(total_size, count, num_strides,
sf_per_stride + mf_per_stride + lf_per_stride, t1 - t0))
def main(basedir, share_name):
"""
1. create a 9800(64KB) files, 50(1GB) files and 10(15GB) file. each such
stride = 10,000 files totalling 200 GB + 612 MB
"""
create_files(num_strides, basedir, share_name)
if __name__ == '__main__':
print('Number of files per stride: %d(sf) %d(mf) %d(lf) %d(total)' %
(sf_per_stride, mf_per_stride, lf_per_stride,
(sf_per_stride + mf_per_stride + lf_per_stride)))
print('Size of files in KB: %d(sf) %d(mf) %d(lf) %d(total)' %
(sf_size, mf_size, lf_size, sf_size + mf_size + lf_size))
print('Number of strides: %d' % num_strides)
print('Total number of files created: %d(sf) %d(mf) %d(lf) %d(total)' %
(sf_per_stride * num_strides, mf_per_stride * num_strides,
lf_per_stride * num_strides,
(sf_per_stride + mf_per_stride + lf_per_stride) * num_strides))
sf_size_per_stride = sf_size * sf_per_stride
mf_size_per_stride = mf_size * mf_per_stride
lf_size_per_stride = lf_size * lf_per_stride
total_size_per_stride = (sf_size_per_stride + mf_size_per_stride +
lf_size_per_stride)
print('Size per stride: %d(sf) %d(mf) %d(lf) %d(total)' %
(sf_size_per_stride, mf_size_per_stride, lf_size_per_stride,
total_size_per_stride))
print('Total size of files created: %d(sf) %d(mf) %d(lf) %d(total)' %
(sf_size_per_stride * num_strides,
mf_size_per_stride * num_strides,
lf_size_per_stride * num_strides,
total_size_per_stride * num_strides))
sf_deleted = int(sf_per_stride)/2
mf_deleted = int(mf_per_stride)/4
print('Number of files deleted from previous stride: %d(sf) %d(mf) '
'0(lf) %d(total)' % (sf_deleted, mf_deleted,
sf_deleted + mf_deleted))
sf_size_deleted = sf_deleted * sf_size
mf_size_deleted = mf_deleted * mf_size
total_sf_size_deleted = sf_size_deleted * (num_strides - 1)
total_mf_size_deleted = mf_size_deleted * (num_strides - 1)
print('Deleted size from previous stride: %d(sf) %d(mf) 0(lf) %d(total) '
% (sf_size_deleted, mf_size_deleted,
sf_size_deleted + mf_size_deleted))
print('Total size of files deleted: %d(sf) %d(mf) 0(lf) %d(total)' %
(total_sf_size_deleted, total_mf_size_deleted,
total_sf_size_deleted + total_mf_size_deleted))
main(sys.argv[1], sys.argv[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment