dbishop/Real_base2_simplified.scenario Secret

## container-server.conf
[DEFAULT]
workers = 8

[pipeline:main]
pipeline = healthcheck recon container-server

[app:container-server]
node_timeout = 4
conn_timeout = 10.0

[container-updater]
interval = 300
concurrency = 1
node_timeout = 4
conn_timeout = 10.0
slowdown = 0.001
account_suppression_time = 60
db_preallocation = False

[container-auditor]
interval = 1800
containers_per_second = 200
db_preallocation = False

## details.txt
Hardware:
proxy1: 12x E5-2650 0 @ 2.00GHz (24 HT cores); 32 GiB RAM; 10 Gbps NIC
proxy2: 12x E5-2650 0 @ 2.00GHz (24 HT cores); 32 GiB RAM; 10 Gbps NIC
stor1:  12x E5-2620 0 @ 2.00GHz (24 HT cores); 64 GiB RAM; 10 Gbps NIC; 32x 3T obj disks; 2x 120 GB acct/cont SSDs
stor2:  12x E5-2620 0 @ 2.00GHz (24 HT cores); 64 GiB RAM; 10 Gbps NIC; 29x 3T obj disks; 2x 120 GB acct/cont SSDs

Operating Systems:
proxy1, stor1, stor2: Ubuntu fully-patched 12.04.5 running 3.13.0-53-generic kernel
proxy2: Ubuntu fully-patched 14.04.2 running 3.13.0-53-generic kernel

Software:
Swift Patch Set 9 https://review.openstack.org/#/c/184189/9/
ssbench 0.3.5

I ran ssbench-master on proxy1, with 4 ssbench-workers per proxy1 and proxy2; each worker talked only to its local proxy-server processes (127.0.0.1).  Workers looked like:
```
ssbench-worker --zmq-host 192.168.201.51 --concurrency 128 --batch-size 1 0
```

The master was invoked as:
```
ssbench-master run-scenario -U bench2 -S http://127.0.0.1/v1/AUTH_bench2 -f Real_base2_simplified.scenario -u 120 -r 1200
```
in groups of 4, with a 120 second sleep in between.

The Swift account used for this benchmarking had 1200 containers, 7,186,573 objects, storing ~21.6TB; a second account had 2800 containers, ~11M objects, storing ~3.24 TB.

## io_hammer.py
#!/usr/bin/env python

# Run me like:
#  sudo ./io_hammer.py /dev/sds 256

import directio
import os
from random import randint
import sys
import threading
import time

keep_running = [True]


def do_hammer(dev_path):
    dev_stat = os.statvfs(dev_path)
    fd = os.open(dev_path, os.O_RDONLY | os.O_DIRECT)
    try:
        while keep_running[0]:
            os.lseek(
                fd, randint(0, dev_stat.f_blocks - 100) * dev_stat.f_bsize,
                os.SEEK_SET)
            directio.read(fd, dev_stat.f_bsize * 100)
    except:
        keep_running[0] = False

if __name__ == '__main__':
    dev_path, thread_count = sys.argv[1:]
    thread_count = int(thread_count)

    print "Spawning %d threads to hammer %s" % (thread_count, dev_path)
    threads = []
    for _ in xrange(thread_count):
        thread = threading.Thread(target=do_hammer, args=(dev_path,))
        thread.daemon = True
        thread.start()
        threads.append(thread)

    try:
        time.sleep(10000000)
    except:
        keep_running[0] = False
        for thread in threads:
            thread.join()

## object-server.conf
[DEFAULT]
backlog = 4096
workers = 90
mount_check = true
servers_per_port = 3
node_timeout = 20
conn_timeout = 10.0
network_chunk_size = 65536
disk_chunk_size = 65536

[pipeline:main]
pipeline = healthcheck recon object-server

[app:object-server]
slow = 0
mb_per_sync = 512
log_requests = True
threads_per_disk = 0
disk_chunk_size = 65536

[object-updater]
interval = 300
concurrency = 1
node_timeout = 20
conn_timeout = 10.0
slowdown = 0.01

[object-auditor]
disk_chunk_size = 1048576
files_per_second = 0
bytes_per_second = 0
zero_byte_files_per_second = 200

## proxy-server.conf
[DEFAULT]
backlog = 4096
workers = 18
client_timeout = 60
expose_info = True

[pipeline:main]
pipeline = catch_errors healthcheck proxy-logging cache formpost tempurl swc swiftstack_authen swiftstack_authz dlo slo proxy-logging proxy-server

[app:proxy-server]
recheck_account_existence = 60
recheck_container_existence = 60
object_chunk_size = 65536
client_chunk_size = 65536
node_timeout = 10
conn_timeout = 10.0
post_quorum_timeout = 0.01
error_suppression_interval = 60
error_suppression_limit = 10
object_post_as_copy = False
put_queue_depth = 10
sorting_method = timing
timing_expiry = 300

[filter:cache]
memcache_servers = 192.168.3.51:11211,192.168.3.52:11211
max_connections = 20
connect_timeout = 0.6
pool_timeout = 2.5
tries = 3
io_timeout = 4.0

[filter:proxy-logging]
set access_log_facility = LOG_LOCAL2
reveal_sensitive_prefix = 16
access_log_headers = False
access_log_headers_only =

## Real_base2_simplified.scenario
{
  "name": "Realistic2",
  "sizes": [{
    "name": "Empty",
    "size_min": 0,
    "size_max": 0
  }, {
    "name": "512KB",
    "size_min": 512000,
    "size_max": 512000
  }, {
    "name": "5MB",
    "size_min": 5000000,
    "size_max": 5000000
  }, {
    "name": "50MB",
    "size_min": 50000000,
    "size_max": 50000000
  }],
  "initial_files": {
    "Empty": 3500,
    "512KB": 58500,
    "5MB": 61400,
    "50MB": 39000
  },
  "container_base": "base2_Realistic2",
  "run_seconds": 7200,
  "container_count": 500,
  "container_concurrency": 50,
  "crud_profile": [86, 197, 0, 0],
  "user_count": 1
}

## Results.md

      
    Raw
  

              Results.md
            
          
    Client concurrency level: 120
"0;..." is no slow disks (just normal swift-object-auditor)
"1;..." is one slow disk (out of 61 total disks in cluster) in one of 2 storage nodes
"2;..." is two slow disks (out of 61 total disks in cluster), one per each of the 2 storage nodes


Raw Data


Config
count
avg_req_per_s
min
max
avg
std_dev
99_pctile


0; servers_per_port=3
105861.75
87.708292
0.006878
33.7284175
1.343781
2.270706
11.53051475


0; workers=90
90707.25
75.1377795
0.0071755
26.708472
1.57758675
2.37150675
10.855694


1; workers=90
54308.66667
32.31263167
0.007210667
543.8019563
2.607022333
26.34763967
14.57220133


1; workers=90;max_clients=2
50582
30.089187
0.0068335
525.8224
2.8690385
25.7079165
12.798998


2; workers=90
36168.33333
21.45463367
0.007056333
523.3460433
3.922348
33.53118167
51.96340467


2; servers_per_port=3
93899.33333
76.48168167
0.00684
31.47392
1.500636333
2.771960667
12.94143367
	[DEFAULT]
	workers = 8

	[pipeline:main]
	pipeline = healthcheck recon container-server

	[app:container-server]
	node_timeout = 4
	conn_timeout = 10.0

	[container-updater]
	interval = 300
	concurrency = 1
	node_timeout = 4
	conn_timeout = 10.0
	slowdown = 0.001
	account_suppression_time = 60
	db_preallocation = False

	[container-auditor]
	interval = 1800
	containers_per_second = 200
	db_preallocation = False
	Hardware:
	proxy1: 12x E5-2650 0 @ 2.00GHz (24 HT cores); 32 GiB RAM; 10 Gbps NIC
	proxy2: 12x E5-2650 0 @ 2.00GHz (24 HT cores); 32 GiB RAM; 10 Gbps NIC
	stor1: 12x E5-2620 0 @ 2.00GHz (24 HT cores); 64 GiB RAM; 10 Gbps NIC; 32x 3T obj disks; 2x 120 GB acct/cont SSDs
	stor2: 12x E5-2620 0 @ 2.00GHz (24 HT cores); 64 GiB RAM; 10 Gbps NIC; 29x 3T obj disks; 2x 120 GB acct/cont SSDs

	Operating Systems:
	proxy1, stor1, stor2: Ubuntu fully-patched 12.04.5 running 3.13.0-53-generic kernel
	proxy2: Ubuntu fully-patched 14.04.2 running 3.13.0-53-generic kernel

	Software:
	Swift Patch Set 9 https://review.openstack.org/#/c/184189/9/
	ssbench 0.3.5

	I ran ssbench-master on proxy1, with 4 ssbench-workers per proxy1 and proxy2; each worker talked only to its local proxy-server processes (127.0.0.1). Workers looked like:
	```
	ssbench-worker --zmq-host 192.168.201.51 --concurrency 128 --batch-size 1 0
	```

	The master was invoked as:
	```
	ssbench-master run-scenario -U bench2 -S http://127.0.0.1/v1/AUTH_bench2 -f Real_base2_simplified.scenario -u 120 -r 1200
	```
	in groups of 4, with a 120 second sleep in between.

	The Swift account used for this benchmarking had 1200 containers, 7,186,573 objects, storing ~21.6TB; a second account had 2800 containers, ~11M objects, storing ~3.24 TB.
	#!/usr/bin/env python

	# Run me like:
	# sudo ./io_hammer.py /dev/sds 256

	import directio
	import os
	from random import randint
	import sys
	import threading
	import time

	keep_running = [True]


	def do_hammer(dev_path):
	dev_stat = os.statvfs(dev_path)
	fd = os.open(dev_path, os.O_RDONLY \| os.O_DIRECT)
	try:
	while keep_running[0]:
	os.lseek(
	fd, randint(0, dev_stat.f_blocks - 100) * dev_stat.f_bsize,
	os.SEEK_SET)
	directio.read(fd, dev_stat.f_bsize * 100)
	except:
	keep_running[0] = False

	if __name__ == '__main__':
	dev_path, thread_count = sys.argv[1:]
	thread_count = int(thread_count)

	print "Spawning %d threads to hammer %s" % (thread_count, dev_path)
	threads = []
	for _ in xrange(thread_count):
	thread = threading.Thread(target=do_hammer, args=(dev_path,))
	thread.daemon = True
	thread.start()
	threads.append(thread)

	try:
	time.sleep(10000000)
	except:
	keep_running[0] = False
	for thread in threads:
	thread.join()
	[DEFAULT]
	backlog = 4096
	workers = 90
	mount_check = true
	servers_per_port = 3
	node_timeout = 20
	conn_timeout = 10.0
	network_chunk_size = 65536
	disk_chunk_size = 65536

	[pipeline:main]
	pipeline = healthcheck recon object-server

	[app:object-server]
	slow = 0
	mb_per_sync = 512
	log_requests = True
	threads_per_disk = 0
	disk_chunk_size = 65536

	[object-updater]
	interval = 300
	concurrency = 1
	node_timeout = 20
	conn_timeout = 10.0
	slowdown = 0.01

	[object-auditor]
	disk_chunk_size = 1048576
	files_per_second = 0
	bytes_per_second = 0
	zero_byte_files_per_second = 200
	[DEFAULT]
	backlog = 4096
	workers = 18
	client_timeout = 60
	expose_info = True

	[pipeline:main]
	pipeline = catch_errors healthcheck proxy-logging cache formpost tempurl swc swiftstack_authen swiftstack_authz dlo slo proxy-logging proxy-server

	[app:proxy-server]
	recheck_account_existence = 60
	recheck_container_existence = 60
	object_chunk_size = 65536
	client_chunk_size = 65536
	node_timeout = 10
	conn_timeout = 10.0
	post_quorum_timeout = 0.01
	error_suppression_interval = 60
	error_suppression_limit = 10
	object_post_as_copy = False
	put_queue_depth = 10
	sorting_method = timing
	timing_expiry = 300

	[filter:cache]
	memcache_servers = 192.168.3.51:11211,192.168.3.52:11211
	max_connections = 20
	connect_timeout = 0.6
	pool_timeout = 2.5
	tries = 3
	io_timeout = 4.0

	[filter:proxy-logging]
	set access_log_facility = LOG_LOCAL2
	reveal_sensitive_prefix = 16
	access_log_headers = False
	access_log_headers_only =
	{
	"name": "Realistic2",
	"sizes": [{
	"name": "Empty",
	"size_min": 0,
	"size_max": 0
	}, {
	"name": "512KB",
	"size_min": 512000,
	"size_max": 512000
	}, {
	"name": "5MB",
	"size_min": 5000000,
	"size_max": 5000000
	}, {
	"name": "50MB",
	"size_min": 50000000,
	"size_max": 50000000
	}],
	"initial_files": {
	"Empty": 3500,
	"512KB": 58500,
	"5MB": 61400,
	"50MB": 39000
	},
	"container_base": "base2_Realistic2",
	"run_seconds": 7200,
	"container_count": 500,
	"container_concurrency": 50,
	"crud_profile": [86, 197, 0, 0],
	"user_count": 1
	}
Config	count	avg_req_per_s	min	max	avg	std_dev	99_pctile
0; servers_per_port=3	105861.75	87.708292	0.006878	33.7284175	1.343781	2.270706	11.53051475
0; workers=90	90707.25	75.1377795	0.0071755	26.708472	1.57758675	2.37150675	10.855694
1; workers=90	54308.66667	32.31263167	0.007210667	543.8019563	2.607022333	26.34763967	14.57220133
1; workers=90;max_clients=2	50582	30.089187	0.0068335	525.8224	2.8690385	25.7079165	12.798998
2; workers=90	36168.33333	21.45463367	0.007056333	523.3460433	3.922348	33.53118167	51.96340467
2; servers_per_port=3	93899.33333	76.48168167	0.00684	31.47392	1.500636333	2.771960667	12.94143367