Skip to content

Instantly share code, notes, and snippets.

@portante
Last active December 13, 2015 19:58
Show Gist options
  • Save portante/4965908 to your computer and use it in GitHub Desktop.
Save portante/4965908 to your computer and use it in GitHub Desktop.
PUT time budget.
[pid 30268] 01:28:38.185609 accept(8, {sa_family=AF_INET, sin_port=htons(42975), sin_addr=inet_addr("127.0.0.1")}, [16]) = 14
[pid 30268] 01:28:38.185682 fcntl(14, F_GETFL) = 0x2 (flags O_RDWR)
[pid 30268] 01:28:38.185718 fcntl(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0
= 109us
# Discussion: fcntl() calls are overhead cost of eventlet
[pid 30268] 01:28:38.185763 fcntl(14, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)
[pid 30268] 01:28:38.185808 fcntl(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0
[pid 30268] 01:28:38.185897 fcntl(14, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)
[pid 30268] 01:28:38.185923 fcntl(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0
[pid 30268] 01:28:38.185954 fcntl(14, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)
[pid 30268] 01:28:38.185976 fcntl(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0
[pid 30268] 01:28:38.186016 fcntl(14, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)
[pid 30268] 01:28:38.186038 fcntl(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0
[pid 30268] 01:28:38.186074 fcntl(14, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)
[pid 30268] 01:28:38.186096 fcntl(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0
= 378us
# Discussion: the above sequence of 10 fcntl() calls is completely gratuitous,
# removing them restores one more op per sec and is available in
# v0.12 of eventlet, another 2 iops per second saved.
# Cost: ~2 iops
[pid 30268] 01:28:38.186139 recvfrom(14, "PUT /sdb1/197427/AUTH_del0/58586"..., 8192, 0, NULL, NULL) = 489
[pid 30268] 01:28:38.186347 getsockname(14, {sa_family=AF_INET, sin_port=htons(6010), sin_addr=inet_addr("127.0.0.1")}, [16]) = 0
= 251us
# Discussion: necessary cost of handling requests
[pid 30268] 01:28:38.186715 lstat("/mnt/gluster-object/AUTH_del0_001", {st_mode=S_IFDIR|0755, st_size=80, ...}) = 0
[pid 30268] 01:28:38.186787 lstat("/mnt/gluster-object/AUTH_del0_001/..", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
= 440us
# Discussion: This is the mount check, performed on every single request. We
# could reduce the frequency of those checks to once a minute, or
# eliminate them entirely using a configuration option to tell us
# not to check at all. That would save another 2 iops.
# Cost: ~2 iops
[pid 30268] 01:28:38.186854 stat("/mnt/gluster-object/AUTH_del0_001/58586052B2C246E9E4928080BE25F0391379ADE5/load1ev1/20091012_0/job35051/0001/3505100001649.pdf", {st_mode=S_IFREG|0600, st_size=19288, ...}) = 0
[pid 30268] 01:28:38.187571 getxattr("/mnt/gluster-object/AUTH_del0_001/58586052B2C246E9E4928080BE25F0391379ADE5/load1ev1/20091012_0/job35051/0001/3505100001649.pdf", "user.swift.metadata", 0x0, 0) = 194
[pid 30268] 01:28:38.188024 getxattr("/mnt/gluster-object/AUTH_del0_001/58586052B2C246E9E4928080BE25F0391379ADE5/load1ev1/20091012_0/job35051/0001/3505100001649.pdf", "user.swift.metadata", "\x80\x02...", 194) = 194
= 1237us
# Discussion: For a PUT operation, stat()ing the file and fetching the
# metadata is unnecessary, and is only an artifact of the
# attempt to mimic too closely the existing semantics for
# updating containers on the PUT. We can defer operations
# for checking existence to avoid this happening on PUTs.
# Cost: ~8 iops
[pid 30268] 01:28:38.188479 stat("/mnt/gluster-object/AUTH_del0_001/tmp", {st_mode=S_IFDIR|0755, st_size=6, ...}) = 0
= 455us
# Discussion: It is not necessary to check the existence of the "tmp"
# directory, as we should be creating the target directory
# path and then create the temporary file there, renaming
# into place.
# Cost: ~2 iops
[pid 30268] 01:28:38.188650 open("/mnt/gluster-object/AUTH_del0_001/tmp/tmpgQgiUU", O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, 0600) = 16
= 171us
[pid 30268] 01:28:38.190995 fcntl(16, F_GETFD) = 0
[pid 30268] 01:28:38.191046 fcntl(16, F_SETFD, FD_CLOEXEC) = 0
= 2396us
# Discussion: It is not necessary to separately call fcntl() on the
# open file to set CLOEXEC, as RHEL 6.2 and later have
# a linux kernel that supports using O_CLOEXEC on the
# open system call above.
# Cost: ~15 iops
[pid 30268] 01:28:38.191122 sendto(14, "HTTP/1.1 100 Continue\r\n\r\n", 25, 0, NULL, 0) = 25
= 2643us
# Discussion: Longer term, investigate not using Expect/100 HTTP semantics
# to avoid this round trip, having the client send the entire
# request at once, good for small files.
# Cost: ~17 iops
[pid 30268] 01:28:38.191218 recvfrom(14, 0x101ea14, 19288, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable)
[pid 30268] 01:28:38.191319 poll([{fd=17, events=POLLIN|POLLPRI|POLLERR|POLLHUP}, {fd=14, events=POLLIN|POLLPRI|POLLERR|POLLHUP}], 2, 60000) = 1 ([{fd=14, revents=POLLIN}])
[pid 30268] 01:28:38.192201 recvfrom(14, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"..., 19288, 0, NULL, NULL) = 19288
= 1079us
# Discussion: Guessing that the above 1ms delay cannot be removed since we
# still have to get the data, though perhaps a few mics can be
# saved from less system calls ... not considering this here.
[pid 30268] 01:28:38.192470 write(16, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"..., 19288) = 19288
= 269us
[pid 30268] 01:28:38.192982 fadvise64(16, 0, 19288, POSIX_FADV_DONTNEED) = 0
= 512us
[pid 30268] 01:28:38.193091 futex(0x7fc83c001580, FUTEX_WAKE_PRIVATE, 1) = 1
[pid 30298] 01:28:38.193179 <... futex resumed> ) = 0
[pid 30268] 01:28:38.193204 poll([{fd=17, events=POLLIN|POLLPRI|POLLERR|POLLHUP}], 1, 60000 <unfinished ...>
[pid 30298] 01:28:38.193278 fsync(16) = 0
[pid 30298] 01:28:38.194797 write(18, " ", 1) = 1
[pid 30268] 01:28:38.194893 <... poll resumed> ) = 1 ([{fd=17, revents=POLLIN}])
[pid 30298] 01:28:38.194927 futex(0x7fc83c001580, FUTEX_WAIT_PRIVATE, 0, NULL <unfinished ...>
[pid 30268] 01:28:38.194956 read(17, " ", 1) = 1
[pid 30268] 01:28:38.195079 read(17, 0xf6e084, 1) = -1 EAGAIN (Resource temporarily unavailable)
= 2097us
# Discussion: The fsync() call itself only took 99us, so this overhead is
# just too much. Let's change this to perform the fsync() inline
# for small chunks.
# Cost: ~12 iops
[pid 30268] 01:28:38.195234 setxattr("/mnt/gluster-object/AUTH_del0_001/tmp/tmpgQgiUU", "user.swift.metadata", "\x80\x02...", 194, 0) = 0
[pid 30268] 01:28:38.196043 fchown(16, 4294967295, 4294967295) = 0
[pid 30268] 01:28:38.196834 rename("/mnt/gluster-object/AUTH_del0_001/tmp/tmpgQgiUU", "/mnt/gluster-object/AUTH_del0_001/58586052B2C246E9E4928080BE25F0391379ADE5/load1ev1/20091012_0/job35051/0001/3505100001649.pdf") = 0
[pid 30268] 01:28:38.198229 close(16) = 0
= 3150us
# Discussion: These operations appear to be required, though if we run swift
# under the swift user account, can't we set up the file system
# to properly create the default uid/gid under a container, and
# avoid the fchown() call entirely, which costs 809us?
# Cost: ~4 iops
[pid 30268] 01:28:38.198676 sendto(12, "<150>object-server 127.0.0.1 - -"..., 254, 0, NULL, 0) = 254
= 447us
# Discussion: For performance, we should turn off request logging for the
# object server, log_requests = off
# Cost: ~2 iops
[pid 30268] 01:28:38.199012 sendto(14, "HTTP/1.1 201 Created\r\nEtag: \"c4c"..., 301, 0, NULL, 0) = 301
[pid 30268] 01:28:38.199198 shutdown(14, 2 /* send and receive */) = 0
[pid 30268] 01:28:38.199260 close(14) = 0
= 584us
[pid 30268] 01:28:38.199362 poll([{fd=17, events=POLLIN|POLLPRI|POLLERR|POLLHUP}], 1, 0) = 0 (Timeout)
[pid 30268] 01:28:38.199422 accept(8, 0x7fffc8f403f0, [16]) = -1 EAGAIN (Resource temporarily unavailable)
[pid 30268] 01:28:38.199510 poll([{fd=8, events=POLLIN|POLLPRI|POLLERR|POLLHUP}, {fd=17, events=POLLIN|POLLPRI|POLLERR|POLLHUP}], 2, 60000) = 1 ([{fd=8, revents=POLLIN}])
= 250us
Total = 13901us
@portante
Copy link
Author

I believe I am interpreting the strace output incorrectly. Diffing the timestamps is not sufficient, should be using -ttt and -T to get a more accurate view.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment