Last active
December 13, 2015 19:58
-
-
Save portante/4965908 to your computer and use it in GitHub Desktop.
PUT time budget.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[pid 30268] 01:28:38.185609 accept(8, {sa_family=AF_INET, sin_port=htons(42975), sin_addr=inet_addr("127.0.0.1")}, [16]) = 14 | |
[pid 30268] 01:28:38.185682 fcntl(14, F_GETFL) = 0x2 (flags O_RDWR) | |
[pid 30268] 01:28:38.185718 fcntl(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0 | |
= 109us | |
# Discussion: fcntl() calls are overhead cost of eventlet | |
[pid 30268] 01:28:38.185763 fcntl(14, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK) | |
[pid 30268] 01:28:38.185808 fcntl(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0 | |
[pid 30268] 01:28:38.185897 fcntl(14, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK) | |
[pid 30268] 01:28:38.185923 fcntl(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0 | |
[pid 30268] 01:28:38.185954 fcntl(14, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK) | |
[pid 30268] 01:28:38.185976 fcntl(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0 | |
[pid 30268] 01:28:38.186016 fcntl(14, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK) | |
[pid 30268] 01:28:38.186038 fcntl(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0 | |
[pid 30268] 01:28:38.186074 fcntl(14, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK) | |
[pid 30268] 01:28:38.186096 fcntl(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0 | |
= 378us | |
# Discussion: the above sequence of 10 fcntl() calls is completely gratuitous, | |
# removing them restores one more op per sec and is available in | |
# v0.12 of eventlet, another 2 iops per second saved. | |
# Cost: ~2 iops | |
[pid 30268] 01:28:38.186139 recvfrom(14, "PUT /sdb1/197427/AUTH_del0/58586"..., 8192, 0, NULL, NULL) = 489 | |
[pid 30268] 01:28:38.186347 getsockname(14, {sa_family=AF_INET, sin_port=htons(6010), sin_addr=inet_addr("127.0.0.1")}, [16]) = 0 | |
= 251us | |
# Discussion: necessary cost of handling requests | |
[pid 30268] 01:28:38.186715 lstat("/mnt/gluster-object/AUTH_del0_001", {st_mode=S_IFDIR|0755, st_size=80, ...}) = 0 | |
[pid 30268] 01:28:38.186787 lstat("/mnt/gluster-object/AUTH_del0_001/..", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 | |
= 440us | |
# Discussion: This is the mount check, performed on every single request. We | |
# could reduce the frequency of those checks to once a minute, or | |
# eliminate them entirely using a configuration option to tell us | |
# not to check at all. That would save another 2 iops. | |
# Cost: ~2 iops | |
[pid 30268] 01:28:38.186854 stat("/mnt/gluster-object/AUTH_del0_001/58586052B2C246E9E4928080BE25F0391379ADE5/load1ev1/20091012_0/job35051/0001/3505100001649.pdf", {st_mode=S_IFREG|0600, st_size=19288, ...}) = 0 | |
[pid 30268] 01:28:38.187571 getxattr("/mnt/gluster-object/AUTH_del0_001/58586052B2C246E9E4928080BE25F0391379ADE5/load1ev1/20091012_0/job35051/0001/3505100001649.pdf", "user.swift.metadata", 0x0, 0) = 194 | |
[pid 30268] 01:28:38.188024 getxattr("/mnt/gluster-object/AUTH_del0_001/58586052B2C246E9E4928080BE25F0391379ADE5/load1ev1/20091012_0/job35051/0001/3505100001649.pdf", "user.swift.metadata", "\x80\x02...", 194) = 194 | |
= 1237us | |
# Discussion: For a PUT operation, stat()ing the file and fetching the | |
# metadata is unnecessary, and is only an artifact of the | |
# attempt to mimic too closely the existing semantics for | |
# updating containers on the PUT. We can defer operations | |
# for checking existence to avoid this happening on PUTs. | |
# Cost: ~8 iops | |
[pid 30268] 01:28:38.188479 stat("/mnt/gluster-object/AUTH_del0_001/tmp", {st_mode=S_IFDIR|0755, st_size=6, ...}) = 0 | |
= 455us | |
# Discussion: It is not necessary to check the existence of the "tmp" | |
# directory, as we should be creating the target directory | |
# path and then create the temporary file there, renaming | |
# into place. | |
# Cost: ~2 iops | |
[pid 30268] 01:28:38.188650 open("/mnt/gluster-object/AUTH_del0_001/tmp/tmpgQgiUU", O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, 0600) = 16 | |
= 171us | |
[pid 30268] 01:28:38.190995 fcntl(16, F_GETFD) = 0 | |
[pid 30268] 01:28:38.191046 fcntl(16, F_SETFD, FD_CLOEXEC) = 0 | |
= 2396us | |
# Discussion: It is not necessary to separately call fcntl() on the | |
# open file to set CLOEXEC, as RHEL 6.2 and later have | |
# a linux kernel that supports using O_CLOEXEC on the | |
# open system call above. | |
# Cost: ~15 iops | |
[pid 30268] 01:28:38.191122 sendto(14, "HTTP/1.1 100 Continue\r\n\r\n", 25, 0, NULL, 0) = 25 | |
= 2643us | |
# Discussion: Longer term, investigate not using Expect/100 HTTP semantics | |
# to avoid this round trip, having the client send the entire | |
# request at once, good for small files. | |
# Cost: ~17 iops | |
[pid 30268] 01:28:38.191218 recvfrom(14, 0x101ea14, 19288, 0, 0, 0) = -1 EAGAIN (Resource temporarily unavailable) | |
[pid 30268] 01:28:38.191319 poll([{fd=17, events=POLLIN|POLLPRI|POLLERR|POLLHUP}, {fd=14, events=POLLIN|POLLPRI|POLLERR|POLLHUP}], 2, 60000) = 1 ([{fd=14, revents=POLLIN}]) | |
[pid 30268] 01:28:38.192201 recvfrom(14, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"..., 19288, 0, NULL, NULL) = 19288 | |
= 1079us | |
# Discussion: Guessing that the above 1ms delay cannot be removed since we | |
# still have to get the data, though perhaps a few mics can be | |
# saved from less system calls ... not considering this here. | |
[pid 30268] 01:28:38.192470 write(16, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"..., 19288) = 19288 | |
= 269us | |
[pid 30268] 01:28:38.192982 fadvise64(16, 0, 19288, POSIX_FADV_DONTNEED) = 0 | |
= 512us | |
[pid 30268] 01:28:38.193091 futex(0x7fc83c001580, FUTEX_WAKE_PRIVATE, 1) = 1 | |
[pid 30298] 01:28:38.193179 <... futex resumed> ) = 0 | |
[pid 30268] 01:28:38.193204 poll([{fd=17, events=POLLIN|POLLPRI|POLLERR|POLLHUP}], 1, 60000 <unfinished ...> | |
[pid 30298] 01:28:38.193278 fsync(16) = 0 | |
[pid 30298] 01:28:38.194797 write(18, " ", 1) = 1 | |
[pid 30268] 01:28:38.194893 <... poll resumed> ) = 1 ([{fd=17, revents=POLLIN}]) | |
[pid 30298] 01:28:38.194927 futex(0x7fc83c001580, FUTEX_WAIT_PRIVATE, 0, NULL <unfinished ...> | |
[pid 30268] 01:28:38.194956 read(17, " ", 1) = 1 | |
[pid 30268] 01:28:38.195079 read(17, 0xf6e084, 1) = -1 EAGAIN (Resource temporarily unavailable) | |
= 2097us | |
# Discussion: The fsync() call itself only took 99us, so this overhead is | |
# just too much. Let's change this to perform the fsync() inline | |
# for small chunks. | |
# Cost: ~12 iops | |
[pid 30268] 01:28:38.195234 setxattr("/mnt/gluster-object/AUTH_del0_001/tmp/tmpgQgiUU", "user.swift.metadata", "\x80\x02...", 194, 0) = 0 | |
[pid 30268] 01:28:38.196043 fchown(16, 4294967295, 4294967295) = 0 | |
[pid 30268] 01:28:38.196834 rename("/mnt/gluster-object/AUTH_del0_001/tmp/tmpgQgiUU", "/mnt/gluster-object/AUTH_del0_001/58586052B2C246E9E4928080BE25F0391379ADE5/load1ev1/20091012_0/job35051/0001/3505100001649.pdf") = 0 | |
[pid 30268] 01:28:38.198229 close(16) = 0 | |
= 3150us | |
# Discussion: These operations appear to be required, though if we run swift | |
# under the swift user account, can't we set up the file system | |
# to properly create the default uid/gid under a container, and | |
# avoid the fchown() call entirely, which costs 809us? | |
# Cost: ~4 iops | |
[pid 30268] 01:28:38.198676 sendto(12, "<150>object-server 127.0.0.1 - -"..., 254, 0, NULL, 0) = 254 | |
= 447us | |
# Discussion: For performance, we should turn off request logging for the | |
# object server, log_requests = off | |
# Cost: ~2 iops | |
[pid 30268] 01:28:38.199012 sendto(14, "HTTP/1.1 201 Created\r\nEtag: \"c4c"..., 301, 0, NULL, 0) = 301 | |
[pid 30268] 01:28:38.199198 shutdown(14, 2 /* send and receive */) = 0 | |
[pid 30268] 01:28:38.199260 close(14) = 0 | |
= 584us | |
[pid 30268] 01:28:38.199362 poll([{fd=17, events=POLLIN|POLLPRI|POLLERR|POLLHUP}], 1, 0) = 0 (Timeout) | |
[pid 30268] 01:28:38.199422 accept(8, 0x7fffc8f403f0, [16]) = -1 EAGAIN (Resource temporarily unavailable) | |
[pid 30268] 01:28:38.199510 poll([{fd=8, events=POLLIN|POLLPRI|POLLERR|POLLHUP}, {fd=17, events=POLLIN|POLLPRI|POLLERR|POLLHUP}], 2, 60000) = 1 ([{fd=8, revents=POLLIN}]) | |
= 250us | |
Total = 13901us |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I believe I am interpreting the strace output incorrectly. Diffing the timestamps is not sufficient, should be using -ttt and -T to get a more accurate view.