Skip to content

Instantly share code, notes, and snippets.

@jolynch
Last active June 26, 2023 01:24
Show Gist options
  • Save jolynch/a67a2bbd235dcbc3a6e1b0d47ea6a3be to your computer and use it in GitHub Desktop.
Save jolynch/a67a2bbd235dcbc3a6e1b0d47ea6a3be to your computer and use it in GitHub Desktop.
fdatasync benchmark

Fdatasync Benchmark

A simple benchmark to show how much fdatasync can tank your performance if you call it too often.

$ make build && ./fsync_bench -b 4096 -r 2>/dev/null
{
0: [160, 152, 151, 151, 151, 151, 151, 151, 151, 151, 151, 150, 151, 151, 151, 153, 153, 152, 152, 153],
-1: [632, 648, 678, 663, 683, 660, 660, 686, 660, 669, 678, 661, 693, 660, 672, 653, 674, 665, 660, 664],
104857600: [719, 740, 739, 736, 743, 737, 770, 736, 749, 732, 727, 735, 727, 731, 738, 762, 749, 759, 741, 743],
10485760: [1131, 1125, 1127, 1127, 1132, 1118, 1145, 1115, 1115, 1114, 1136, 1133, 1139, 1118, 1119, 1131, 1133, 1132, 1109, 1122],
1048576: [3140, 3171, 3173, 3125, 3153, 3154, 3152, 3193, 3126, 3190, 3155, 3163, 3197, 3162, 3170, 3178, 3123, 3176, 3157, 3136],
524288: [5998, 5890, 5892, 5957, 5980, 5992, 6014, 5985, 5959, 5894, 5948, 5995, 5892, 5953, 5959, 5949, 5937, 5943, 5938, 5978],
262144: [10898, 10703, 10743, 10694, 10706, 10697, 10684, 10682, 10696, 10750, 10671, 10699, 10622, 10609, 10725, 10662, 10695, 10654, 10650, 10816],
131072: [17413, 17635, 17488, 17943, 17851, 17847, 17820, 17838, 17868, 17897, 17936, 17667, 17817, 17855, 17987, 17668, 17819, 17847, 18030, 17740],
65536: [26767, 26643, 26811, 26972, 26739, 26783, 26852, 26996, 26956, 26998, 27085, 26872, 26767, 26743, 26953, 27047, 26988, 26972, 26778, 26969],
32768: [54994, 53198, 52228, 52164, 52581, 52027, 52584, 52347, 51994, 52121, 51922, 52192, 52627, 52265, 52137, 52463, 52190, 52734, 52823, 52530],
16384: [107463, 112095, 111869, 111114, 112511, 111984, 112373, 112409, 111575, 111978, 112228, 112479, 111888, 112323, 112331, 112275, 111966, 111005, 110973, 108589],
}
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <time.h>
static long size[] = {
// Never sync
0,
// Sync at the end
-1,
// Sync every 100MiB
100 * 1024 * 1024,
// Sync every 10MiB
10 * 1024 * 1024,
// Sync every MiB
1 * 1024 * 1024,
// Sync every 512KiB etc ...
512 * 1024,
256 * 1024,
128 * 1024,
64 * 1024,
32 * 1024,
16 * 1024
};
void usage (char *argv[]) {
fprintf(stderr, "Usage: %s [OPTION] [FILE]...\n", argv[0]);
fprintf(stderr, " -b Write block size (4096)\n");
fprintf(stderr, " -t Target size in bytes (1073741824)\n");
fprintf(stderr, " -f Call fdatasync after this many bytes. 0 syncs at the end of writes and -1 disables (0)\n");
fprintf(stderr, " -l Number of trials to run during benchmark (10)\n");
fprintf(stderr, " -r Run a benchmark doing multiple iterations at various fsync intervals\n");
exit(EXIT_FAILURE);
}
// Return the milliseconds to run
long long run_iteration(long buf_size, long target_size, long fsync_size) {
long num_iterations;
char buf[buf_size];
struct timespec start;
struct timespec end;
num_iterations = target_size / buf_size;
fprintf(
stderr,
"Writing %ld MiB in chunks of %ld for %ld syscalls. Fsync every %ld KiB ...",
target_size / (1024 * 1024), buf_size, num_iterations, fsync_size / (1024));
int fd = open("test.bin", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
for (long i = 0; i < buf_size; i++) {
buf[i] = 'x';
}
long written = 0;
long next_fsync = 0;
clock_gettime(CLOCK_MONOTONIC, &start);
for (int i = 0; i < num_iterations; i++) {
write(fd, buf, buf_size);
if (fsync_size > 0) {
written += buf_size;
if (written > next_fsync) {
fdatasync(fd);
next_fsync += fsync_size;
}
}
}
// Support "sync on close"
if (fsync_size < 0) {
fdatasync(fd);
}
clock_gettime(CLOCK_MONOTONIC, &end);
// TODO
// read time in queue from /sys/block/nvme0n1/stat
// divide by elapsed time, get avg queue size.
close(fd);
long long start_ms = start.tv_sec * 1000 + start.tv_nsec/1000000;
long long end_ms = end.tv_sec * 1000 + end.tv_nsec/1000000;
fprintf(stderr, " Done in %lld millis\n", end_ms - start_ms);
return end_ms - start_ms;
}
int main(int argc, char *argv[]) {
long buf_size = 4096;
long target_size = 1024 * 1024 * 1024;
long fsync_size = -1;
int trials = 20;
int bench = 0;
int opt;
long result;
while ((opt = getopt(argc, argv, "b:t:f:l:r")) != -1 ) {
switch (opt) {
case 'b':
buf_size = atol(optarg);
break;
case 't':
target_size = atol(optarg);
break;
case 'f':
fsync_size = atol(optarg);
break;
case 'l':
trials = atoi(optarg);
break;
case 'r':
bench = 1;
break;
default:
usage(argv);
exit(EXIT_FAILURE);
}
}
if (buf_size == 0) buf_size = 4096;
if (bench == 1) {
fprintf(stderr, "Running %d trials writing %ldKiB of data\n", trials, target_size / (1024));
printf("{\n");
for (int i = 0; i < 11; i++) {
fsync_size = size[i];
printf(" %ld: [", fsync_size);
for (int j = 0; j < trials; j++) {
result = run_iteration(buf_size, target_size, fsync_size);
if (j + 1 < trials) printf("%ld, ", result);
else printf("%ld", result);
}
printf("],\n");
}
printf("}\n");
} else {
result = run_iteration(buf_size, target_size, fsync_size);
fprintf(stdout, "%ld -> %ld millis\n", fsync_size, result);
}
}
$ make
gcc benchmark.c -o fsync_bench
echo
echo "Writing 100 MiB in 4kib blocks, no fdatasync"
Writing 100 MiB in 4kib blocks, no fdatasync
./fsync_bench -b 4096 -f 0
Writing 1024 MiB in chunks of 4096 for 262144 syscalls. Fsync every 0 KiB ... Done in 194 millis
0 -> 194 millis
sync
echo
echo "Writing 100 MiB in 4kib blocks, one fdatasync at end"
Writing 100 MiB in 4kib blocks, one fdatasync at end
./fsync_bench -b 4096
Writing 1024 MiB in chunks of 4096 for 262144 syscalls. Fsync every 0 KiB ... Done in 675 millis
-1 -> 675 millis
sync
echo
echo "Writing 100MiB in 4KiB blocks, sync per 100MiB"
Writing 100MiB in 4KiB blocks, sync per 100MiB
./fsync_bench -b 4096 -f 104857600
Writing 1024 MiB in chunks of 4096 for 262144 syscalls. Fsync every 102400 KiB ... Done in 730 millis
104857600 -> 730 millis
sync
echo
echo "Writing 100MiB in 4KiB blocks, sync per 10MiB"
Writing 100MiB in 4KiB blocks, sync per 10MiB
./fsync_bench -b 4096 -f 10485760
Writing 1024 MiB in chunks of 4096 for 262144 syscalls. Fsync every 10240 KiB ... Done in 1105 millis
10485760 -> 1105 millis
sync
echo
echo "Writing 100MiB in 4KiB blocks, sync per 1MiB"
Writing 100MiB in 4KiB blocks, sync per 1MiB
./fsync_bench -b 4096 -f 1048576
Writing 1024 MiB in chunks of 4096 for 262144 syscalls. Fsync every 1024 KiB ... Done in 3144 millis
1048576 -> 3144 millis
sync
echo
echo "Writing 100MiB in 4KiB blocks, sync per 64KiB"
Writing 100MiB in 4KiB blocks, sync per 64KiB
./fsync_bench -b 4096 -f 65536
Writing 1024 MiB in chunks of 4096 for 262144 syscalls. Fsync every 64 KiB ... Done in 28165 millis
65536 -> 28165 millis
sync
all: build test_baseline test_end test_100m test_10m test_1m test_64k
build:
gcc benchmark.c -o fsync_bench
test_baseline: build
echo
echo "Writing 100 MiB in 4kib blocks, no fdatasync"
./fsync_bench -b 4096 -f 0
sync
test_end: build
echo
echo "Writing 100 MiB in 4kib blocks, one fdatasync at end"
./fsync_bench -b 4096
sync
test_100m: build
echo
echo "Writing 100MiB in 4KiB blocks, sync per 100MiB"
./fsync_bench -b 4096 -f 104857600
sync
test_10m: build
echo
echo "Writing 100MiB in 4KiB blocks, sync per 10MiB"
./fsync_bench -b 4096 -f 10485760
sync
test_1m: build
echo
echo "Writing 100MiB in 4KiB blocks, sync per 1MiB"
./fsync_bench -b 4096 -f 1048576
sync
test_64k: build
echo
echo "Writing 100MiB in 4KiB blocks, sync per 64KiB"
./fsync_bench -b 4096 -f 65536
sync
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment