Last active
August 20, 2023 23:53
-
-
Save LiosK/6abcd792a159e3a796ca3a52054444e2 to your computer and use it in GitHub Desktop.
A quick benchmark of / and >> in computing millisecond timestamp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <stdio.h> | |
#include <time.h> | |
static const long REPEAT = 4; | |
static const long NUMBER = 400000000; | |
/// Computes a millisecond timestamp from `struct timespect` components. | |
#define div(tv_sec, tv_nsec) \ | |
((uint64_t)(tv_sec)*1000 + (uint64_t)(tv_nsec) / 1000000) | |
/// Same as `div()`, but uses multiplication and shift operations. | |
/// | |
/// @see https://dl.acm.org/doi/10.1145/773473.178249 | |
#define shr(tv_sec, tv_nsec) \ | |
((uint64_t)(tv_sec)*1000 + ((uint64_t)(tv_nsec)*UINT64_C(2251799814) >> 51)) | |
volatile struct timespec tp; | |
volatile uint64_t discarded; | |
static inline double timespec_diff(const struct timespec *end, | |
const struct timespec *begin) { | |
return (double)(end->tv_sec - begin->tv_sec) + | |
(double)(end->tv_nsec - begin->tv_nsec) / 1000000000.0; | |
} | |
int main(void) { | |
clock_gettime(CLOCK_REALTIME, (struct timespec *)&tp); | |
printf("tv_sec: %ld, tv_nsec: %ld\n", tp.tv_sec, tp.tv_nsec); | |
printf("=> div: %llu msec\n", div(tp.tv_sec, tp.tv_nsec)); | |
printf("=> shr: %llu msec\n", shr(tp.tv_sec, tp.tv_nsec)); | |
struct { | |
double div; | |
double shr; | |
} results = {0.0, 0.0}; | |
printf("\nExecuting %ld ops x %ld...\n", NUMBER, REPEAT); | |
for (long i = 0; i < REPEAT; i++) { | |
struct timespec begin, end; | |
clock_gettime(CLOCK_MONOTONIC, &begin); | |
for (long j = 0; j < NUMBER; j++) { | |
discarded = div(tp.tv_sec, tp.tv_nsec); | |
} | |
clock_gettime(CLOCK_MONOTONIC, &end); | |
results.div += timespec_diff(&end, &begin); | |
clock_gettime(CLOCK_MONOTONIC, &begin); | |
for (long j = 0; j < NUMBER; j++) { | |
discarded = shr(tp.tv_sec, tp.tv_nsec); | |
} | |
clock_gettime(CLOCK_MONOTONIC, &end); | |
results.shr += timespec_diff(&end, &begin); | |
} | |
printf("div: %.3f sec (%.3f nsec/op)\n", results.div / REPEAT, | |
results.div / REPEAT / NUMBER * 1000000000); | |
printf("shr: %.3f sec (%.3f nsec/op)\n", results.shr / REPEAT, | |
results.shr / REPEAT / NUMBER * 1000000000); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.PHONY: run clean | |
run: divshr.O0 divshr.O2 | |
./divshr.O0 | |
./divshr.O2 | |
clean: | |
$(RM) divshr.O* | |
divshr.O0: divshr.c | |
$(CC) -Wall -Wextra -O0 -o$@ $< | |
divshr.O2: divshr.c | |
$(CC) -Wall -Wextra -O2 -o$@ $< |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cc -Wall -Wextra -O0 -odivshr.O0 divshr.c | |
cc -Wall -Wextra -O2 -odivshr.O2 divshr.c | |
./divshr.O0 | |
tv_sec: 1692460446, tv_nsec: 656331000 | |
=> div: 1692460446656 msec | |
=> shr: 1692460446656 msec | |
Executing 400000000 ops x 4... | |
div: 3.181 sec (7.952 nsec/op) | |
shr: 0.693 sec (1.734 nsec/op) | |
./divshr.O2 | |
tv_sec: 1692460462, tv_nsec: 205994000 | |
=> div: 1692460462205 msec | |
=> shr: 1692460462205 msec | |
Executing 400000000 ops x 4... | |
div: 0.230 sec (0.575 nsec/op) | |
shr: 0.230 sec (0.574 nsec/op) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
That result is to some extent consistent with what I get from assemblies. O2
shr
code emits fewer instructions than doesdiv
, which might result in the throughput difference. As for O0, perhaps division is much faster on ARM than on x86.Anyway, I think those results support my opinion that we don't really need to be worried about divisions.