Skip to content

Instantly share code, notes, and snippets.

@nkurz
Created November 29, 2014 14:10
Show Gist options
  • Save nkurz/d64f5b4ded4e19e17aae to your computer and use it in GitHub Desktop.
Save nkurz/d64f5b4ded4e19e17aae to your computer and use it in GitHub Desktop.
// Calculate cycles spent on overhead of function calls
// See http://cs.coloradocollege.edu/~bylvisaker/CallReturn/
// gcc -g -std=gnu99 -O3 -Wall -Wextra call-return.c -o call-return
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#define DEFAULT_LOOP_COUNT (1000 * 1000)
// set starting cycles to current 64-bit rdtsc value
#define RDTSC_START(cycles) \
do { \
register unsigned cyc_high, cyc_low; \
asm volatile("cpuid\n\t" \
"rdtsc\n\t" \
"mov %%edx, %0\n\t" \
"mov %%eax, %1\n\t" \
: "=r" (cyc_high), "=r" (cyc_low) \
:: "%rax", "%rbx", "%rcx", "%rdx"); \
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
} while (0)
// set final cycles to current 64-bit rdtsc value
#define RDTSC_FINAL(cycles) \
do { \
register unsigned cyc_high, cyc_low; \
asm volatile("rdtscp\n\t" \
"mov %%edx, %0\n\t" \
"mov %%eax, %1\n\t" \
"cpuid\n\t" \
: "=r" (cyc_high), "=r" (cyc_low) \
:: "%rax", "%rbx", "%rcx", "%rdx"); \
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
} while(0)
#define COMPILER_NO_INLINE __attribute__((noinline))
COMPILER_NO_INLINE uint64_t f0( uint64_t x ) {
/* The details of this calculation are not important */
return x + 1;
}
COMPILER_NO_INLINE uint64_t f1( uint64_t x ) {
return f0( f0( x ) ) + 1;
}
COMPILER_NO_INLINE uint64_t f2( uint64_t x ) {
return f1( f1( x ) ) + 1;
}
COMPILER_NO_INLINE uint64_t f3( uint64_t x ) {
return f2( f2( x ) ) + 1;
}
COMPILER_NO_INLINE uint64_t f4( uint64_t x ) {
return f3( f3( x ) ) + 1;
}
COMPILER_NO_INLINE uint64_t f5( uint64_t x ) {
return f4( f4( x ) ) + 1;
}
COMPILER_NO_INLINE uint64_t f6( uint64_t x ) {
return f5( f5( x ) ) + 1;
}
COMPILER_NO_INLINE uint64_t f7( uint64_t x ) {
return f6( f6( x ) ) + 1;
}
COMPILER_NO_INLINE uint64_t f8( uint64_t x ) {
return f7( f7( x ) ) + 1;
}
int main(int argc, char** argv) {
if (argc > 2) goto die_usage;
uint64_t loop_count = DEFAULT_LOOP_COUNT;
if (argc > 1) loop_count = atoi(argv[1]);
if (! loop_count) goto die_usage;
printf("Calling f8() %ld times\n", loop_count);
uint64_t calls_per_iteration = f8(0); // prewarm instruction cache
uint64_t expected_val = calls_per_iteration * loop_count;
uint64_t cycles_start, cycles_final;
RDTSC_START(cycles_start);
uint64_t val = 0;
while (loop_count--) {
val = f8(val); // f8() calls f0() 2^8 times
}
RDTSC_FINAL(cycles_final);
uint64_t cycles_spent = cycles_final - cycles_start;
float cycles_per_call = cycles_spent / (float) val;
printf("%ld calls to f0() took %ld cycles (%.2f cycles per call)\n",
val, cycles_spent, cycles_per_call);
if (val == expected_val) return 0; // comparison to prevent optimization code removal
else printf("Warning --- expected %ld calls but got %ld\n", expected_val, val);
return 1;
die_usage:
printf("Usage: %s [count] (default count %d)\n", argv[0], DEFAULT_LOOP_COUNT);
return 1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment