Skip to content

Instantly share code, notes, and snippets.

@nkurz
Created September 21, 2014 05:12
Show Gist options
  • Save nkurz/985470b01b999e67d04b to your computer and use it in GitHub Desktop.
Save nkurz/985470b01b999e67d04b to your computer and use it in GitHub Desktop.
Sample file showing timings for several alignment implementations.
// gcc -fno-inline -std=gnu99 -Wall -O3 align.c -o align -lm -DLIKWID -llikwid -lpthread
// objdump -d align | less (to confirm that the code hasn't been optimized out)
// likwid -m -C2 -g BRANCH align
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#ifndef ALIGN
#define ALIGN 8
#endif
#ifndef REPEAT
#define REPEAT 1000000
#endif
unsigned int baseline(unsigned int x) {
return x;
}
unsigned int align_1(unsigned int x) {
return (x + ALIGN - 1) & ~(ALIGN - 1);
}
unsigned int align_2(unsigned int x) {
unsigned int boundary = ALIGN;
while (x > boundary) {
boundary += ALIGN;
}
return boundary;
}
unsigned int align_3(unsigned int x) {
return ALIGN * ceil((double)x / ALIGN);
}
#ifdef LIKWID
#include <likwid.h>
#else
#define likwid_markerInit()
#define likwid_markerThreadInit()
#define likwid_markerStartRegion(name)
#define likwid_markerStopRegion(name)
#define likwid_markerClose()
#endif // LIKWID
int main(int argc, char **argv) {
likwid_markerInit();
likwid_markerThreadInit();
char *name = "baseline";
likwid_markerStartRegion(name);
for (int i = 1; i <= REPEAT; i++) {
if (baseline(i) == 0) {
printf("Test failed!");
exit(1);
}
}
likwid_markerStopRegion(name);
name = "align_1";
likwid_markerStartRegion(name);
for (int i = 1; i <= REPEAT; i++) {
if (align_1(i) == 0) {
printf("Test failed!");
exit(1);
}
}
likwid_markerStopRegion(name);
name = "align_2";
likwid_markerStartRegion(name);
for (int i = 1; i <= REPEAT; i++) {
if (align_2(i) == 0) {
printf("Test failed!");
exit(1);
}
}
likwid_markerStopRegion(name);
name = "align_3";
likwid_markerStartRegion(name);
for (int i = 1; i <= REPEAT; i++) {
if (align_3(i) == 0) {
printf("Test failed!");
exit(1);
}
}
likwid_markerStopRegion(name);
likwid_markerClose();
exit(0);
}
#if LIKWID_RESULTS
-------------------------------------------------------------
-------------------------------------------------------------
CPU type: Intel Core Haswell processor
CPU clock: 3.39 GHz
Measuring group BRANCH
-------------------------------------------------------------
align
=====================
Region: baseline
=====================
+-------------------+------------+
| Region Info | core 2 |
+-------------------+------------+
| RDTSC Runtime [s] | 0.00157839 |
| call count | 1 |
+-------------------+------------+
+------------------------------+-------------+
| Event | core 2 |
+------------------------------+-------------+
| INSTR_RETIRED_ANY | 9.00103e+06 |
| CPU_CLK_UNHALTED_CORE | 5.3404e+06 |
| CPU_CLK_UNHALTED_REF | 5.34018e+06 |
| BR_INST_RETIRED_ALL_BRANCHES | 4.00032e+06 |
| BR_MISP_RETIRED_ALL_BRANCHES | 47 |
+------------------------------+-------------+
+----------------------------+-------------+
| Metric | core 2 |
+----------------------------+-------------+
| Runtime (RDTSC) [s] | 0.00157839 |
| Runtime unhalted [s] | 0.00157435 |
| Clock [MHz] | 3392.28 |
| CPI | 0.59331 |
| Branch rate | 0.444429 |
| Branch misprediction rate | 5.22163e-06 |
| Branch misprediction ratio | 1.17491e-05 |
| Instructions per branch | 2.25008 |
+----------------------------+-------------+
=====================
Region: align_1
=====================
+-------------------+-----------+
| Region Info | core 2 |
+-------------------+-----------+
| RDTSC Runtime [s] | 0.0014741 |
| call count | 1 |
+-------------------+-----------+
+------------------------------+-------------+
| Event | core 2 |
+------------------------------+-------------+
| INSTR_RETIRED_ANY | 1.0001e+07 |
| CPU_CLK_UNHALTED_CORE | 5.00746e+06 |
| CPU_CLK_UNHALTED_REF | 5.00752e+06 |
| BR_INST_RETIRED_ALL_BRANCHES | 4.00031e+06 |
| BR_MISP_RETIRED_ALL_BRANCHES | 40 |
+------------------------------+-------------+
+----------------------------+-------------+
| Metric | core 2 |
+----------------------------+-------------+
| Runtime (RDTSC) [s] | 0.0014741 |
| Runtime unhalted [s] | 0.0014762 |
| Clock [MHz] | 3392.1 |
| CPI | 0.500695 |
| Branch rate | 0.399991 |
| Branch misprediction rate | 3.99959e-06 |
| Branch misprediction ratio | 9.99922e-06 |
| Instructions per branch | 2.50006 |
+----------------------------+-------------+
=====================
Region: align_2
=====================
+-------------------+---------+
| Region Info | core 2 |
+-------------------+---------+
| RDTSC Runtime [s] | 18.4446 |
| call count | 1 |
+-------------------+---------+
+------------------------------+-------------+
| Event | core 2 |
+------------------------------+-------------+
| INSTR_RETIRED_ANY | 1.8751e+11 |
| CPU_CLK_UNHALTED_CORE | 6.25503e+10 |
| CPU_CLK_UNHALTED_REF | 6.25503e+10 |
| BR_INST_RETIRED_ALL_BRANCHES | 6.25045e+10 |
| BR_MISP_RETIRED_ALL_BRANCHES | 1.0001e+06 |
+------------------------------+-------------+
+----------------------------+-------------+
| Metric | core 2 |
+----------------------------+-------------+
| Runtime (RDTSC) [s] | 18.4446 |
| Runtime unhalted [s] | 18.4398 |
| Clock [MHz] | 3392.14 |
| CPI | 0.333583 |
| Branch rate | 0.333339 |
| Branch misprediction rate | 5.33357e-06 |
| Branch misprediction ratio | 1.60004e-05 |
| Instructions per branch | 2.99995 |
+----------------------------+-------------+
=====================
Region: align_3
=====================
+-------------------+------------+
| Region Info | core 2 |
+-------------------+------------+
| RDTSC Runtime [s] | 0.00590322 |
| call count | 1 |
+-------------------+------------+
+------------------------------+-------------+
| Event | core 2 |
+------------------------------+-------------+
| INSTR_RETIRED_ANY | 1.90017e+07 |
| CPU_CLK_UNHALTED_CORE | 2.00174e+07 |
| CPU_CLK_UNHALTED_REF | 2.00175e+07 |
| BR_INST_RETIRED_ALL_BRANCHES | 7.00045e+06 |
| BR_MISP_RETIRED_ALL_BRANCHES | 64 |
+------------------------------+-------------+
+----------------------------+-------------+
| Metric | core 2 |
+----------------------------+-------------+
| Runtime (RDTSC) [s] | 0.00590322 |
| Runtime unhalted [s] | 0.0059011 |
| Clock [MHz] | 3392.12 |
| CPI | 1.05345 |
| Branch rate | 0.368412 |
| Branch misprediction rate | 3.36812e-06 |
| Branch misprediction ratio | 9.14227e-06 |
| Instructions per branch | 2.71435 |
+----------------------------+-------------+
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment