Skip to content

Instantly share code, notes, and snippets.

@mmozeiko mmozeiko/test.cpp
Created Aug 2, 2015

Embed
What would you like to do?
Testing performance
#include <stdint.h>
#include <stdio.h>
#if _WIN32
#include <intrin.h>
#elif ANDROID
#include <unistd.h>
#include <sys/syscall.h>
// Android NDK doesn't have perf_event.h header :(
// copy&paste from http://lxr.free-electrons.com/source/include/uapi/linux/perf_event.h
enum perf_type_id {
PERF_TYPE_HARDWARE = 0,
PERF_TYPE_SOFTWARE = 1,
PERF_TYPE_TRACEPOINT = 2,
PERF_TYPE_HW_CACHE = 3,
PERF_TYPE_RAW = 4,
PERF_TYPE_BREAKPOINT = 5,
PERF_TYPE_MAX,
};
enum perf_hw_id
{
PERF_COUNT_HW_CPU_CYCLES = 0,
PERF_COUNT_HW_INSTRUCTIONS = 1,
PERF_COUNT_HW_CACHE_REFERENCES = 2,
PERF_COUNT_HW_CACHE_MISSES = 3,
PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
PERF_COUNT_HW_BRANCH_MISSES = 5,
PERF_COUNT_HW_BUS_CYCLES = 6,
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7,
PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8,
PERF_COUNT_HW_REF_CPU_CYCLES = 9,
PERF_COUNT_HW_MAX,
};
typedef int32_t __s32;
typedef uint32_t __u32;
typedef uint64_t __u64;
struct perf_event_attr
{
__u32 type;
__u32 size;
__u64 config;
union {
__u64 sample_period;
__u64 sample_freq;
};
__u64 sample_type;
__u64 read_format;
__u64 disabled : 1,
inherit : 1,
pinned : 1,
exclusive : 1,
exclude_user : 1,
exclude_kernel : 1,
exclude_hv : 1,
exclude_idle : 1,
mmap : 1,
comm : 1,
freq : 1,
inherit_stat : 1,
enable_on_exec : 1,
task : 1,
watermark : 1,
precise_ip : 2,
mmap_data : 1,
sample_id_all : 1,
exclude_host : 1,
exclude_guest : 1,
exclude_callchain_kernel : 1,
exclude_callchain_user : 1,
mmap2 : 1,
comm_exec : 1,
use_clockid : 1,
__reserved_1 : 38;
union {
__u32 wakeup_events;
__u32 wakeup_watermark;
};
__u32 bp_type;
union {
__u64 bp_addr;
__u64 config1;
};
union {
__u64 bp_len;
__u64 config2;
};
__u64 branch_sample_type;
__u64 sample_regs_user;
__u32 sample_stack_user;
__s32 clockid;
__u64 sample_regs_intr;
__u32 aux_watermark;
__u32 __reserved_2;
};
static int fd;
__attribute__((constructor)) static void init_tsc()
{
static struct perf_event_attr attr;
attr.type = PERF_TYPE_HARDWARE;
attr.config = PERF_COUNT_HW_CPU_CYCLES;
fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
}
static uint64_t __rdtsc()
{
uint64_t res;
if (read(fd, &res, sizeof(res)) < (ssize_t)sizeof(res)) return 0;
return res;
}
#endif
int main()
{
uint64_t ex1;
{
static uint8_t buffer[0x100000] = {};
uint32_t sum = 0;
uint64_t t1 = __rdtsc();
for (size_t i = 0; i < 0x100000; i++)
{
sum += buffer[i];
}
uint64_t t2 = __rdtsc();
ex1 = t2 - t1;
volatile uint32_t tmp = sum;
}
uint64_t ex2;
{
static uint8_t buffer[0x100000] = {};
uint32_t sum = 0;
uint64_t t1 = __rdtsc();
for (size_t i = 0; i < 0x100000; i += 0x10)
{
sum += buffer[i];
}
uint64_t t2 = __rdtsc();
ex2 = t2 - t1;
volatile uint32_t tmp = sum;
}
printf("Ex2 is %.2f%% faster than Ex1\n", ((double)ex1 / double(ex2) - 1)*100);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.