Skip to content

Instantly share code, notes, and snippets.

@BensonQiu
Created April 23, 2016 07:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BensonQiu/45d18b0111dd627f05f54d1f9aa6f66f to your computer and use it in GitHub Desktop.
Save BensonQiu/45d18b0111dd627f05f54d1f9aa6f66f to your computer and use it in GitHub Desktop.
Simple mutex vs shared mutex with no contention
#ifndef _SYRAH_CYCLE_TIMER_H_
#define _SYRAH_CYCLE_TIMER_H_
#if defined(__APPLE__)
#if defined(__x86_64__)
#include <sys/sysctl.h>
#else
#include <mach/mach.h>
#include <mach/mach_time.h>
#endif // __x86_64__ or not
#include <stdio.h> // fprintf
#include <stdlib.h> // exit
#elif _WIN32
# include <windows.h>
# include <time.h>
#else
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <sys/time.h>
#endif
// This uses the cycle counter of the processor. Different
// processors in the system will have different values for this. If
// you process moves across processors, then the delta time you
// measure will likely be incorrect. This is mostly for fine
// grained measurements where the process is likely to be on the
// same processor. For more global things you should use the
// Time interface.
// Also note that if you processors' speeds change (i.e. processors
// scaling) or if you are in a heterogenous environment, you will
// likely get spurious results.
class CycleTimer {
public:
typedef unsigned long long SysClock;
//////////
// Return the current CPU time, in terms of clock ticks.
// Time zero is at some arbitrary point in the past.
static SysClock currentTicks() {
#if defined(__APPLE__) && !defined(__x86_64__)
return mach_absolute_time();
#elif defined(_WIN32)
LARGE_INTEGER qwTime;
QueryPerformanceCounter(&qwTime);
return qwTime.QuadPart;
#elif defined(__x86_64__)
unsigned int a, d;
asm volatile("rdtsc" : "=a" (a), "=d" (d));
return static_cast<unsigned long long>(a) |
(static_cast<unsigned long long>(d) << 32);
#elif defined(__ARM_NEON__) && 0 // mrc requires superuser.
unsigned int val;
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(val));
return val;
#else
timespec spec;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &spec);
return CycleTimer::SysClock(static_cast<float>(spec.tv_sec) * 1e9 + static_cast<float>(spec.tv_nsec));
#endif
}
//////////
// Return the current CPU time, in terms of seconds.
// This is slower than currentTicks(). Time zero is at
// some arbitrary point in the past.
static double currentSeconds() {
return currentTicks() * secondsPerTick();
}
//////////
// Return the conversion from seconds to ticks.
static double ticksPerSecond() {
return 1.0/secondsPerTick();
}
static const char* tickUnits() {
#if defined(__APPLE__) && !defined(__x86_64__)
return "ns";
#elif defined(__WIN32__) || defined(__x86_64__)
return "cycles";
#else
return "ns"; // clock_gettime
#endif
}
//////////
// Return the conversion from ticks to seconds.
static double secondsPerTick() {
static bool initialized = false;
static double secondsPerTick_val;
if (initialized) return secondsPerTick_val;
#if defined(__APPLE__)
#ifdef __x86_64__
int args[] = {CTL_HW, HW_CPU_FREQ};
unsigned int Hz;
size_t len = sizeof(Hz);
if (sysctl(args, 2, &Hz, &len, NULL, 0) != 0) {
fprintf(stderr, "Failed to initialize secondsPerTick_val!\n");
exit(-1);
}
secondsPerTick_val = 1.0 / (double) Hz;
#else
mach_timebase_info_data_t time_info;
mach_timebase_info(&time_info);
// Scales to nanoseconds without 1e-9f
secondsPerTick_val = (1e-9*static_cast<double>(time_info.numer))/
static_cast<double>(time_info.denom);
#endif // x86_64 or not
#elif defined(_WIN32)
LARGE_INTEGER qwTicksPerSec;
QueryPerformanceFrequency(&qwTicksPerSec);
secondsPerTick_val = 1.0/static_cast<double>(qwTicksPerSec.QuadPart);
#else
FILE *fp = fopen("/proc/cpuinfo","r");
char input[1024];
if (!fp) {
fprintf(stderr, "CycleTimer::resetScale failed: couldn't find /proc/cpuinfo.");
exit(-1);
}
// In case we don't find it, e.g. on the N900
secondsPerTick_val = 1e-9;
while (!feof(fp) && fgets(input, 1024, fp)) {
// NOTE(boulos): Because reading cpuinfo depends on dynamic
// frequency scaling it's better to read the @ sign first
float GHz, MHz;
if (strstr(input, "model name")) {
char* at_sign = strstr(input, "@");
if (at_sign) {
char* after_at = at_sign + 1;
char* GHz_str = strstr(after_at, "GHz");
char* MHz_str = strstr(after_at, "MHz");
if (GHz_str) {
*GHz_str = '\0';
if (1 == sscanf(after_at, "%f", &GHz)) {
//printf("GHz = %f\n", GHz);
secondsPerTick_val = 1e-9f / GHz;
break;
}
} else if (MHz_str) {
*MHz_str = '\0';
if (1 == sscanf(after_at, "%f", &MHz)) {
//printf("MHz = %f\n", MHz);
secondsPerTick_val = 1e-6f / GHz;
break;
}
}
}
} else if (1 == sscanf(input, "cpu MHz : %f", &MHz)) {
//printf("MHz = %f\n", MHz);
secondsPerTick_val = 1e-6f / MHz;
break;
}
}
fclose(fp);
#endif
initialized = true;
return secondsPerTick_val;
}
//////////
// Return the conversion from ticks to milliseconds.
static double msPerTick() {
return secondsPerTick() * 1000.0;
}
private:
CycleTimer();
};
#endif // #ifndef _SYRAH_CYCLE_TIMER_H_
#include <iostream>
#include <mutex>
#include <boost/thread/shared_mutex.hpp>
#include "CycleTimer.h"
#define NUM_OPS 10 * 1000 * 1000
int main() {
double start_time, end_time, best_time;
best_time = 1e30;
for (int i = 0; i < 3; i++) {
start_time = CycleTimer::currentSeconds();
boost::shared_mutex mutex;
for (int j = 0; j < NUM_OPS; j++) {
boost::shared_lock<boost::shared_mutex> lock(mutex);
}
end_time = CycleTimer::currentSeconds();
best_time = std::min(best_time, end_time-start_time);
}
std::cout << "Shared mutex time: " << best_time << std::endl;
best_time = 1e30;
for (int i = 0; i < 3; i++) {
start_time = CycleTimer::currentSeconds();
std::mutex mutex;
for (int j = 0; j < NUM_OPS; j++) {
mutex.lock();
mutex.unlock();
}
end_time = CycleTimer::currentSeconds();
best_time = std::min(best_time, end_time-start_time);
}
std::cout << "Simple mutex time: " << best_time << std::endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment