Created
April 23, 2016 07:46
-
-
Save BensonQiu/45d18b0111dd627f05f54d1f9aa6f66f to your computer and use it in GitHub Desktop.
Simple mutex vs shared mutex with no contention
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef _SYRAH_CYCLE_TIMER_H_ | |
#define _SYRAH_CYCLE_TIMER_H_ | |
#if defined(__APPLE__) | |
#if defined(__x86_64__) | |
#include <sys/sysctl.h> | |
#else | |
#include <mach/mach.h> | |
#include <mach/mach_time.h> | |
#endif // __x86_64__ or not | |
#include <stdio.h> // fprintf | |
#include <stdlib.h> // exit | |
#elif _WIN32 | |
# include <windows.h> | |
# include <time.h> | |
#else | |
# include <stdio.h> | |
# include <stdlib.h> | |
# include <string.h> | |
# include <sys/time.h> | |
#endif | |
// This uses the cycle counter of the processor. Different | |
// processors in the system will have different values for this. If | |
// you process moves across processors, then the delta time you | |
// measure will likely be incorrect. This is mostly for fine | |
// grained measurements where the process is likely to be on the | |
// same processor. For more global things you should use the | |
// Time interface. | |
// Also note that if you processors' speeds change (i.e. processors | |
// scaling) or if you are in a heterogenous environment, you will | |
// likely get spurious results. | |
class CycleTimer { | |
public: | |
typedef unsigned long long SysClock; | |
////////// | |
// Return the current CPU time, in terms of clock ticks. | |
// Time zero is at some arbitrary point in the past. | |
static SysClock currentTicks() { | |
#if defined(__APPLE__) && !defined(__x86_64__) | |
return mach_absolute_time(); | |
#elif defined(_WIN32) | |
LARGE_INTEGER qwTime; | |
QueryPerformanceCounter(&qwTime); | |
return qwTime.QuadPart; | |
#elif defined(__x86_64__) | |
unsigned int a, d; | |
asm volatile("rdtsc" : "=a" (a), "=d" (d)); | |
return static_cast<unsigned long long>(a) | | |
(static_cast<unsigned long long>(d) << 32); | |
#elif defined(__ARM_NEON__) && 0 // mrc requires superuser. | |
unsigned int val; | |
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(val)); | |
return val; | |
#else | |
timespec spec; | |
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &spec); | |
return CycleTimer::SysClock(static_cast<float>(spec.tv_sec) * 1e9 + static_cast<float>(spec.tv_nsec)); | |
#endif | |
} | |
////////// | |
// Return the current CPU time, in terms of seconds. | |
// This is slower than currentTicks(). Time zero is at | |
// some arbitrary point in the past. | |
static double currentSeconds() { | |
return currentTicks() * secondsPerTick(); | |
} | |
////////// | |
// Return the conversion from seconds to ticks. | |
static double ticksPerSecond() { | |
return 1.0/secondsPerTick(); | |
} | |
static const char* tickUnits() { | |
#if defined(__APPLE__) && !defined(__x86_64__) | |
return "ns"; | |
#elif defined(__WIN32__) || defined(__x86_64__) | |
return "cycles"; | |
#else | |
return "ns"; // clock_gettime | |
#endif | |
} | |
////////// | |
// Return the conversion from ticks to seconds. | |
static double secondsPerTick() { | |
static bool initialized = false; | |
static double secondsPerTick_val; | |
if (initialized) return secondsPerTick_val; | |
#if defined(__APPLE__) | |
#ifdef __x86_64__ | |
int args[] = {CTL_HW, HW_CPU_FREQ}; | |
unsigned int Hz; | |
size_t len = sizeof(Hz); | |
if (sysctl(args, 2, &Hz, &len, NULL, 0) != 0) { | |
fprintf(stderr, "Failed to initialize secondsPerTick_val!\n"); | |
exit(-1); | |
} | |
secondsPerTick_val = 1.0 / (double) Hz; | |
#else | |
mach_timebase_info_data_t time_info; | |
mach_timebase_info(&time_info); | |
// Scales to nanoseconds without 1e-9f | |
secondsPerTick_val = (1e-9*static_cast<double>(time_info.numer))/ | |
static_cast<double>(time_info.denom); | |
#endif // x86_64 or not | |
#elif defined(_WIN32) | |
LARGE_INTEGER qwTicksPerSec; | |
QueryPerformanceFrequency(&qwTicksPerSec); | |
secondsPerTick_val = 1.0/static_cast<double>(qwTicksPerSec.QuadPart); | |
#else | |
FILE *fp = fopen("/proc/cpuinfo","r"); | |
char input[1024]; | |
if (!fp) { | |
fprintf(stderr, "CycleTimer::resetScale failed: couldn't find /proc/cpuinfo."); | |
exit(-1); | |
} | |
// In case we don't find it, e.g. on the N900 | |
secondsPerTick_val = 1e-9; | |
while (!feof(fp) && fgets(input, 1024, fp)) { | |
// NOTE(boulos): Because reading cpuinfo depends on dynamic | |
// frequency scaling it's better to read the @ sign first | |
float GHz, MHz; | |
if (strstr(input, "model name")) { | |
char* at_sign = strstr(input, "@"); | |
if (at_sign) { | |
char* after_at = at_sign + 1; | |
char* GHz_str = strstr(after_at, "GHz"); | |
char* MHz_str = strstr(after_at, "MHz"); | |
if (GHz_str) { | |
*GHz_str = '\0'; | |
if (1 == sscanf(after_at, "%f", &GHz)) { | |
//printf("GHz = %f\n", GHz); | |
secondsPerTick_val = 1e-9f / GHz; | |
break; | |
} | |
} else if (MHz_str) { | |
*MHz_str = '\0'; | |
if (1 == sscanf(after_at, "%f", &MHz)) { | |
//printf("MHz = %f\n", MHz); | |
secondsPerTick_val = 1e-6f / GHz; | |
break; | |
} | |
} | |
} | |
} else if (1 == sscanf(input, "cpu MHz : %f", &MHz)) { | |
//printf("MHz = %f\n", MHz); | |
secondsPerTick_val = 1e-6f / MHz; | |
break; | |
} | |
} | |
fclose(fp); | |
#endif | |
initialized = true; | |
return secondsPerTick_val; | |
} | |
////////// | |
// Return the conversion from ticks to milliseconds. | |
static double msPerTick() { | |
return secondsPerTick() * 1000.0; | |
} | |
private: | |
CycleTimer(); | |
}; | |
#endif // #ifndef _SYRAH_CYCLE_TIMER_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <mutex> | |
#include <boost/thread/shared_mutex.hpp> | |
#include "CycleTimer.h" | |
#define NUM_OPS 10 * 1000 * 1000 | |
int main() { | |
double start_time, end_time, best_time; | |
best_time = 1e30; | |
for (int i = 0; i < 3; i++) { | |
start_time = CycleTimer::currentSeconds(); | |
boost::shared_mutex mutex; | |
for (int j = 0; j < NUM_OPS; j++) { | |
boost::shared_lock<boost::shared_mutex> lock(mutex); | |
} | |
end_time = CycleTimer::currentSeconds(); | |
best_time = std::min(best_time, end_time-start_time); | |
} | |
std::cout << "Shared mutex time: " << best_time << std::endl; | |
best_time = 1e30; | |
for (int i = 0; i < 3; i++) { | |
start_time = CycleTimer::currentSeconds(); | |
std::mutex mutex; | |
for (int j = 0; j < NUM_OPS; j++) { | |
mutex.lock(); | |
mutex.unlock(); | |
} | |
end_time = CycleTimer::currentSeconds(); | |
best_time = std::min(best_time, end_time-start_time); | |
} | |
std::cout << "Simple mutex time: " << best_time << std::endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment