Skip to content

Instantly share code, notes, and snippets.

@wuye9036
Last active December 23, 2015 15:39
Show Gist options
  • Save wuye9036/6657313 to your computer and use it in GitHub Desktop.
Save wuye9036/6657313 to your computer and use it in GitHub Desktop.
#include "stdafx.h"
#include <utility>
#include <chrono>
#include <vector>
#include <algorithm>
#include <iostream>
#include <future>
#include <atomic>
#include <thread>
#include <algorithm>
#if defined(WIN32)
#define NOMINMAX
#include <Windows.h>
#endif
using namespace std;
#if !defined(_DEBUG)
static const int ARRAY_SIZE = 2048 * 2048 * 4;
#else
static const int ARRAY_SIZE = 512 * 512 * 4;
#endif
struct k_best
{
public:
k_best(int k, float epsilon, int max_test_count)
{
min_time_ = std::numeric_limits<int64_t>::max();
if(k == 0)
{
k_ = 1;
}
else
{
k_ = k;
}
if(max_test_count < k_)
{
max_tests_ = k_;
}
else
{
max_tests_ = max_test_count;
}
if(epsilon < 0.0f)
{
eps_ = 0.05f;
}
else
{
eps_ = epsilon;
}
#if defined(WIN32)
LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);
perf_freq_ = freq.QuadPart;
#endif
}
pair<bool, int64_t> test(function<void ()> const& fn)
{
for(int i = 0; i < max_tests_; ++i)
{
#if defined(WIN32)
LARGE_INTEGER beg_time, end_time;
QueryPerformanceCounter(&beg_time);
fn();
QueryPerformanceCounter(&end_time);
int64_t elapsedUS = static_cast<int64_t>( double(end_time.QuadPart - beg_time.QuadPart) / double(perf_freq_) * 1000000 );
#else
auto startTime = chrono::high_resolution_clock::now();
fn();
auto endTime = chrono::high_resolution_clock::now();
int64_t elapsedUS = chrono::duration_cast<chrono::microseconds>(endTime-startTime).count();
#endif
min_time_ = std::min(min_time_, elapsedUS);
heap_.push_back(elapsedUS);
push_heap(heap_.begin(), heap_.end());
if(heap_.size() > static_cast<size_t>(k_))
{
pop_heap(heap_.begin(), heap_.end());
heap_.pop_back();
}
if(heap_.size() == static_cast<size_t>(k_))
{
if( static_cast<float>(heap_.front()) < static_cast<float>(min_time_ * (1.0f + eps_)) )
{
cout << "Convergence: " << i+1 << " = " ;
return make_pair( true, heap_.front() );
}
}
}
cout << "Convergence: No = " ;
return make_pair(false, heap_.front());
}
private:
int k_;
float eps_;
int max_tests_;
std::vector<int64_t> heap_;
int64_t min_time_;
int64_t perf_freq_;
};
struct calc
{
calc()
{
base = new float[ARRAY_SIZE];
}
virtual ~calc()
{
delete [] base;
}
void init()
{
for(int i = 0; i < ARRAY_SIZE; ++i)
{
base[i] = static_cast<float>(i);
}
}
inline void do_with_fnptr(int i)
{
fn(base, i);
}
inline void do_with_mfnptr(int i)
{
(this->*mfn)(i);
}
inline void do_with_fnobj(int i)
{
fnobj(i);
}
virtual void do_with_vfn(int i) = 0;
public:
float* base;
void (*fn)(float* base, int i);
void (calc::*mfn)(int i);
std::function<void (int)> fnobj;
static void add2(float* base, int i)
{
base[i] += 2;
}
static void mul2(float* base, int i)
{
base[i] *= 7.16f;
}
static void do_nothing(float* /*base*/, int /*i*/)
{
}
void madd2(int i)
{
*(base+i) += 2;
}
void mmul2(int i)
{
*(base+i) *= 7.16f;
}
};
struct calc_add2: public calc
{
calc_add2()
{
fn = &calc::add2;
mfn = &calc::madd2;
fnobj = [this](int i) { this->base[i] += 2; };
}
void do_with_vfn(int i)
{
base[i] += 2;
}
};
struct calc_mul2: public calc
{
calc_mul2()
{
fn = &calc::mul2;
mfn = &calc::mmul2;
fnobj = [this](int i) { this->base[i] *= 7.16f; };
}
void do_with_vfn(int i)
{
base[i] *= 7.16f;
}
};
int _tmain(int argc, _TCHAR* argv[])
{
chrono::duration<chrono::high_resolution_clock::rep, chrono::high_resolution_clock::period> duration;
calc* obj = nullptr;
if(argc == 1)
{
obj = new calc_add2();
}
else
{
obj = new calc_mul2();
}
{
k_best measure(16, 0.03f, 500);
obj->init();
auto test_result = measure.test( [=]()
{
if(argc == 1)
{
for(int i = 0; i < ARRAY_SIZE; ++i)
{
obj->base[i] += 2;
}
}
else
{
for(int i = 0; i < ARRAY_SIZE; ++i)
{
obj->base[i] *= 7.16f;
}
}
});
cout << "Branch per batch Elapsed: " << test_result.second << "us" << endl;
}
{
obj->init();
k_best measure(16, 0.03f, 500);
auto test_result = measure.test( [=]()
{
for(int i = 0; i < ARRAY_SIZE; ++i)
{
if(argc == 1)
{
obj->base[i] += 2;
}
else
{
obj->base[i] *= 7.16f;
}
}
});
cout << "Branch per scalar Elapsed: " << test_result.second << "us" << endl;
}
{
obj->init();
k_best measure(16, 0.03f, 500);
auto test_result = measure.test( [=]()
{
for(int i = 0; i < ARRAY_SIZE; ++i)
{
obj->do_with_fnptr(i);
}
});
cout << "Fn Ptr Elapsed: " << test_result.second << "us" << endl;
}
{
obj->init();
k_best measure(16, 0.03f, 500);
auto test_result = measure.test( [=]()
{
for(int i = 0; i < ARRAY_SIZE; ++i)
{
obj->do_with_vfn(i);
}
});
cout << "Virtual Func Elapsed: " << test_result.second << "us" << endl;
}
{
obj->init();
k_best measure(16, 0.03f, 500);
auto test_result = measure.test( [=]()
{
for(int i = 0; i < ARRAY_SIZE; ++i)
{
obj->do_with_mfnptr(i);
}
});
cout << "Member Func Ptr Elapsed: " << test_result.second << "us" << endl;
}
{
obj->init();
k_best measure(16, 0.03f, 500);
auto test_result = measure.test( [=]()
{
for(int i = 0; i < ARRAY_SIZE; ++i)
{
obj->do_with_fnobj(i);
}
});
cout << "Function Object with Lambda Elapsed: " << test_result.second << "us" << endl;
}
delete obj;
system( "pause" );
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment