Skip to content

Instantly share code, notes, and snippets.

@ACUVE

ACUVE/main.cpp Secret

Last active December 17, 2015 13:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ACUVE/b4ee1008a4c539e3f09b to your computer and use it in GitHub Desktop.
Save ACUVE/b4ee1008a4c539e3f09b to your computer and use it in GitHub Desktop.
#include <atomic>
#include <chrono>
#include <iostream>
#include <limits>
#include <condition_variable>
#include <thread>
#include <vector>
#include <shared_mutex>
#include <omp.h>
template< typename FUNC, typename... Args >
inline std::chrono::nanoseconds time( FUNC &&func, Args &&... args )
{
auto const start = std::chrono::high_resolution_clock::now();
func( std::forward< Args >( args )... );
auto const end = std::chrono::high_resolution_clock::now();
auto const duration = end - start;
return std::chrono::duration_cast< std::chrono::nanoseconds >( duration );
}
class ThreadPool
{
public:
using func_type = void (*)( void *data, unsigned int num, unsigned int thread_num );
private:
std::atomic< func_type > func;
std::atomic< void * > func_data;
std::mutex mutex;
std::atomic< unsigned int > waiting_counter;
std::condition_variable cond;
unsigned int counter;
unsigned int const num_thread;
std::vector< std::thread > thread;
private:
void WaitAllWaiting()
{
unsigned int const len = num_thread - 1;
while( waiting_counter.load() != len )
std::this_thread::yield();
}
public:
ThreadPool( unsigned int const hc = std::thread::hardware_concurrency() )
: func( nullptr )
, func_data( nullptr )
, waiting_counter( 0 )
, counter( 0 )
, num_thread( [ & ]{ if( hc ) return hc; return 1u; }() )
{
unsigned int const len = num_thread - 1;
for( unsigned int i = 0; i < len; ++i )
thread.emplace_back( &ThreadPool::Thread, this, i );
WaitAllWaiting();
}
~ThreadPool()
{
func.store( nullptr );
{
std::lock_guard< decltype( mutex ) > lock( mutex );
counter += 1;
}
cond.notify_all();
for( auto &&th : thread )
th.join();
}
void Thread( unsigned int const num )
{
unsigned int c = counter;
unsigned int const num_t = num_thread;
while( true )
{
waiting_counter.fetch_add( 1 );
{
std::unique_lock< decltype( mutex ) > lock( mutex );
cond.wait( lock, [ & ] { return c != counter; } );
}
c = counter;
auto f = func.load();
if( !f )
break;
f( func_data.load(), num + 1, num_t );
}
}
void Do( func_type f, void *data )
{
if( f == nullptr )
return;
func.store( f, std::memory_order_relaxed );
func_data.store( data, std::memory_order_relaxed );
waiting_counter.store( 0, std::memory_order_release );
{
std::lock_guard< decltype( mutex ) > lock( mutex );
counter += 1;
}
cond.notify_all();
f( data, 0, num_thread );
WaitAllWaiting();
}
};
struct ThreadData
{
unsigned int c;
std::atomic< double > *s;
};
void aaa( void *data, unsigned int const num, unsigned int const thread_num )
{
auto const td = reinterpret_cast< ThreadData * >( data );
auto const c = td->c;
std::atomic< double > *const ret = td->s;
decltype( c ) const
part = c / thread_num,
rest = c % thread_num,
s = part * num + (num < rest ? num : rest),
e = s + part + (num < rest);
double sum = 0;
double const delta = 1.0 / c;
for( auto i = s; i < e; ++i )
{
double const x = (delta * i);
double const area = delta * 1.0 / (x * x + 1.0);
sum += area;
}
double expected = ret->load();
while( !std::atomic_compare_exchange_weak( ret, &expected, expected + sum ) );
}
void openmp( std::atomic< double > *const v, unsigned int const c )
{
double sum = 0;
double const delta = 1.0 / c;
#pragma omp parallel for reduction ( + : sum )
for( unsigned int i = 0; i < c; ++i )
{
double const x = (delta * i);
double const area = delta * 1.0 / (x * x + 1.0);
sum += area;
}
v->store( sum );
}
int main( int argc, char **argv )
{
constexpr unsigned int NUM_TEST = 100000;
unsigned int const count = 10 * argc;
unsigned long long int mintime_t = std::numeric_limits< decltype( mintime_t ) >::max();
unsigned long long int mintime_o = std::numeric_limits< decltype( mintime_o ) >::max();
unsigned long long int sumtime_t = 0, sumtime_o = 0;
double minv_t = std::numeric_limits< decltype( minv_t ) >::max();
double minv_o = std::numeric_limits< decltype( minv_o ) >::max();
std::atomic< double > v;
{
ThreadPool pool;
ThreadData td;
td.c = count;
td.s = &v;
for( unsigned int i = 0; i < NUM_TEST; ++i )
{
v = 0;
unsigned long long int const t = time( std::bind( &ThreadPool::Do, &pool, aaa, &td ) ).count();
mintime_t = std::min( mintime_t, t );
minv_t = std::min( minv_t, v.load() );
sumtime_t += t;
}
}
{
for( unsigned int i = 0; i < NUM_TEST; ++i )
{
v = 0;
unsigned long long int const t = time( openmp, &v, count ).count();
mintime_o = std::min( mintime_o, t );
minv_o = std::min( minv_o, v.load() );
sumtime_o += t;
}
}
{
std::cout << "NUM_TSET: " << NUM_TEST << ", count: " << count << std::endl;
std::cout << "Thread min time[ns]: " << mintime_t << ", OpenMP min time[ns]: " << mintime_o << std::endl;
std::cout << "Thread mean time[ns]: " << static_cast< double >( sumtime_t ) / NUM_TEST << ", OpenMP mean time[ns]: " << static_cast< double >( sumtime_o ) / NUM_TEST << std::endl;
std::cout << "Thread_v: " << minv_t << ", OpenMP_v: " << minv_o << std::endl;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment