JosephLaurino/cpu_ppl_amp_test.cpp

## cpu_ppl_amp_test.cpp
// ----------------------------------------------------------------------------
#include <ppl.h>
#include <amp.h>
#include <amp_math.h>
#include <iostream>
#include <boost/chrono/chrono.hpp>
#include <vector>
using namespace concurrency;
using namespace concurrency::precise_math;

const int size = 80000;
const int loopCount = 1;

/*
    Setup:  Visual Studio 2012, Win7, i7, GeForce 650M

    loopCount = 1

    cpu took 0.00463677 seconds
    ppl took 0.00497504 seconds
    gpu took 0.16018 seconds

    loopCount = 20

    cpu took 0.0483519 seconds
    ppl took 0.0113667 seconds
    gpu took 0.166711 seconds

    loopCount = 200

    cpu took 0.244843 seconds
    ppl took 0.061472 seconds
    gpu took 0.150505 seconds

    loopCount = 2000

    cpu took 2.26371 seconds
    ppl took 0.533806 seconds
    gpu took 0.202005 seconds

    loopCount = 20000

    cpu took 22.3698 seconds
    ppl took 5.25856 seconds
    gpu took 0.66479 seconds

 */

void test_PPLMethod(std::vector<float>& result) {
    boost::chrono::steady_clock::time_point start = boost::chrono::steady_clock::now();

    float aCPP[size];
    float bCPP[size];
    float sumCPP[size];

    for( int i = 0; i < size; i++ ) {
        aCPP[i] = i;
        bCPP[i] = i*i;
    }

    parallel_for( 0, size, [&](int idx) {
        sumCPP[idx] = pow(aCPP[idx], bCPP[idx]);
        for( int i = 0; i < loopCount; i++ ) {
            sumCPP[idx] = pow(sumCPP[idx], bCPP[idx]);
        }
    });

    boost::chrono::duration<double> sec = boost::chrono::steady_clock::now() - start;
    std::cout << "ppl took " << sec.count() << " seconds\n";

    result.clear();
    for( int i = 0; i < size; i++ ) {
        result.push_back(sumCPP[i]);
    }
}


void test_AmpMethod(std::vector<float>& result) {
    boost::chrono::steady_clock::time_point start = boost::chrono::steady_clock::now();

    float aCPP[size];
    float bCPP[size];
    float sumCPP[size];

    for( int i = 0; i < size; i++ ) {
        aCPP[i] = i;
        bCPP[i] = i*i;
    }

    // Create C++ AMP objects.
    array_view<const float, 1>    a(size, aCPP);
    array_view<const float, 1>    b(size, bCPP);
    array_view<float, 1>          sum(size, sumCPP);

    sum.discard_data();

    parallel_for_each( sum.extent, [=](index<1> idx) restrict(amp) {
        sum[idx] = pow(a[idx], b[idx]);
        for( int i = 0; i < loopCount; i++ ) {
            sum[idx] = pow(sum[idx], b[idx]);
        }
    }
    );

    sum.synchronize(); // needed this to copy data from GPU back to CPU

    boost::chrono::duration<double> sec = boost::chrono::steady_clock::now() - start;
    std::cout << "gpu took " << sec.count() << " seconds\n";

    result.clear();
    for( int i = 0; i < size; i++ ) {
        result.push_back(sum[i]);
    }
}

void test_CPUMethod(std::vector<float>& result) {
    boost::chrono::steady_clock::time_point start = boost::chrono::steady_clock::now();

    float aCPP[size];
    float bCPP[size];
    float sumCPP[size];

    for( int i = 0; i < size; i++ ) {
        aCPP[i] = i;
        bCPP[i] = i*i;
    }

    for( int idx = 0; idx < size; idx++) {
        sumCPP[idx] = pow(aCPP[idx], bCPP[idx]);
        for( int i = 0; i < loopCount; i++ ) {
            sumCPP[idx] = pow(sumCPP[idx], bCPP[idx]);
        }
    }

    boost::chrono::duration<double> sec = boost::chrono::steady_clock::now() - start;
    std::cout << "cpu took " << sec.count() << " seconds\n";

    result.clear();
    for( int i = 0; i < size; i++ ) {
        result.push_back(sumCPP[i]);
    }
}


// ----------------------------------------------------------------------------
int main(int argc, char* argv[]) {

    std::vector<float> cpuResult;
    std::vector<float> pplResult;
    std::vector<float> gpuResult;

    test_CPUMethod(cpuResult);
    test_PPLMethod(pplResult);
    test_AmpMethod(gpuResult);

    for( int i = 0; i < cpuResult.size(); i++ )
    {
        if( (cpuResult[i] != pplResult[i]) ||
            (cpuResult[i] != gpuResult[i]) ) {

            std::cout << "bad calc at " << i << "\n";
            std::cout << "cpuResult[i] " << cpuResult[i] << "\n";
            std::cout << "pplResult[i] " << pplResult[i] << "\n";
            std::cout << "gpuResult[i] " << gpuResult[i] << "\n";

            break;
        }
    }

    return 0;
}
	// ----------------------------------------------------------------------------
	#include <ppl.h>
	#include <amp.h>
	#include <amp_math.h>
	#include <iostream>
	#include <boost/chrono/chrono.hpp>
	#include <vector>
	using namespace concurrency;
	using namespace concurrency::precise_math;

	const int size = 80000;
	const int loopCount = 1;

	/*
	Setup: Visual Studio 2012, Win7, i7, GeForce 650M

	loopCount = 1

	cpu took 0.00463677 seconds
	ppl took 0.00497504 seconds
	gpu took 0.16018 seconds

	loopCount = 20

	cpu took 0.0483519 seconds
	ppl took 0.0113667 seconds
	gpu took 0.166711 seconds

	loopCount = 200

	cpu took 0.244843 seconds
	ppl took 0.061472 seconds
	gpu took 0.150505 seconds

	loopCount = 2000

	cpu took 2.26371 seconds
	ppl took 0.533806 seconds
	gpu took 0.202005 seconds

	loopCount = 20000

	cpu took 22.3698 seconds
	ppl took 5.25856 seconds
	gpu took 0.66479 seconds

	*/

	void test_PPLMethod(std::vector<float>& result) {
	boost::chrono::steady_clock::time_point start = boost::chrono::steady_clock::now();

	float aCPP[size];
	float bCPP[size];
	float sumCPP[size];

	for( int i = 0; i < size; i++ ) {
	aCPP[i] = i;
	bCPP[i] = i*i;
	}

	parallel_for( 0, size, [&](int idx) {
	sumCPP[idx] = pow(aCPP[idx], bCPP[idx]);
	for( int i = 0; i < loopCount; i++ ) {
	sumCPP[idx] = pow(sumCPP[idx], bCPP[idx]);
	}
	});

	boost::chrono::duration<double> sec = boost::chrono::steady_clock::now() - start;
	std::cout << "ppl took " << sec.count() << " seconds\n";

	result.clear();
	for( int i = 0; i < size; i++ ) {
	result.push_back(sumCPP[i]);
	}
	}


	void test_AmpMethod(std::vector<float>& result) {
	boost::chrono::steady_clock::time_point start = boost::chrono::steady_clock::now();

	float aCPP[size];
	float bCPP[size];
	float sumCPP[size];

	for( int i = 0; i < size; i++ ) {
	aCPP[i] = i;
	bCPP[i] = i*i;
	}

	// Create C++ AMP objects.
	array_view<const float, 1> a(size, aCPP);
	array_view<const float, 1> b(size, bCPP);
	array_view<float, 1> sum(size, sumCPP);

	sum.discard_data();

	parallel_for_each( sum.extent, [=](index<1> idx) restrict(amp) {
	sum[idx] = pow(a[idx], b[idx]);
	for( int i = 0; i < loopCount; i++ ) {
	sum[idx] = pow(sum[idx], b[idx]);
	}
	}
	);

	sum.synchronize(); // needed this to copy data from GPU back to CPU

	boost::chrono::duration<double> sec = boost::chrono::steady_clock::now() - start;
	std::cout << "gpu took " << sec.count() << " seconds\n";

	result.clear();
	for( int i = 0; i < size; i++ ) {
	result.push_back(sum[i]);
	}
	}

	void test_CPUMethod(std::vector<float>& result) {
	boost::chrono::steady_clock::time_point start = boost::chrono::steady_clock::now();

	float aCPP[size];
	float bCPP[size];
	float sumCPP[size];

	for( int i = 0; i < size; i++ ) {
	aCPP[i] = i;
	bCPP[i] = i*i;
	}

	for( int idx = 0; idx < size; idx++) {
	sumCPP[idx] = pow(aCPP[idx], bCPP[idx]);
	for( int i = 0; i < loopCount; i++ ) {
	sumCPP[idx] = pow(sumCPP[idx], bCPP[idx]);
	}
	}

	boost::chrono::duration<double> sec = boost::chrono::steady_clock::now() - start;
	std::cout << "cpu took " << sec.count() << " seconds\n";

	result.clear();
	for( int i = 0; i < size; i++ ) {
	result.push_back(sumCPP[i]);
	}
	}


	// ----------------------------------------------------------------------------
	int main(int argc, char* argv[]) {

	std::vector<float> cpuResult;
	std::vector<float> pplResult;
	std::vector<float> gpuResult;

	test_CPUMethod(cpuResult);
	test_PPLMethod(pplResult);
	test_AmpMethod(gpuResult);

	for( int i = 0; i < cpuResult.size(); i++ )
	{
	if( (cpuResult[i] != pplResult[i]) \|\|
	(cpuResult[i] != gpuResult[i]) ) {

	std::cout << "bad calc at " << i << "\n";
	std::cout << "cpuResult[i] " << cpuResult[i] << "\n";
	std::cout << "pplResult[i] " << pplResult[i] << "\n";
	std::cout << "gpuResult[i] " << gpuResult[i] << "\n";

	break;
	}
	}

	return 0;
	}