Skip to content

Instantly share code, notes, and snippets.

@bassoy

bassoy/main.cpp

Last active Sep 29, 2018
Embed
What would you like to do?
Example of Tensor/Matrix/Vector Profiler
// Wraps the standard transpose
template<typename _Value, typename _Layout>
struct StandardTranspose
{
using Value = _Value;
using Layout = _Layout;
using Array = fhg::tensor<Value, Layout>;
static constexpr auto name = "transpose";
static auto call(Array const& a, std::vector<std::size_t> const& phi) { return a.transpose(phi); }
};
// Wraps the blocked transpose
template<typename _Value, typename _Layout>
struct BlockedTranspose
{
using Value = _Value;
using Layout = _Layout;
using Array = fhg::tensor<Value, Layout>;
static constexpr auto name = "blocked_transpose";
static auto call(Array const& a, std::vector<std::size_t> const& phi) { return a.bocked_transpose(phi); }
};
// Takes Standard, Blocked Transpose, etc. -> FunctionT
template <typename FunctionT>
struct ProfilerTranspose : public utl::Profiler
{
using Value = typename FunctionT::Value;
using Array = typename FunctionT::Array;
using Base = utl::Profiler;
ProfilerTranspose() : Base(std::string(FunctionT::name)) {}
ProfilerTranspose(const ProfilerTranspose&) = default;
utl::NanoSeconds profile(const utl::Dim& dim, std::size_t iter) override
{
auto s1 = fhg::shape{ dim.begin (), dim. end() };
auto phi = std::vector<std::size_t>( s1.size() );
std::iota(phi.rbegin(), phi.rend(), 1);
Array a ( s1 );
std::iota(a.begin(), a.end(), Value(0.0));
timer.tic();
for (auto i = 0ul; i < iter; ++i)
volatile auto r = FunctionT::call(a,phi);
timer.toc();
return timer.elapsed() / iter ;
}
utl::Timer<utl::NanoSeconds> timer;
};
// ProfileManager takes different transpose functions with different types.
ProfilerManager m("tensor-transpose");
m << new ProfilerTranspose <StandardTranspose <float>>;
m << new ProfilerTranspose <BlockedTranspose <float>>;
m << new ProfilerTranspose <StandardTranspose <double>>;
m << new ProfilerTranspose <BLockedTranspose <double>>;
// runs all of them from (4096x4096) with step size (512x512) to (8192x8192)
m.run(utl::Range{utl::Dim{4096,4096}, utl::Dim{512,512}, utl::Dim{8192,8192}} ,5);
// now we could also be able to serialize
// prints all profiled
std::ofstream out( "data.m", std::ios_base::app); // std::ios_base::out |
utl::Printer printer;
printer.print(out, utl::cat(m.profilerLabel() ,"size" ), m.size() );
printer.print(out, utl::cat(m.profilerLabel() ,"order" ), m.order() );
printer.print(out, utl::cat(m.profilerLabel() ,"time" ), m.time() );
printer.print(out, utl::cat(m.profilerLabel() ,"perf" ), m.perf() );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment