Last active
April 9, 2024 14:07
-
-
Save ASKabalan/6d25ae71963659dfd87ad05559e05789 to your computer and use it in GitHub Desktop.
One Header Libraries
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @file logger.hpp | |
* @version 0.0.4 | |
* @brief Async Logger for C++ with timestamp, name, and configurable options. | |
* | |
* Environment variables: | |
* - ASYNC_TRACE: Enables trace for specific logger name. | |
* - ASYNC_TRACE_VERBOSE: Enables verbose trace for specific logger name. | |
* - ASYNC_TRACE_MAX_BUFFER: Sets the maximum buffer size for log entries. | |
* - ASYNC_TRACE_OUTPUT_DIR: Sets the output directory for log files. | |
* - ASYNC_TRACE_CONSOLE: Enables logging to the console (stdout). | |
* | |
* Example usage: | |
* @code | |
* #include "logger.hpp" | |
* | |
* int main() { | |
* AsyncLogger logger("CUD"); | |
* | |
* StartTraceInfo(logger) << "This is an info message" << '\n'; | |
* StartTraceVerbose(logger) << "This is a verbose message" << '\n'; | |
* | |
* return 0; | |
* } | |
* @endcode | |
* | |
* Async Logger for C++ | |
* with timestamp and name | |
* configurable via environment variables. | |
* | |
* | |
* @author Wassim KABALAN | |
*/ | |
#ifndef ASYNC_LOGGER_HPP | |
#define ASYNC_LOGGER_HPP | |
#include <chrono> | |
#include <cstdlib> | |
#include <cxxabi.h> | |
#include <execinfo.h> | |
#include <fstream> | |
#include <iostream> | |
#include <sstream> | |
#include <filesystem> | |
#ifdef MPI_VERSION | |
#include <mpi.h> | |
#endif | |
class AsyncLogger { | |
public: | |
AsyncLogger(const std::string &name) | |
: name(name), bufferSize(10 * 1024 * 1024), buffer(""), traceInfo(false), | |
traceVerbose(false), traceToConsole(true) { | |
static const char *traceEnv = std::getenv("ASYNC_TRACE"); | |
if (traceEnv != nullptr) { | |
std::string traceString = traceEnv; | |
size_t pos = traceString.find(name); | |
if (pos != std::string::npos) { | |
traceInfo = true; | |
} | |
} | |
static const char *traceEnvVerb = std::getenv("ASYNC_TRACE_VERBOSE"); | |
if (traceEnvVerb != nullptr) { | |
std::string traceString = traceEnvVerb; | |
size_t pos = traceString.find(name); | |
if (pos != std::string::npos) { | |
traceInfo = true; | |
traceVerbose = true; | |
} | |
} | |
static const char *bufferSizeEnv = std::getenv("ASYNC_TRACE_MAX_BUFFER"); | |
if (bufferSizeEnv != nullptr) { | |
bufferSize = std::atoi(bufferSizeEnv); | |
} | |
static const char *outputDirEnv = std::getenv("ASYNC_TRACE_OUTPUT_DIR"); | |
if (outputDirEnv != nullptr) { | |
outputDir = outputDirEnv; | |
// Ensure the output directory exists | |
//std::filesystem::create_directories(outputDir); | |
traceToConsole = false; | |
} | |
static const char *traceToConsoleEnv = std::getenv("ASYNC_TRACE_CONSOLE"); | |
if (traceToConsoleEnv != nullptr) { | |
traceToConsole = std::atoi(traceToConsoleEnv) != 0; | |
traceToConsole = true; | |
} | |
static const char *nobufferEnv = std::getenv("ASYNC_TRACE_NOBUFFER"); | |
if (nobufferEnv != nullptr) { | |
nobuffer = std::atoi(nobufferEnv) != 0; | |
nobuffer = true; | |
} | |
#ifdef MPI_VERSION | |
MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
#endif | |
} | |
AsyncLogger &startTraceInfo() { | |
if (traceInfo || traceVerbose) { | |
std::ostringstream ss; | |
addTimestamp(ss); | |
ss << "[INFO] "; | |
ss << "[" << name << "] "; | |
buffer += ss.str(); | |
} | |
return *this; | |
} | |
AsyncLogger &startTraceVerbose() { | |
if (traceInfo || traceVerbose) { | |
std::ostringstream ss; | |
addTimestamp(ss); | |
ss << "[VERB] "; | |
ss << "[" << name << "] "; | |
buffer += ss.str(); | |
} | |
return *this; | |
} | |
template <typename T> AsyncLogger &operator<<(const T &value) { | |
if (traceInfo || traceVerbose) { | |
std::ostringstream ss; | |
ss << value; | |
buffer += ss.str(); | |
if (buffer.size() >= bufferSize || nobuffer) { | |
flush(); | |
} | |
} | |
return *this; | |
} | |
// Specialization for bool | |
AsyncLogger &operator<<(bool value) { | |
if (traceInfo || traceVerbose) { | |
std::ostringstream ss; | |
ss << std::boolalpha << value; | |
buffer += ss.str(); | |
if (buffer.size() >= bufferSize || nobuffer) { | |
flush(); | |
} | |
} | |
return *this; | |
} | |
// Specialization for std::endl | |
AsyncLogger &operator<<(std::ostream &(*manipulator)(std::ostream &)) { | |
if (traceInfo || traceVerbose) { | |
std::ostringstream ss; | |
ss << manipulator; | |
buffer += ss.str(); | |
if (buffer.size() >= bufferSize || nobuffer) { | |
flush(); | |
} | |
} | |
return *this; | |
} | |
~AsyncLogger() { | |
if (traceInfo || traceVerbose) { | |
flush(); | |
} | |
} | |
bool getTraceInfo() const { return traceInfo; } | |
bool getTraceVerbose() const { return traceVerbose; } | |
void flush() { | |
if (traceToConsole) { | |
std::cout << buffer; | |
} else { | |
std::ostringstream filename; | |
std::string rankStr = rank >= 0 ? "_" + std::to_string(rank) : ""; | |
filename << outputDir << "/AsyncTrace_" << name << rankStr << ".log"; | |
std::ofstream outfile(filename.str(), std::ios::app); | |
if (outfile.is_open()) { | |
outfile << buffer; | |
outfile.close(); | |
} | |
} | |
buffer.clear(); | |
} | |
void addStackTrace() { | |
if (traceInfo || traceVerbose) { | |
std::ostringstream ss; | |
ss << "Call stack:" << std::endl; | |
const int max_frames = 64; | |
void *frame_ptrs[max_frames]; | |
int num_frames = backtrace(frame_ptrs, max_frames); | |
char **symbols = backtrace_symbols(frame_ptrs, num_frames); | |
if (symbols == nullptr) { | |
buffer += "Error retrieving backtrace symbols." + std::string("\n"); | |
return; | |
} | |
for (int i = 0; i < num_frames; ++i) { | |
// Demangle the C++ function name | |
size_t size; | |
int status; | |
char *demangled = | |
abi::__cxa_demangle(symbols[i], nullptr, &size, &status); | |
if (status == 0) { | |
ss << demangled << std::endl; | |
free(demangled); | |
} else { | |
// Couldn't demangle, use the original symbol | |
ss << symbols[i] << std::endl; | |
} | |
} | |
free(symbols); | |
buffer += ss.str(); | |
if (buffer.size() >= bufferSize || nobuffer) { | |
flush(); | |
} | |
} | |
} | |
private: | |
void addTimestamp(std::ostringstream &stream) { | |
auto now = std::chrono::system_clock::now(); | |
auto timePoint = std::chrono::system_clock::to_time_t(now); | |
auto milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>( | |
now.time_since_epoch()) % | |
1000; | |
std::tm tm; | |
#ifdef _WIN32 | |
localtime_s(&tm, &timePoint); | |
#else | |
localtime_r(&timePoint, &tm); | |
#endif | |
stream << "[" << tm.tm_year + 1900 << "/" << tm.tm_mon + 1 << "/" | |
<< tm.tm_mday << " " << tm.tm_hour << ":" << tm.tm_min << ":" | |
<< tm.tm_sec << ":" << milliseconds.count() << "] "; | |
} | |
std::string name; | |
std::string buffer; | |
size_t bufferSize; | |
std::string outputDir; | |
bool traceInfo = false; | |
bool traceVerbose = false; | |
bool traceToConsole = true; | |
bool nobuffer = false; | |
int rank = -1; | |
}; | |
#define StartTraceInfo(logger) \ | |
if (logger.getTraceInfo()) \ | |
logger.startTraceInfo() | |
#define TraceInfo(logger) \ | |
if (logger.getTraceInfo()) \ | |
logger | |
#define PrintStack(logger) \ | |
if (logger.getTraceInfo()) \ | |
logger.addStackTrace() | |
#define StartTraceVerbose(logger) \ | |
if (logger.getTraceVerbose()) \ | |
logger.startTraceVerbose() | |
#define TraceVerbose(logger) \ | |
if (logger.getTraceVerbose()) \ | |
logger | |
#endif // ASYNC_LOGGER_HPP | |
/* | |
Example usage: | |
#include "logger.hpp" | |
int main() { | |
AsyncLogger logger("CUD"); | |
StartTraceInfo(logger) << "This is an info message" << '\n'; | |
StartTraceVerbose(logger) << "This is a verbose message" << '\n'; | |
return 0; | |
} | |
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @file Perfostep.hpp | |
* @version 0.0.4 | |
* @brief A class for measuring the performance of code execution. | |
* @details This header-only C++ class provides functionality for measuring the | |
* performance of code execution, supporting both CPU and GPU measurements. It | |
* utilizes high-resolution timers and optional NVIDIA Tools Extension (NVTX) | |
* for GPU measurements. | |
* | |
* Environment Variable: | |
* - ENABLE_PERFO_STEP: Set this environment variable to enable performance | |
* measurement. Possible values are "TIMER" for CPU timing or "NVTX" for NSYS | |
* profiling or "CUDA" for CUDA timing. | |
* @code | |
* // Example 1: Measure the performance of a CPU code | |
* Perfostep perf; | |
* perf.start("Code block 1"); | |
* // Code block to measure | |
* for (int i = 0; i < 1000000; ++i) { | |
* // Some computation | |
* } | |
* perf.stop(); | |
* | |
* perf.printToMarkdown("perf_report.md"); | |
* perf.printToCSV("perf_report.csv"); | |
* | |
* // Example 2: Measure the performance of GPU code | |
* Perfostep perf; | |
* perf.start("CUDA Kernel"); | |
* // Kernel to measure | |
* myCUDAKernel<<<blocks, threads>>>(input, output); | |
* perf.stop(); | |
* | |
* perf.printToMarkdown("cuperf_report.md"); | |
* perf.printToCSV("cuperf_report.csv"); | |
* @endcode | |
* | |
* | |
* @author Wassim KABALAN | |
*/ | |
#ifndef PERFOSTEP_HPP | |
#define PERFOSTEP_HPP | |
#include <algorithm> | |
#include <cassert> | |
#include <chrono> | |
#include <fstream> | |
#include <iostream> | |
#include <iterator> | |
#include <map> | |
#include <memory> | |
#include <numeric> | |
#include <stdexcept> | |
#include <string> | |
#include <tuple> | |
#include <vector> | |
#if __has_include(<nvToolsExt.h>) | |
#define ENABLE_NVTX | |
#include <nvtx3/nvToolsExt.h> | |
#endif | |
#if defined(__CUDACC__) | |
#include <cuda_runtime.h> | |
#define ENABLE_CUDA | |
#endif | |
using namespace std::chrono; | |
typedef std::map<const std::string, const std::string> ColumnNames; | |
typedef std::map<const std::string, double> Reports; | |
class AbstractPerfostep { | |
public: | |
virtual void Start(const std::string &iReport, const ColumnNames &iCol) = 0; | |
virtual double Stop() = 0; | |
virtual void Report(const bool &iPrintTotal = false) const = 0; | |
virtual void PrintToMarkdown(const char *ifilename, | |
const bool &iPrintTotal = false) const = 0; | |
virtual void PrintToCSV(const char *ifilename, | |
const bool &iPrintTotal = false) const = 0; | |
virtual void Switch(const std::string &iReport, const ColumnNames &iCol) = 0; | |
virtual ~AbstractPerfostep() {} | |
protected: | |
Reports m_Reports; /**< The report of measured tasks. */ | |
ColumnNames m_ColNames; | |
}; | |
class BasePerfostep : public AbstractPerfostep { | |
public: | |
void Report(const bool &iPrintTotal = false) const override { | |
if (m_Reports.size() == 0) | |
return; | |
std::cout << "Reporting : " << std::endl; | |
std::cout << "For parameters: " << std::endl; | |
for (const auto &entry : m_ColNames) { | |
std::cout << std::get<0>(entry) << " : " << std::get<1>(entry) | |
<< std::endl; | |
} | |
for (const auto &entry : m_Reports) { | |
std::cout << std::get<0>(entry) << " : " << std::get<1>(entry) << "ms " | |
<< std::endl; | |
} | |
} | |
void PrintToMarkdown(const char *filename, | |
const bool &iPrintTotal = false) const override { | |
if (m_Reports.size() == 0) | |
return; | |
std::ofstream file(filename, std::ios::app); | |
if (!file.is_open()) { | |
throw std::runtime_error("Failed to open file: " + std::string(filename)); | |
} | |
if (file.tellp() == 0) { // Check if file is empty | |
file << "| Task | "; | |
for (const auto &entry : m_ColNames) { | |
file << std::get<0>(entry) << " | "; | |
} | |
file << "Elapsed Time (ms) |" << std::endl; // Header names for columns | |
// For the Task column | |
file << "| ---- | "; | |
// For the other columns | |
for (const auto &entry : m_ColNames) { | |
file << std::string(entry.first.length(), '-') << " | "; | |
} | |
// For the elapsed time column | |
file << " ---------------- |" << std::endl; | |
} | |
std::string colvalues; | |
for (const auto &col : m_ColNames) { | |
colvalues += std::get<1>(col) + " | "; | |
} | |
for (const auto &entry : m_Reports) { | |
file << "| " << std::get<0>(entry) << " | " << colvalues | |
<< std::get<1>(entry) << " |" << std::endl; | |
} | |
if (iPrintTotal) | |
file << "| Total | " << colvalues << GetTotal() << " |" << std::endl; | |
file.close(); | |
} | |
/** | |
* @brief Prints the measured tasks and their elapsed times in a CSV | |
* format to a file. | |
* @param filename The name of the file to write the CSV data to. | |
*/ | |
void PrintToCSV(const char *filename, | |
const bool &iPrintTotal) const override { | |
if (m_Reports.size() == 0) | |
return; | |
std::ofstream file(filename, std::ios::app); // Open file in append mode | |
if (!file.is_open()) { | |
throw std::runtime_error("Failed to open file: " + std::string(filename)); | |
} | |
if (file.tellp() == 0) { // Check if file is empty | |
file << "Task,"; | |
for (const auto &entry : m_ColNames) { | |
file << std::get<0>(entry) << ","; | |
} | |
file << "Elapsed Time (ms)" << std::endl; // Header names for columns | |
} | |
std::string colvalues; | |
for (const auto &col : m_ColNames) { | |
colvalues += std::get<1>(col) + ","; | |
} | |
for (const auto &entry : m_Reports) { | |
file << std::get<0>(entry) << "," << colvalues << std::get<1>(entry) | |
<< std::endl; | |
} | |
if (iPrintTotal) | |
file << "Total," << colvalues << GetTotal() << std::endl; | |
file.close(); | |
} | |
void Switch(const std::string &iReport, const ColumnNames &iCol) override { | |
Stop(); | |
Start(iReport, iCol); | |
} | |
private: | |
double GetTotal() const { | |
double total = std::accumulate( | |
m_Reports.begin(), m_Reports.end(), 0.0, | |
[](double sum, const std::tuple<const std::string, double> &entry) { | |
return sum + std::get<1>(entry); | |
}); | |
return total; | |
} | |
}; | |
typedef std::vector< | |
std::tuple<const std::string, time_point<high_resolution_clock>>> | |
StartTimes; | |
class PerfostepChrono : public BasePerfostep { | |
public: | |
void Start(const std::string &iReport, const ColumnNames &iCol) override { | |
m_StartTimes.push_back( | |
std::make_tuple(iReport, high_resolution_clock::now())); | |
m_ColNames = iCol; | |
} | |
double Stop() override { | |
// Check if there are any start times | |
assert(m_StartTimes.size() > 0); | |
m_EndTime = high_resolution_clock::now(); | |
duration<double> diff = m_EndTime - std::get<1>(m_StartTimes.back()); | |
double elapsed_time = diff.count() * 1000; | |
m_Reports[std::get<0>(m_StartTimes.back())] = elapsed_time; | |
m_StartTimes.pop_back(); | |
return elapsed_time; | |
} | |
~PerfostepChrono() { | |
if (m_StartTimes.size() > 0) { | |
std::cerr << "Warning: There are still start times not stopped" | |
<< std::endl; | |
// print message for each start time | |
for (const auto &entry : m_StartTimes) { | |
std::cerr << "Start time for " << std::get<0>(entry) | |
<< " is not stopped" << std::endl; | |
} | |
} | |
} | |
private: | |
StartTimes m_StartTimes; /**< The start time of the measurement. */ | |
time_point<high_resolution_clock> | |
m_EndTime; /**< The end time of the measurement. */ | |
}; | |
#ifdef ENABLE_NVTX | |
// Credit : https://github.com/NVIDIA/cuDecomp | |
class PerfostepNVTX : public BasePerfostep { | |
public: | |
// ColumnNames are not used in NVTX | |
void Start(const std::string &iReport, const ColumnNames &iCol) override { | |
static constexpr int ncolors_ = 8; | |
static constexpr int colors_[ncolors_] = {0x3366CC, 0xDC3912, 0xFF9900, | |
0x109618, 0x990099, 0x3B3EAC, | |
0x0099C6, 0xDD4477}; | |
std::string range_name(iReport); | |
std::hash<std::string> hash_fn; | |
int color = colors_[hash_fn(range_name) % ncolors_]; | |
nvtxEventAttributes_t ev = {0}; | |
ev.version = NVTX_VERSION; | |
ev.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; | |
ev.colorType = NVTX_COLOR_ARGB; | |
ev.color = color; | |
ev.messageType = NVTX_MESSAGE_TYPE_ASCII; | |
ev.message.ascii = range_name.c_str(); | |
nvtxRangePushEx(&ev); | |
nvtx_ranges++; | |
} | |
double Stop() override { | |
nvtxRangePop(); | |
nvtx_ranges--; | |
assert(nvtx_ranges >= 0); | |
return 0.0; | |
} | |
~PerfostepNVTX() { | |
if (nvtx_ranges > 0) { | |
std::cerr << "Warning: There are still start times not stopped" | |
<< std::endl; | |
for (int i = 0; i < nvtx_ranges; i++) | |
nvtxRangePop(); | |
} | |
} | |
private: | |
int nvtx_ranges = 0; | |
}; | |
#endif // ENABLE_NVTX | |
#ifdef ENABLE_CUDA | |
typedef std::vector<std::tuple<const std::string, cudaEvent_t>> StartEvents; | |
class PerfostepCUDA : public BasePerfostep { | |
public: | |
PerfostepCUDA() { cudaEventCreate(&m_EndEvent); } | |
void Start(const std::string &iReport, const ColumnNames &iCol) override { | |
cudaEvent_t m_StartEvent; | |
cudaEventCreate(&m_StartEvent); | |
cudaEventRecord(m_StartEvent); | |
m_StartEvents.push_back(std::make_tuple(iReport, m_StartEvent)); | |
m_ColNames = iCol; | |
} | |
double Stop() override { | |
cudaEventRecord(m_EndEvent); | |
cudaEventSynchronize(m_EndEvent); | |
float elapsed; | |
cudaEventElapsedTime(&elapsed, std::get<1>(m_StartEvents.back()), | |
m_EndEvent); | |
double m_ElapsedTime = static_cast<double>(elapsed); | |
cudaEventDestroy(std::get<1>(m_StartEvents.back())); | |
m_Reports[std::get<0>(m_StartEvents.back())] = m_ElapsedTime; | |
m_StartEvents.pop_back(); | |
return m_ElapsedTime; | |
} | |
~PerfostepCUDA() { | |
if (m_StartEvents.size() > 0) { | |
std::cerr << "Warning: There are still start events not stopped" | |
<< std::endl; | |
std::for_each( | |
m_StartEvents.cbegin(), m_StartEvents.cend(), | |
[](const std::tuple<const std::string, cudaEvent_t> &entry) { | |
cudaEventDestroy(std::get<1>(entry)); | |
}); | |
} | |
cudaEventDestroy(m_EndEvent); | |
} | |
private: | |
StartEvents m_StartEvents; /**< The start event for CUDA measurement. */ | |
cudaEvent_t m_EndEvent; /**< The end event for CUDA measurement. */ | |
}; | |
#endif // ENABLE_CUDA | |
class Perfostep { | |
public: | |
Perfostep() { | |
static const char *env = std::getenv("ENABLE_PERFO_STEP"); | |
if (env != nullptr) { | |
std::string envStr(env); | |
if (envStr == "TIMER") { | |
m_Perfostep = std::make_unique<PerfostepChrono>(); | |
m_EnablePerfoStep = true; | |
} else if (envStr == "NVTX") { | |
#ifdef ENABLE_NVTX | |
m_Perfostep = std::make_unique<PerfostepNVTX>(); | |
m_EnablePerfoStep = true; | |
#else | |
throw std::runtime_error( | |
"NVTX is not available. Please install NVTX to use it."); | |
#endif | |
} else if (envStr == "CUDA") { | |
#ifdef ENABLE_CUDA | |
m_Perfostep = std::make_unique<PerfostepCUDA>(); | |
m_EnablePerfoStep = true; | |
#else | |
throw std::runtime_error("CUDA is not available. Please install CUDA " | |
"or compile using nvcc to use it."); | |
#endif | |
} else { | |
throw std::runtime_error( | |
"Invalid value for ENABLE_PERFO_STEP: " + envStr + | |
". Possible values are TIMER, NVTX, or " | |
"CUDA."); | |
} | |
} | |
} | |
void Start(const std::string &iReport, const ColumnNames &iCol = {}) { | |
if (m_EnablePerfoStep) | |
m_Perfostep->Start(iReport, iCol); | |
} | |
double Stop() { | |
if (m_EnablePerfoStep) | |
return m_Perfostep->Stop(); | |
return 0.0; | |
} | |
void Report(const bool &iPrintTotal = false) const { | |
if (m_EnablePerfoStep) | |
m_Perfostep->Report(iPrintTotal); | |
} | |
void PrintToMarkdown(const char *filename, | |
const bool &iPrintTotal = false) const { | |
if (m_EnablePerfoStep) | |
m_Perfostep->PrintToMarkdown(filename, iPrintTotal); | |
} | |
void PrintToCSV(const char *filename, const bool &iPrintTotal = false) const { | |
if (m_EnablePerfoStep) | |
m_Perfostep->PrintToCSV(filename, iPrintTotal); | |
} | |
void Switch(const std::string &iReport, const ColumnNames &iCol = {}) { | |
if (m_EnablePerfoStep) | |
m_Perfostep->Switch(iReport, iCol); | |
} | |
private: | |
std::unique_ptr<AbstractPerfostep> m_Perfostep; | |
bool m_EnablePerfoStep = false; | |
}; | |
#endif // PERFOSTEP_HPP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @file Perfostep.hpp | |
* @version 0.0.4 | |
* @brief A class for measuring the performance of code execution. | |
* @details This header-only C++ class provides functionality for measuring the | |
* performance of code execution, supporting both CPU and GPU measurements. It | |
* utilizes high-resolution timers and optional NVIDIA Tools Extension (NVTX) | |
* for GPU measurements. | |
* | |
* Environment Variable: | |
* - ENABLE_PERFO_STEP: Set this environment variable to enable performance | |
* measurement. Possible values are "TIMER" for CPU timing or "NVTX" for NSYS | |
* profiling or "CUDA" for CUDA timing. | |
* @code | |
* // Example 1: Measure the performance of a CPU code | |
* Perfostep perf; | |
* perf.start("Code block 1"); | |
* // Code block to measure | |
* for (int i = 0; i < 1000000; ++i) { | |
* // Some computation | |
* } | |
* perf.stop(); | |
* | |
* perf.printToMarkdown("perf_report.md"); | |
* perf.printToCSV("perf_report.csv"); | |
* | |
* // Example 2: Measure the performance of GPU code | |
* Perfostep perf; | |
* perf.start("CUDA Kernel"); | |
* // Kernel to measure | |
* myCUDAKernel<<<blocks, threads>>>(input, output); | |
* perf.stop(); | |
* | |
* perf.printToMarkdown("cuperf_report.md"); | |
* perf.printToCSV("cuperf_report.csv"); | |
* @endcode | |
* | |
* | |
* @author Wassim KABALAN | |
*/ | |
#ifndef PERFOSTEP_HPP | |
#define PERFOSTEP_HPP | |
#include <algorithm> | |
#include <cassert> | |
#include <chrono> | |
#include <fstream> | |
#include <iostream> | |
#include <iterator> | |
#include <map> | |
#include <memory> | |
#include <numeric> | |
#include <stdexcept> | |
#include <string> | |
#include <tuple> | |
#include <vector> | |
#if __has_include(<nvToolsExt.h>) | |
#define ENABLE_NVTX | |
#include <nvtx3/nvToolsExt.h> | |
#endif | |
#if defined(__CUDACC__) | |
#include <cuda_runtime.h> | |
#define ENABLE_CUDA | |
#endif | |
using namespace std::chrono; | |
typedef std::map<const std::string, const std::string> ColumnNames; | |
typedef std::map<const std::string, double> Reports; | |
class AbstractPerfostep { | |
public: | |
virtual void Start(const std::string &iReport, const ColumnNames &iCol) = 0; | |
virtual double Stop() = 0; | |
virtual void Report(const bool &iPrintTotal = false) const = 0; | |
virtual void PrintToMarkdown(const char *ifilename, | |
const bool &iPrintTotal = false) const = 0; | |
virtual void PrintToCSV(const char *ifilename, | |
const bool &iPrintTotal = false) const = 0; | |
virtual void Switch(const std::string &iReport, const ColumnNames &iCol) = 0; | |
virtual ~AbstractPerfostep() {} | |
protected: | |
Reports m_Reports; /**< The report of measured tasks. */ | |
ColumnNames m_ColNames; | |
}; | |
class BasePerfostep : public AbstractPerfostep { | |
public: | |
void Report(const bool &iPrintTotal = false) const override { | |
if (m_Reports.size() == 0) return; | |
std::cout << "Reporting : " << std::endl; | |
std::cout << "For parameters: " << std::endl; | |
for (const auto &entry : m_ColNames) { | |
std::cout << std::get<0>(entry) << " : " << std::get<1>(entry) | |
<< std::endl; | |
} | |
for (const auto &entry : m_Reports) { | |
std::cout << std::get<0>(entry) << " : " << std::get<1>(entry) << "ms " | |
<< std::endl; | |
} | |
} | |
void PrintToMarkdown(const char *filename, | |
const bool &iPrintTotal = false) const override { | |
if (m_Reports.size() == 0) return; | |
std::ofstream file(filename, std::ios::app); | |
if (!file.is_open()) { | |
throw std::runtime_error("Failed to open file: " + std::string(filename)); | |
} | |
if (file.tellp() == 0) { // Check if file is empty | |
file << "| Task | "; | |
for (const auto &entry : m_ColNames) { | |
file << std::get<0>(entry) << " | "; | |
} | |
file << "Elapsed Time (ms) |" << std::endl; // Header names for columns | |
// For the Task column | |
file << "| --- | "; | |
// For the other columns | |
for (const auto &entry : m_ColNames) { | |
file << " --- | "; | |
} | |
// For the elapsed time column | |
file << " --------------- |" << std::endl; | |
} | |
std::string colvalues; | |
for (const auto &col : m_ColNames) { | |
colvalues += std::get<1>(col) + " | "; | |
} | |
for (const auto &entry : m_Reports) { | |
file << "| " << std::get<0>(entry) << " | " << colvalues | |
<< std::get<1>(entry) << " |" << std::endl; | |
} | |
if (iPrintTotal) | |
file << "| Total | " << colvalues << GetTotal() << " |" << std::endl; | |
file.close(); | |
} | |
/** | |
* @brief Prints the measured tasks and their elapsed times in a CSV | |
* format to a file. | |
* @param filename The name of the file to write the CSV data to. | |
*/ | |
void PrintToCSV(const char *filename, | |
const bool &iPrintTotal) const override { | |
if (m_Reports.size() == 0) return; | |
std::ofstream file(filename, std::ios::app); // Open file in append mode | |
if (!file.is_open()) { | |
throw std::runtime_error("Failed to open file: " + std::string(filename)); | |
} | |
if (file.tellp() == 0) { // Check if file is empty | |
file << "Task,"; | |
for (const auto &entry : m_ColNames) { | |
file << std::get<0>(entry) << ","; | |
} | |
file << "Elapsed Time (ms)" << std::endl; // Header names for columns | |
} | |
std::string colvalues; | |
for (const auto &col : m_ColNames) { | |
colvalues += std::get<1>(col) + ","; | |
} | |
for (const auto &entry : m_Reports) { | |
file << std::get<0>(entry) << "," << colvalues << std::get<1>(entry) | |
<< std::endl; | |
} | |
if (iPrintTotal) file << "Total," << colvalues << GetTotal() << std::endl; | |
file.close(); | |
} | |
void Switch(const std::string &iReport, const ColumnNames &iCol) override { | |
Stop(); | |
Start(iReport, iCol); | |
} | |
private: | |
double GetTotal() const { | |
double total = std::accumulate( | |
m_Reports.begin(), m_Reports.end(), 0.0, | |
[](double sum, const std::tuple<const std::string, double> &entry) { | |
return sum + std::get<1>(entry); | |
}); | |
return total; | |
} | |
}; | |
typedef std::vector< | |
std::tuple<const std::string, time_point<high_resolution_clock>>> | |
StartTimes; | |
class PerfostepChrono : public BasePerfostep { | |
public: | |
void Start(const std::string &iReport, const ColumnNames &iCol) override { | |
m_StartTimes.push_back( | |
std::make_tuple(iReport, high_resolution_clock::now())); | |
m_ColNames = iCol; | |
} | |
double Stop() override { | |
// Check if there are any start times | |
assert(m_StartTimes.size() > 0); | |
m_EndTime = high_resolution_clock::now(); | |
duration<double> diff = m_EndTime - std::get<1>(m_StartTimes.back()); | |
double elapsed_time = diff.count() * 1000; | |
m_Reports[std::get<0>(m_StartTimes.back())] = elapsed_time; | |
m_StartTimes.pop_back(); | |
return elapsed_time; | |
} | |
~PerfostepChrono() { | |
if (m_StartTimes.size() > 0) { | |
std::cerr << "Warning: There are still start times not stopped" | |
<< std::endl; | |
// print message for each start time | |
for (const auto &entry : m_StartTimes) { | |
std::cerr << "Start time for " << std::get<0>(entry) | |
<< " is not stopped" << std::endl; | |
} | |
} | |
} | |
private: | |
StartTimes m_StartTimes; /**< The start time of the measurement. */ | |
time_point<high_resolution_clock> | |
m_EndTime; /**< The end time of the measurement. */ | |
}; | |
#ifdef ENABLE_NVTX | |
// Credit : https://github.com/NVIDIA/cuDecomp | |
class PerfostepNVTX : public BasePerfostep { | |
public: | |
// ColumnNames are not used in NVTX | |
void Start(const std::string &iReport, const ColumnNames &iCol) override { | |
static constexpr int ncolors_ = 8; | |
static constexpr int colors_[ncolors_] = {0x3366CC, 0xDC3912, 0xFF9900, | |
0x109618, 0x990099, 0x3B3EAC, | |
0x0099C6, 0xDD4477}; | |
std::string range_name(iReport); | |
std::hash<std::string> hash_fn; | |
int color = colors_[hash_fn(range_name) % ncolors_]; | |
nvtxEventAttributes_t ev = {0}; | |
ev.version = NVTX_VERSION; | |
ev.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; | |
ev.colorType = NVTX_COLOR_ARGB; | |
ev.color = color; | |
ev.messageType = NVTX_MESSAGE_TYPE_ASCII; | |
ev.message.ascii = range_name.c_str(); | |
nvtxRangePushEx(&ev); | |
nvtx_ranges++; | |
} | |
double Stop() override { | |
nvtxRangePop(); | |
nvtx_ranges--; | |
assert(nvtx_ranges >= 0); | |
return 0.0; | |
} | |
~PerfostepNVTX() { | |
if (nvtx_ranges > 0) { | |
std::cerr << "Warning: There are still start times not stopped" | |
<< std::endl; | |
for (int i = 0; i < nvtx_ranges; i++) nvtxRangePop(); | |
} | |
} | |
private: | |
int nvtx_ranges = 0; | |
}; | |
#endif // ENABLE_NVTX | |
#ifdef ENABLE_CUDA | |
typedef std::vector<std::tuple<const std::string, cudaEvent_t>> StartEvents; | |
class PerfostepCUDA : public BasePerfostep { | |
public: | |
PerfostepCUDA() { cudaEventCreate(&m_EndEvent); } | |
void Start(const std::string &iReport, const ColumnNames &iCol) override { | |
cudaEvent_t m_StartEvent; | |
cudaEventCreate(&m_StartEvent); | |
cudaEventRecord(m_StartEvent); | |
m_StartEvents.push_back(std::make_tuple(iReport, m_StartEvent)); | |
m_ColNames = iCol; | |
} | |
double Stop() override { | |
cudaEventRecord(m_EndEvent); | |
cudaEventSynchronize(m_EndEvent); | |
float elapsed; | |
cudaEventElapsedTime(&elapsed, std::get<1>(m_StartEvents.back()), | |
m_EndEvent); | |
double m_ElapsedTime = static_cast<double>(elapsed); | |
cudaEventDestroy(std::get<1>(m_StartEvents.back())); | |
m_Reports[std::get<0>(m_StartEvents.back())] = m_ElapsedTime; | |
m_StartEvents.pop_back(); | |
return m_ElapsedTime; | |
} | |
~PerfostepCUDA() { | |
if (m_StartEvents.size() > 0) { | |
std::cerr << "Warning: There are still start events not stopped" | |
<< std::endl; | |
std::for_each( | |
m_StartEvents.cbegin(), m_StartEvents.cend(), | |
[](const std::tuple<const std::string, cudaEvent_t> &entry) { | |
cudaEventDestroy(std::get<1>(entry)); | |
}); | |
} | |
cudaEventDestroy(m_EndEvent); | |
} | |
private: | |
StartEvents m_StartEvents; /**< The start event for CUDA measurement. */ | |
cudaEvent_t m_EndEvent; /**< The end event for CUDA measurement. */ | |
}; | |
#endif // ENABLE_CUDA | |
class Perfostep { | |
public: | |
Perfostep() { | |
static const char *env = std::getenv("ENABLE_PERFO_STEP"); | |
if (env != nullptr) { | |
std::string envStr(env); | |
if (envStr == "TIMER") { | |
m_Perfostep = std::make_unique<PerfostepChrono>(); | |
m_EnablePerfoStep = true; | |
} else if (envStr == "NVTX") { | |
#ifdef ENABLE_NVTX | |
m_Perfostep = std::make_unique<PerfostepNVTX>(); | |
m_EnablePerfoStep = true; | |
#else | |
throw std::runtime_error( | |
"NVTX is not available. Please install NVTX to use it."); | |
#endif | |
} else if (envStr == "CUDA") { | |
#ifdef ENABLE_CUDA | |
m_Perfostep = std::make_unique<PerfostepCUDA>(); | |
m_EnablePerfoStep = true; | |
#else | |
throw std::runtime_error( | |
"CUDA is not available. Please install CUDA " | |
"or compile using nvcc to use it."); | |
#endif | |
} else { | |
throw std::runtime_error( | |
"Invalid value for ENABLE_PERFO_STEP: " + envStr + | |
". Possible values are TIMER, NVTX, or " | |
"CUDA."); | |
} | |
} | |
} | |
void Start(const std::string &iReport, const ColumnNames &iCol = {}) { | |
if (m_EnablePerfoStep) m_Perfostep->Start(iReport, iCol); | |
} | |
double Stop() { | |
if (m_EnablePerfoStep) return m_Perfostep->Stop(); | |
return 0.0; | |
} | |
void Report(const bool &iPrintTotal = false) const { | |
if (m_EnablePerfoStep) m_Perfostep->Report(iPrintTotal); | |
} | |
void PrintToMarkdown(const char *filename, | |
const bool &iPrintTotal = false) const { | |
if (m_EnablePerfoStep) m_Perfostep->PrintToMarkdown(filename, iPrintTotal); | |
} | |
void PrintToCSV(const char *filename, const bool &iPrintTotal = false) const { | |
if (m_EnablePerfoStep) m_Perfostep->PrintToCSV(filename, iPrintTotal); | |
} | |
void Switch(const std::string &iReport, const ColumnNames &iCol = {}) { | |
if (m_EnablePerfoStep) m_Perfostep->Switch(iReport, iCol); | |
} | |
private: | |
std::unique_ptr<AbstractPerfostep> m_Perfostep; | |
bool m_EnablePerfoStep = false; | |
}; | |
#endif // PERFOSTEP_HPP |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment