Skip to content

Instantly share code, notes, and snippets.

@ASKabalan
Last active April 9, 2024 14:07
Show Gist options
  • Save ASKabalan/6d25ae71963659dfd87ad05559e05789 to your computer and use it in GitHub Desktop.
Save ASKabalan/6d25ae71963659dfd87ad05559e05789 to your computer and use it in GitHub Desktop.
One Header Libraries
/**
* @file logger.hpp
* @version 0.0.4
* @brief Async Logger for C++ with timestamp, name, and configurable options.
*
* Environment variables:
* - ASYNC_TRACE: Enables trace for specific logger name.
* - ASYNC_TRACE_VERBOSE: Enables verbose trace for specific logger name.
* - ASYNC_TRACE_MAX_BUFFER: Sets the maximum buffer size for log entries.
* - ASYNC_TRACE_OUTPUT_DIR: Sets the output directory for log files.
* - ASYNC_TRACE_CONSOLE: Enables logging to the console (stdout).
*
* Example usage:
* @code
* #include "logger.hpp"
*
* int main() {
* AsyncLogger logger("CUD");
*
* StartTraceInfo(logger) << "This is an info message" << '\n';
* StartTraceVerbose(logger) << "This is a verbose message" << '\n';
*
* return 0;
* }
* @endcode
*
* Async Logger for C++
* with timestamp and name
* configurable via environment variables.
*
*
* @author Wassim KABALAN
*/
#ifndef ASYNC_LOGGER_HPP
#define ASYNC_LOGGER_HPP
#include <chrono>
#include <cstdlib>
#include <cxxabi.h>
#include <execinfo.h>
#include <fstream>
#include <iostream>
#include <sstream>
#include <filesystem>
#ifdef MPI_VERSION
#include <mpi.h>
#endif
class AsyncLogger {
public:
AsyncLogger(const std::string &name)
: name(name), bufferSize(10 * 1024 * 1024), buffer(""), traceInfo(false),
traceVerbose(false), traceToConsole(true) {
static const char *traceEnv = std::getenv("ASYNC_TRACE");
if (traceEnv != nullptr) {
std::string traceString = traceEnv;
size_t pos = traceString.find(name);
if (pos != std::string::npos) {
traceInfo = true;
}
}
static const char *traceEnvVerb = std::getenv("ASYNC_TRACE_VERBOSE");
if (traceEnvVerb != nullptr) {
std::string traceString = traceEnvVerb;
size_t pos = traceString.find(name);
if (pos != std::string::npos) {
traceInfo = true;
traceVerbose = true;
}
}
static const char *bufferSizeEnv = std::getenv("ASYNC_TRACE_MAX_BUFFER");
if (bufferSizeEnv != nullptr) {
bufferSize = std::atoi(bufferSizeEnv);
}
static const char *outputDirEnv = std::getenv("ASYNC_TRACE_OUTPUT_DIR");
if (outputDirEnv != nullptr) {
outputDir = outputDirEnv;
// Ensure the output directory exists
//std::filesystem::create_directories(outputDir);
traceToConsole = false;
}
static const char *traceToConsoleEnv = std::getenv("ASYNC_TRACE_CONSOLE");
if (traceToConsoleEnv != nullptr) {
traceToConsole = std::atoi(traceToConsoleEnv) != 0;
traceToConsole = true;
}
static const char *nobufferEnv = std::getenv("ASYNC_TRACE_NOBUFFER");
if (nobufferEnv != nullptr) {
nobuffer = std::atoi(nobufferEnv) != 0;
nobuffer = true;
}
#ifdef MPI_VERSION
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
#endif
}
AsyncLogger &startTraceInfo() {
if (traceInfo || traceVerbose) {
std::ostringstream ss;
addTimestamp(ss);
ss << "[INFO] ";
ss << "[" << name << "] ";
buffer += ss.str();
}
return *this;
}
AsyncLogger &startTraceVerbose() {
if (traceInfo || traceVerbose) {
std::ostringstream ss;
addTimestamp(ss);
ss << "[VERB] ";
ss << "[" << name << "] ";
buffer += ss.str();
}
return *this;
}
template <typename T> AsyncLogger &operator<<(const T &value) {
if (traceInfo || traceVerbose) {
std::ostringstream ss;
ss << value;
buffer += ss.str();
if (buffer.size() >= bufferSize || nobuffer) {
flush();
}
}
return *this;
}
// Specialization for bool
AsyncLogger &operator<<(bool value) {
if (traceInfo || traceVerbose) {
std::ostringstream ss;
ss << std::boolalpha << value;
buffer += ss.str();
if (buffer.size() >= bufferSize || nobuffer) {
flush();
}
}
return *this;
}
// Specialization for std::endl
AsyncLogger &operator<<(std::ostream &(*manipulator)(std::ostream &)) {
if (traceInfo || traceVerbose) {
std::ostringstream ss;
ss << manipulator;
buffer += ss.str();
if (buffer.size() >= bufferSize || nobuffer) {
flush();
}
}
return *this;
}
~AsyncLogger() {
if (traceInfo || traceVerbose) {
flush();
}
}
bool getTraceInfo() const { return traceInfo; }
bool getTraceVerbose() const { return traceVerbose; }
void flush() {
if (traceToConsole) {
std::cout << buffer;
} else {
std::ostringstream filename;
std::string rankStr = rank >= 0 ? "_" + std::to_string(rank) : "";
filename << outputDir << "/AsyncTrace_" << name << rankStr << ".log";
std::ofstream outfile(filename.str(), std::ios::app);
if (outfile.is_open()) {
outfile << buffer;
outfile.close();
}
}
buffer.clear();
}
void addStackTrace() {
if (traceInfo || traceVerbose) {
std::ostringstream ss;
ss << "Call stack:" << std::endl;
const int max_frames = 64;
void *frame_ptrs[max_frames];
int num_frames = backtrace(frame_ptrs, max_frames);
char **symbols = backtrace_symbols(frame_ptrs, num_frames);
if (symbols == nullptr) {
buffer += "Error retrieving backtrace symbols." + std::string("\n");
return;
}
for (int i = 0; i < num_frames; ++i) {
// Demangle the C++ function name
size_t size;
int status;
char *demangled =
abi::__cxa_demangle(symbols[i], nullptr, &size, &status);
if (status == 0) {
ss << demangled << std::endl;
free(demangled);
} else {
// Couldn't demangle, use the original symbol
ss << symbols[i] << std::endl;
}
}
free(symbols);
buffer += ss.str();
if (buffer.size() >= bufferSize || nobuffer) {
flush();
}
}
}
private:
void addTimestamp(std::ostringstream &stream) {
auto now = std::chrono::system_clock::now();
auto timePoint = std::chrono::system_clock::to_time_t(now);
auto milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(
now.time_since_epoch()) %
1000;
std::tm tm;
#ifdef _WIN32
localtime_s(&tm, &timePoint);
#else
localtime_r(&timePoint, &tm);
#endif
stream << "[" << tm.tm_year + 1900 << "/" << tm.tm_mon + 1 << "/"
<< tm.tm_mday << " " << tm.tm_hour << ":" << tm.tm_min << ":"
<< tm.tm_sec << ":" << milliseconds.count() << "] ";
}
std::string name;
std::string buffer;
size_t bufferSize;
std::string outputDir;
bool traceInfo = false;
bool traceVerbose = false;
bool traceToConsole = true;
bool nobuffer = false;
int rank = -1;
};
#define StartTraceInfo(logger) \
if (logger.getTraceInfo()) \
logger.startTraceInfo()
#define TraceInfo(logger) \
if (logger.getTraceInfo()) \
logger
#define PrintStack(logger) \
if (logger.getTraceInfo()) \
logger.addStackTrace()
#define StartTraceVerbose(logger) \
if (logger.getTraceVerbose()) \
logger.startTraceVerbose()
#define TraceVerbose(logger) \
if (logger.getTraceVerbose()) \
logger
#endif // ASYNC_LOGGER_HPP
/*
Example usage:
#include "logger.hpp"
int main() {
AsyncLogger logger("CUD");
StartTraceInfo(logger) << "This is an info message" << '\n';
StartTraceVerbose(logger) << "This is a verbose message" << '\n';
return 0;
}
*/
/**
* @file Perfostep.hpp
* @version 0.0.4
* @brief A class for measuring the performance of code execution.
* @details This header-only C++ class provides functionality for measuring the
* performance of code execution, supporting both CPU and GPU measurements. It
* utilizes high-resolution timers and optional NVIDIA Tools Extension (NVTX)
* for GPU measurements.
*
* Environment Variable:
* - ENABLE_PERFO_STEP: Set this environment variable to enable performance
* measurement. Possible values are "TIMER" for CPU timing or "NVTX" for NSYS
* profiling or "CUDA" for CUDA timing.
* @code
* // Example 1: Measure the performance of a CPU code
* Perfostep perf;
* perf.start("Code block 1");
* // Code block to measure
* for (int i = 0; i < 1000000; ++i) {
* // Some computation
* }
* perf.stop();
*
* perf.printToMarkdown("perf_report.md");
* perf.printToCSV("perf_report.csv");
*
* // Example 2: Measure the performance of GPU code
* Perfostep perf;
* perf.start("CUDA Kernel");
* // Kernel to measure
* myCUDAKernel<<<blocks, threads>>>(input, output);
* perf.stop();
*
* perf.printToMarkdown("cuperf_report.md");
* perf.printToCSV("cuperf_report.csv");
* @endcode
*
*
* @author Wassim KABALAN
*/
#ifndef PERFOSTEP_HPP
#define PERFOSTEP_HPP
#include <algorithm>
#include <cassert>
#include <chrono>
#include <fstream>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <numeric>
#include <stdexcept>
#include <string>
#include <tuple>
#include <vector>
#if __has_include(<nvToolsExt.h>)
#define ENABLE_NVTX
#include <nvtx3/nvToolsExt.h>
#endif
#if defined(__CUDACC__)
#include <cuda_runtime.h>
#define ENABLE_CUDA
#endif
using namespace std::chrono;
typedef std::map<const std::string, const std::string> ColumnNames;
typedef std::map<const std::string, double> Reports;
class AbstractPerfostep {
public:
virtual void Start(const std::string &iReport, const ColumnNames &iCol) = 0;
virtual double Stop() = 0;
virtual void Report(const bool &iPrintTotal = false) const = 0;
virtual void PrintToMarkdown(const char *ifilename,
const bool &iPrintTotal = false) const = 0;
virtual void PrintToCSV(const char *ifilename,
const bool &iPrintTotal = false) const = 0;
virtual void Switch(const std::string &iReport, const ColumnNames &iCol) = 0;
virtual ~AbstractPerfostep() {}
protected:
Reports m_Reports; /**< The report of measured tasks. */
ColumnNames m_ColNames;
};
class BasePerfostep : public AbstractPerfostep {
public:
void Report(const bool &iPrintTotal = false) const override {
if (m_Reports.size() == 0)
return;
std::cout << "Reporting : " << std::endl;
std::cout << "For parameters: " << std::endl;
for (const auto &entry : m_ColNames) {
std::cout << std::get<0>(entry) << " : " << std::get<1>(entry)
<< std::endl;
}
for (const auto &entry : m_Reports) {
std::cout << std::get<0>(entry) << " : " << std::get<1>(entry) << "ms "
<< std::endl;
}
}
void PrintToMarkdown(const char *filename,
const bool &iPrintTotal = false) const override {
if (m_Reports.size() == 0)
return;
std::ofstream file(filename, std::ios::app);
if (!file.is_open()) {
throw std::runtime_error("Failed to open file: " + std::string(filename));
}
if (file.tellp() == 0) { // Check if file is empty
file << "| Task | ";
for (const auto &entry : m_ColNames) {
file << std::get<0>(entry) << " | ";
}
file << "Elapsed Time (ms) |" << std::endl; // Header names for columns
// For the Task column
file << "| ---- | ";
// For the other columns
for (const auto &entry : m_ColNames) {
file << std::string(entry.first.length(), '-') << " | ";
}
// For the elapsed time column
file << " ---------------- |" << std::endl;
}
std::string colvalues;
for (const auto &col : m_ColNames) {
colvalues += std::get<1>(col) + " | ";
}
for (const auto &entry : m_Reports) {
file << "| " << std::get<0>(entry) << " | " << colvalues
<< std::get<1>(entry) << " |" << std::endl;
}
if (iPrintTotal)
file << "| Total | " << colvalues << GetTotal() << " |" << std::endl;
file.close();
}
/**
* @brief Prints the measured tasks and their elapsed times in a CSV
* format to a file.
* @param filename The name of the file to write the CSV data to.
*/
void PrintToCSV(const char *filename,
const bool &iPrintTotal) const override {
if (m_Reports.size() == 0)
return;
std::ofstream file(filename, std::ios::app); // Open file in append mode
if (!file.is_open()) {
throw std::runtime_error("Failed to open file: " + std::string(filename));
}
if (file.tellp() == 0) { // Check if file is empty
file << "Task,";
for (const auto &entry : m_ColNames) {
file << std::get<0>(entry) << ",";
}
file << "Elapsed Time (ms)" << std::endl; // Header names for columns
}
std::string colvalues;
for (const auto &col : m_ColNames) {
colvalues += std::get<1>(col) + ",";
}
for (const auto &entry : m_Reports) {
file << std::get<0>(entry) << "," << colvalues << std::get<1>(entry)
<< std::endl;
}
if (iPrintTotal)
file << "Total," << colvalues << GetTotal() << std::endl;
file.close();
}
void Switch(const std::string &iReport, const ColumnNames &iCol) override {
Stop();
Start(iReport, iCol);
}
private:
double GetTotal() const {
double total = std::accumulate(
m_Reports.begin(), m_Reports.end(), 0.0,
[](double sum, const std::tuple<const std::string, double> &entry) {
return sum + std::get<1>(entry);
});
return total;
}
};
typedef std::vector<
std::tuple<const std::string, time_point<high_resolution_clock>>>
StartTimes;
class PerfostepChrono : public BasePerfostep {
public:
void Start(const std::string &iReport, const ColumnNames &iCol) override {
m_StartTimes.push_back(
std::make_tuple(iReport, high_resolution_clock::now()));
m_ColNames = iCol;
}
double Stop() override {
// Check if there are any start times
assert(m_StartTimes.size() > 0);
m_EndTime = high_resolution_clock::now();
duration<double> diff = m_EndTime - std::get<1>(m_StartTimes.back());
double elapsed_time = diff.count() * 1000;
m_Reports[std::get<0>(m_StartTimes.back())] = elapsed_time;
m_StartTimes.pop_back();
return elapsed_time;
}
~PerfostepChrono() {
if (m_StartTimes.size() > 0) {
std::cerr << "Warning: There are still start times not stopped"
<< std::endl;
// print message for each start time
for (const auto &entry : m_StartTimes) {
std::cerr << "Start time for " << std::get<0>(entry)
<< " is not stopped" << std::endl;
}
}
}
private:
StartTimes m_StartTimes; /**< The start time of the measurement. */
time_point<high_resolution_clock>
m_EndTime; /**< The end time of the measurement. */
};
#ifdef ENABLE_NVTX
// Credit : https://github.com/NVIDIA/cuDecomp
class PerfostepNVTX : public BasePerfostep {
public:
// ColumnNames are not used in NVTX
void Start(const std::string &iReport, const ColumnNames &iCol) override {
static constexpr int ncolors_ = 8;
static constexpr int colors_[ncolors_] = {0x3366CC, 0xDC3912, 0xFF9900,
0x109618, 0x990099, 0x3B3EAC,
0x0099C6, 0xDD4477};
std::string range_name(iReport);
std::hash<std::string> hash_fn;
int color = colors_[hash_fn(range_name) % ncolors_];
nvtxEventAttributes_t ev = {0};
ev.version = NVTX_VERSION;
ev.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
ev.colorType = NVTX_COLOR_ARGB;
ev.color = color;
ev.messageType = NVTX_MESSAGE_TYPE_ASCII;
ev.message.ascii = range_name.c_str();
nvtxRangePushEx(&ev);
nvtx_ranges++;
}
double Stop() override {
nvtxRangePop();
nvtx_ranges--;
assert(nvtx_ranges >= 0);
return 0.0;
}
~PerfostepNVTX() {
if (nvtx_ranges > 0) {
std::cerr << "Warning: There are still start times not stopped"
<< std::endl;
for (int i = 0; i < nvtx_ranges; i++)
nvtxRangePop();
}
}
private:
int nvtx_ranges = 0;
};
#endif // ENABLE_NVTX
#ifdef ENABLE_CUDA
typedef std::vector<std::tuple<const std::string, cudaEvent_t>> StartEvents;
class PerfostepCUDA : public BasePerfostep {
public:
PerfostepCUDA() { cudaEventCreate(&m_EndEvent); }
void Start(const std::string &iReport, const ColumnNames &iCol) override {
cudaEvent_t m_StartEvent;
cudaEventCreate(&m_StartEvent);
cudaEventRecord(m_StartEvent);
m_StartEvents.push_back(std::make_tuple(iReport, m_StartEvent));
m_ColNames = iCol;
}
double Stop() override {
cudaEventRecord(m_EndEvent);
cudaEventSynchronize(m_EndEvent);
float elapsed;
cudaEventElapsedTime(&elapsed, std::get<1>(m_StartEvents.back()),
m_EndEvent);
double m_ElapsedTime = static_cast<double>(elapsed);
cudaEventDestroy(std::get<1>(m_StartEvents.back()));
m_Reports[std::get<0>(m_StartEvents.back())] = m_ElapsedTime;
m_StartEvents.pop_back();
return m_ElapsedTime;
}
~PerfostepCUDA() {
if (m_StartEvents.size() > 0) {
std::cerr << "Warning: There are still start events not stopped"
<< std::endl;
std::for_each(
m_StartEvents.cbegin(), m_StartEvents.cend(),
[](const std::tuple<const std::string, cudaEvent_t> &entry) {
cudaEventDestroy(std::get<1>(entry));
});
}
cudaEventDestroy(m_EndEvent);
}
private:
StartEvents m_StartEvents; /**< The start event for CUDA measurement. */
cudaEvent_t m_EndEvent; /**< The end event for CUDA measurement. */
};
#endif // ENABLE_CUDA
class Perfostep {
public:
Perfostep() {
static const char *env = std::getenv("ENABLE_PERFO_STEP");
if (env != nullptr) {
std::string envStr(env);
if (envStr == "TIMER") {
m_Perfostep = std::make_unique<PerfostepChrono>();
m_EnablePerfoStep = true;
} else if (envStr == "NVTX") {
#ifdef ENABLE_NVTX
m_Perfostep = std::make_unique<PerfostepNVTX>();
m_EnablePerfoStep = true;
#else
throw std::runtime_error(
"NVTX is not available. Please install NVTX to use it.");
#endif
} else if (envStr == "CUDA") {
#ifdef ENABLE_CUDA
m_Perfostep = std::make_unique<PerfostepCUDA>();
m_EnablePerfoStep = true;
#else
throw std::runtime_error("CUDA is not available. Please install CUDA "
"or compile using nvcc to use it.");
#endif
} else {
throw std::runtime_error(
"Invalid value for ENABLE_PERFO_STEP: " + envStr +
". Possible values are TIMER, NVTX, or "
"CUDA.");
}
}
}
void Start(const std::string &iReport, const ColumnNames &iCol = {}) {
if (m_EnablePerfoStep)
m_Perfostep->Start(iReport, iCol);
}
double Stop() {
if (m_EnablePerfoStep)
return m_Perfostep->Stop();
return 0.0;
}
void Report(const bool &iPrintTotal = false) const {
if (m_EnablePerfoStep)
m_Perfostep->Report(iPrintTotal);
}
void PrintToMarkdown(const char *filename,
const bool &iPrintTotal = false) const {
if (m_EnablePerfoStep)
m_Perfostep->PrintToMarkdown(filename, iPrintTotal);
}
void PrintToCSV(const char *filename, const bool &iPrintTotal = false) const {
if (m_EnablePerfoStep)
m_Perfostep->PrintToCSV(filename, iPrintTotal);
}
void Switch(const std::string &iReport, const ColumnNames &iCol = {}) {
if (m_EnablePerfoStep)
m_Perfostep->Switch(iReport, iCol);
}
private:
std::unique_ptr<AbstractPerfostep> m_Perfostep;
bool m_EnablePerfoStep = false;
};
#endif // PERFOSTEP_HPP
/**
* @file Perfostep.hpp
* @version 0.0.4
* @brief A class for measuring the performance of code execution.
* @details This header-only C++ class provides functionality for measuring the
* performance of code execution, supporting both CPU and GPU measurements. It
* utilizes high-resolution timers and optional NVIDIA Tools Extension (NVTX)
* for GPU measurements.
*
* Environment Variable:
* - ENABLE_PERFO_STEP: Set this environment variable to enable performance
* measurement. Possible values are "TIMER" for CPU timing or "NVTX" for NSYS
* profiling or "CUDA" for CUDA timing.
* @code
* // Example 1: Measure the performance of a CPU code
* Perfostep perf;
* perf.start("Code block 1");
* // Code block to measure
* for (int i = 0; i < 1000000; ++i) {
* // Some computation
* }
* perf.stop();
*
* perf.printToMarkdown("perf_report.md");
* perf.printToCSV("perf_report.csv");
*
* // Example 2: Measure the performance of GPU code
* Perfostep perf;
* perf.start("CUDA Kernel");
* // Kernel to measure
* myCUDAKernel<<<blocks, threads>>>(input, output);
* perf.stop();
*
* perf.printToMarkdown("cuperf_report.md");
* perf.printToCSV("cuperf_report.csv");
* @endcode
*
*
* @author Wassim KABALAN
*/
#ifndef PERFOSTEP_HPP
#define PERFOSTEP_HPP
#include <algorithm>
#include <cassert>
#include <chrono>
#include <fstream>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <numeric>
#include <stdexcept>
#include <string>
#include <tuple>
#include <vector>
#if __has_include(<nvToolsExt.h>)
#define ENABLE_NVTX
#include <nvtx3/nvToolsExt.h>
#endif
#if defined(__CUDACC__)
#include <cuda_runtime.h>
#define ENABLE_CUDA
#endif
using namespace std::chrono;
typedef std::map<const std::string, const std::string> ColumnNames;
typedef std::map<const std::string, double> Reports;
class AbstractPerfostep {
public:
virtual void Start(const std::string &iReport, const ColumnNames &iCol) = 0;
virtual double Stop() = 0;
virtual void Report(const bool &iPrintTotal = false) const = 0;
virtual void PrintToMarkdown(const char *ifilename,
const bool &iPrintTotal = false) const = 0;
virtual void PrintToCSV(const char *ifilename,
const bool &iPrintTotal = false) const = 0;
virtual void Switch(const std::string &iReport, const ColumnNames &iCol) = 0;
virtual ~AbstractPerfostep() {}
protected:
Reports m_Reports; /**< The report of measured tasks. */
ColumnNames m_ColNames;
};
class BasePerfostep : public AbstractPerfostep {
public:
void Report(const bool &iPrintTotal = false) const override {
if (m_Reports.size() == 0) return;
std::cout << "Reporting : " << std::endl;
std::cout << "For parameters: " << std::endl;
for (const auto &entry : m_ColNames) {
std::cout << std::get<0>(entry) << " : " << std::get<1>(entry)
<< std::endl;
}
for (const auto &entry : m_Reports) {
std::cout << std::get<0>(entry) << " : " << std::get<1>(entry) << "ms "
<< std::endl;
}
}
void PrintToMarkdown(const char *filename,
const bool &iPrintTotal = false) const override {
if (m_Reports.size() == 0) return;
std::ofstream file(filename, std::ios::app);
if (!file.is_open()) {
throw std::runtime_error("Failed to open file: " + std::string(filename));
}
if (file.tellp() == 0) { // Check if file is empty
file << "| Task | ";
for (const auto &entry : m_ColNames) {
file << std::get<0>(entry) << " | ";
}
file << "Elapsed Time (ms) |" << std::endl; // Header names for columns
// For the Task column
file << "| --- | ";
// For the other columns
for (const auto &entry : m_ColNames) {
file << " --- | ";
}
// For the elapsed time column
file << " --------------- |" << std::endl;
}
std::string colvalues;
for (const auto &col : m_ColNames) {
colvalues += std::get<1>(col) + " | ";
}
for (const auto &entry : m_Reports) {
file << "| " << std::get<0>(entry) << " | " << colvalues
<< std::get<1>(entry) << " |" << std::endl;
}
if (iPrintTotal)
file << "| Total | " << colvalues << GetTotal() << " |" << std::endl;
file.close();
}
/**
* @brief Prints the measured tasks and their elapsed times in a CSV
* format to a file.
* @param filename The name of the file to write the CSV data to.
*/
void PrintToCSV(const char *filename,
const bool &iPrintTotal) const override {
if (m_Reports.size() == 0) return;
std::ofstream file(filename, std::ios::app); // Open file in append mode
if (!file.is_open()) {
throw std::runtime_error("Failed to open file: " + std::string(filename));
}
if (file.tellp() == 0) { // Check if file is empty
file << "Task,";
for (const auto &entry : m_ColNames) {
file << std::get<0>(entry) << ",";
}
file << "Elapsed Time (ms)" << std::endl; // Header names for columns
}
std::string colvalues;
for (const auto &col : m_ColNames) {
colvalues += std::get<1>(col) + ",";
}
for (const auto &entry : m_Reports) {
file << std::get<0>(entry) << "," << colvalues << std::get<1>(entry)
<< std::endl;
}
if (iPrintTotal) file << "Total," << colvalues << GetTotal() << std::endl;
file.close();
}
void Switch(const std::string &iReport, const ColumnNames &iCol) override {
Stop();
Start(iReport, iCol);
}
private:
double GetTotal() const {
double total = std::accumulate(
m_Reports.begin(), m_Reports.end(), 0.0,
[](double sum, const std::tuple<const std::string, double> &entry) {
return sum + std::get<1>(entry);
});
return total;
}
};
typedef std::vector<
std::tuple<const std::string, time_point<high_resolution_clock>>>
StartTimes;
class PerfostepChrono : public BasePerfostep {
public:
void Start(const std::string &iReport, const ColumnNames &iCol) override {
m_StartTimes.push_back(
std::make_tuple(iReport, high_resolution_clock::now()));
m_ColNames = iCol;
}
double Stop() override {
// Check if there are any start times
assert(m_StartTimes.size() > 0);
m_EndTime = high_resolution_clock::now();
duration<double> diff = m_EndTime - std::get<1>(m_StartTimes.back());
double elapsed_time = diff.count() * 1000;
m_Reports[std::get<0>(m_StartTimes.back())] = elapsed_time;
m_StartTimes.pop_back();
return elapsed_time;
}
~PerfostepChrono() {
if (m_StartTimes.size() > 0) {
std::cerr << "Warning: There are still start times not stopped"
<< std::endl;
// print message for each start time
for (const auto &entry : m_StartTimes) {
std::cerr << "Start time for " << std::get<0>(entry)
<< " is not stopped" << std::endl;
}
}
}
private:
StartTimes m_StartTimes; /**< The start time of the measurement. */
time_point<high_resolution_clock>
m_EndTime; /**< The end time of the measurement. */
};
#ifdef ENABLE_NVTX
// Credit : https://github.com/NVIDIA/cuDecomp
class PerfostepNVTX : public BasePerfostep {
public:
// ColumnNames are not used in NVTX
void Start(const std::string &iReport, const ColumnNames &iCol) override {
static constexpr int ncolors_ = 8;
static constexpr int colors_[ncolors_] = {0x3366CC, 0xDC3912, 0xFF9900,
0x109618, 0x990099, 0x3B3EAC,
0x0099C6, 0xDD4477};
std::string range_name(iReport);
std::hash<std::string> hash_fn;
int color = colors_[hash_fn(range_name) % ncolors_];
nvtxEventAttributes_t ev = {0};
ev.version = NVTX_VERSION;
ev.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
ev.colorType = NVTX_COLOR_ARGB;
ev.color = color;
ev.messageType = NVTX_MESSAGE_TYPE_ASCII;
ev.message.ascii = range_name.c_str();
nvtxRangePushEx(&ev);
nvtx_ranges++;
}
double Stop() override {
nvtxRangePop();
nvtx_ranges--;
assert(nvtx_ranges >= 0);
return 0.0;
}
~PerfostepNVTX() {
if (nvtx_ranges > 0) {
std::cerr << "Warning: There are still start times not stopped"
<< std::endl;
for (int i = 0; i < nvtx_ranges; i++) nvtxRangePop();
}
}
private:
int nvtx_ranges = 0;
};
#endif // ENABLE_NVTX
#ifdef ENABLE_CUDA
typedef std::vector<std::tuple<const std::string, cudaEvent_t>> StartEvents;
class PerfostepCUDA : public BasePerfostep {
public:
PerfostepCUDA() { cudaEventCreate(&m_EndEvent); }
void Start(const std::string &iReport, const ColumnNames &iCol) override {
cudaEvent_t m_StartEvent;
cudaEventCreate(&m_StartEvent);
cudaEventRecord(m_StartEvent);
m_StartEvents.push_back(std::make_tuple(iReport, m_StartEvent));
m_ColNames = iCol;
}
double Stop() override {
cudaEventRecord(m_EndEvent);
cudaEventSynchronize(m_EndEvent);
float elapsed;
cudaEventElapsedTime(&elapsed, std::get<1>(m_StartEvents.back()),
m_EndEvent);
double m_ElapsedTime = static_cast<double>(elapsed);
cudaEventDestroy(std::get<1>(m_StartEvents.back()));
m_Reports[std::get<0>(m_StartEvents.back())] = m_ElapsedTime;
m_StartEvents.pop_back();
return m_ElapsedTime;
}
~PerfostepCUDA() {
if (m_StartEvents.size() > 0) {
std::cerr << "Warning: There are still start events not stopped"
<< std::endl;
std::for_each(
m_StartEvents.cbegin(), m_StartEvents.cend(),
[](const std::tuple<const std::string, cudaEvent_t> &entry) {
cudaEventDestroy(std::get<1>(entry));
});
}
cudaEventDestroy(m_EndEvent);
}
private:
StartEvents m_StartEvents; /**< The start event for CUDA measurement. */
cudaEvent_t m_EndEvent; /**< The end event for CUDA measurement. */
};
#endif // ENABLE_CUDA
class Perfostep {
public:
Perfostep() {
static const char *env = std::getenv("ENABLE_PERFO_STEP");
if (env != nullptr) {
std::string envStr(env);
if (envStr == "TIMER") {
m_Perfostep = std::make_unique<PerfostepChrono>();
m_EnablePerfoStep = true;
} else if (envStr == "NVTX") {
#ifdef ENABLE_NVTX
m_Perfostep = std::make_unique<PerfostepNVTX>();
m_EnablePerfoStep = true;
#else
throw std::runtime_error(
"NVTX is not available. Please install NVTX to use it.");
#endif
} else if (envStr == "CUDA") {
#ifdef ENABLE_CUDA
m_Perfostep = std::make_unique<PerfostepCUDA>();
m_EnablePerfoStep = true;
#else
throw std::runtime_error(
"CUDA is not available. Please install CUDA "
"or compile using nvcc to use it.");
#endif
} else {
throw std::runtime_error(
"Invalid value for ENABLE_PERFO_STEP: " + envStr +
". Possible values are TIMER, NVTX, or "
"CUDA.");
}
}
}
void Start(const std::string &iReport, const ColumnNames &iCol = {}) {
if (m_EnablePerfoStep) m_Perfostep->Start(iReport, iCol);
}
double Stop() {
if (m_EnablePerfoStep) return m_Perfostep->Stop();
return 0.0;
}
void Report(const bool &iPrintTotal = false) const {
if (m_EnablePerfoStep) m_Perfostep->Report(iPrintTotal);
}
void PrintToMarkdown(const char *filename,
const bool &iPrintTotal = false) const {
if (m_EnablePerfoStep) m_Perfostep->PrintToMarkdown(filename, iPrintTotal);
}
void PrintToCSV(const char *filename, const bool &iPrintTotal = false) const {
if (m_EnablePerfoStep) m_Perfostep->PrintToCSV(filename, iPrintTotal);
}
void Switch(const std::string &iReport, const ColumnNames &iCol = {}) {
if (m_EnablePerfoStep) m_Perfostep->Switch(iReport, iCol);
}
private:
std::unique_ptr<AbstractPerfostep> m_Perfostep;
bool m_EnablePerfoStep = false;
};
#endif // PERFOSTEP_HPP
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment