JuanDiegoMontoya/TimerQueryAsync.h

## TimerQueryAsync.h
#include <cstdint>
#include <optional>

// Async N-buffered timer query.
// Does not induce pipeline stalls.
// Useful for measuring performance of passes every frame without causing stalls.
// However, the results returned may be from multiple frames ago,
// and results are not guaranteed to be available.
// In practice, setting N to 5 should allow at least one query to be available.
class TimerQueryAsync
{
public:
  TimerQueryAsync(uint32_t N);
  ~TimerQueryAsync();

  TimerQueryAsync(const TimerQueryAsync&) = delete;
  TimerQueryAsync(TimerQueryAsync&&) = delete;
  TimerQueryAsync& operator=(const TimerQueryAsync&) = delete;
  TimerQueryAsync& operator=(TimerQueryAsync&&) = delete;

  // begins or ends a query
  // always call End after Begin
  // never call Begin or End twice in a row
  void Begin();
  void End();

  // returns oldest query's result, if available
  // otherwise, returns std::nullopt
  [[nodiscard]] std::optional<uint64_t> Elapsed_ns();

private:
  uint32_t start_{}; // next timer to be used for measurement
  uint32_t count_{}; // number of timers 'buffered', ie measurement was started by result not read yet
  const uint32_t capacity_{};
  uint32_t* queries{};
};

## TimerQueryAsyncz.cpp
#include "TimerQueryAsync.h"

TimerQueryAsync::TimerQueryAsync(uint32_t N)
  : capacity_(N)
{
  ASSERT(capacity_ > 0);
  queries = new uint32_t[capacity_ * 2];
  glGenQueries(capacity_ * 2, queries);
}

TimerQueryAsync::~TimerQueryAsync()
{
  glDeleteQueries(capacity_ * 2, queries);
  delete[] queries;
}

void TimerQueryAsync::Begin()
{
  // begin a query if there is at least one inactive
  if (count_ < capacity_)
  {
    glQueryCounter(queries[start_], GL_TIMESTAMP);
  }
}

void TimerQueryAsync::End()
{
  // end a query if there is at least one inactive
  if (count_ < capacity_)
  {
    glQueryCounter(queries[start_ + capacity_], GL_TIMESTAMP);
    start_ = (start_ + 1) % capacity_; // wrap
    count_++;
  }
}

std::optional<uint64_t> TimerQueryAsync::Elapsed_ns()
{
  // return nothing if there is no active query
  if (count_ == 0)
  {
    return std::nullopt;
  }

  // get the index of the oldest query
  uint32_t index = (start_ + capacity_ - count_) % capacity_;

  // getting the start result is a sanity check
  GLint startResultAvailable{};
  GLint endResultAvailable{};
  glGetQueryObjectiv(queries[index], GL_QUERY_RESULT_AVAILABLE, &startResultAvailable);
  glGetQueryObjectiv(queries[index + capacity_], GL_QUERY_RESULT_AVAILABLE, &endResultAvailable);

  // the oldest query's result is not available, abandon ship!
  if (startResultAvailable == GL_FALSE || endResultAvailable == GL_FALSE)
  {
    return std::nullopt;
  }

  // pop oldest timing and retrieve result
  count_--;
  uint64_t startTimestamp{};
  uint64_t endTimestamp{};
  glGetQueryObjectui64v(queries[index], GL_QUERY_RESULT, &startTimestamp);
  glGetQueryObjectui64v(queries[index + capacity_], GL_QUERY_RESULT, &endTimestamp);
  return endTimestamp - startTimestamp;
}
	#include <cstdint>
	#include <optional>

	// Async N-buffered timer query.
	// Does not induce pipeline stalls.
	// Useful for measuring performance of passes every frame without causing stalls.
	// However, the results returned may be from multiple frames ago,
	// and results are not guaranteed to be available.
	// In practice, setting N to 5 should allow at least one query to be available.
	class TimerQueryAsync
	{
	public:
	TimerQueryAsync(uint32_t N);
	~TimerQueryAsync();

	TimerQueryAsync(const TimerQueryAsync&) = delete;
	TimerQueryAsync(TimerQueryAsync&&) = delete;
	TimerQueryAsync& operator=(const TimerQueryAsync&) = delete;
	TimerQueryAsync& operator=(TimerQueryAsync&&) = delete;

	// begins or ends a query
	// always call End after Begin
	// never call Begin or End twice in a row
	void Begin();
	void End();

	// returns oldest query's result, if available
	// otherwise, returns std::nullopt
	[[nodiscard]] std::optional<uint64_t> Elapsed_ns();

	private:
	uint32_t start_{}; // next timer to be used for measurement
	uint32_t count_{}; // number of timers 'buffered', ie measurement was started by result not read yet
	const uint32_t capacity_{};
	uint32_t* queries{};
	};
	#include "TimerQueryAsync.h"

	TimerQueryAsync::TimerQueryAsync(uint32_t N)
	: capacity_(N)
	{
	ASSERT(capacity_ > 0);
	queries = new uint32_t[capacity_ * 2];
	glGenQueries(capacity_ * 2, queries);
	}

	TimerQueryAsync::~TimerQueryAsync()
	{
	glDeleteQueries(capacity_ * 2, queries);
	delete[] queries;
	}

	void TimerQueryAsync::Begin()
	{
	// begin a query if there is at least one inactive
	if (count_ < capacity_)
	{
	glQueryCounter(queries[start_], GL_TIMESTAMP);
	}
	}

	void TimerQueryAsync::End()
	{
	// end a query if there is at least one inactive
	if (count_ < capacity_)
	{
	glQueryCounter(queries[start_ + capacity_], GL_TIMESTAMP);
	start_ = (start_ + 1) % capacity_; // wrap
	count_++;
	}
	}

	std::optional<uint64_t> TimerQueryAsync::Elapsed_ns()
	{
	// return nothing if there is no active query
	if (count_ == 0)
	{
	return std::nullopt;
	}

	// get the index of the oldest query
	uint32_t index = (start_ + capacity_ - count_) % capacity_;

	// getting the start result is a sanity check
	GLint startResultAvailable{};
	GLint endResultAvailable{};
	glGetQueryObjectiv(queries[index], GL_QUERY_RESULT_AVAILABLE, &startResultAvailable);
	glGetQueryObjectiv(queries[index + capacity_], GL_QUERY_RESULT_AVAILABLE, &endResultAvailable);

	// the oldest query's result is not available, abandon ship!
	if (startResultAvailable == GL_FALSE \|\| endResultAvailable == GL_FALSE)
	{
	return std::nullopt;
	}

	// pop oldest timing and retrieve result
	count_--;
	uint64_t startTimestamp{};
	uint64_t endTimestamp{};
	glGetQueryObjectui64v(queries[index], GL_QUERY_RESULT, &startTimestamp);
	glGetQueryObjectui64v(queries[index + capacity_], GL_QUERY_RESULT, &endTimestamp);
	return endTimestamp - startTimestamp;
	}