kinchungwong/RowFilterLoop_ThreeInput_OneOutput.cpp

## RowFilterLoop_ThreeInput_OneOutput.cpp
//
// RowFilterLoop_ThreeInput_OneOutput.cpp
//

#include <cstdint>
#include <memory>

// ======
// Helper classes (stripped out)
//
// These classes are not the focus of the idea to be illustrated
// by this code skeleton. Therefore, they are stripped out to the
// bare minimum to keep C++ syntax parser happy.
// ======

struct Size
{
    int w;
    int h;
};

// ======

template <typename ElemType, typename KeyType>
class RowCacheT
{
public:
    RowCacheT(size_t, size_t, size_t);
    bool Contains(const KeyType&) const;
    const ElemType* Read(const KeyType&);
    ElemType* Write(const KeyType&);
};

// ======

template <typename ElemType>
class RowSourceT
{
public:
    virtual Size ImageSize() = 0;
    virtual const ElemType* Read(int) = 0;
};

// ======

class SimdRowFunc
{
public:
    //
    // This class is stateful but immutable. All configuration parameters
    // needed for this filtering operation must be initialized by passing
    // them into the constructor.
    //
    SimdRowFunc(/* config params */);

    //
    // This method is called once per output row.
    //
    void Apply(
        const uint8_t* inputTopRow,
        const uint8_t* inputCenterRow,
        const uint8_t* inputBottomRow,
        uint8_t* outputRow) const;
};

// ======
//
// Beginning of the main focus of this code skeleton.
//
// Note that this shell class is universal: it is applicable to ALL
// row-based filter operations that take three input rows and compute
// one output row.
//
// To perform a different 3x3 image operation, just swap out the
// SimdRowFunc class, or the "SimdRowFunc::Apply()" method.
//
// It is best to let SimdRowFunc::Apply() be an
//   * Interface method, or
//   * Raw function pointer.
//
// The overhead of an indirect function call is justified, because
// an entire row of pixels are processed for each function call,
// which amortizes the overhead of that call over the number of pixels
// (width) of the image.
//
// ======

class RowFilterLoop_ThreeInput_OneOutput final : public RowSourceT<uint8_t>
{
public:
    using ElemType = uint8_t;

public:
    RowFilterLoop_ThreeInput_OneOutput(std::shared_ptr<RowSourceT<uint8_t>> source)
        : m_source()
        , m_result()
    {
        static const char* methodName = "ctor";
        if (!source)
        {
            //throw NullSharedPointerException(methodName);
            throw std::logic_error("ctor");
        }
        m_source = source;
        m_size = m_source->ImageSize();
        // TODO : deal with size_t <==> integer conversion
        const size_t initCap = 0uL;
        const size_t maxCap = (size_t)m_size.h;
        m_result = std::make_shared<RowCacheT<uint8_t, size_t>>(
            (size_t)m_size.w, initCap, maxCap);
    }

public:
    Size ImageSize() const
    {
        return m_size;
    }

    const ElemType* Get(int row)
    {
        if (m_result->Contains((size_t)row))
        {
            return m_result->Read((size_t)row);
        }
        //
        // Allocate output row for writing.
        //
        // This consists of:
        //    (1) Ensuring the "unused pool" has at least one spare row;
        //        allocate if necessary. However, typically there's enough
        //        pre-allocated rows, based on pre-calculated maximum usage.
        //    (2) Moving that row from "unused pool" to "assigned pool"
        //    (3) Associate that row with the key (the row number), and then
        //        return that row to this function, which will populate its
        //        pixel values in the code below.
        //
        uint8_t* outputPtr = m_result->Write((size_t)row);
        //
        // Get the input row data.
        //
        const int inputRows[3] =
        {
            std::max<int>(row - 1, 0),
            row,
            std::min<int>(row + 1, m_size.h - 1)
        };
        const uint8_t* abovePtr = m_source->Read(inputRows[0]);
        const uint8_t* centerPtr = m_source->Read(inputRows[1]);
        const uint8_t* belowPtr = m_source->Read(inputRows[2]);
        //
        // If prefer to do C++ scalar processing (loop over pixels)
        //
        for (int col = 0; col < m_size.w; ++col)
        {
        }
        //
        // If prefer to call SIMD function to generate entire row output
        //
        m_simdRowFunc.Apply(abovePtr, centerPtr, belowPtr, outputPtr);
        //
        // BEGIN IMPORTANT NOTE
        //
        // Do not call m_source->Remove() here.
        //
        // Only the "outermost scheduling loop" knows whether it is safe
        // to remove something.
        //
        // While this code skeleton is simplified, a more complicated
        // example may involve pipelining (filter cascading), which
        // may require source data to be kept inside RowCache for a
        // longer time.
        //
        // END IMPORTANT NOTE
        //
        return outputPtr;
    }

    void Remove(int row)
    {
        // TODO
        // see important note near the end of Get() method.
    }

private:
    SimdRowFunc m_simdRowFunc;
    std::shared_ptr<RowSourceT<uint8_t>> m_source;
    std::shared_ptr<RowCacheT<uint8_t, size_t>> m_result;
    Size m_size;
};
	//
	// RowFilterLoop_ThreeInput_OneOutput.cpp
	//

	#include <cstdint>
	#include <memory>

	// ======
	// Helper classes (stripped out)
	//
	// These classes are not the focus of the idea to be illustrated
	// by this code skeleton. Therefore, they are stripped out to the
	// bare minimum to keep C++ syntax parser happy.
	// ======

	struct Size
	{
	int w;
	int h;
	};

	// ======

	template <typename ElemType, typename KeyType>
	class RowCacheT
	{
	public:
	RowCacheT(size_t, size_t, size_t);
	bool Contains(const KeyType&) const;
	const ElemType* Read(const KeyType&);
	ElemType* Write(const KeyType&);
	};

	// ======

	template <typename ElemType>
	class RowSourceT
	{
	public:
	virtual Size ImageSize() = 0;
	virtual const ElemType* Read(int) = 0;
	};

	// ======

	class SimdRowFunc
	{
	public:
	//
	// This class is stateful but immutable. All configuration parameters
	// needed for this filtering operation must be initialized by passing
	// them into the constructor.
	//
	SimdRowFunc(/* config params */);

	//
	// This method is called once per output row.
	//
	void Apply(
	const uint8_t* inputTopRow,
	const uint8_t* inputCenterRow,
	const uint8_t* inputBottomRow,
	uint8_t* outputRow) const;
	};

	// ======
	//
	// Beginning of the main focus of this code skeleton.
	//
	// Note that this shell class is universal: it is applicable to ALL
	// row-based filter operations that take three input rows and compute
	// one output row.
	//
	// To perform a different 3x3 image operation, just swap out the
	// SimdRowFunc class, or the "SimdRowFunc::Apply()" method.
	//
	// It is best to let SimdRowFunc::Apply() be an
	// * Interface method, or
	// * Raw function pointer.
	//
	// The overhead of an indirect function call is justified, because
	// an entire row of pixels are processed for each function call,
	// which amortizes the overhead of that call over the number of pixels
	// (width) of the image.
	//
	// ======

	class RowFilterLoop_ThreeInput_OneOutput final : public RowSourceT<uint8_t>
	{
	public:
	using ElemType = uint8_t;

	public:
	RowFilterLoop_ThreeInput_OneOutput(std::shared_ptr<RowSourceT<uint8_t>> source)
	: m_source()
	, m_result()
	{
	static const char* methodName = "ctor";
	if (!source)
	{
	//throw NullSharedPointerException(methodName);
	throw std::logic_error("ctor");
	}
	m_source = source;
	m_size = m_source->ImageSize();
	// TODO : deal with size_t <==> integer conversion
	const size_t initCap = 0uL;
	const size_t maxCap = (size_t)m_size.h;
	m_result = std::make_shared<RowCacheT<uint8_t, size_t>>(
	(size_t)m_size.w, initCap, maxCap);
	}

	public:
	Size ImageSize() const
	{
	return m_size;
	}

	const ElemType* Get(int row)
	{
	if (m_result->Contains((size_t)row))
	{
	return m_result->Read((size_t)row);
	}
	//
	// Allocate output row for writing.
	//
	// This consists of:
	// (1) Ensuring the "unused pool" has at least one spare row;
	// allocate if necessary. However, typically there's enough
	// pre-allocated rows, based on pre-calculated maximum usage.
	// (2) Moving that row from "unused pool" to "assigned pool"
	// (3) Associate that row with the key (the row number), and then
	// return that row to this function, which will populate its
	// pixel values in the code below.
	//
	uint8_t* outputPtr = m_result->Write((size_t)row);
	//
	// Get the input row data.
	//
	const int inputRows[3] =
	{
	std::max<int>(row - 1, 0),
	row,
	std::min<int>(row + 1, m_size.h - 1)
	};
	const uint8_t* abovePtr = m_source->Read(inputRows[0]);
	const uint8_t* centerPtr = m_source->Read(inputRows[1]);
	const uint8_t* belowPtr = m_source->Read(inputRows[2]);
	//
	// If prefer to do C++ scalar processing (loop over pixels)
	//
	for (int col = 0; col < m_size.w; ++col)
	{
	}
	//
	// If prefer to call SIMD function to generate entire row output
	//
	m_simdRowFunc.Apply(abovePtr, centerPtr, belowPtr, outputPtr);
	//
	// BEGIN IMPORTANT NOTE
	//
	// Do not call m_source->Remove() here.
	//
	// Only the "outermost scheduling loop" knows whether it is safe
	// to remove something.
	//
	// While this code skeleton is simplified, a more complicated
	// example may involve pipelining (filter cascading), which
	// may require source data to be kept inside RowCache for a
	// longer time.
	//
	// END IMPORTANT NOTE
	//
	return outputPtr;
	}

	void Remove(int row)
	{
	// TODO
	// see important note near the end of Get() method.
	}

	private:
	SimdRowFunc m_simdRowFunc;
	std::shared_ptr<RowSourceT<uint8_t>> m_source;
	std::shared_ptr<RowCacheT<uint8_t, size_t>> m_result;
	Size m_size;
	};