markpapadakis/simpleStacklessCorosActors.cpp

## simpleStacklessCorosActors.cpp
// https://gist.github.com/markpapadakis/8dba5c480c13b12a056e (example)
// https://medium.com/@markpapadakis/high-performance-services-using-coroutines-ac8e9f54d727
#include <switch.h>
#include <switch_print.h>
#include <switch_ll.h>
#include <switch_bitops.h>
#include <md5.h>
#include <text.h>
#include <network.h>

// Computed gotos for faster dispatch and lifted restrictions wrt to code that can be placed in switch {}
#define COROS_HAVE_COMPUTEDLABELSDISPATCH

class CorosScheduler;

// Caveats/gotchas:
// 1. you can't rely on stack(stackless coroutines/actors), so you need to use coroutine members for persisting state
// e.g for (uint32_t i = 0; i != 10; ++i) { YieldCoro(); Print(i, "\n"); }   will not work
// whereas if you have a member uint32_t i, then for (i = 0; i != 10; ++i) { YieldCoro(); Print("i, "\n"); } will work fine
//
// 2. If your coroutine calls a function/functor and it or another factor in the call stack wants to yield, then the functor must
// be converted into a coroutine instead and scheduled via WaitCoro()
//
// 3. Because labels are used as return points, you can't create/initialize vars in BeginCoro() .. EndCoro() unless
// they are inside { }. This shouldn't be much of an issue.
//
// Benefits:
// Very lightweight coroutines. Because they are stackless, they can run on any thread, and its fairly trivial to optimize CorosScheduler::Run() to
// e.g consider type of coro and move it to a background thread (e.g for disk I/O), dequeing from a global/per-thread coroutines queue (submitted work) etc
// Priorities allow for an interesting scheme; this is based on Linux Kernel O(1) scheduler implementation semantics.
//
// You can also use thread_local freelists of various coroutine instances, as opposed to deleting them when completed, if you
// creating many 1000s/second and that becomes an issue. It's not implemented in this prototype, but it should be trivial to support that
// kind of reuse semantics.
struct coroutine
{
friend class CorosScheduler;

        using runres_t = coroutine *;
#ifdef COROS_HAVE_COMPUTEDLABELSDISPATCH
        using resume_token_t = const void *;
#else
        using resume_token_t = uint16_t;
#endif

struct coroutine
{
protected:
        switch_dlist schedulerList;
        resume_token_t resumeToken{0};
        coroutine *parent{nullptr};

        // 8 different prioerities, from 0(lowest) to 7(highest)
        // higher priority coros are executed first.
        // e.g no coros of priority 0 will run unless there are no more runnable coros of priority 1 or higher
        uint8_t prio{0};


#pragma mark BEGIN:Coroutiens API

#ifdef COROS_HAVE_COMPUTEDLABELSDISPATCH
// Place at the beginning of your operato() impl.
#define BeginCoro()             if (!resumeToken) { resumeToken = &&___coroEntry; }  else { goto *resumeToken; } ___coroEntry:
// Place at the end of your operator() impl.
#define EndCoro()               return 0
#define YieldCoroImpl(res)      resumeToken = &&__macro_concat(___coroLabel, __LINE__); return (runres_t)(res); __macro_concat(___coroLabel, __LINE__):
// Yield the coro, but next time it it's scheduled, it will be restarted as opposed to continue from where RestartCoro() has been invoked
// This is e.g useful for when you have a coro for network I/O, and you want to restart from beginning whenever it runs
#define RestartCoro()           resumeToken = nullptr; return (runres_t)2
#else
#define BeginCoro()             switch (resumeToken) { case 0:
#define EndCoro()               } return 0
#define YieldCoroImpl(res)      resumeToken = __LINE__; return (runres_t)(res);         case __LINE__:
#define RestartCoro()           resumeToken = 0; return (runres_t)2
#endif

// Exit coro - won't reschedule; will delete/free it
#define ExitCoro()              return 0

// Yield to antother(if any) runnable coro.
// Will exit and return control back to the scheduler, which will place it back in runnable queues, but will
// choose another coro to run
#define YieldCoro()             YieldCoroImpl(1)

// Will yield, but will not be placed back in the runnable queue sto run again
// Instead `c` will be scheduled and when it's done, then this coro will be placed back in the runnable queues to run again
#define WaitCoro(c)             YieldCoroImpl(c)
#define WaitCoroWithPrio(c, p)  YieldCoroImpl(coroutine::SetPrio(c, p))

// Will yield, but coro will not be placed in the runnable queues. It will not be deleted, and is
// expected to be scheduled back again later.
// Useful for when e.g you want to 'block' this thread until say a network event comes in in which case your network I/O logic matches it
// with this coro and schedules it back in. Useful for rate, special-case workloads
// WaitCoroWithPrio() is a handy macro for also setting priority to coro
#define FreezeCoro()            YieldCoroImpl(3)
// You may want to designate other scalar/constants as return values for YieldCoroImpl(). e.g 5 for schedule to bg thread, or 10 for reschedule again after 1 minute, etc.


#pragma mark END:Coroutines API


public:
        coroutine(void)
        {
                switch_dlist_init(&schedulerList);
        }

        virtual ~coroutine(void)
        {

        }

        virtual runres_t operator()(void) = 0;

        _ALWAYS_inline_ auto Priority(void) -> uint8_t
        {
                return prio;
        }

        void SetPriority(const uint8_t p)
        {
                assert(p < 8);
                prio = p;
        }


        // See: WaitCoroWithPrio()
        static _ALWAYS_inline_  coroutine *SetPrio(coroutine *const c, const uint8_t p)
        {
                c->SetPriority(p);
                return c;
        }
};


// An example scheduler implementation
// For an alternative impl. see Run() comments
// A more sophisticated scheduler would run runnable coros, and also dequeue from a coros queue submitted by other threads,
// support delayed re-scheduling, etc.
//
// The overhead compared to not using coros is <= 1us
class CorosScheduler
{
        private:
                switch_dlist corosList[8];      // Multiple runnable queues, for each supported prio
                uint32_t blockedCoros{0};       // How many coros are blocked(created, not runnable) waiting for a child coro to complete
                uint8_t runnableMask{0};

        private:
                void ScheduleCoro(coroutine *const c)
                {
                        const auto p = c->Priority();

                        if (unlikely(p >= 8))
                        {
                                // Low-priority; move to another background thread(e.g disk I/O operation)
                                // IMPLEMENT_ME
                        }
                        else
                        {
                                switch_dlist_insert_before(&corosList[p], &c->schedulerList);
                                runnableMask|=(1U<<p);
                        }
                }

                void ScheduleCoroUnsafe(coroutine *const c)
                {
                        // TODO: IMPLEMENT ME
                }

                // Selects the next runnable; will choose from the highest priority with any runnables queues
                coroutine *NextRunnable(void)
                {
                        if (!runnableMask)
                        {
                                // Nothing in any runnable queue
                                return nullptr;
                        }
                        else
                        {
                                const auto p    = 7 - SwitchBitOps::LeadingZeros(runnableMask);
                                auto &l         = corosList[p];
                                auto *const coro= switch_list_entry(coroutine, schedulerList, l.next);

                                switch_dlist_del_and_reset(&coro->schedulerList);
                                if (switch_dlist_isempty(&l))
                                {
                                        // This runnable queue is now empty
                                        runnableMask&=~(1U << p);
                                }

                                return coro;
                        }
                }

                void FreeCoro(coroutine *const coro)
                {
                        // TODO: maintain thread_local freeList for coro->Type()
                        delete coro;
                }

                void RunCoro(coroutine *const coro)
                {
                        const auto r = (*coro)();

                        switch ((uintptr_t)r)
                        {
                                case 0:
                                        // Coro has endeded
                                        if (coro->parent)
                                        {
                                                // Has a parent waiting for this coro's completion to resume
                                                --blockedCoros;
                                                ScheduleCoro(coro->parent);
                                        }
                                        FreeCoro(coro);
                                        break;

                                case 1:
                                        // Yielded, place back in runnable, but choose another coro now, if available
                                        // See YieldCoro()
                                        ScheduleCoro(coro);
                                        break;

                                case 2:
                                        // Finished, but it wants to run again as soon as it can
                                        // This can be useful, though not sure how yet;)
                                        // See RestartCoro()
                                        ScheduleCoro(coro);
                                        break;

                                case 3:
                                        // Co-ro is frozen. That is, its not runnable, but we won't delete it
                                        // The idea is that another thread or another coro will eventually make it runnable again later
                                        // This is for some edge-cases where you need to freeze the coro, e.g run soemthing on another thread(not as a coro) and then
                                        // notify the thread scheduler to schedule it back in
                                        // See FreezeCoro()
                                        break;

                                default:
                                        // Coro is waiting for another coro, setup parenthood and schedule it (potentially to another thread?)
                                        // See WaitCoro()
                                        ++blockedCoros;
                                        r->parent       = coro;
                                        r->resumeToken  = 0;
                                        ScheduleCoro(r);
                                        break;
                        }
                }

                bool RunNextRunnable(void)
                {
                        if (auto *const coro = NextRunnable())
                        {
                                RunCoro(coro);
                                return true;
                        }
                        else
                                return false;
                }

                inline bool AnyRunnable(void) const
                {
                        return runnableMask;
                }

                inline auto Blocked(void) const -> uint32_t
                {
                        return blockedCoros;
                }


        public:
                CorosScheduler(void)
                {
                        for (auto &it : corosList)
                                switch_dlist_init(&it);
                }

                static void ScheduleInThreadScheduler(coroutine *const c)
                {
                        // Schedule in current thread scheduler
                        // TODO: implement me
                }

                void Schedule(coroutine *const c, const uint8_t prio = 0)
                {
                        ScheduleCoro(c);
                        c->SetPriority(prio);
                        c->resumeToken  = 0;
                }

                // `c` ran on another thread and now it's done, and that thred handed it off back to us
                // We need to check if it has a parent, and if it does, make it runnable again(was waiting for `c`)
                void ProcessCompletedInAnotherThread(coroutine *const c)
                {
                        if (auto *const parent = c->parent)
                        {
                                // submit into e.g thread-specific MPSQ queue to be dequeued later by e.g TryDequeSubmittedCoro() or DequeueSubmittedCoro()
                                ScheduleCoroUnsafe(parent);
                        }

                        delete c;
                }

                virtual void Run(void)
                {
                        // An alternative implementation of this method would
                        // also try dequeing from a thread-specific tasks queue, or a global queue, or whatever else
                        // e.g
                        //
                        // for (;;)
                        // {
                        //      (void)RunNextRunnable();
                        //
                        //      if (AnyRunnable())
                        //      {
                        //              // At least one runnable, don't block waiting for external work
                        //              if (auto *const c = TryDequeSubmittedCoro())
                        //                      ScheduleCoro(c);
                        //      }
                        //      else
                        //      {
                        //              // No runnables, block waiting for external work if needed
                        //              ScheduleCoro(DequeueSubmittedCoro());
                        //      }
                        //}
                        // See also: https://gist.github.com/markpapadakis/8dba5c480c13b12a056e
                        //
                        // We could also implement network I/O poll as another coro with the lowest priority which
                        // returns control to the scheduler with RestartCoro(). See example in this file
                        //
                        // This is optimal for services that accept and manage connections I/O and also execute their requests

                        while (RunNextRunnable())
                        {

                        }
                }
};


// https://twitter.com/ID_AA_Carmack/status/575788622554628096
static CorosScheduler TheScheduler;


struct singer_coro
	: public coroutine
{
	resume_res_t operator()(void) override
	{
		BeginCoro();

		Print("Singing\n");

		EndCoro();
	}
};

struct diskreader_coro
	: public coroutine
{
	strwlen32_t *out; // dummy

	diskreader_coro(strwlen32_t *const o)
		: out(o)
	{

	}

	resume_res_t operator()(void) override
	{
		BeginCoro();
		out->Set(_S("Hello World"));
		EndCoro();
	}
};

struct dancer_coro
	: public coroutine
{
	strwlen32_t localBuf;
	uint32_t i;

	resume_res_t operator()(void) override
	{
		BeginCoro();

		Print("Dancing\n");
		YieldCoro();

		Print("Did Sing!\n");

		// Get some data into localBuf
		// maybe this would block accessing the disk or whatever else.
		// WaitCoro() will put this coro to sleep, waiting until another coro runs and then
		// it's made runnable again
		WaitCoro(new diskreader_coro(&localBuf));
		Print("Got:", localBuf, "\n");

		for (i = 0; i != 10; ++i)
		{
			Print("i = ", i, "\n");
			if (i == 5)
				ExitCoro();
			else
				YieldCoro();

		}


		EndCoro();
	}
};


struct reader_coro
	: public coroutine
{
	int fd;
	const uint64_t offset, len;
	void *const buf;

	reader_coro(int _fd, void *const _buf, const uint64_t _offset, const uint64_t _len)
		: fd(_fd), offset(_offset), len(_len), buf(_buf)
	{

	}

	resume_res_t operator()(void) override
	{
		BeginCoro();
		(void)pread64(fd, buf, len, offset);
		EndCoro();
	}
};


// A simple MD5 checksum coro
struct task_coroutine
	: public coroutine
{
	const char *const path;
	uint64_t fileSize, offset, upto, span;
	CMD5 md5Factory;
	uint8_t buf[1024];
	int fd;

	task_coroutine(const char *const fullPath)
		: path{fullPath}
	{

	}

	resume_res_t operator()(void) override
	{
		BeginCoro();

		fd = open(path, O_RDONLY);
		assert(fd != -1);

		fileSize = lseek64(fd, 0, SEEK_END);
		md5Factory.Init();

		for (offset = 0; offset != fileSize; )
		{
			upto = Min(fileSize, offset + 1024);
			span = upto - offset;

			// If we did have  pread2v(), we could attempt read, and if it failed with EAGAIN, we 'd
			// use WaitCor() which would (based on scheduler semantics) schedule it on another background thread
			// see: https://lwn.net/Articles/612483/
			WaitCoro(new reader_coro(fd, buf, offset, span));

			md5Factory.Update(buf, span);
			offset = upto;
		}
		(void)close(fd);

		uint8_t digest[16];

		md5Factory.Finalize(digest);
		Print(hex_fmt(digest, 16), "\n");

		EndCoro();
	}
};


struct fetchcf_coro
	: public coroutine
{

};


int main(int argc, char *argv[])
{
#if 0
	TheScheduler.Schedule(new dancer_coro());
	TheScheduler.Schedule(new callable_coro(
		[](void)
		{
			Print("Hello World\n");
		}));
	TheScheduler.Schedule(new singer_coro());
#endif
	TheScheduler.Schedule(new task_coroutine("/etc/passwd"));

	TheScheduler.Run();


	return 0;
}
	// https://gist.github.com/markpapadakis/8dba5c480c13b12a056e (example)
	// https://medium.com/@markpapadakis/high-performance-services-using-coroutines-ac8e9f54d727
	#include <switch.h>
	#include <switch_print.h>
	#include <switch_ll.h>
	#include <switch_bitops.h>
	#include <md5.h>
	#include <text.h>
	#include <network.h>

	// Computed gotos for faster dispatch and lifted restrictions wrt to code that can be placed in switch {}
	#define COROS_HAVE_COMPUTEDLABELSDISPATCH

	class CorosScheduler;

	// Caveats/gotchas:
	// 1. you can't rely on stack(stackless coroutines/actors), so you need to use coroutine members for persisting state
	// e.g for (uint32_t i = 0; i != 10; ++i) { YieldCoro(); Print(i, "\n"); } will not work
	// whereas if you have a member uint32_t i, then for (i = 0; i != 10; ++i) { YieldCoro(); Print("i, "\n"); } will work fine
	//
	// 2. If your coroutine calls a function/functor and it or another factor in the call stack wants to yield, then the functor must
	// be converted into a coroutine instead and scheduled via WaitCoro()
	//
	// 3. Because labels are used as return points, you can't create/initialize vars in BeginCoro() .. EndCoro() unless
	// they are inside { }. This shouldn't be much of an issue.
	//
	// Benefits:
	// Very lightweight coroutines. Because they are stackless, they can run on any thread, and its fairly trivial to optimize CorosScheduler::Run() to
	// e.g consider type of coro and move it to a background thread (e.g for disk I/O), dequeing from a global/per-thread coroutines queue (submitted work) etc
	// Priorities allow for an interesting scheme; this is based on Linux Kernel O(1) scheduler implementation semantics.
	//
	// You can also use thread_local freelists of various coroutine instances, as opposed to deleting them when completed, if you
	// creating many 1000s/second and that becomes an issue. It's not implemented in this prototype, but it should be trivial to support that
	// kind of reuse semantics.
	struct coroutine
	{
	friend class CorosScheduler;

	using runres_t = coroutine *;
	#ifdef COROS_HAVE_COMPUTEDLABELSDISPATCH
	using resume_token_t = const void *;
	#else
	using resume_token_t = uint16_t;
	#endif

	struct coroutine
	{
	protected:
	switch_dlist schedulerList;
	resume_token_t resumeToken{0};
	coroutine *parent{nullptr};

	// 8 different prioerities, from 0(lowest) to 7(highest)
	// higher priority coros are executed first.
	// e.g no coros of priority 0 will run unless there are no more runnable coros of priority 1 or higher
	uint8_t prio{0};


	#pragma mark BEGIN:Coroutiens API

	#ifdef COROS_HAVE_COMPUTEDLABELSDISPATCH
	// Place at the beginning of your operato() impl.
	#define BeginCoro() if (!resumeToken) { resumeToken = &&___coroEntry; } else { goto *resumeToken; } ___coroEntry:
	// Place at the end of your operator() impl.
	#define EndCoro() return 0
	#define YieldCoroImpl(res) resumeToken = &&__macro_concat(___coroLabel, __LINE__); return (runres_t)(res); __macro_concat(___coroLabel, __LINE__):
	// Yield the coro, but next time it it's scheduled, it will be restarted as opposed to continue from where RestartCoro() has been invoked
	// This is e.g useful for when you have a coro for network I/O, and you want to restart from beginning whenever it runs
	#define RestartCoro() resumeToken = nullptr; return (runres_t)2
	#else
	#define BeginCoro() switch (resumeToken) { case 0:
	#define EndCoro() } return 0
	#define YieldCoroImpl(res) resumeToken = __LINE__; return (runres_t)(res); case __LINE__:
	#define RestartCoro() resumeToken = 0; return (runres_t)2
	#endif

	// Exit coro - won't reschedule; will delete/free it
	#define ExitCoro() return 0

	// Yield to antother(if any) runnable coro.
	// Will exit and return control back to the scheduler, which will place it back in runnable queues, but will
	// choose another coro to run
	#define YieldCoro() YieldCoroImpl(1)

	// Will yield, but will not be placed back in the runnable queue sto run again
	// Instead `c` will be scheduled and when it's done, then this coro will be placed back in the runnable queues to run again
	#define WaitCoro(c) YieldCoroImpl(c)
	#define WaitCoroWithPrio(c, p) YieldCoroImpl(coroutine::SetPrio(c, p))

	// Will yield, but coro will not be placed in the runnable queues. It will not be deleted, and is
	// expected to be scheduled back again later.
	// Useful for when e.g you want to 'block' this thread until say a network event comes in in which case your network I/O logic matches it
	// with this coro and schedules it back in. Useful for rate, special-case workloads
	// WaitCoroWithPrio() is a handy macro for also setting priority to coro
	#define FreezeCoro() YieldCoroImpl(3)
	// You may want to designate other scalar/constants as return values for YieldCoroImpl(). e.g 5 for schedule to bg thread, or 10 for reschedule again after 1 minute, etc.


	#pragma mark END:Coroutines API




	public:
	coroutine(void)
	{
	switch_dlist_init(&schedulerList);
	}

	virtual ~coroutine(void)
	{

	}

	virtual runres_t operator()(void) = 0;

	_ALWAYS_inline_ auto Priority(void) -> uint8_t
	{
	return prio;
	}

	void SetPriority(const uint8_t p)
	{
	assert(p < 8);
	prio = p;
	}


	// See: WaitCoroWithPrio()
	static _ALWAYS_inline_ coroutine SetPrio(coroutine const c, const uint8_t p)
	{
	c->SetPriority(p);
	return c;
	}
	};





	// An example scheduler implementation
	// For an alternative impl. see Run() comments
	// A more sophisticated scheduler would run runnable coros, and also dequeue from a coros queue submitted by other threads,
	// support delayed re-scheduling, etc.
	//
	// The overhead compared to not using coros is <= 1us
	class CorosScheduler
	{
	private:
	switch_dlist corosList[8]; // Multiple runnable queues, for each supported prio
	uint32_t blockedCoros{0}; // How many coros are blocked(created, not runnable) waiting for a child coro to complete
	uint8_t runnableMask{0};

	private:
	void ScheduleCoro(coroutine *const c)
	{
	const auto p = c->Priority();

	if (unlikely(p >= 8))
	{
	// Low-priority; move to another background thread(e.g disk I/O operation)
	// IMPLEMENT_ME
	}
	else
	{
	switch_dlist_insert_before(&corosList[p], &c->schedulerList);
	runnableMask\|=(1U<<p);
	}
	}

	void ScheduleCoroUnsafe(coroutine *const c)
	{
	// TODO: IMPLEMENT ME
	}

	// Selects the next runnable; will choose from the highest priority with any runnables queues
	coroutine *NextRunnable(void)
	{
	if (!runnableMask)
	{
	// Nothing in any runnable queue
	return nullptr;
	}
	else
	{
	const auto p = 7 - SwitchBitOps::LeadingZeros(runnableMask);
	auto &l = corosList[p];
	auto *const coro= switch_list_entry(coroutine, schedulerList, l.next);

	switch_dlist_del_and_reset(&coro->schedulerList);
	if (switch_dlist_isempty(&l))
	{
	// This runnable queue is now empty
	runnableMask&=~(1U << p);
	}

	return coro;
	}
	}

	void FreeCoro(coroutine *const coro)
	{
	// TODO: maintain thread_local freeList for coro->Type()
	delete coro;
	}

	void RunCoro(coroutine *const coro)
	{
	const auto r = (*coro)();

	switch ((uintptr_t)r)
	{
	case 0:
	// Coro has endeded
	if (coro->parent)
	{
	// Has a parent waiting for this coro's completion to resume
	--blockedCoros;
	ScheduleCoro(coro->parent);
	}
	FreeCoro(coro);
	break;

	case 1:
	// Yielded, place back in runnable, but choose another coro now, if available
	// See YieldCoro()
	ScheduleCoro(coro);
	break;

	case 2:
	// Finished, but it wants to run again as soon as it can
	// This can be useful, though not sure how yet;)
	// See RestartCoro()
	ScheduleCoro(coro);
	break;

	case 3:
	// Co-ro is frozen. That is, its not runnable, but we won't delete it
	// The idea is that another thread or another coro will eventually make it runnable again later
	// This is for some edge-cases where you need to freeze the coro, e.g run soemthing on another thread(not as a coro) and then
	// notify the thread scheduler to schedule it back in
	// See FreezeCoro()
	break;

	default:
	// Coro is waiting for another coro, setup parenthood and schedule it (potentially to another thread?)
	// See WaitCoro()
	++blockedCoros;
	r->parent = coro;
	r->resumeToken = 0;
	ScheduleCoro(r);
	break;
	}
	}

	bool RunNextRunnable(void)
	{
	if (auto *const coro = NextRunnable())
	{
	RunCoro(coro);
	return true;
	}
	else
	return false;
	}

	inline bool AnyRunnable(void) const
	{
	return runnableMask;
	}

	inline auto Blocked(void) const -> uint32_t
	{
	return blockedCoros;
	}


	public:
	CorosScheduler(void)
	{
	for (auto &it : corosList)
	switch_dlist_init(&it);
	}

	static void ScheduleInThreadScheduler(coroutine *const c)
	{
	// Schedule in current thread scheduler
	// TODO: implement me
	}

	void Schedule(coroutine *const c, const uint8_t prio = 0)
	{
	ScheduleCoro(c);
	c->SetPriority(prio);
	c->resumeToken = 0;
	}

	// `c` ran on another thread and now it's done, and that thred handed it off back to us
	// We need to check if it has a parent, and if it does, make it runnable again(was waiting for `c`)
	void ProcessCompletedInAnotherThread(coroutine *const c)
	{
	if (auto *const parent = c->parent)
	{
	// submit into e.g thread-specific MPSQ queue to be dequeued later by e.g TryDequeSubmittedCoro() or DequeueSubmittedCoro()
	ScheduleCoroUnsafe(parent);
	}

	delete c;
	}

	virtual void Run(void)
	{
	// An alternative implementation of this method would
	// also try dequeing from a thread-specific tasks queue, or a global queue, or whatever else
	// e.g
	//
	// for (;;)
	// {
	// (void)RunNextRunnable();
	//
	// if (AnyRunnable())
	// {
	// // At least one runnable, don't block waiting for external work
	// if (auto *const c = TryDequeSubmittedCoro())
	// ScheduleCoro(c);
	// }
	// else
	// {
	// // No runnables, block waiting for external work if needed
	// ScheduleCoro(DequeueSubmittedCoro());
	// }
	//}
	// See also: https://gist.github.com/markpapadakis/8dba5c480c13b12a056e
	//
	// We could also implement network I/O poll as another coro with the lowest priority which
	// returns control to the scheduler with RestartCoro(). See example in this file
	//
	// This is optimal for services that accept and manage connections I/O and also execute their requests

	while (RunNextRunnable())
	{

	}
	}
	};


	// https://twitter.com/ID_AA_Carmack/status/575788622554628096
	static CorosScheduler TheScheduler;


	struct singer_coro
	: public coroutine
	{
	resume_res_t operator()(void) override
	{
	BeginCoro();

	Print("Singing\n");

	EndCoro();
	}
	};

	struct diskreader_coro
	: public coroutine
	{
	strwlen32_t *out; // dummy

	diskreader_coro(strwlen32_t *const o)
	: out(o)
	{

	}

	resume_res_t operator()(void) override
	{
	BeginCoro();
	out->Set(_S("Hello World"));
	EndCoro();
	}
	};

	struct dancer_coro
	: public coroutine
	{
	strwlen32_t localBuf;
	uint32_t i;

	resume_res_t operator()(void) override
	{
	BeginCoro();

	Print("Dancing\n");
	YieldCoro();

	Print("Did Sing!\n");

	// Get some data into localBuf
	// maybe this would block accessing the disk or whatever else.
	// WaitCoro() will put this coro to sleep, waiting until another coro runs and then
	// it's made runnable again
	WaitCoro(new diskreader_coro(&localBuf));
	Print("Got:", localBuf, "\n");

	for (i = 0; i != 10; ++i)
	{
	Print("i = ", i, "\n");
	if (i == 5)
	ExitCoro();
	else
	YieldCoro();

	}


	EndCoro();
	}
	};


	struct reader_coro
	: public coroutine
	{
	int fd;
	const uint64_t offset, len;
	void *const buf;

	reader_coro(int _fd, void *const _buf, const uint64_t _offset, const uint64_t _len)
	: fd(_fd), offset(_offset), len(_len), buf(_buf)
	{

	}

	resume_res_t operator()(void) override
	{
	BeginCoro();
	(void)pread64(fd, buf, len, offset);
	EndCoro();
	}
	};


	// A simple MD5 checksum coro
	struct task_coroutine
	: public coroutine
	{
	const char *const path;
	uint64_t fileSize, offset, upto, span;
	CMD5 md5Factory;
	uint8_t buf[1024];
	int fd;

	task_coroutine(const char *const fullPath)
	: path{fullPath}
	{

	}

	resume_res_t operator()(void) override
	{
	BeginCoro();

	fd = open(path, O_RDONLY);
	assert(fd != -1);

	fileSize = lseek64(fd, 0, SEEK_END);
	md5Factory.Init();

	for (offset = 0; offset != fileSize; )
	{
	upto = Min(fileSize, offset + 1024);
	span = upto - offset;

	// If we did have pread2v(), we could attempt read, and if it failed with EAGAIN, we 'd
	// use WaitCor() which would (based on scheduler semantics) schedule it on another background thread
	// see: https://lwn.net/Articles/612483/
	WaitCoro(new reader_coro(fd, buf, offset, span));

	md5Factory.Update(buf, span);
	offset = upto;
	}
	(void)close(fd);

	uint8_t digest[16];

	md5Factory.Finalize(digest);
	Print(hex_fmt(digest, 16), "\n");

	EndCoro();
	}
	};


	struct fetchcf_coro
	: public coroutine
	{

	};


	int main(int argc, char *argv[])
	{
	#if 0
	TheScheduler.Schedule(new dancer_coro());
	TheScheduler.Schedule(new callable_coro(
	[](void)
	{
	Print("Hello World\n");
	}));
	TheScheduler.Schedule(new singer_coro());
	#endif
	TheScheduler.Schedule(new task_coroutine("/etc/passwd"));

	TheScheduler.Run();


	return 0;
	}