Instantly share code, notes, and snippets.

Embed
What would you like to do?
coroutine benckmark

Benchmark Data

组件(Avg) 协程数:1 切换开销 协程数:1000 创建开销 协程数:1000 切换开销 协程数:30000 创建开销 协程数:30000 切换开销
栈大小(如果可指定) 16 KB 2 MB 2 MB 64 KB 64 KB
libcopp 60 ns 3.7 us 91 ns 3.5 us 239 ns
libcopp+动态栈池 60 ns 109 ns 90 ns 261 ns 238 ns
libcopp+libcotask 79 ns 4.2 us 124 ns 3.8 us 338 ns
libcopp+libcotask+动态栈池 80 ns 246 ns 126 ns 340 ns 335 ns
libco+静态栈池 94 ns 7.1 us 180 ns 5.7 us 451 ns
libco(共享栈4K占用) 94 ns 3.8 us 173 ns 4.0 us 558 ns
libco(共享栈8K占用) 95 ns 3.8 us 1021 ns 3.8 us 1810 ns
libco(共享栈32K占用) - 3.8 us 6275 ns 4.0 us 6429 ns
libgo with boost 197 ns 5.3 us 124 ns 2.3 us 441 ns
libgo with ucontext 539 ns 7.0 us 482 ns 2.7 us 921 ns
goroutine(golang) 464 ns 578 ns 538 ns 1.4 us 799 ns
linux ucontext 356 ns 4.0 us 431 ns 4.5 us 946 ns

Build & run script

libcopp&libcotask

bash ./cmake_dev.sh -b RelWithDebInfo -us;
make -j4;
make benchmark;

goroutine

/usr/local/go/bin/go build -o goroutine_benchmark goroutine_benchmark.go;
./goroutine_benchmark 1 3000000;
./goroutine_benchmark 1000 1000;
./goroutine_benchmark 30000 100;

ucontext

g++ -O2 -g -DNDEBUG -ggdb   -Wall -Werror -fPIC ucontext_benchmark.cpp -o ucontext_benchmark;
./ucontext_benchmark 1 3000000 16;
./ucontext_benchmark 1000 1000 2048;
./ucontext_benchmark 30000 100 64;

libco

# build libco
mkdir build && cd build;
cmake ../libco;
make -j4;
cd ..;

# build exe
g++ -O2 -g -DNDEBUG -ggdb -Wall -Werror -fPIC libco_benchmark.cpp build/libcolib.a -o libco_benchmark -Ilibco -lpthread -lz -lm -ldl ;
# static stack pool
./libco_benchmark 1 3000000 16;
./libco_benchmark 1000 1000 2048;
./libco_benchmark 30000 100 64;

# shared stack=4K
./libco_benchmark 1 3000000 16 4;
./libco_benchmark 1000 1000 2048 4;
./libco_benchmark 30000 100 64 4;

# shared stack=8K
./libco_benchmark 1 3000000 16 8;
./libco_benchmark 1000 1000 2048 8;
./libco_benchmark 30000 100 64 8;

# shared stack=32K
./libco_benchmark 1000 1000 2048 32;
./libco_benchmark 30000 100 64 32;

libgo with boost

# build libgo
mkdir build && cd build;
cmake ../libgo -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBOOST_ROOT=/home/owent/prebuilt/boost/1.67.0 -DENABLE_BOOST_CONTEXT=YES;
make -j4;
cd ..;

# build exe
g++ -O2 -g -DNDEBUG -ggdb -Wall -Werror -fPIC libgo_benchmark.cpp -o libgo_benchmark -Ilibgo -Ilibgo/libgo -Ilibgo/libgo/linux -I/home/owent/prebuilt/boost/1.67.0/include -L/home/owent/prebuilt/boost/1.67.0/lib -Lbuild -llibgo -lboost_coroutine -lboost_context -lboost_system -lboost_thread -lrt -lpthread -ldl;
./libgo_benchmark 1 3000000 16;
./libgo_benchmark 1000 1000 2048;
./libgo_benchmark 30000 100 64;

libgo with ucontext

# build libgo
mkdir build && cd build;
cmake ../libgo -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_BOOST_CONTEXT=NO;
make -j4;
cd ..;

# build exe
g++ -O2 -g -DNDEBUG -ggdb -Wall -Werror -fPIC libgo_benchmark.cpp -o libgo_benchmark -Ilibgo -Ilibgo/libgo -Ilibgo/libgo/linux -Lbuild -llibgo -lrt -lpthread -ldl;
./libgo_benchmark 1 3000000 16;
./libgo_benchmark 1000 1000 2048;
./libgo_benchmark 30000 100 64;
package main
import (
"fmt"
"os"
"strconv"
"time"
)
func runCallback(in, out chan int64) {
for n, ok := <-in; ok; n, ok = <-in {
out <- n
}
}
func runTest(round int, coroutineNum, switchTimes int64) {
fmt.Printf("##### Round: %v\n", round)
start := time.Now()
channelsIn, channelsOut := make([]chan int64, coroutineNum), make([]chan int64, coroutineNum)
for i := int64(0); i < coroutineNum; i++ {
channelsIn[i] = make(chan int64, 1)
channelsOut[i] = make(chan int64, 1)
}
end := time.Now()
fmt.Printf("Create %v goroutines and channels cost %vns, avg %vns\n", coroutineNum, end.Sub(start).Nanoseconds(), end.Sub(start).Nanoseconds()/coroutineNum)
start = time.Now()
for i := int64(0); i < coroutineNum; i++ {
go runCallback(channelsIn[i], channelsOut[i])
}
end = time.Now()
fmt.Printf("Start %v goroutines and channels cost %vns, avg %vns\n", coroutineNum, end.Sub(start).Nanoseconds(), end.Sub(start).Nanoseconds()/coroutineNum)
var sum int64 = 0
start = time.Now()
for i := int64(0); i < switchTimes; i++ {
for j := int64(0); j < coroutineNum; j++ {
channelsIn[j] <- 1
sum += <-channelsOut[j]
}
}
end = time.Now()
fmt.Printf("Switch %v goroutines for %v times cost %vns, avg %vns\n", coroutineNum, sum, end.Sub(start).Nanoseconds(), end.Sub(start).Nanoseconds()/sum)
start = time.Now()
for i := int64(0); i < coroutineNum; i++ {
close(channelsIn[i])
close(channelsOut[i])
}
end = time.Now()
fmt.Printf("Close %v goroutines cost %vns, avg %vns\n", coroutineNum, end.Sub(start).Nanoseconds(), end.Sub(start).Nanoseconds()/coroutineNum)
}
func main() {
var coroutineNum, switchTimes int64 = 30000, 1000
fmt.Printf("### Run: ")
for _, v := range os.Args {
fmt.Printf(" \"%s\"", v)
}
fmt.Printf("\n")
if (len(os.Args)) > 1 {
v, _ := strconv.Atoi(os.Args[1])
coroutineNum = int64(v)
}
if (len(os.Args)) > 2 {
v, _ := strconv.Atoi(os.Args[2])
switchTimes = int64(v)
}
for i := 1; i <= 5; i++ {
runTest(i, coroutineNum, switchTimes)
}
}
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <inttypes.h>
#include <stdint.h>
#include <vector>
#include <memory>
#include <co_routine.h>
#include <chrono>
#define CALC_CLOCK_T std::chrono::system_clock::time_point
#define CALC_CLOCK_NOW() std::chrono::system_clock::now()
#define CALC_MS_CLOCK(x) static_cast<int>(std::chrono::duration_cast<std::chrono::milliseconds>(x).count())
#define CALC_NS_AVG_CLOCK(x, y) static_cast<long long>(std::chrono::duration_cast<std::chrono::nanoseconds>(x).count() / (y ? y : 1))
class coroutine_context;
int switch_count = 100;
int max_coroutine_number = 100000; // 协程数量
class coroutine_context {
public:
coroutine_context(const stCoRoutineAttr_t* libco_attr, int share_stack) {
callee_ctx_ = NULL;
share_stack_ = share_stack;
co_create(&callee_ctx_, libco_attr, &start_callback, this);
is_in_callback_ = false;
is_finished_ = false;
}
~coroutine_context() {
if (NULL != callee_ctx_) {
co_release(callee_ctx_);
}
}
void resume() {
if (is_in_callback_) {
return;
}
co_resume(callee_ctx_);
is_in_callback_ = false;
}
void yield() {
if(!is_in_callback_) {
return;
}
co_yield(callee_ctx_);
is_in_callback_ = true;
}
static void* start_callback(void* arg) {
coroutine_context* this_coroutine = reinterpret_cast<coroutine_context*>(arg);
this_coroutine->is_in_callback_ = true;
// 可能需要占用一部分空间,测试共享栈的copy性能
void* stack_buffer = NULL;
if (this_coroutine->share_stack_ > 0) {
stack_buffer = alloca(static_cast<size_t>(this_coroutine->share_stack_));
memset(stack_buffer, 0, static_cast<size_t>(this_coroutine->share_stack_));
memcpy(stack_buffer, this_coroutine, sizeof(coroutine_context));
memcpy(static_cast<char*>(stack_buffer) + static_cast<size_t>(this_coroutine->share_stack_) - sizeof(coroutine_context),
this_coroutine, sizeof(coroutine_context));
}
int count = switch_count; // 每个协程N次切换
while (count-- > 0) {
this_coroutine->yield();
}
this_coroutine->is_finished_ = true;
this_coroutine->yield();
return stack_buffer;
}
inline bool is_finished() const { return is_finished_; }
private:
stCoRoutine_t* callee_ctx_;
int share_stack_;
bool is_in_callback_;
bool is_finished_;
};
int main(int argc, char *argv[]) {
puts("###################### ucontext coroutine ###################");
printf("########## Cmd:");
for (int i = 0; i < argc; ++i) {
printf(" %s", argv[i]);
}
puts("");
if (argc > 1) {
max_coroutine_number = atoi(argv[1]);
}
if (argc > 2) {
switch_count = atoi(argv[2]);
}
size_t stack_size = 16 * 1024;
if (argc > 3) {
stack_size = atoi(argv[3]) * 1024;
}
int enable_share_stack = 0;
if (argc > 4) {
enable_share_stack = atoi(argv[4]) * 1024;
}
stCoRoutineAttr_t libco_attr;
libco_attr.stack_size = static_cast<int>(stack_size);
if (0 != enable_share_stack) {
libco_attr.share_stack = co_alloc_sharestack(1, libco_attr.stack_size);
} else {
libco_attr.share_stack = co_alloc_sharestack(max_coroutine_number, libco_attr.stack_size);
}
time_t begin_time = time(NULL);
CALC_CLOCK_T begin_clock = CALC_CLOCK_NOW();
// create coroutines
std::vector<std::unique_ptr<coroutine_context> > co_arr;
co_arr.resize(static_cast<size_t>(max_coroutine_number));
for (size_t i = 0; i < 64; ++ i) {
for (size_t j = 0; i + j * 64 < co_arr.size(); ++ j) {
co_arr[i + j * 64].reset(new coroutine_context(&libco_attr, enable_share_stack));
}
}
time_t end_time = time(NULL);
CALC_CLOCK_T end_clock = CALC_CLOCK_NOW();
printf("create %d coroutine, cost time: %d s, clock time: %d ms, avg: %lld ns\n", max_coroutine_number,
static_cast<int>(end_time - begin_time), CALC_MS_CLOCK(end_clock - begin_clock),
CALC_NS_AVG_CLOCK(end_clock - begin_clock, max_coroutine_number));
begin_time = end_time;
begin_clock = end_clock;
// yield & resume from runner
bool continue_flag = true;
long long real_switch_times = static_cast<long long>(0);
while (continue_flag) {
continue_flag = false;
for (int i = 0; i < max_coroutine_number; ++i) {
if (false == co_arr[i]->is_finished()) {
continue_flag = true;
++real_switch_times;
co_arr[i]->resume();
}
}
}
end_time = time(NULL);
end_clock = CALC_CLOCK_NOW();
printf("switch %d coroutine contest %lld times, cost time: %d s, clock time: %d ms, avg: %lld ns\n", max_coroutine_number,
real_switch_times, static_cast<int>(end_time - begin_time), CALC_MS_CLOCK(end_clock - begin_clock),
CALC_NS_AVG_CLOCK(end_clock - begin_clock, real_switch_times));
begin_time = end_time;
begin_clock = end_clock;
co_arr.clear();
end_time = time(NULL);
end_clock = CALC_CLOCK_NOW();
printf("remove %d coroutine, cost time: %d s, clock time: %d ms, avg: %lld ns\n", max_coroutine_number,
static_cast<int>(end_time - begin_time), CALC_MS_CLOCK(end_clock - begin_clock),
CALC_NS_AVG_CLOCK(end_clock - begin_clock, max_coroutine_number));
return 0;
}
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <inttypes.h>
#include <stdint.h>
#include <vector>
#include <memory>
#include <libgo/coroutine.h>
#include <chrono>
#define CALC_CLOCK_T std::chrono::system_clock::time_point
#define CALC_CLOCK_NOW() std::chrono::system_clock::now()
#define CALC_MS_CLOCK(x) static_cast<int>(std::chrono::duration_cast<std::chrono::milliseconds>(x).count())
#define CALC_NS_AVG_CLOCK(x, y) static_cast<long long>(std::chrono::duration_cast<std::chrono::nanoseconds>(x).count() / (y ? y : 1))
int switch_count = 100;
int max_coroutine_number = 100000; // 协程数量
int main(int argc, char *argv[]) {
puts("###################### ucontext coroutine ###################");
printf("########## Cmd:");
for (int i = 0; i < argc; ++i) {
printf(" %s", argv[i]);
}
puts("");
if (argc > 1) {
max_coroutine_number = atoi(argv[1]);
}
if (argc > 2) {
switch_count = atoi(argv[2]);
}
size_t stack_size = 16 * 1024;
if (argc > 3) {
stack_size = atoi(argv[3]) * 1024;
}
time_t begin_time = time(NULL);
CALC_CLOCK_T begin_clock = CALC_CLOCK_NOW();
// create coroutines
for (int i = 0; i < max_coroutine_number; ++ i) {
go_stack(stack_size) []{
int left_count = switch_count;
while (left_count -- > 0) {
co_yield;
}
};
}
time_t end_time = time(NULL);
CALC_CLOCK_T end_clock = CALC_CLOCK_NOW();
printf("create %d coroutine, cost time: %d s, clock time: %d ms, avg: %lld ns\n", max_coroutine_number,
static_cast<int>(end_time - begin_time), CALC_MS_CLOCK(end_clock - begin_clock),
CALC_NS_AVG_CLOCK(end_clock - begin_clock, max_coroutine_number));
begin_time = end_time;
begin_clock = end_clock;
// yield & resume from runner
co_sched.RunUntilNoTask();
long long real_switch_times = max_coroutine_number * static_cast<long long>(switch_count);
end_time = time(NULL);
end_clock = CALC_CLOCK_NOW();
printf("switch %d coroutine contest %lld times, cost time: %d s, clock time: %d ms, avg: %lld ns\n", max_coroutine_number,
real_switch_times, static_cast<int>(end_time - begin_time), CALC_MS_CLOCK(end_clock - begin_clock),
CALC_NS_AVG_CLOCK(end_clock - begin_clock, real_switch_times));
begin_time = end_time;
begin_clock = end_clock;
end_time = time(NULL);
end_clock = CALC_CLOCK_NOW();
printf("remove %d coroutine, cost time: %d s, clock time: %d ms, avg: %lld ns\n", max_coroutine_number,
static_cast<int>(end_time - begin_time), CALC_MS_CLOCK(end_clock - begin_clock),
CALC_NS_AVG_CLOCK(end_clock - begin_clock, max_coroutine_number));
return 0;
}
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <inttypes.h>
#include <stdint.h>
#include <vector>
#include <memory>
extern "C" {
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#include <ucontext.h>
}
#include <chrono>
#define CALC_CLOCK_T std::chrono::system_clock::time_point
#define CALC_CLOCK_NOW() std::chrono::system_clock::now()
#define CALC_MS_CLOCK(x) static_cast<int>(std::chrono::duration_cast<std::chrono::milliseconds>(x).count())
#define CALC_NS_AVG_CLOCK(x, y) static_cast<long long>(std::chrono::duration_cast<std::chrono::nanoseconds>(x).count() / (y ? y : 1))
class coroutine_context;
int switch_count = 100;
thread_local coroutine_context* this_coroutine = nullptr;
int max_coroutine_number = 100000; // 协程数量
class coroutine_context {
public:
coroutine_context(size_t stack_sz) {
// padding to 4K
stack_sz = (stack_sz + 4095) % 4096 + 4096;
// conform to POSIX.4 (POSIX.1b-1993, _POSIX_C_SOURCE=199309L)
void *start_ptr =
#if defined(macintosh) || defined(__APPLE__) || defined(__APPLE_CC__)
::mmap(0, stack_sz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
#else
::mmap(0, stack_sz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
#endif
if (!start_ptr || MAP_FAILED == start_ptr) {
stack_ctx_.sp = NULL;
return;
}
// memset(start_ptr, 0, size_);
::mprotect(start_ptr, 4096, PROT_NONE);
stack_ctx_.size = stack_sz;
stack_ctx_.sp = static_cast<char *>(start_ptr) + stack_ctx_.size; // stack down
// ==============================================================
getcontext(&callee_ctx_);
callee_ctx_.uc_stack.ss_sp = static_cast<char *>(start_ptr) + 4096;
callee_ctx_.uc_stack.ss_size = stack_sz - 4096;
callee_ctx_.uc_link = &caller_ctx_;
makecontext(&callee_ctx_, start_callback, 0);
caller_coroutine_ = NULL;
is_in_callback_ = false;
is_finished_ = false;
}
~coroutine_context() {
if (NULL != stack_ctx_.sp) {
void *start_ptr = static_cast<char *>(stack_ctx_.sp) - stack_ctx_.size;
::munmap(start_ptr, stack_ctx_.size);
}
}
void resume() {
if (is_in_callback_) {
return;
}
is_in_callback_ = true;
caller_coroutine_ = this_coroutine;
this_coroutine = this;
swapcontext(&caller_ctx_, &callee_ctx_);
}
void yield() {
if(!is_in_callback_) {
return;
}
is_in_callback_ = false;
this_coroutine = caller_coroutine_;
caller_coroutine_ = NULL;
swapcontext(&callee_ctx_, &caller_ctx_);
}
static void start_callback() {
this_coroutine->is_finished_ = false;
int count = switch_count; // 每个协程N次切换
while (count-- > 0)
this_coroutine->yield();
this_coroutine->is_finished_ = true;
this_coroutine->yield();
}
inline bool is_finished() const { return is_finished_; }
private:
struct stack_context {
size_t size; /** @brief stack size **/
void* sp; /** @brief stack end pointer **/
};
stack_context stack_ctx_;
ucontext_t callee_ctx_;
ucontext_t caller_ctx_;
coroutine_context* caller_coroutine_;
bool is_in_callback_;
bool is_finished_;
};
int main(int argc, char *argv[]) {
puts("###################### ucontext coroutine ###################");
printf("########## Cmd:");
for (int i = 0; i < argc; ++i) {
printf(" %s", argv[i]);
}
puts("");
if (argc > 1) {
max_coroutine_number = atoi(argv[1]);
}
if (argc > 2) {
switch_count = atoi(argv[2]);
}
size_t stack_size = 16 * 1024;
if (argc > 3) {
stack_size = atoi(argv[3]) * 1024;
}
time_t begin_time = time(NULL);
CALC_CLOCK_T begin_clock = CALC_CLOCK_NOW();
// create coroutines
std::vector<std::unique_ptr<coroutine_context> > co_arr;
co_arr.resize(static_cast<size_t>(max_coroutine_number));
for (size_t i = 0; i < 64; ++ i) {
for (size_t j = 0; i + j * 64 < co_arr.size(); ++ j) {
co_arr[i + j * 64].reset(new coroutine_context(stack_size));
}
}
time_t end_time = time(NULL);
CALC_CLOCK_T end_clock = CALC_CLOCK_NOW();
printf("create %d coroutine, cost time: %d s, clock time: %d ms, avg: %lld ns\n", max_coroutine_number,
static_cast<int>(end_time - begin_time), CALC_MS_CLOCK(end_clock - begin_clock),
CALC_NS_AVG_CLOCK(end_clock - begin_clock, max_coroutine_number));
begin_time = end_time;
begin_clock = end_clock;
// yield & resume from runner
bool continue_flag = true;
long long real_switch_times = static_cast<long long>(0);
while (continue_flag) {
continue_flag = false;
for (int i = 0; i < max_coroutine_number; ++i) {
if (false == co_arr[i]->is_finished()) {
continue_flag = true;
++real_switch_times;
co_arr[i]->resume();
}
}
}
end_time = time(NULL);
end_clock = CALC_CLOCK_NOW();
printf("switch %d coroutine contest %lld times, cost time: %d s, clock time: %d ms, avg: %lld ns\n", max_coroutine_number,
real_switch_times, static_cast<int>(end_time - begin_time), CALC_MS_CLOCK(end_clock - begin_clock),
CALC_NS_AVG_CLOCK(end_clock - begin_clock, real_switch_times));
begin_time = end_time;
begin_clock = end_clock;
co_arr.clear();
end_time = time(NULL);
end_clock = CALC_CLOCK_NOW();
printf("remove %d coroutine, cost time: %d s, clock time: %d ms, avg: %lld ns\n", max_coroutine_number,
static_cast<int>(end_time - begin_time), CALC_MS_CLOCK(end_clock - begin_clock),
CALC_NS_AVG_CLOCK(end_clock - begin_clock, max_coroutine_number));
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment