Skip to content

Instantly share code, notes, and snippets.

@chichunchen
Forked from leifwalsh/A_results.txt
Created May 16, 2018 17:03
Show Gist options
  • Save chichunchen/ee2ca363cf4613a5337ff5f9816a4a8f to your computer and use it in GitHub Desktop.
Save chichunchen/ee2ca363cf4613a5337ff5f9816a4a8f to your computer and use it in GitHub Desktop.
mcs lock: userspace c++11 implementation of http://lwn.net/Articles/590243/. seems to be awful---far worse than normal spinlock---with nthreads > ncpus
# clang version 3.4 (tags/RELEASE_34/final)
# Intel(R) Core(TM) i5-3470 CPU @ 3.20GHz
spinlock 1: 3925ms
spinlock 2: 25665ms
spinlock 3: 27722ms
spinlock 4: 42727ms
std::mutex 1: 5526ms
std::mutex 2: 17672ms
std::mutex 3: 31038ms
std::mutex 4: 36890ms
mcs_lock 1: 4014ms
mcs_lock 2: 23112ms
mcs_lock 3: 26940ms
mcs_lock 4: 55371ms
# gcc version 4.8.2 20140206 (prerelease) (GCC)
# Intel(R) Core(TM) i5-3470 CPU @ 3.20GHz
spinlock 1: 5954ms
spinlock 2: 23907ms
spinlock 3: 29344ms
spinlock 4: 39561ms
std::mutex 1: 5549ms
std::mutex 2: 25904ms
std::mutex 3: 40386ms
std::mutex 4: 45023ms
mcs_lock 1: 4450ms
mcs_lock 2: 30693ms
mcs_lock 3: 27023ms
mcs_lock 4: 39660ms
# clang version 3.4 (tags/RELEASE_34/final)
# Intel(R) Core(TM) i5-4300U CPU @ 1.90GHz
spinlock 1: 4426ms
spinlock 2: 36351ms
spinlock 3: 42095ms
spinlock 4: 74889ms
std::mutex 1: 7989ms
std::mutex 2: 23520ms
std::mutex 3: 32154ms
std::mutex 4: 36576ms
mcs_lock 1: 4979ms
mcs_lock 2: 36937ms
mcs_lock 3: 29988ms
mcs_lock 4: 32646ms
# gcc version 4.8.2 20140206 (prerelease) (GCC)
# Intel(R) Core(TM) i5-4300U CPU @ 1.90GHz
spinlock 1: 7193ms
spinlock 2: 12639ms
spinlock 3: 117277ms
spinlock 4: 121082ms
std::mutex 1: 7672ms
std::mutex 2: 23599ms
std::mutex 3: 32680ms
std::mutex 4: 35891ms
mcs_lock 1: 4883ms
mcs_lock 2: 36233ms
mcs_lock 3: 29314ms
mcs_lock 4: 34758ms
# Apple LLVM version 5.1 (clang-503.0.38) (based on LLVM 3.4svn)
# Intel(R) Core(TM) i7-2677M CPU @ 1.80GHz
# (turned n down from 1<<28 to 1<<24)
spinlock 1: 342ms
spinlock 2: 3474ms
spinlock 3: 3848ms
spinlock 4: 3667ms
std::mutex 1: 1307ms
std::mutex 2: 53748ms
std::mutex 3: 53613ms
std::mutex 4: 52825ms
mcs_lock 1: 344ms
mcs_lock 2: 749ms
mcs_lock 3: 1497ms
mcs_lock 4: 1928ms
# g++-4.8 (Ubuntu/Linaro 4.8.1-10ubuntu9) 4.8.1
# Intel(R) Xeon(R) CPU E5540 @ 2.53GHz
spinlock 1: 5967ms
spinlock 2: 26993ms
spinlock 3: 29944ms
spinlock 4: 33079ms
spinlock 5: 44120ms
spinlock 6: 52264ms
spinlock 7: 61463ms
spinlock 8: 67250ms
spinlock 9: 72514ms
spinlock 10: 85820ms
spinlock 11: 89884ms
spinlock 12: 98427ms
spinlock 13: 105303ms
spinlock 14: 110160ms
spinlock 15: 114706ms
spinlock 16: 116581ms
std::mutex 1: 4907ms
std::mutex 2: 14258ms
std::mutex 3: 36275ms
std::mutex 4: 41975ms
std::mutex 5: 40615ms
std::mutex 6: 36456ms
std::mutex 7: 35691ms
std::mutex 8: 29196ms
std::mutex 9: 27997ms
std::mutex 10: 26738ms
std::mutex 11: 26207ms
std::mutex 12: 25737ms
std::mutex 13: 25337ms
std::mutex 14: 24937ms
std::mutex 15: 24852ms
std::mutex 16: 24480ms
mcs_lock 1: 4447ms
mcs_lock 2: 103555ms
mcs_lock 3: 88553ms
mcs_lock 4: 67063ms
mcs_lock 5: 68666ms
mcs_lock 6: 65370ms
mcs_lock 7: 61647ms
mcs_lock 8: 73226ms
mcs_lock 9: 69561ms
mcs_lock 10: 68885ms
mcs_lock 11: 75992ms
mcs_lock 12: 60612ms
mcs_lock 13: 77011ms
mcs_lock 14: 75698ms
mcs_lock 15: 71876ms
mcs_lock 16: 62400ms
COMPILER=g++
#COMPILER=g++-4.8
#COMPILER=clang++
CXX=$(COMPILER) -std=c++11
# osx:
#CXX=$(COMPILER) -std=c++11 -stdlib=libc++
CXXFLAGS=-Wall -Werror -g -O2
LDFLAGS=-pthread -lpthread
# ubuntu is insane:
#LDFLAGS=-pthread -lpthread -Wl,--no-as-needed
all: mcs_lock_test
mcs_lock_test: mcs_lock_test.cpp mcs_lock.hpp
$(CXX) $(CXXFLAGS) $(LDFLAGS) $< -o $@
clean:
$(RM) mcs_lock_test
#include <atomic>
// http://lwn.net/Articles/590243/
class mcs_lock {
public:
class scoped_lock;
private:
std::atomic<scoped_lock *> _tail;
public:
mcs_lock(const mcs_lock&) = delete;
mcs_lock& operator=(const mcs_lock&) = delete;
mcs_lock() : _tail(nullptr) {}
class scoped_lock {
mcs_lock &_lock;
scoped_lock * volatile _next;
volatile bool _owned;
public:
scoped_lock(const scoped_lock&) = delete;
scoped_lock& operator=(const scoped_lock&) = delete;
scoped_lock(mcs_lock &lock) : _lock(lock), _next(nullptr), _owned(false) {
scoped_lock *tail = _lock._tail.exchange(this);
if (tail != nullptr) {
tail->_next = this;
while (!_owned) {
// spin me right round
}
}
}
~scoped_lock() {
scoped_lock *tail = this;
if (!_lock._tail.compare_exchange_strong(tail, nullptr)) {
while (_next == nullptr) {
// baby right round
}
_next->_owned = true;
}
}
};
};
#include "mcs_lock.hpp"
#include <atomic>
class spin_lock {
public:
class scoped_lock;
private:
std::atomic<scoped_lock *> _lock;
public:
spin_lock(const spin_lock&) = delete;
spin_lock& operator=(const spin_lock&) = delete;
spin_lock() : _lock(nullptr) {}
class scoped_lock {
spin_lock &_lock;
public:
scoped_lock(const scoped_lock&) = delete;
scoped_lock& operator=(const scoped_lock&) = delete;
scoped_lock(spin_lock &lock) : _lock(lock) {
scoped_lock *expect = nullptr;
while (!_lock._lock.compare_exchange_weak(expect, this)) {
expect = nullptr;
}
}
~scoped_lock() {
_lock._lock = nullptr;
}
};
};
template<class mutex, class lock_object>
void spinner(mutex *m, int *val, std::size_t n) {
for (std::size_t i = 0; i < n; ++i) {
lock_object lk(*m);
(*val)++;
}
}
#include <chrono>
template<typename Clock>
class timer {
std::chrono::time_point<Clock> _t0;
public:
timer() : _t0(Clock::now()) {}
timer(const timer&) = delete;
timer& operator=(const timer&) = delete;
void reset() { _t0 = Clock::now(); }
operator std::chrono::milliseconds() const {
return std::chrono::duration_cast<std::chrono::milliseconds>(Clock::now() - _t0);
}
};
#include <algorithm>
#include <functional>
#include <iostream>
#include <mutex>
#include <string>
#include <thread>
#include <vector>
template<class mutex, class lock_object>
void test(const std::string &name, std::size_t nthreads) {
std::vector<std::thread> threads;
mutex m;
int i = 0;
timer<std::chrono::high_resolution_clock> t;
std::generate_n(std::back_inserter(threads), nthreads,
[&m, &i, nthreads]() { return std::move(std::thread(&spinner<mutex, lock_object>, &m, &i, (1<<28) / nthreads)); });
std::for_each(threads.begin(), threads.end(), std::mem_fn(&std::thread::join));
std::cout << name << " " << nthreads << ":\t" << static_cast<std::chrono::milliseconds>(t).count() << "ms" << std::endl;
}
int main(void) {
for (std::size_t i = 1; i <= std::thread::hardware_concurrency(); ++i) {
test<spin_lock, spin_lock::scoped_lock>("spinlock", i);
}
for (std::size_t i = 1; i <= std::thread::hardware_concurrency(); ++i) {
test<std::mutex, std::unique_lock<std::mutex> >("std::mutex", i);
}
for (std::size_t i = 1; i <= std::thread::hardware_concurrency(); ++i) {
test<mcs_lock, mcs_lock::scoped_lock>("mcs_lock", i);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment