Skip to content

Instantly share code, notes, and snippets.

@fantix
Forked from divi255/main.rs
Last active November 7, 2022 22:58
Show Gist options
  • Save fantix/647ca9b9e002cbd89c1af4525d8923ee to your computer and use it in GitHub Desktop.
Save fantix/647ca9b9e002cbd89c1af4525d8923ee to your computer and use it in GitHub Desktop.
Test Rust shared resource guards
#[macro_use]
extern crate lazy_static;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::time::{Duration, Instant};
static N: u64 = 500_000;
static READERS: u64 = 100;
static WRITERS: u64 = 2;
static READING: AtomicBool = AtomicBool::new(true);
static READ_COUNT: AtomicU64 = AtomicU64::new(0);
static PROGRESS_EVERY: u64 = 50000;
lazy_static! {
static ref S_MUTEX: std::sync::Mutex<u64> = std::sync::Mutex::new(0);
static ref S_RWLOCK: std::sync::RwLock<u64> = std::sync::RwLock::new(0);
static ref P_MUTEX: parking_lot::Mutex<u64> = parking_lot::Mutex::new(0);
static ref P_RWLOCK: parking_lot::RwLock<u64> = parking_lot::RwLock::new(0);
}
macro_rules! print_stat {
($title: expr, $duration: expr, $read_count: expr) => {
println!(
"{} spent: {} sec, {} reads",
$title,
$duration.as_nanos() as f64 / 1000_000_000.0,
$read_count,
);
};
}
fn bench_parking_lot_rwlock() -> Duration {
let start = Instant::now();
let readers = (0..READERS)
.map(|n| {
std::thread::spawn(move || {
let mut i = 0;
while READING.load(Ordering::Relaxed) {
let a = P_RWLOCK.read();
i += 1;
if i % PROGRESS_EVERY == 0 {
println!("read {} @ {} = {}", n, i, *a);
}
READ_COUNT.fetch_add(1, Ordering::Relaxed);
}
})
})
.collect::<Vec<_>>();
(0..WRITERS)
.map(|i| {
std::thread::spawn(move || {
for _ in 0..(N / WRITERS) {
let mut a = P_RWLOCK.write();
*a += 1;
*a += 2;
*a += 3;
*a += 4;
for _ in 0..100 {
let _a = 123;
}
}
println!("write {} done", i);
})
})
.collect::<Vec<_>>()
.into_iter()
.map(|h| {
h.join().unwrap();
})
.for_each(drop);
let spent = Instant::now() - start;
assert_eq!(*P_RWLOCK.read(), N * 10);
READING.store(false, Ordering::SeqCst);
readers
.into_iter()
.map(|h| h.join().unwrap())
.for_each(drop);
spent
}
fn bench_parking_lot_mutex() -> Duration {
let start = Instant::now();
let readers = (0..READERS)
.map(|n| {
std::thread::spawn(move || {
let mut i = 0;
while READING.load(Ordering::Relaxed) {
let a = P_MUTEX.lock();
i += 1;
if i % PROGRESS_EVERY == 0 {
println!("read {} @ {} = {}", n, i, *a);
}
READ_COUNT.fetch_add(1, Ordering::Relaxed);
}
})
})
.collect::<Vec<_>>();
(0..WRITERS)
.map(|i| {
std::thread::spawn(move || {
for _ in 0..(N / WRITERS) {
let mut a = P_MUTEX.lock();
*a += 1;
*a += 2;
*a += 3;
*a += 4;
for _ in 0..100 {
let _a = 123;
}
}
println!("write {} done", i);
})
})
.collect::<Vec<_>>()
.into_iter()
.map(|h| {
h.join().unwrap();
})
.for_each(drop);
let spent = Instant::now() - start;
assert_eq!(*P_MUTEX.lock(), N * 10);
READING.store(false, Ordering::SeqCst);
readers
.into_iter()
.map(|h| h.join().unwrap())
.for_each(drop);
spent
}
fn bench_sync_rwlock() -> Duration {
let start = Instant::now();
let readers = (0..READERS)
.map(|n| {
std::thread::spawn(move || {
let mut i = 0;
while READING.load(Ordering::Relaxed) {
let a = S_RWLOCK.read().unwrap();
i += 1;
if i % PROGRESS_EVERY == 0 {
println!("read {} @ {} = {}", n, i, *a);
}
READ_COUNT.fetch_add(1, Ordering::Relaxed);
}
})
})
.collect::<Vec<_>>();
(0..WRITERS)
.map(|i| {
std::thread::spawn(move || {
for _ in 0..(N / WRITERS) {
let mut a = S_RWLOCK.write().unwrap();
*a += 1;
*a += 2;
*a += 3;
*a += 4;
for _ in 0..100 {
let _a = 123;
}
}
println!("write {} done", i);
})
})
.collect::<Vec<_>>()
.into_iter()
.map(|h| {
h.join().unwrap();
})
.for_each(drop);
let spent = Instant::now() - start;
assert_eq!(*S_RWLOCK.read().unwrap(), N * 10);
READING.store(false, Ordering::SeqCst);
readers
.into_iter()
.map(|h| h.join().unwrap())
.for_each(drop);
spent
}
fn bench_sync_mutex() -> Duration {
let start = Instant::now();
let readers = (0..READERS)
.map(|n| {
std::thread::spawn(move || {
let mut i = 0;
while READING.load(Ordering::Relaxed) {
let a = S_MUTEX.lock().unwrap();
i += 1;
if i % PROGRESS_EVERY == 0 {
println!("read {} @ {} = {}", n, i, *a);
}
READ_COUNT.fetch_add(1, Ordering::Relaxed);
}
})
})
.collect::<Vec<_>>();
(0..WRITERS)
.map(|i| {
std::thread::spawn(move || {
for _ in 0..(N / WRITERS) {
let mut a = S_MUTEX.lock().unwrap();
*a += 1;
*a += 2;
*a += 3;
*a += 4;
for _ in 0..100 {
let _a = 123;
}
}
println!("write {} done", i);
})
})
.collect::<Vec<_>>()
.into_iter()
.map(|h| {
h.join().unwrap();
})
.for_each(drop);
let spent = Instant::now() - start;
assert_eq!(*S_MUTEX.lock().unwrap(), N * 10);
READING.store(false, Ordering::SeqCst);
readers
.into_iter()
.map(|h| h.join().unwrap())
.for_each(drop);
spent
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("PARKING_LOT RWLOCK ===============================");
let p_rwlock = bench_parking_lot_rwlock();
READING.store(true, Ordering::SeqCst);
let p_rwlock_r = READ_COUNT.swap(0, Ordering::SeqCst);
println!("PARKING_LOT MUTEX ================================");
let p_mutex = bench_parking_lot_mutex();
READING.store(true, Ordering::SeqCst);
let p_mutex_r = READ_COUNT.swap(0, Ordering::SeqCst);
println!("SYNC RWLOCK ======================================");
let s_rwlock = bench_sync_rwlock();
READING.store(true, Ordering::SeqCst);
let s_rwlock_r = READ_COUNT.swap(0, Ordering::SeqCst);
println!("SYNC MUTEX =======================================");
let s_mutex = bench_sync_mutex();
let s_mutex_r = READ_COUNT.swap(0, Ordering::SeqCst);
print_stat!("PARKING_LOT RWLOCK", p_rwlock, p_rwlock_r);
print_stat!("PARKING_LOT MUTEX", p_mutex, p_mutex_r);
print_stat!("SYNC RWLOCK", s_rwlock, s_rwlock_r);
print_stat!("SYNC MUTEX", s_mutex, s_mutex_r);
Ok(())
}
@fantix
Copy link
Author

fantix commented Nov 7, 2022

2 writers doing 500,000 writes, 100 free readers (previous revision of code):

Result on 2020 MBP with 2.3GHz 4-core i7:

PARKING_LOT RWLOCK spent: 11.221159946 sec, 59149773 reads
PARKING_LOT MUTEX spent: 22.538999885 sec, 14961447 reads
SYNC RWLOCK spent: 50.869181373 sec, 53485615 reads
SYNC MUTEX spent: 27.455305881 sec, 37521027 reads

Result on 12-core Ryzen 9 5900X:

PARKING_LOT RWLOCK spent: 111.517005976 sec, 511775779 reads
PARKING_LOT MUTEX spent: 16.854885435 sec, 7977828 reads
SYNC RWLOCK spent: 13.175750669 sec, 111278471 reads
SYNC MUTEX spent: 11.350269887 sec, 37666183 reads

@fantix
Copy link
Author

fantix commented Nov 7, 2022

100 readers doing 50,000,000 reads, 2 free writers (previous version of code):

Result on 2020 MBP with 2.3GHz 4-core i7:

PARKING_LOT RWLOCK spent: 10.74906391 sec, 4,700,740 writes
PARKING_LOT MUTEX spent: 65.448203744 sec, 19,120,750 writes
SYNC RWLOCK spent: 33.23635836 sec, 3,378,590 writes
SYNC MUTEX spent: 33.950512621 sec, 5,530,290 writes

Result on 12-core Ryzen 9 5900X:

PARKING_LOT RWLOCK spent: 11.403740884 sec, 469,000 writes
PARKING_LOT MUTEX spent: 108.797817425 sec, 36,626,330 writes
SYNC RWLOCK spent: 5.541838849 sec, 2,021,870 writes
SYNC MUTEX spent: 14.064283695 sec, 6,142,770 writes

Findings:

  • parking_lot::RwLock seems to have better performance than std::sync::RwLock with lower contention (4-core, 11s vs 33s, 4.7M writes vs 3.4M)
  • parking_lot::RwLock is stable with time to read the same number of times under different contention situations (~11s, but writes degraded from 4.7M to 0.5M with higher contention of 12-core)
  • parking_lot::Mutex is not suitable for this scenario, CPU is not full
  • std::sync::RwLock and std::sync::Mutex are similarly "slow" with lower contention (4-core, 33-34s, 3M-5M writes)
  • Under high contention (12-core), std::sync::RwLock is 2x faster than parking_lot::RwLock (5.5s vs 11.4s, 2M writes vs 0.5M writes)
  • std::sync::RwLock did 9M/s reads and 0.36M/s writes, while std::sync::Mutex did 3.6M/s reads and 0.44M/s writes with 12-cores.

std::sync::RwLock is the winner in this test.

@fantix
Copy link
Author

fantix commented Nov 7, 2022

16s, 100 free readers + 2 free writers

Result on 2020 MBP with 2.3GHz 4-core i7:

PARKING_LOT RWLOCK: 67,874,030 reads, 6,358,520 writes
PARKING_LOT MUTEX: 11,822,910 reads, 4,041,520 writes
SYNC RWLOCK: 23,697,876 reads, 1,588,900 writes
SYNC MUTEX: 13,482,674 reads, 1,534,450 writes

Result on 12-core Ryzen 9 5900X:

PARKING_LOT RWLOCK: 70,328,321 reads, 670,990 writes
PARKING_LOT MUTEX: 7,424,345 reads, 5,456,960 writes
SYNC RWLOCK: 142,979,987 reads, 5,707,880 writes
SYNC MUTEX: 54,569,975 reads, 7,053,940 writes

Findings:

  • parking_lot doesn't seem to scale well

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment