Skip to content

Instantly share code, notes, and snippets.

@wathenjiang
Last active December 2, 2023 04:12
Show Gist options
  • Save wathenjiang/30b689a7ef20b4ea667a2e8f358c321d to your computer and use it in GitHub Desktop.
Save wathenjiang/30b689a7ef20b4ea667a2e8f358c321d to your computer and use it in GitHub Desktop.
random_access

The test code:

use rand::Rng;
use std::{
    sync::{Arc, Mutex},
    thread,
    time::Instant,
    vec,
};
const INT_SIZE: usize = 8; // in 64 machine
const ARRAY_SIZE: usize = 4096 * 1024 * 1024 / INT_SIZE; // 4096MB
const ITERATIONS: usize = 100_000_000;

fn main() {
    // change it in [1, 2, 4, 16, 36, 64, 128, 256, 512, 1024]
    let num_of_mutex = 16;
    let thread_counts = [1, 2, 4, 8, 12, 16, 24, 32, 48, 64, 128];

    for thread_count in thread_counts {
        let mut vecs = Vec::with_capacity(thread_count);

        for _ in 0..thread_count {
            let size = ARRAY_SIZE / thread_count;
            vecs.push(vec![1; size]);
        }

        let mut threads = vec![];
        let start_time = Instant::now();
        let mut global_mutexs = vec![];
        for _ in 0..num_of_mutex {
            global_mutexs.push(Arc::new(Mutex::new(())));
        }
        let len = vecs.len();
        for vec in vecs {
            let mutex = global_mutexs.clone();
            threads.push(thread::spawn(move || {
                let mut rng = rand::thread_rng();
                let mut sum = 0;
                for i in 0..ITERATIONS / len {
                    let m = mutex[i & (num_of_mutex - 1)].lock().unwrap();
                    let index = rng.gen_range(0..vec.len());
                    sum += vec[index];
                    drop(m);
                }
                return sum;
            }));
        }
        let mut sum = 0;
        for handle in threads {
            sum += handle.join().unwrap();
        }

        let elapsed = start_time.elapsed();

        println!("threads: {}, elapsed: {:?}", thread_count, elapsed);
        println!("sum : {sum}");
    }
}

The results:

mutexes/threads 1 2 4 8 12 16 24 32 48 64 128
0 5.980172444 2.899437975 1.447906311 0.828731566 0.689618066 0.612355429 0.589401394 0.587380871 0.525477567 0.578456362 0.552132325
1 7.970250774 17.29034894 22.60164692 25.97284605 28.12352579 33.31359697 31.18786342 31.61139126 29.23225856 30.94094675 31.59191497
2 7.883931727 15.97845738 16.11107368 18.73377898 20.34614133 23.02624802 22.69439808 23.15802647 21.80570219 22.48815498 22.98585238
4 7.975676415 10.25364766 11.88074538 15.40198137 15.51024255 16.35328034 15.46874828 15.7982897 15.48703267 15.67227903 15.35829948
8 8.058803258 8.138193999 7.619081588 7.936418179 7.654288652 7.901945312 7.642439744 7.861542054 7.730389506 7.821229611 7.748344488
16 9.797308994 6.213334839 4.455407945 4.496371955 4.291254249 4.130849346 4.347601475 4.294096757 3.990391527 4.028562691 4.059085994
32 8.742854719 4.847656612 3.301780829 2.578327826 2.480488617 2.331294827 2.388718271 2.306257478 2.421350161 2.278177495 2.26569423
64 8.042672888 4.963568223 3.012473492 2.08243512 1.828237002 1.653421053 1.550811454 1.536452054 1.519761769 1.618966043 1.48010674
128 8.62801309 4.978525185 2.637936755 1.777546296 1.549096849 1.359814529 1.43875245 1.385468038 1.238832309 1.249940559 1.248131329
256 8.584906215 4.591742459 2.441556366 1.504790937 1.335449235 1.169191715 1.115906268 1.230570609 1.075581823 1.048285585 1.02977064
512 8.171549127 4.182283461 2.37535305 1.54202412 1.1690348 1.054650104 1.015366906 1.153238581 0.993319168 0.998864737 0.981392837
1024 8.533398132 4.175120792 2.209645233 1.412410651 1.055442085 0.938202817 1.122801927 0.940661156 0.888767412 0.914867532 0.92237305

The drawing script:

import matplotlib.pyplot as plt

data = [
    [5.980172444, 2.899437975, 1.447906311, 0.828731566, 0.689618066, 0.612355429, 0.589401394, 0.587380871, 0.525477567, 0.578456362, 0.552132325],
    [7.970250774, 17.29034894, 22.60164692, 25.97284605, 28.12352579, 33.31359697, 31.18786342, 31.61139126, 29.23225856, 30.94094675, 31.59191497],
    [7.883931727, 15.97845738, 16.11107368, 18.73377898, 20.34614133, 23.02624802, 22.69439808, 23.15802647, 21.80570219, 22.48815498, 22.98585238],
    [7.975676415, 10.25364766, 11.88074538, 15.40198137, 15.51024255, 16.35328034, 15.46874828, 15.7982897, 15.48703267, 15.67227903, 15.35829948],
    [8.058803258, 8.363385774, 7.619081588, 7.936418179, 7.654288652, 7.901945312, 7.642439744, 7.861542054, 7.730389506, 7.821229611, 7.748344488],
    [9.797308994, 6.213334839, 4.455407945, 4.496371955, 4.291254249, 4.130849346, 4.347601475, 4.294096757, 3.990391527, 4.028562691, 4.059085994],
    [8.742854719, 4.847656612, 3.301780829, 2.578327826, 2.480488617, 2.331294827, 2.388718271, 2.306257478, 2.421350161, 2.278177495, 2.26569423],
    [8.042672888, 4.963568223, 3.012473492, 2.08243512, 1.828237002, 1.653421053, 1.550811454, 1.536452054, 1.519761769, 1.618966043, 1.48010674],
    [8.62801309, 4.978525185, 2.637936755, 1.777546296, 1.549096849, 1.359814529, 1.43875245, 1.385468038, 1.238832309, 1.249940559, 1.248131329],
    [8.584906215, 4.591742459, 2.441556366, 1.504790937, 1.335449235, 1.169191715, 1.115906268, 1.230570609, 1.075581823, 1.048285585, 1.02977064],
    [8.171549127, 4.182283461, 2.37535305, 1.54202412, 1.1690348, 1.054650104, 1.015366906, 1.153238581, 0.993319168, 0.998864737, 0.981392837],
    [8.533398132, 4.175120792, 2.209645233, 1.412410651, 1.055442085, 0.938202817, 1.122801927, 0.940661156, 0.888767412, 0.914867532, 0.92237305]
]

row_labels = [0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]

column_labels = [1, 2, 4, 8, 12, 16, 24, 32, 48, 64, 128]

for row, label in zip(data, row_labels):
    plt.plot(column_labels, row, label=f"Locks: {label}", marker='o')

plt.legend()
plt.xlabel("Threads")
plt.ylabel("Time (s)")

plt.show()

The chart:

Figure_1

All tests were conducted on Linux with an X86 CPU with 16 cores.

lscpu
Architecture:        x86_64
CPU op-mode(s):      32-bit, 64-bit
Byte Order:          Little Endian
CPU(s):              16
On-line CPU(s) list: 0-15
Thread(s) per core:  1
Core(s) per socket:  16
Socket(s):           1
NUMA node(s):        1
Vendor ID:           GenuineIntel
BIOS Vendor ID:      Smdbmds
CPU family:          6
Model:               94
Model name:          Intel(R) Xeon(R) Gold 6133 CPU @ 2.50GHz
BIOS Model name:     3.0
Stepping:            3
CPU MHz:             2499.998
BogoMIPS:            4999.99
Hypervisor vendor:   KVM
Virtualization type: full
L1d cache:           32K
L1i cache:           32K
L2 cache:            4096K
L3 cache:            28160K
NUMA node0 CPU(s):   0-15
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment