wathenjiang/random_access.md

## random_access.md

      
    Raw
  

              random_access.md
            
          
    The test code:
use rand::Rng;
use std::{
    sync::{Arc, Mutex},
    thread,
    time::Instant,
    vec,
};
const INT_SIZE: usize = 8; // in 64 machine
const ARRAY_SIZE: usize = 4096 * 1024 * 1024 / INT_SIZE; // 4096MB
const ITERATIONS: usize = 100_000_000;

fn main() {
    // change it in [1, 2, 4, 16, 36, 64, 128, 256, 512, 1024]
    let num_of_mutex = 16;
    let thread_counts = [1, 2, 4, 8, 12, 16, 24, 32, 48, 64, 128];

    for thread_count in thread_counts {
        let mut vecs = Vec::with_capacity(thread_count);

        for _ in 0..thread_count {
            let size = ARRAY_SIZE / thread_count;
            vecs.push(vec![1; size]);
        }

        let mut threads = vec![];
        let start_time = Instant::now();
        let mut global_mutexs = vec![];
        for _ in 0..num_of_mutex {
            global_mutexs.push(Arc::new(Mutex::new(())));
        }
        let len = vecs.len();
        for vec in vecs {
            let mutex = global_mutexs.clone();
            threads.push(thread::spawn(move || {
                let mut rng = rand::thread_rng();
                let mut sum = 0;
                for i in 0..ITERATIONS / len {
                    let m = mutex[i & (num_of_mutex - 1)].lock().unwrap();
                    let index = rng.gen_range(0..vec.len());
                    sum += vec[index];
                    drop(m);
                }
                return sum;
            }));
        }
        let mut sum = 0;
        for handle in threads {
            sum += handle.join().unwrap();
        }

        let elapsed = start_time.elapsed();

        println!("threads: {}, elapsed: {:?}", thread_count, elapsed);
        println!("sum : {sum}");
    }
}
The results:


mutexes/threads
1
2
4
8
12
16
24
32
48
64
128


0
5.980172444
2.899437975
1.447906311
0.828731566
0.689618066
0.612355429
0.589401394
0.587380871
0.525477567
0.578456362
0.552132325


1
7.970250774
17.29034894
22.60164692
25.97284605
28.12352579
33.31359697
31.18786342
31.61139126
29.23225856
30.94094675
31.59191497


2
7.883931727
15.97845738
16.11107368
18.73377898
20.34614133
23.02624802
22.69439808
23.15802647
21.80570219
22.48815498
22.98585238


4
7.975676415
10.25364766
11.88074538
15.40198137
15.51024255
16.35328034
15.46874828
15.7982897
15.48703267
15.67227903
15.35829948


8
8.058803258
8.138193999
7.619081588
7.936418179
7.654288652
7.901945312
7.642439744
7.861542054
7.730389506
7.821229611
7.748344488


16
9.797308994
6.213334839
4.455407945
4.496371955
4.291254249
4.130849346
4.347601475
4.294096757
3.990391527
4.028562691
4.059085994


32
8.742854719
4.847656612
3.301780829
2.578327826
2.480488617
2.331294827
2.388718271
2.306257478
2.421350161
2.278177495
2.26569423


64
8.042672888
4.963568223
3.012473492
2.08243512
1.828237002
1.653421053
1.550811454
1.536452054
1.519761769
1.618966043
1.48010674


128
8.62801309
4.978525185
2.637936755
1.777546296
1.549096849
1.359814529
1.43875245
1.385468038
1.238832309
1.249940559
1.248131329


256
8.584906215
4.591742459
2.441556366
1.504790937
1.335449235
1.169191715
1.115906268
1.230570609
1.075581823
1.048285585
1.02977064


512
8.171549127
4.182283461
2.37535305
1.54202412
1.1690348
1.054650104
1.015366906
1.153238581
0.993319168
0.998864737
0.981392837


1024
8.533398132
4.175120792
2.209645233
1.412410651
1.055442085
0.938202817
1.122801927
0.940661156
0.888767412
0.914867532
0.92237305


The drawing script:
import matplotlib.pyplot as plt

data = [
    [5.980172444, 2.899437975, 1.447906311, 0.828731566, 0.689618066, 0.612355429, 0.589401394, 0.587380871, 0.525477567, 0.578456362, 0.552132325],
    [7.970250774, 17.29034894, 22.60164692, 25.97284605, 28.12352579, 33.31359697, 31.18786342, 31.61139126, 29.23225856, 30.94094675, 31.59191497],
    [7.883931727, 15.97845738, 16.11107368, 18.73377898, 20.34614133, 23.02624802, 22.69439808, 23.15802647, 21.80570219, 22.48815498, 22.98585238],
    [7.975676415, 10.25364766, 11.88074538, 15.40198137, 15.51024255, 16.35328034, 15.46874828, 15.7982897, 15.48703267, 15.67227903, 15.35829948],
    [8.058803258, 8.363385774, 7.619081588, 7.936418179, 7.654288652, 7.901945312, 7.642439744, 7.861542054, 7.730389506, 7.821229611, 7.748344488],
    [9.797308994, 6.213334839, 4.455407945, 4.496371955, 4.291254249, 4.130849346, 4.347601475, 4.294096757, 3.990391527, 4.028562691, 4.059085994],
    [8.742854719, 4.847656612, 3.301780829, 2.578327826, 2.480488617, 2.331294827, 2.388718271, 2.306257478, 2.421350161, 2.278177495, 2.26569423],
    [8.042672888, 4.963568223, 3.012473492, 2.08243512, 1.828237002, 1.653421053, 1.550811454, 1.536452054, 1.519761769, 1.618966043, 1.48010674],
    [8.62801309, 4.978525185, 2.637936755, 1.777546296, 1.549096849, 1.359814529, 1.43875245, 1.385468038, 1.238832309, 1.249940559, 1.248131329],
    [8.584906215, 4.591742459, 2.441556366, 1.504790937, 1.335449235, 1.169191715, 1.115906268, 1.230570609, 1.075581823, 1.048285585, 1.02977064],
    [8.171549127, 4.182283461, 2.37535305, 1.54202412, 1.1690348, 1.054650104, 1.015366906, 1.153238581, 0.993319168, 0.998864737, 0.981392837],
    [8.533398132, 4.175120792, 2.209645233, 1.412410651, 1.055442085, 0.938202817, 1.122801927, 0.940661156, 0.888767412, 0.914867532, 0.92237305]
]

row_labels = [0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]

column_labels = [1, 2, 4, 8, 12, 16, 24, 32, 48, 64, 128]

for row, label in zip(data, row_labels):
    plt.plot(column_labels, row, label=f"Locks: {label}", marker='o')

plt.legend()
plt.xlabel("Threads")
plt.ylabel("Time (s)")

plt.show()
The chart:

All tests were conducted on Linux with an X86 CPU with 16 cores.
lscpu
Architecture:        x86_64
CPU op-mode(s):      32-bit, 64-bit
Byte Order:          Little Endian
CPU(s):              16
On-line CPU(s) list: 0-15
Thread(s) per core:  1
Core(s) per socket:  16
Socket(s):           1
NUMA node(s):        1
Vendor ID:           GenuineIntel
BIOS Vendor ID:      Smdbmds
CPU family:          6
Model:               94
Model name:          Intel(R) Xeon(R) Gold 6133 CPU @ 2.50GHz
BIOS Model name:     3.0
Stepping:            3
CPU MHz:             2499.998
BogoMIPS:            4999.99
Hypervisor vendor:   KVM
Virtualization type: full
L1d cache:           32K
L1i cache:           32K
L2 cache:            4096K
L3 cache:            28160K
NUMA node0 CPU(s):   0-15
mutexes/threads	1	2	4	8	12	16	24	32	48	64	128
0	5.980172444	2.899437975	1.447906311	0.828731566	0.689618066	0.612355429	0.589401394	0.587380871	0.525477567	0.578456362	0.552132325
1	7.970250774	17.29034894	22.60164692	25.97284605	28.12352579	33.31359697	31.18786342	31.61139126	29.23225856	30.94094675	31.59191497
2	7.883931727	15.97845738	16.11107368	18.73377898	20.34614133	23.02624802	22.69439808	23.15802647	21.80570219	22.48815498	22.98585238
4	7.975676415	10.25364766	11.88074538	15.40198137	15.51024255	16.35328034	15.46874828	15.7982897	15.48703267	15.67227903	15.35829948
8	8.058803258	8.138193999	7.619081588	7.936418179	7.654288652	7.901945312	7.642439744	7.861542054	7.730389506	7.821229611	7.748344488
16	9.797308994	6.213334839	4.455407945	4.496371955	4.291254249	4.130849346	4.347601475	4.294096757	3.990391527	4.028562691	4.059085994
32	8.742854719	4.847656612	3.301780829	2.578327826	2.480488617	2.331294827	2.388718271	2.306257478	2.421350161	2.278177495	2.26569423
64	8.042672888	4.963568223	3.012473492	2.08243512	1.828237002	1.653421053	1.550811454	1.536452054	1.519761769	1.618966043	1.48010674
128	8.62801309	4.978525185	2.637936755	1.777546296	1.549096849	1.359814529	1.43875245	1.385468038	1.238832309	1.249940559	1.248131329
256	8.584906215	4.591742459	2.441556366	1.504790937	1.335449235	1.169191715	1.115906268	1.230570609	1.075581823	1.048285585	1.02977064
512	8.171549127	4.182283461	2.37535305	1.54202412	1.1690348	1.054650104	1.015366906	1.153238581	0.993319168	0.998864737	0.981392837
1024	8.533398132	4.175120792	2.209645233	1.412410651	1.055442085	0.938202817	1.122801927	0.940661156	0.888767412	0.914867532	0.92237305