samueleresca/l1d-optimization\l1d-optimization.rs

## l1d-optimization\l1d-optimization.rs
use cache_size::l1_cache_line_size;

/// Returns the result of a matrix multiplication.
/// Uses loop tiling to optimize the matrix multiplication.
/// The function relies on cache_size crate to get the cache line size.
///
/// # Arguments
///
/// * `n` - The dimension of the matrix.
pub fn optimized_tiled(n: usize) -> Vec<Vec<f64>> {
    let m1 = generate_matrix(n);
    let m2 = m1.clone();
    let mut res = vec![vec![0.0; n]; n];

    // Get the cache line size
    let block_size: usize = l1_cache_line_size().unwrap() / std::mem::size_of::<f64>();

    // Loop through each block
    for i in (0..n).step_by(block_size) {
        for j in (0..n).step_by(block_size) {
            for k in (0..n).step_by(block_size) {
                // Loop through each element in the block
                for ii in i..std::cmp::min(i + block_size, n) {
                    for jj in j..std::cmp::min(j + block_size, n) {
                        for kk in k..std::cmp::min(k + block_size, n) {
                            // Do the actual multiplication
                            res[ii][jj] += m1[ii][kk] * m2[kk][jj];
                        }
                    }
                }
            }
        }
    }

    res
}
	use cache_size::l1_cache_line_size;

	/// Returns the result of a matrix multiplication.
	/// Uses loop tiling to optimize the matrix multiplication.
	/// The function relies on cache_size crate to get the cache line size.
	///
	/// # Arguments
	///
	/// * `n` - The dimension of the matrix.
	pub fn optimized_tiled(n: usize) -> Vec<Vec<f64>> {
	let m1 = generate_matrix(n);
	let m2 = m1.clone();
	let mut res = vec![vec![0.0; n]; n];

	// Get the cache line size
	let block_size: usize = l1_cache_line_size().unwrap() / std::mem::size_of::<f64>();

	// Loop through each block
	for i in (0..n).step_by(block_size) {
	for j in (0..n).step_by(block_size) {
	for k in (0..n).step_by(block_size) {
	// Loop through each element in the block
	for ii in i..std::cmp::min(i + block_size, n) {
	for jj in j..std::cmp::min(j + block_size, n) {
	for kk in k..std::cmp::min(k + block_size, n) {
	// Do the actual multiplication
	res[ii][jj] += m1[ii][kk] * m2[kk][jj];
	}
	}
	}
	}
	}
	}

	res
	}