Skip to content

Instantly share code, notes, and snippets.

@samueleresca
Created March 26, 2023 16:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save samueleresca/899ce8463a45ade58f5d7993a39f338c to your computer and use it in GitHub Desktop.
Save samueleresca/899ce8463a45ade58f5d7993a39f338c to your computer and use it in GitHub Desktop.
use cache_size::l1_cache_line_size;
/// Returns the result of a matrix multiplication.
/// Uses loop tiling to optimize the matrix multiplication.
/// The function relies on cache_size crate to get the cache line size.
///
/// # Arguments
///
/// * `n` - The dimension of the matrix.
pub fn optimized_tiled(n: usize) -> Vec<Vec<f64>> {
let m1 = generate_matrix(n);
let m2 = m1.clone();
let mut res = vec![vec![0.0; n]; n];
// Get the cache line size
let block_size: usize = l1_cache_line_size().unwrap() / std::mem::size_of::<f64>();
// Loop through each block
for i in (0..n).step_by(block_size) {
for j in (0..n).step_by(block_size) {
for k in (0..n).step_by(block_size) {
// Loop through each element in the block
for ii in i..std::cmp::min(i + block_size, n) {
for jj in j..std::cmp::min(j + block_size, n) {
for kk in k..std::cmp::min(k + block_size, n) {
// Do the actual multiplication
res[ii][jj] += m1[ii][kk] * m2[kk][jj];
}
}
}
}
}
}
res
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment