Skip to content

Instantly share code, notes, and snippets.

@ro99
Created October 18, 2021 18:42
Show Gist options
  • Save ro99/bf10834aa73a4c4e47e83c71f65cadae to your computer and use it in GitHub Desktop.
Save ro99/bf10834aa73a4c4e47e83c71f65cadae to your computer and use it in GitHub Desktop.
tests with crossbeam (1.5x speedup) (independently from t size...)
unsafe fn run_with_scratch_space_parallel(
&self,
m: usize,
n: usize,
t: usize,
non_linear: &[FusedSpec],
) -> anyhow::Result<()> {
let mr = K::mr();
let nr = K::nr();
let cols: Vec<usize> = (0..m / mr).collect();
let mut rows: Vec<usize> = (0..n / nr).collect();
let size = rows.len() / t + rows.len() % t;
crossbeam::scope(|scope| {
for ia in cols {
for row_chunk in rows.chunks_mut(size) {
let row_chunk = row_chunk.to_owned();
scope.spawn(move |_| {
let mut scratch = self.allocate_scratch_space();
let scratch = scratch
.downcast_mut::<ScratchSpaceFusedNonLinear<TI>>()
.unwrap();
scratch.prepare::<K>(&non_linear);
for ib in row_chunk {
scratch.for_valid_tile::<K>(&non_linear, ia, ib);
let err = K::kernel(&scratch.uspecs());
debug_assert_eq!(err, 0, "Kernel return error {}", err);
}
if n % nr != 0 {
scratch.for_border_tile::<K>(&non_linear, ia, n / nr);
let err = K::kernel(&scratch.uspecs());
debug_assert_eq!(err, 0, "Kernel return error {}", err);
scratch.postprocess_tile::<K>(&non_linear, ia, n / nr, mr, n % nr);
}
});
}
}
let mut scratch = self.allocate_scratch_space();
let scratch = scratch
.downcast_mut::<ScratchSpaceFusedNonLinear<TI>>()
.unwrap();
scratch.prepare::<K>(&non_linear);
if m % mr != 0 {
for ib in 0..n / nr {
scratch.for_border_tile::<K>(&non_linear, m / mr, ib);
let err = K::kernel(&scratch.uspecs());
debug_assert_eq!(err, 0, "Kernel return error {}", err);
scratch.postprocess_tile::<K>(&non_linear, m / mr, ib, m % mr, nr);
}
if n % nr != 0 {
scratch.for_border_tile::<K>(&non_linear, m / mr, n / nr);
let err = K::kernel(&scratch.uspecs());
debug_assert_eq!(err, 0, "Kernel return error {}", err);
scratch.postprocess_tile::<K>(&non_linear, m / mr, n / nr, m % mr, n % nr);
}
}
})
.unwrap();
Ok(())
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment