Skip to content

Instantly share code, notes, and snippets.

@anowell
Last active June 13, 2018 23:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anowell/9936f3b0f4ba0a8ad2c651bf9baf38ed to your computer and use it in GitHub Desktop.
Save anowell/9936f3b0f4ba0a8ad2c651bf9baf38ed to your computer and use it in GitHub Desktop.
bench iterators and loops of Zip-Map-Sum
(Benchmark disclaimers apply. Don't trust the numbers without double checking my code.)
Scala (functional): 697,099 ns/iter
Scala (for loop): 20,467 ns/iter
Rust (functional): 4,984 ns/iter
Rust (for loop): 4,983 ns/iter // Note: this required adding assertions to prevent bounds checking
C (for loop): 6,172 ns/iter // Note: this used -O2 optimization
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
uint zms(uint *a, uint *b, uint aLen, uint bLen) {
uint sum = 0;
uint minLen = (aLen > bLen) ? aLen : bLen;
for(int i=0; i < minLen; i++) {
sum += (a[i] * b[i]);
}
return sum;
}
unsigned long get_time() {
timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return ts.tv_sec * 1e9 + ts.tv_nsec;
}
int main() {
uint *these, *those;
unsigned long start, stop;
these = (uint*) malloc (10000 * sizeof(uint));
those = (uint*) malloc (10000 * sizeof(uint));
for(int i=0; i<10000; i++) {
these[i] = i%997;
those[i] = i%97;
}
// Storing sum to keep loop from being optimized away
uint sum = 0;
start = get_time();
for(int i=0; i<1000; i++) {
sum = zms(these, those, 10000, 10000);
}
stop = get_time();
printf("sum=%d\n", sum);
printf("%d ns/iter", (stop-start)/1000);
}
#![feature(test)]
extern crate test;
pub fn zms(a: &[u32], b: &[u32]) -> u32 {
a.iter()
.zip(b)
.map(|(i,j)| i*j)
.sum()
}
pub fn zms_for_loop(a: &[u32], b: &[u32]) -> u32 {
let mut sum = 0;
let min_len = ::std::cmp::min(a.len(), b.len());
// assert less than length to force LLVM to optimize out the bounds check
// while still avoiding the unsafe block to use .get_unchecked(i)
// otherwise the bounds checks ends up being ~40% of the tight loop
assert!(min_len - 1 < a.len());
assert!(min_len - 1 < b.len());
for i in 0..min_len {
sum += a[i] * b[i];
}
sum
}
#[cfg(test)]
mod tests {
use test::Bencher;
use super::*;
fn prep() -> (Vec<u32>, Vec<u32>) {
let mut these = vec![];
let mut those = vec![];
let count = 10000;
for i in 0..count {
these.push(i%997);
those.push(i%97);
}
(these, those)
}
#[bench]
fn bench_zms(b: &mut Bencher) {
let (these, those) = prep();
b.iter(|| zms(&these, &those) );
}
#[bench]
fn bench_zms_for_loop(b: &mut Bencher) {
let (these, those) = prep();
b.iter(|| zms_for_loop(&these, &those));
}
}
var these: Array[Int] = Array();
var those: Array[Int] = Array();
// This is not efficient, but who cares, we aren't benching this step
for(i <- 0 until 10000) { these = these :+ i%997; those = those :+ i%97 }
def zms(a: Array[Int], b: Array[Int]) = a.zip(b).map{ case (i,j) => i*j }.sum
def zmsLoop(a: Array[Int], b: Array[Int]) = {
var sum=0
val min_len = math.min(a.length, b.length)
for(i <- 0 until min_len) {
sum += (a(i) * b(i))
}
sum
}
// Run zms 1000 times
val start = System.nanoTime; for(i <- 0 until 1000) { zms(these, those) }; val stop = System.nanoTime; val zmsDur = (stop-start)/1000
// Run zmsLoop 1000 times
val start = System.nanoTime; for(i <- 0 until 1000) { zmsLoop(these, those) }; val stop = System.nanoTime; val zmsLoopDur = (stop-start)/1000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment