A dumb microbenchmark: folding a 10000000 element list in OCaml vs C++ vs Python vs Go
Aggregate results: | |
Wall time: | |
OCaml: 0.76s | |
C++ (gcc defaults): 1.45s | |
C++ (gcc with -O2): 0.62s | |
Python: 2.18s | |
Go: 3.3s | |
Memory consumption: | |
OCaml: 146MiB | |
C++: 230MiB | |
Python: 347MiB | |
Go: 190MiB | |
$ ocamlopt -o list_test_ml ./list_test.ml | |
$ time ./list_test_ml | |
50000005000000 | |
real 0m0.763s | |
user 0m0.669s | |
sys 0m0.087s | |
$ time ./list_test_ml | |
50000005000000 | |
real 0m0.798s | |
user 0m0.720s | |
sys 0m0.071s | |
$ time ./list_test_ml | |
50000005000000 | |
real 0m0.723s | |
user 0m0.647s | |
sys 0m0.070s | |
$ g++ -o list_test_cpp ./list_test.cpp | |
$ time ./list_test_cpp | |
50000005000000 | |
real 0m1.412s | |
user 0m1.316s | |
sys 0m0.091s | |
$ time ./list_test_cpp | |
50000005000000 | |
real 0m1.500s | |
user 0m1.397s | |
sys 0m0.097s | |
$ time ./list_test_cpp | |
50000005000000 | |
real 0m1.431s | |
user 0m1.332s | |
sys 0m0.093s | |
$ time ./list_test.py | |
50000005000000 | |
real 0m2.127s | |
user 0m1.998s | |
sys 0m0.121s | |
$ time ./list_test.py | |
50000005000000 | |
real 0m2.282s | |
user 0m2.149s | |
sys 0m0.124s | |
$ time ./list_test.py | |
50000005000000 | |
real 0m2.142s | |
user 0m2.007s | |
sys 0m0.127s | |
$ go build list-test.go | |
$ time ./list-test | |
50000005000000 | |
real 0m3.384s | |
user 0m3.621s | |
sys 0m0.249s | |
$ time ./list-test | |
50000005000000 | |
real 0m3.136s | |
user 0m4.035s | |
sys 0m0.243s | |
$ time ./list-test | |
50000005000000 | |
real 0m3.382s | |
user 0m3.583s | |
sys 0m0.256s | |
# valgrind calls | |
$ valgrind --tool=massif ./list_test_ml | |
$valgrind --tool=massif ./list_test_cpp | |
$ valgrind --tool=massif python3 ./list_test.py | |
# Peak memory usage viewed with massif-visualizer | |
OCaml: 146MiB | |
C++: 230MiB | |
Python: 347MiB | |
Valgrind doesn't work for go, profiling with pprof tool gives: | |
$ go tool pprof --text mem.pprof | |
190.47MB of 190.47MB total ( 100%) | |
flat flat% sum% cum cum% | |
190.47MB 100% 100% 190.47MB 100% | |
package main | |
import "container/list"; | |
import "fmt" | |
func main() { | |
first := 0; | |
last := 10000000; | |
l := list.New(); | |
// build the list | |
for i := first; i <= last; i = i + 1 { | |
l.PushFront(i); | |
} | |
// fold the list | |
sum := 0 | |
for e := l.Front(); e != nil; e = e.Next() { | |
sum = sum + e.Value.(int) | |
} | |
fmt.Println(sum) | |
} |
#include <iostream> | |
#include <list> | |
int main(void) | |
{ | |
int first = 0; | |
int last = 10000000; | |
std::list<int> l; | |
for(int x=first;x<=last;x++) | |
{ | |
l.push_front(x); // push_front is O(1) | |
} | |
long sum = 0; | |
for (std::list<int>::const_iterator i = l.begin(), end = l.end(); i != end; ++i) | |
{ | |
sum += *i; | |
} | |
std::cout << sum; | |
return(0); | |
} |
let first = 0 | |
let last = 10000000 | |
let make_list first last = | |
let rec aux xs n = | |
if n >= last then xs | |
else | |
let next = n + 1 in aux (next :: xs) next (* :: aka cons is O(1) *) | |
in aux [] first | |
(* If my memory hadn't failed me I wouldn't reimplement it, List.fold_left is already tail recursive *) | |
let fold f a xs = | |
let rec aux f acc ys = | |
match ys with | |
| [] -> acc | |
| y :: ys -> | |
let acc = f acc y in aux f acc ys | |
in aux f a xs | |
let () = | |
let xs = make_list first last in | |
print_endline @@ string_of_int (fold (+) 0 xs) |
#!/usr/bin/env python3 | |
first = 0 | |
last = 10000000 | |
def make_list(first, last): | |
i = first | |
l = [] | |
while i <= last: | |
l.append(i) # append is O(1) | |
i = i + 1 | |
return l | |
def sum_list(list): | |
sum = 0 | |
for i in list: | |
sum = sum + i | |
return sum | |
l = make_list(first, last) | |
print(sum(l)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
Interesting results, although I think there are better ways to write the Python program.
Here is some Python code that does exactly the same thing:
In this code I deliberately created a list before summation so it would somewhat resemble your code algorithmically. If I forget about the list and sum the iterator directly it is even faster:
For this test your OCaml program was built with
ocamlbuild list_test.native
.Here are the numbers on my computer averaged over 20 runs:
The timing script I used: