czgdp1807/dict_bench.md

## dict_bench.md

      
    Raw
  

              dict_bench.md
            
          
    Codes
from ltypes import i32, f64

def test_dict():
    rollnumber2cpi: dict[i32, f64] = {0: 1.1}
    i: i32
    size: i32 = 100000000
    total: f64 = 0

    for i in range(1000, 1000 + size):
        rollnumber2cpi[i] = float(i/100.0 + 5.0)

    for i in range(1000, 1000 + size):
        total += rollnumber2cpi[i]

    print(total, len(rollnumber2cpi))

test_dict()
#include <unordered_map>
#include <iostream>

void test_dict() {
    std::unordered_map<int32_t, double> rollnumber2cpi;
    int32_t i, size = 100000000;
    double total = 0;

    for(i = 1000; i < 1000 + size; i++) {
        rollnumber2cpi[i] = double(i/100.0 + 5.0);
    }

    for(i = 1000; i < 1000 + size; i++) {
        total += rollnumber2cpi[i];
    }

    std::cout<<total;
}

int main() {
    test_dict();
}
#include <unordered_map>
#include <iostream>

size_t size = 100000000;

struct ModuloHash {
    size_t operator() (int32_t key) const
    {
        return key % size;
    }
};

void test_dict() {
    std::unordered_map<int32_t, double, ModuloHash> rollnumber2cpi;
    int32_t i;
    double total = 0;
    rollnumber2cpi.reserve(size);

    for(i = 1000; i < 1000 + size; i++) {
        rollnumber2cpi[i] = double(i/100.0 + 5.0);
    }

    for(i = 1000; i < 1000 + size; i++) {
        total += rollnumber2cpi[i];
    }

    std::cout<<total<<" "<<rollnumber2cpi.size();
}

int main() {
    test_dict();
}
codon code
def test_dict():
    rollnumber2cpi = {0: 1.1}
    size = 100000000
    total = 0.0

    for i in range(1000, 1000 + size):
        rollnumber2cpi[i] = float(i/100.0 + 5.0)

    for i in range(1000, 1000 + size):
        total += rollnumber2cpi[i]

    print(total, len(rollnumber2cpi))

test_dict()
Apple M1 Macbook Pro macOS Monterey 12.5


Compiler
Time [s]
Relative


LPython (dict02)
2.502
2.00


LPython (dict03)
1.87
1.5


LPython (dict03) OptimizedLinearProbing
1.85
1.48


LPython SeparateChaining
4.931
4.0


LPython (dict_neg_keys) --fast
0.984
0.8


LPython (dict_neg_keys)
1.922
1.5


clang++ 13.1.6 arm64-apple-darwin21.6.0 -std=c++11
30.814
24.73


clang++ 13.1.6 arm64-apple-darwin21.6.0
38.581
30.96


clang++ 13.1.6 arm64-apple-darwin21.6.0 ModuloHash
36.846
29.457


LPython (dict02) --fast
1.246
1.0


LPython (dict03) --fast
1.1
0.88


LPython (dict03) --fast OptimizedLinearProbing
1.104
0.88


LPython --fast SeparateChaining
4.270
3.4


clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -std=c++11
7.386
5.93


clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops
7.573
6.08


clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math ModuloHash
7.814
6.27


Python 3.10.4
22.678
18.20


codon 0.15.5 (codon build -release -exe and time ./executable)
4.983
4.0


Both the computation time (user) and system calls (sys) are lower for LPython.
(lp) 12:52:38:~/lpython_project/lpython % clang++ /Users/czgdp1807/lpython_project/dict_bench_hash.cpp
(lp) 12:52:44:~/lpython_project/lpython % time ./a.out
5.00015e+13 100000000./a.out  34.17s user 1.90s system 96% cpu 37.385 total
(lp) 12:53:24:~/lpython_project/lpython % clang++ -O3 -funroll-loops -ffast-math /Users/czgdp1807/lpython_project/dict_bench_hash.cpp
(lp) 12:54:03:~/lpython_project/lpython % time ./a.out
5.00015e+13 100000000./a.out  5.47s user 1.73s system 87% cpu 8.260 total
(lp) 12:54:14:~/lpython_project/lpython % lpython /Users/czgdp1807/lpython_project/dict_bench.py
(lp) 12:54:25:~/lpython_project/lpython % time ./a.out
50001499500000.00000000000000000 100000001
./a.out  1.35s user 0.33s system 99% cpu 1.691 total
(lp) 12:54:30:~/lpython_project/lpython % lpython --fast /Users/czgdp1807/lpython_project/dict_bench.py
(lp) 12:54:49:~/lpython_project/lpython % time ./a.out
50001499500000.00000000000000000 100000001
./a.out  0.49s user 0.33s system 94% cpu 0.866 total
(lp) 12:54:52:~/lpython_project/lpython %

Ubuntu 18.04.6 on Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz


Compiler
Time [s]
Relative


LPython (dict02)
2.801
1.23


LPython SeparateChaining
4.738
2.1


clang++ version 6.0.0-1ubuntu2 -std=c++11
34.041
15.03


clang++ version 6.0.0-1ubuntu2
34.189
15.09


clang++ version 6.0.0-1ubuntu2 ModuloHash
28.280
12.485


g++ 7.5.0
39.505
17.44


LPython (dict02) --fast
2.265
1.0


LPython --fast SeparateChaining
4.162
1.8


clang++ version 6.0.0-1ubuntu2 -O3 -march=native -funroll-loops -std=c++11
6.482
2.86


clang++ version 6.0.0-1ubuntu2 -O3 -march=native -funroll-loops
6.47
2.85


clang++ version 6.0.0-1ubuntu2 -O3 -march=native -funroll-loops -ffast-math ModuloHash
6.402
2.82


g++ 7.5.0 -O3 -march=native -funroll-loops
8.427
3.72


Python 3.10.5
15.454 (Killed In Between by the OS)
6.82


## dict_bench_2.md

      
    Raw
  

              dict_bench_2.md
            
          
    Benchmarking with large number of collisions while inserting keys into the hash map.
Codes
from ltypes import i32, f64

def test_dict():
    rollnumber2cpi: dict[i32, f64] = {0: 1.1}
    i: i32
    size: i32 = 700000000
    total: f64 = 0

    for i in range(1000, 1000 + size, 7):
        rollnumber2cpi[i] = float(i/100.0 + 5.0)

    for i in range(1000, 1000 + size, 7):
        total += rollnumber2cpi[i]

    print(total - 350001496500000.06250000000000000, len(rollnumber2cpi))

test_dict()
#include <unordered_map>
#include <iostream>


void test_dict() {
    std::unordered_map<int32_t, double> rollnumber2cpi;
    int32_t i, size = 700000000;
    rollnumber2cpi.reserve(size/7);
    double total = 0;

    for(i = 1000; i < 1000 + size; i += 7) {
        rollnumber2cpi[i] = double(i/100.0 + 5.0);
    }

    for(i = 1000; i < 1000 + size; i += 7) {
        total += rollnumber2cpi[i];
    }

    std::cout<<(total - 350001496500000.06250000000000000)<<" "<<rollnumber2cpi.size();
}

int main() {
    test_dict();
}
#include <unordered_map>
#include <iostream>

size_t size = 700000000;

struct ModuloHash {
    size_t operator() (int32_t key) const
    {
        return key % (size / 7);
    }
};

void test_dict() {
    std::unordered_map<int32_t, double, ModuloHash> rollnumber2cpi;
    int32_t i;
    double total = 0;
    rollnumber2cpi.reserve(size/7);

    for(i = 1000; i < 1000 + size; i += 7) {
        rollnumber2cpi[i] = double(i/100.0 + 5.0);
    }

    for(i = 1000; i < 1000 + size; i += 7) {
        total += rollnumber2cpi[i];
    }

    std::cout<<(total - 350001496500000.06250000000000000)<<" "<<rollnumber2cpi.size();
}

int main() {
    test_dict();
}
codon code
def test_dict():
    rollnumber2cpi = {0: 1.1}
    size = 700000000
    total = 0.0

    for i in range(1000, 1000 + size, 7):
        rollnumber2cpi[i] = float(i/100.0 + 5.0)

    for i in range(1000, 1000 + size, 7):
        total += rollnumber2cpi[i]

    print(total - 350001496500000.06250000000000000, len(rollnumber2cpi))

test_dict()


Compiler
Time [s]
Relative


LPython (main) --fast
2.024
1.0


LPython (main)
3.043
1.5


LPython (dict_neg_keys) --fast
2.612
1.3


LPython (dict_neg_keys)
3.767
1.86


LPython --fast SeparateChaining
4.300
2.1


LPython SeparateChaining
4.679
2.3


codon 0.15.5 (codon build -release -exe and time ./executable)
4.822
2.38


clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math ModuloHash
6.832
3.4


clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math
6.908
3.4


Python 3.10.4
23.651
11.7


clang++ 13.1.6 arm64-apple-darwin21.6.0
37.670
18.6


clang++ 13.1.6 arm64-apple-darwin21.6.0 ModuloHash
36.132
17.8


## dict_bench_3.md

      
    Raw
  

              dict_bench_3.md
            
          
    Codes
from ltypes import i32, f64

def test_dict():
    rollnumber2cpi: dict[i32, f64] = {}
    i: i32
    size: i32 = 700000000
    total: f64 = 0

    for i in range(1000, 1000 + size, 7):
        rollnumber2cpi[i] = float(i/100.0 + 5.0)

    for i in range(1000, 1000 + size, 7):
        total += rollnumber2cpi.pop(i)

    print(total - 350001496500000.06250000000000000, len(rollnumber2cpi))

test_dict()
#include <unordered_map>
#include <iostream>

size_t size = 700000000;

struct ModuloHash {
    size_t operator() (int32_t key) const
    {
        return key % size;
    }
};

void test_dict() {
    std::unordered_map<int32_t, double, ModuloHash> rollnumber2cpi;
    int32_t i;
    double total = 0;

    for(i = 1000; i < 1000 + size; i += 7) {
        rollnumber2cpi[i] = double(i/100.0 + 5.0);
    }

    for(i = 1000; i < 1000 + size; i += 7) {
        total += rollnumber2cpi[i];
        rollnumber2cpi.erase(i);
    }

    std::cout<<(total - 350001496500000.06250000000000000)<<" "<<rollnumber2cpi.size()<<"\n";
}

int main() {
    test_dict();
}
codon code
def test_dict():
    rollnumber2cpi = {}
    size = 700000000
    total = 0.0

    for i in range(1000, 1000 + size, 7):
        rollnumber2cpi[i] = float(i/100.0 + 5.0)

    for i in range(1000, 1000 + size, 7):
        total += rollnumber2cpi.pop(i)

    print(total - 350001496500000.06250000000000000, len(rollnumber2cpi))

test_dict()


Compiler
Time [s]
Relative


LPython (dict05) --fast OptimizedLinearProbing
2.315
1.0


LPython (dict05) OptimizedLinearProbing
3.383
1.5


LPython --fast SeparateChaining
3.903
1.7


LPython (dict05) --fast
3.928
1.7


LPython SeparateChaining
4.212
1.8


codon 0.15.5 (codon build -release -exe and time ./executable)
4.456
2.0


LPython (dict05)
4.735
2.0


clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math
10.731
4.6


Python 3.10.4
21.690
9.4


clang++ 13.1.6 arm64-apple-darwin21.6.0
56.361
24.3


## dict_bench_int64.md

      
    Raw
  

              dict_bench_int64.md
            
          
    Same as https://gist.github.com/czgdp1807/e7f7b6ca52c57b16b27ec8d0259c6d4a#file-dict_bench_str-md but with 64-bit integers as keys.
The difference can be seen clearly. With strings as keys, LPython cannot avail the benefits of,

Spatial Locality of array elements (major factor).
And each stringification calls C-runtime (minor factor accounts for 10% of the total time on my Apple M1 MacBook Pro).

Codes
from ltypes import i32, i64

def test_dict():
    number2cpi: dict[i64, i64] = {}
    i: i32
    size: i32 = 10000000
    c: i64 = 1048576
    correct: i64

    for i in range(1000, 1000 + size, 1):
        number2cpi[c + i] = int(i)

    correct = 0
    for i in range(1000, 1000 + size, 1):
        correct = (correct + number2cpi.pop(c + i))

    # 50009995000000 doesn't fit into i32
    # as a constant, so splitting it into two
    # parts and saving each into 64 bit so
    # that the product goes into 64 bit
    # where it fits easily
    answer1: i64 = 500099950
    answer2: i64 = 100000
    print(correct - answer1 * answer2, correct)
    assert correct == answer1 * answer2

test_dict()
#include <iostream>
#include <unordered_map>

void test_dict() {
    std::unordered_map<int64_t, int64_t> number2cpi;
    int32_t i, size = 10000000;
    int64_t c = 1048576;
    int64_t correct;

    for (i = 1000; i < 1000 + size; i++) {
        number2cpi[c + i] = i;
    }

    correct = 0;
    for (i = 1000; i < 1000 + size; i++) {
        correct = (correct + number2cpi[c + i]);
        number2cpi.erase(c + i);
    }

    /* 50009995000000 doesn't fit into i32
       as a constant, so splitting it into two
       parts and saving each into 64 bit so
       that the product goes into 64 bit
       where it fits easily */
    int64_t answer1 = 500099950;
    int64_t answer2 = 100000;
    std::cout<<(correct - answer1 * answer2)<<" "<<correct<<std::endl;
}

int main() {
    test_dict();
    return 0;
}
#include <unordered_map>
#include <iostream>
#include <string>

uint32_t size = 10000000;

struct ModuloHash {
    size_t operator() (int32_t key) const
    {
        return key % size;
    }
};

#include <iostream>
#include <unordered_map>
#include <string>

void test_dict() {
    std::unordered_map<int64_t, int64_t, ModuloHash> number2cpi;
    int32_t i;
    int64_t c = 1048576;
    int64_t correct;

    for (i = 1000; i < 1000 + size; i++) {
        number2cpi[c + i] = i;
    }

    correct = 0;
    for (i = 1000; i < 1000 + size; i++) {
        correct = (correct + number2cpi[c + i]);
        number2cpi.erase(c + i);
    }

    /* 50009995000000 doesn't fit into i32
       as a constant, so splitting it into two
       parts and saving each into 64 bit so
       that the product goes into 64 bit
       where it fits easily */
    int64_t answer1 = 500099950;
    int64_t answer2 = 100000;
    std::cout<<(correct - answer1 * answer2)<<" "<<correct<<std::endl;
}

int main() {
    test_dict();
    return 0;
}
codon code
def test_dict():
    number2cpi = {}
    size = 10000000
    c = 1048576

    for i in range(1000, 1000 + size, 1):
        number2cpi[c + i] = int(i)

    correct = 0
    for i in range(1000, 1000 + size, 1):
        correct = (correct + number2cpi.pop(c + i))

    # 50009995000000 doesn't fit into i32
    # as a constant, so splitting it into two
    # parts and saving each into 64 bit so
    # that the product goes into 64 bit
    # where it fits easily
    answer1 = 500099950
    answer2 = 100000
    print(correct - answer1 * answer2, correct)
    assert correct == answer1 * answer2

test_dict()
Apple M1 Macbook Pro macOS Monterey 12.5


Compiler
Time [s]
Relative


LPython (dict06) --fast
0.099
1.0


LPython (dict06)
0.134
1.4


LPython --fast SeparateChaining
0.216
2.2


codon 0.15.5 (codon build -release -exe and time ./executable)
0.257
2.6


LPython SeparateChaining
0.342
3.4


clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math
0.979
9.9


clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math ModuloHash
1.010
10.2


Python 3.10.4
1.676
16.9


clang++ 13.1.6 arm64-apple-darwin21.6.0
5.512
55.7


clang++ 13.1.6 arm64-apple-darwin21.6.0 ModuloHash
5.646
57.0


Ubuntu 18.04.6 on Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz


Compiler
Time [s]
Relative


LPython (dict06) --fast
0.372


LPython (dict06)
0.441
1.2


LPython --fast SeparateChaining
0.533
1.4


LPython SeparateChaining
0.629
1.7


clang++ version 6.0.0-1ubuntu2 -O3 -march=native -funroll-loops -ffast-math
0.779
2.1


clang++ version 6.0.0-1ubuntu2 -O3 -march=native -funroll-loops -ffast-math ModuloHash
0.911
2.4


g++ 7.5.0 -O3 -march=native -funroll-loops -ffast-math
1.015
2.7


g++ 7.5.0 -O3 -march=native -funroll-loops -ffast-math ModuloHash
1.185
3.2


Python 3.10.5
2.534
6.8


clang++ version 6.0.0-1ubuntu2 ModuloHash
3.994
10.7


clang++ version 6.0.0-1ubuntu2
4.519
12.1


g++ 7.5.0 ModuloHash
4.567
12.3


g++ 7.5.0
5.270
14.2


## dict_bench_str.md

      
    Raw
  

              dict_bench_str.md
            
          
    Codes
from ltypes import i32, i64

def test_dict():
    number2cpi: dict[str, i64] = {}
    i: i32
    size: i32 = 10000000
    key: str
    c: i32 = 1048576
    correct: i64

    for i in range(1000, 1000 + size, 1):
        key = str(c + i)
        number2cpi[key] = int(i)

    correct = 0
    for i in range(1000, 1000 + size, 1):
        key = str(c + i)
        correct = (correct + number2cpi.pop(key))

    # 50009995000000 doesn't fit into i32
    # as a constant, so splitting it into two
    # parts and saving each into 64 bit so
    # that the product goes into 64 bit
    # where it fits easily
    answer1: i64 = 500099950
    answer2: i64 = 100000
    print(correct - answer1 * answer2, correct)
    assert correct == answer1 * answer2

test_dict()
#include <iostream>
#include <unordered_map>
#include <string>

void test_dict() {
    std::unordered_map<std::string, int64_t> number2cpi;
    int32_t i, size = 10000000;
    std::string key;
    int32_t c = 1048576;
    int64_t correct;

    for (i = 1000; i < 1000 + size; i++) {
        number2cpi[std::to_string(c + i)] = i;
    }

    correct = 0;
    for (i = 1000; i < 1000 + size; i++) {
        key = std::to_string(c + i);
        correct = (correct + number2cpi[key]);
        number2cpi.erase(key);
    }

    /* 50009995000000 doesn't fit into i32
       as a constant, so splitting it into two
       parts and saving each into 64 bit so
       that the product goes into 64 bit
       where it fits easily */
    int64_t answer1 = 500099950;
    int64_t answer2 = 100000;
    std::cout<<(correct - answer1 * answer2)<<" "<<correct<<std::endl;
}

int main() {
    test_dict();
    return 0;
}
#include <unordered_map>
#include <iostream>
#include <string>

uint32_t size = 10000000;

struct PolynomialRollingHash {
    size_t operator() (const std::string& key) const
    {
        const uint64_t p = 31;
        const uint64_t m = 100000009;
        uint64_t hash_value = 0;
        uint64_t p_pow = 1;
        for (char c : key) {
            hash_value = (hash_value + (c - 'a' + 1) * p_pow) % m;
            p_pow = (p_pow * p) % m;
        }
        return hash_value % size;
    }
};

#include <iostream>
#include <unordered_map>
#include <string>

void test_dict() {
    std::unordered_map<std::string, int64_t, PolynomialRollingHash> number2cpi;
    int32_t i;
    std::string key;
    int32_t c = 1048576;
    int64_t correct;

    for (i = 1000; i < 1000 + size; i++) {
        number2cpi[std::to_string(c + i)] = i;
    }

    correct = 0;
    for (i = 1000; i < 1000 + size; i++) {
        key = std::to_string(c + i);
        correct = (correct + number2cpi[key]);
        number2cpi.erase(key);
    }

    /* 50009995000000 doesn't fit into i32
       as a constant, so splitting it into two
       parts and saving each into 64 bit so
       that the product goes into 64 bit
       where it fits easily */
    int64_t answer1 = 500099950;
    int64_t answer2 = 100000;
    std::cout<<(correct - answer1 * answer2)<<" "<<correct<<std::endl;
}

int main() {
    test_dict();
    return 0;
}
codon code
def test_dict():
    number2cpi = {}
    size = 10000000
    c = 1048576

    for i in range(1000, 1000 + size, 1):
        key = str(c + i)
        number2cpi[key] = int(i)

    correct = 0
    for i in range(1000, 1000 + size, 1):
        key = str(c + i)
        correct = (correct + number2cpi.pop(key))

    # 50009995000000 doesn't fit into i32
    # as a constant, so splitting it into two
    # parts and saving each into 64 bit so
    # that the product goes into 64 bit
    # where it fits easily
    answer1 = 500099950
    answer2 = 100000
    print(correct - answer1 * answer2, correct)
    assert correct == answer1 * answer2

test_dict()
Apple M1 Macbook Pro macOS Monterey 12.5


Compiler
Time [s]
Relative


codon 0.15.5 (codon build -release -exe and time ./executable)
2.675
0.11


LPython --fast SeparateChaining
4.831
0.4


LPython SeparateChaining
5.048
0.5


clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math PolynomialRollingHash
5.522
0.5


Python 3.10.4
6.050
0.6


LPython (dict08) --fast tripling
6.420
0.6


LPython (dict08) tripling
7.596
0.7


clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math
7.608
0.7


LPython (dict06) --fast
10.784
1.0


LPython (dict06)
11.831
1.0


clang++ 13.1.6 arm64-apple-darwin21.6.0 PolynomialRollingHash
14.363
1.3


clang++ 13.1.6 arm64-apple-darwin21.6.0
16.477
1.5


codon output
18:46:31:~/lpython_project % time ./dict_bench
0 50009995000000
./dict_bench  3.19s user 0.09s system 122% cpu 2.675 total

Ubuntu 18.04.6 on Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz


Compiler
Time [s]
Relative


LPython --fast SeparateChaining
8.323
0.3


LPython SeparateChaining
8.436
0.3


Python 3.10.5
9.426
0.4


clang++ version 6.0.0-1ubuntu2 -O3 -march=native -funroll-loops -ffast-math PolynomialRollingHash
11.293
0.5


g++ 7.5.0 -O3 -march=native -funroll-loops -ffast-math PolynomialRollingHash
11.517
0.5


g++ 7.5.0 -O3 -march=native -funroll-loops -ffast-math
12.147
0.5


clang++ version 6.0.0-1ubuntu2 -O3 -march=native -funroll-loops -ffast-math
12.215
0.5


LPython (dict08) --fast tripling
12.415
0.5


LPython (dict08) tripling
12.963
0.5


clang++ version 6.0.0-1ubuntu2
18.36
0.75


g++ 7.5.0
18.985
0.78


g++ 7.5.0 PolynomialRollingHash
20.422
0.8


clang++ version 6.0.0-1ubuntu2 PolynomialRollingHash
21.463
0.88


LPython (dict06) --fast
24.493
1.0


LPython (dict06)
27.328
1.1
Compiler	Time [s]	Relative
LPython (dict02)	2.502	2.00
LPython (dict03)	1.87	1.5
LPython (dict03) OptimizedLinearProbing	1.85	1.48
LPython SeparateChaining	4.931	4.0
LPython (dict_neg_keys) --fast	0.984	0.8
LPython (dict_neg_keys)	1.922	1.5
clang++ 13.1.6 arm64-apple-darwin21.6.0 -std=c++11	30.814	24.73
clang++ 13.1.6 arm64-apple-darwin21.6.0	38.581	30.96
clang++ 13.1.6 arm64-apple-darwin21.6.0 ModuloHash	36.846	29.457
LPython (dict02) --fast	1.246	1.0
LPython (dict03) --fast	1.1	0.88
LPython (dict03) --fast OptimizedLinearProbing	1.104	0.88
LPython --fast SeparateChaining	4.270	3.4
clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -std=c++11	7.386	5.93
clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops	7.573	6.08
clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math ModuloHash	7.814	6.27
Python 3.10.4	22.678	18.20
codon 0.15.5 (`codon build -release -exe` and `time ./executable`)	4.983	4.0
Compiler	Time [s]	Relative
LPython (dict02)	2.801	1.23
LPython SeparateChaining	4.738	2.1
clang++ version 6.0.0-1ubuntu2 -std=c++11	34.041	15.03
clang++ version 6.0.0-1ubuntu2	34.189	15.09
clang++ version 6.0.0-1ubuntu2 ModuloHash	28.280	12.485
g++ 7.5.0	39.505	17.44
LPython (dict02) --fast	2.265	1.0
LPython --fast SeparateChaining	4.162	1.8
clang++ version 6.0.0-1ubuntu2 -O3 -march=native -funroll-loops -std=c++11	6.482	2.86
clang++ version 6.0.0-1ubuntu2 -O3 -march=native -funroll-loops	6.47	2.85
clang++ version 6.0.0-1ubuntu2 -O3 -march=native -funroll-loops -ffast-math ModuloHash	6.402	2.82
g++ 7.5.0 -O3 -march=native -funroll-loops	8.427	3.72
Python 3.10.5	15.454 (Killed In Between by the OS)	6.82
Compiler	Time [s]	Relative
LPython (main) --fast	2.024	1.0
LPython (main)	3.043	1.5
LPython (dict_neg_keys) --fast	2.612	1.3
LPython (dict_neg_keys)	3.767	1.86
LPython --fast SeparateChaining	4.300	2.1
LPython SeparateChaining	4.679	2.3
codon 0.15.5 (`codon build -release -exe` and `time ./executable`)	4.822	2.38
clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math ModuloHash	6.832	3.4
clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math	6.908	3.4
Python 3.10.4	23.651	11.7
clang++ 13.1.6 arm64-apple-darwin21.6.0	37.670	18.6
clang++ 13.1.6 arm64-apple-darwin21.6.0 ModuloHash	36.132	17.8
Compiler	Time [s]	Relative
LPython (dict05) --fast OptimizedLinearProbing	2.315	1.0
LPython (dict05) OptimizedLinearProbing	3.383	1.5
LPython --fast SeparateChaining	3.903	1.7
LPython (dict05) --fast	3.928	1.7
LPython SeparateChaining	4.212	1.8
codon 0.15.5 (`codon build -release -exe` and `time ./executable`)	4.456	2.0
LPython (dict05)	4.735	2.0
clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math	10.731	4.6
Python 3.10.4	21.690	9.4
clang++ 13.1.6 arm64-apple-darwin21.6.0	56.361	24.3
Compiler	Time [s]	Relative
LPython (dict06) --fast	0.099	1.0
LPython (dict06)	0.134	1.4
LPython --fast SeparateChaining	0.216	2.2
codon 0.15.5 (`codon build -release -exe` and `time ./executable`)	0.257	2.6
LPython SeparateChaining	0.342	3.4
clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math	0.979	9.9
clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math ModuloHash	1.010	10.2
Python 3.10.4	1.676	16.9
clang++ 13.1.6 arm64-apple-darwin21.6.0	5.512	55.7
clang++ 13.1.6 arm64-apple-darwin21.6.0 ModuloHash	5.646	57.0
Compiler	Time [s]	Relative
LPython (dict06) --fast	0.372
LPython (dict06)	0.441	1.2
LPython --fast SeparateChaining	0.533	1.4
LPython SeparateChaining	0.629	1.7
clang++ version 6.0.0-1ubuntu2 -O3 -march=native -funroll-loops -ffast-math	0.779	2.1
clang++ version 6.0.0-1ubuntu2 -O3 -march=native -funroll-loops -ffast-math ModuloHash	0.911	2.4
g++ 7.5.0 -O3 -march=native -funroll-loops -ffast-math	1.015	2.7
g++ 7.5.0 -O3 -march=native -funroll-loops -ffast-math ModuloHash	1.185	3.2
Python 3.10.5	2.534	6.8
clang++ version 6.0.0-1ubuntu2 ModuloHash	3.994	10.7
clang++ version 6.0.0-1ubuntu2	4.519	12.1
g++ 7.5.0 ModuloHash	4.567	12.3
g++ 7.5.0	5.270	14.2
Compiler	Time [s]	Relative
codon 0.15.5 (`codon build -release -exe` and `time ./executable`)	2.675	0.11
LPython --fast SeparateChaining	4.831	0.4
LPython SeparateChaining	5.048	0.5
clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math PolynomialRollingHash	5.522	0.5
Python 3.10.4	6.050	0.6
LPython (dict08) --fast tripling	6.420	0.6
LPython (dict08) tripling	7.596	0.7
clang++ 13.1.6 arm64-apple-darwin21.6.0 -O3 -funroll-loops -ffast-math	7.608	0.7
LPython (dict06) --fast	10.784	1.0
LPython (dict06)	11.831	1.0
clang++ 13.1.6 arm64-apple-darwin21.6.0 PolynomialRollingHash	14.363	1.3
clang++ 13.1.6 arm64-apple-darwin21.6.0	16.477	1.5
Compiler	Time [s]	Relative
LPython --fast SeparateChaining	8.323	0.3
LPython SeparateChaining	8.436	0.3
Python 3.10.5	9.426	0.4
clang++ version 6.0.0-1ubuntu2 -O3 -march=native -funroll-loops -ffast-math PolynomialRollingHash	11.293	0.5
g++ 7.5.0 -O3 -march=native -funroll-loops -ffast-math PolynomialRollingHash	11.517	0.5
g++ 7.5.0 -O3 -march=native -funroll-loops -ffast-math	12.147	0.5
clang++ version 6.0.0-1ubuntu2 -O3 -march=native -funroll-loops -ffast-math	12.215	0.5
LPython (dict08) --fast tripling	12.415	0.5
LPython (dict08) tripling	12.963	0.5
clang++ version 6.0.0-1ubuntu2	18.36	0.75
g++ 7.5.0	18.985	0.78
g++ 7.5.0 PolynomialRollingHash	20.422	0.8
clang++ version 6.0.0-1ubuntu2 PolynomialRollingHash	21.463	0.88
LPython (dict06) --fast	24.493	1.0
LPython (dict06)	27.328	1.1