Skip to content

Instantly share code, notes, and snippets.

@kokizzu
Forked from spion/a-warning.md
Created April 10, 2021 13:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kokizzu/082a44845887a1b842d141340f6bbdc8 to your computer and use it in GitHub Desktop.
Save kokizzu/082a44845887a1b842d141340f6bbdc8 to your computer and use it in GitHub Desktop.
C++ versus V8 versus luajit versus C benchmark - (hash) tables

Warning

This benchmark has been misleading for a while. It was originally made to demonstrate how JIT compilers can do all sorts of crazy stuff to your code - especially LuaJIT - and was meant to be a starting point of discussion about what exactly LuaJIT does and how.

As a result, its not indicative of what its performance may be on more realistic data. Differences can be expected because

  1. the text will not consist of hard-coded constants
  2. the number of words (and therefore the dictionary) would be larger, and JIT compilers for JS and Lua often have special optimizations for small dictionaries/tables
  3. the words wont be pre-split, and allocating new words adds significant performance penalty (in that case a trie would probably outperform other approaches)
spion@missperfect:~/Projects/testhash$ gcc -O3 test.c -o testc
spion@missperfect:~/Projects/testhash$ time ./testc
The 1000000
a 2000000
at 1000000
brown 1000000
dog 1000000
era 1000000
fox 1000000
jumped 1000000
lake 1000000
lazy 1000000
new 1000000
near 1000000
of 1000000
over 1000000
quick 1000000
restaurant 1000000
the 2000000
real 0m0.234s
user 0m0.228s
sys 0m0.004s
spion@missperfect:~/Projects/testhash$ g++ -O3 -std=gnu++0x test.cpp -o test
spion@missperfect:~/Projects/testhash$ time ./test
the 2000000
at 1000000
a 2000000
brown 1000000
dog 1000000
era 1000000
fox 1000000
jumped 1000000
The 1000000
lake 1000000
lazy 1000000
new 1000000
near 1000000
of 1000000
over 1000000
quick 1000000
restaurant 1000000
real 0m0.321s
user 0m0.320s
sys 0m0.000s
spion@missperfect:~/Projects/testhash$ time node test.js
The 1000000
quick 1000000
brown 1000000
fox 1000000
jumped 1000000
over 1000000
the 2000000
lazy 1000000
dog 1000000
at 1000000
a 2000000
restaurant 1000000
near 1000000
lake 1000000
of 1000000
new 1000000
era 1000000
real 0m0.317s
user 0m0.304s
sys 0m0.012s
spion@missperfect:~/Projects/testhash$ time luajit test.lua
new 1000000
lazy 1000000
The 1000000
era 1000000
of 1000000
at 1000000
fox 1000000
jumped 1000000
quick 1000000
brown 1000000
lake 1000000
over 1000000
near 1000000
the 2000000
dog 1000000
a 2000000
restaurant 1000000
real 0m0.161s
user 0m0.160s
sys 0m0.000s
// Like in the c++ version, we
// have a regular hash table
// with a custom hash function
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
struct fht_node {
char* key;
void* data;
struct fht_node * next;
};
typedef struct fht {
struct fht_node ** nodes;
size_t size;
} FHT;
FHT* fht_create() {
FHT *ht = malloc(sizeof(struct fht));
ht->size = 1 << (sizeof(char)*8);
ht->nodes = calloc(ht->size, sizeof(struct fht_node));
return ht;
}
fht_put(FHT* hashtbl, char* key, void* data) {
struct fht_node *node = hashtbl->nodes[key[0]];
while(node) {
if(!strcmp(node->key, key)) {
node->data=data;
return 0;
}
node=node->next;
}
node=malloc(sizeof(struct fht_node));
node->key= strdup(key);
node->data=data;
node->next=hashtbl->nodes[key[0]];
hashtbl->nodes[key[0]]=node;
}
void* fht_get(FHT* hashtbl, char* key) {
struct fht_node *node = hashtbl->nodes[key[0]];
while (node) {
if (!strcmp(node->key, key)) return node->data;
node = node->next;
}
return NULL;
}
int main() {
char* text[19] = {
"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a",
"restaurant", "near", "the", "lake", "of", "a", "new", "era"
};
FHT *hashtbl = fht_create();
int textLen = 19;
int times = 1000000;
int k, *cnt;
while (times--)
for (k = 0; k < textLen; ++k) {
cnt = fht_get(hashtbl, text[k]);
if (!cnt) {
cnt = malloc(sizeof(int));
*cnt = 1;
fht_put(hashtbl, text[k], cnt);
} else { *cnt += 1; }
}
for (k = 0; k < hashtbl->size; ++k) {
struct fht_node *n = hashtbl->nodes[k];
while (n) {
printf("%s %d\n", n->key, *((int *)n->data));
n = n->next;
}
}
}
#include <unordered_map>
#include <string>
#include <vector>
#include <iostream>
#include <string>
using namespace std;
// In C++ despite only accessing the hashtable
// once per loop, we also need a custom hash function
// otherwise the program will be 2 times slower.
struct CustomHash {
size_t operator() (const string& s) const { return (size_t)s[0]; }
};
int main() {
vector<string> text = {"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"};
unordered_map<string, int, CustomHash> cnt;
int times = 1000000;
while (times--)
for (auto it = text.begin(); it != text.end(); ++it)
cnt[*it] += 1;
for (auto it = cnt.begin(); it != cnt.end(); ++it) {
cout << it->first << " " << it->second << endl;
}
}
var text = ["The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"],
map = {};
times = 1000001;
while (--times)
for (var k = 0; k < text.length; ++k) {
// Unlike luajit, if we put if (map[text[k]]) map[text[k]] += 1
// in v8, this program will become 6 times slower. Instead we create a new object with a counter
// and this way we'll only need to access the hashtable once per loop.
var kth = map[text[k]];
if (kth) kth.c += 1;
else map[text[k]] = {c:1};
}
for (var key in map) {
console.log(key, map[key].c);
}
-- The lua program needs no optimizations.
-- It just works. Fast.
-- Note that luajit probably optimizes away
-- all dictionary lookups :)
local text = {"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"}
local map = {}
local times = 1000000
while times > 0 do
for i = 1, table.getn(text), 1 do
if map[text[i]] then map[text[i]] = map[text[i]] + 1
else map[text[i]] = 1 end
end
times = times - 1;
end
for key, value in pairs(map) do
io.write(key, " ", value, "\n")
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment