jh3141/Makefile

## benchmark
Benchmark updated at Sat Dec  6 23:05:32 UTC 2014

--- time ./test-c
The 1000000
a 2000000
at 1000000
brown 1000000
dog 1000000
era 1000000
fox 1000000
jumped 1000000
lake 1000000
lazy 1000000
new 1000000
near 1000000
of 1000000
over 1000000
quick 1000000
restaurant 1000000
the 2000000
0.16user 0.00system 0:00.33elapsed 50%CPU (0avgtext+0avgdata 532maxresident)k
0inputs+0outputs (0major+174minor)pagefaults 0swaps

--- time ./test-clang
The 1000000
a 2000000
at 1000000
brown 1000000
dog 1000000
era 1000000
fox 1000000
jumped 1000000
lake 1000000
lazy 1000000
new 1000000
near 1000000
of 1000000
over 1000000
quick 1000000
restaurant 1000000
the 2000000
0.19user 0.00system 0:00.19elapsed 98%CPU (0avgtext+0avgdata 536maxresident)k
0inputs+0outputs (0major+174minor)pagefaults 0swaps

--- time ./test-g++
The 1000000
quick 1000000
brown 1000000
fox 1000000
jumped 1000000
over 1000000
the 2000000
lazy 1000000
dog 1000000
at 1000000
a 2000000
restaurant 1000000
near 1000000
lake 1000000
of 1000000
new 1000000
era 1000000
0.08user 0.00system 0:00.09elapsed 96%CPU (0avgtext+0avgdata 1208maxresident)k
0inputs+0outputs (0major+355minor)pagefaults 0swaps

--- time ./test-clang++
The 1000000
quick 1000000
brown 1000000
fox 1000000
jumped 1000000
over 1000000
the 2000000
lazy 1000000
dog 1000000
at 1000000
a 2000000
restaurant 1000000
near 1000000
lake 1000000
of 1000000
new 1000000
era 1000000
0.06user 0.00system 0:00.22elapsed 29%CPU (0avgtext+0avgdata 1204maxresident)k
0inputs+0outputs (0major+355minor)pagefaults 0swaps

--- time node test.js
The 1000000
quick 1000000
brown 1000000
fox 1000000
jumped 1000000
over 1000000
the 2000000
lazy 1000000
dog 1000000
at 1000000
a 2000000
restaurant 1000000
near 1000000
lake 1000000
of 1000000
new 1000000
era 1000000
0.25user 0.01system 0:00.43elapsed 62%CPU (0avgtext+0avgdata 9956maxresident)k
0inputs+8outputs (0major+2798minor)pagefaults 0swaps

--- time luajit test.lua
new 1000000
lazy 1000000
The 1000000
era 1000000
of 1000000
at 1000000
fox 1000000
jumped 1000000
quick 1000000
brown 1000000
lake 1000000
over 1000000
near 1000000
the 2000000
dog 1000000
a 2000000
restaurant 1000000
0.09user 0.00system 0:00.09elapsed 96%CPU (0avgtext+0avgdata 1128maxresident)k
0inputs+0outputs (0major+338minor)pagefaults 0swaps

## Makefile
all: benchmark

benchmark: test-c test-clang test-g++ test-clang++ test.js test.lua
	@echo "Benchmark updated at `date`" > benchmark
	@echo >> benchmark
	@echo '--- time ./test-c' >> benchmark
	@time ./test-c >> benchmark 2>&1
	@echo >> benchmark
	@echo '--- time ./test-clang' >> benchmark
	@time ./test-clang >> benchmark 2>&1
	@echo >> benchmark
	@echo '--- time ./test-g++' >> benchmark
	@time ./test-g++ >> benchmark 2>&1
	@echo >> benchmark
	@echo '--- time ./test-clang++' >> benchmark
	@time ./test-clang++ >> benchmark 2>&1
	@echo >> benchmark
	@echo '--- time node test.js' >> benchmark
	@time node test.js >> benchmark 2>&1
	@echo >> benchmark
	@echo '--- time luajit test.lua' >> benchmark
	@time luajit test.lua >> benchmark 2>&1

	@cat benchmark

test-clang: test.c
	clang -O3 -o test-clang test.c
test-c: test.c
	gcc -O3 -o test-c test.c
test-g++: test.cpp table.hpp
	g++ -std=gnu++0x -O3 -o test-g++ test.cpp
test-clang++: test.cpp table.hpp
	clang++ -std=c++11 -O3 -o test-clang++ test.cpp

## table.hpp
#ifndef TABLE_HPP
#define TABLE_HPP

#include <utility>
#include <vector>

namespace jh3141
{
    template<typename T1, typename T2>
    class table
    {
    public:
        typedef typename std::vector<std::pair<T1,T2>>::iterator iterator;

    private:
        std::vector<std::pair<T1,T2>> data;
        iterator cachedSearchStart;
    public:
        table ()
        {
            cachedSearchStart = data.end();
        }


        T2& operator [] (const T1 & key)
        {
            if (cachedSearchStart != data.end () && cachedSearchStart->first == key)
                return (cachedSearchStart++)->second;

            for (iterator i = data.begin (); i != data.end (); i ++)
                if (i->first == key) {
                    cachedSearchStart = i + 1;
                    return i->second;
                }

            auto newItemPos = data.insert(data.end(), std::make_pair(key, T2()));
            cachedSearchStart = data.begin();
            return newItemPos->second;
        }

        iterator begin() { return data.begin(); }
        iterator end() { return data.end(); }
    };
}

#endif

## test.c
// Like in the c++ version, we
// have a regular hash table
// with a custom hash function

#include <string.h>
#include <stdlib.h>
#include <stdio.h>

struct fht_node {
    char* key;
    void* data;
    struct fht_node * next;
};

typedef struct fht {
    struct fht_node ** nodes;
    size_t size;
} FHT;


FHT* fht_create() {
    FHT *ht = malloc(sizeof(struct fht));
    ht->size = 1 << (sizeof(char)*8);
    ht->nodes = calloc(ht->size, sizeof(struct fht_node));
    return ht;
}

fht_put(FHT* hashtbl, char* key, void* data) {
    struct fht_node *node = hashtbl->nodes[key[0]];

    while(node) {
        if(!strcmp(node->key, key)) {
            node->data=data;
            return 0;
        }
        node=node->next;
    }

    node=malloc(sizeof(struct fht_node));
    node->key= strdup(key);
    node->data=data;
    node->next=hashtbl->nodes[key[0]];
    hashtbl->nodes[key[0]]=node;
}

void* fht_get(FHT* hashtbl, char* key) {
    struct fht_node *node = hashtbl->nodes[key[0]];
    while (node) {
        if (!strcmp(node->key, key)) return node->data;
        node = node->next;
    }
    return NULL;
}

int main() {

    char* text[19] = {
        "The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a",
        "restaurant", "near", "the", "lake", "of", "a", "new", "era"
    };

    FHT *hashtbl = fht_create();

    int textLen = 19;

    int times = 1000000;
    int k, *cnt;


    while (times--)
        for (k = 0; k < textLen; ++k) {
            cnt = fht_get(hashtbl, text[k]);
            if (!cnt) {
                cnt = malloc(sizeof(int));
                *cnt = 1;
                fht_put(hashtbl, text[k], cnt);
            } else { *cnt += 1; }
        }


    for (k = 0; k < hashtbl->size; ++k) {
        struct fht_node *n = hashtbl->nodes[k];
        while (n) {
            printf("%s %d\n", n->key, *((int *)n->data));
            n = n->next;
        }
    }
}

## test.cpp
#include <array>
#include <set>
#include <cstring>
#include <vector>
#include <iostream>
#include <string>
#include "table.hpp"

using namespace std;
using namespace jh3141;

// Use an array to hold the strings rather than vector, as it is a simpler and more easily-optimized object.

// We also use const char * rather than string to avoid the overhead of performing actual string comparisons,
// instead using a set of strings to ensure that all references to identical strings are using the same
// pointer before we begin the operation.  This optimization is probably a key reason why node/luajit were
// originally much faster than the c++ version.

// We also iterate over the text array by index rather than using an iterator as the compiler can't unroll the
// loop when we use an iterator but can when we're doing it numerically (this is only a small win, but still
// worth doing).

// Not flushing the output after each end of line (using "\n" instead of std::endl) gains nothing in terms of
// best times, but appears to significantly reduce the variance of the test times, so is also worth doing.

// Finally, it became apparent that both v8 and luajit use a substantially simpler data structure for storing
// data in this kind of application, which is optimized for small numbers of items with rather regular access
// patterns.  I developed a similar structure myself, based on a table of key/value pairs with a cached
// next-item-to-search pointer.  Switching to this knocked off another 20% of the run time, finally bringing
// this implementation to faster than luajit.

struct cstr_lt {
    bool operator() (const char * left, const char * right) { return strcmp(left, right) < 0; }
};

int main() {
    set<const char *, cstr_lt> string_intern;
    array<const char *, 19> text = {"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"};
    table<const char *, int> cnt;
    int times = 1000000;

    // start off by ensuring all duplicated strings are using the same reference
    for (const char *& str : text)
    {
        auto existing = string_intern.find(str);
        if (existing != string_intern.end())
        {
            str = *existing;
        }
        else
        {
            string_intern.insert(str);
        }
    }
    // now update the counts in the map
    while (times--)
        for (int i = 0; i < text.size(); i++)
            cnt[text[i]] += 1;
    // and produce output
    for (auto it = cnt.begin(); it != cnt.end(); ++it) {
        cout << it->first << " " << it->second << "\n";
    }
}

## test.js
var text = ["The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"],
    map = {};
    times = 1000001;

while (--times)
    for (var k = 0; k < text.length; ++k) {
        // Unlike luajit, if we put if (map[text[k]]) map[text[k]] += 1
        // in v8, this program will become 6 times slower. Instead we create a new object with a counter
        // and this way we'll only need to access the hashtable once per loop.
        var kth = map[text[k]];
        if (kth) kth.c += 1;
        else map[text[k]] = {c:1};
    }

for (var key in map) {
    console.log(key, map[key].c);
}

## test.lua
-- The lua program needs no optimizations.
-- It just works. Fast.
-- Note that luajit probably optimizes away
-- all dictionary lookups :)

local text = {"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"}
local map = {}
local times = 1000000

while times > 0 do
    for i = 1, table.getn(text), 1 do
        if map[text[i]] then map[text[i]] = map[text[i]] + 1
        else map[text[i]] = 1 end
    end
    times = times - 1;
end

for key, value in pairs(map) do
    io.write(key, " ", value, "\n")
end
	Benchmark updated at Sat Dec 6 23:05:32 UTC 2014

	--- time ./test-c
	The 1000000
	a 2000000
	at 1000000
	brown 1000000
	dog 1000000
	era 1000000
	fox 1000000
	jumped 1000000
	lake 1000000
	lazy 1000000
	new 1000000
	near 1000000
	of 1000000
	over 1000000
	quick 1000000
	restaurant 1000000
	the 2000000
	0.16user 0.00system 0:00.33elapsed 50%CPU (0avgtext+0avgdata 532maxresident)k
	0inputs+0outputs (0major+174minor)pagefaults 0swaps

	--- time ./test-clang
	The 1000000
	a 2000000
	at 1000000
	brown 1000000
	dog 1000000
	era 1000000
	fox 1000000
	jumped 1000000
	lake 1000000
	lazy 1000000
	new 1000000
	near 1000000
	of 1000000
	over 1000000
	quick 1000000
	restaurant 1000000
	the 2000000
	0.19user 0.00system 0:00.19elapsed 98%CPU (0avgtext+0avgdata 536maxresident)k
	0inputs+0outputs (0major+174minor)pagefaults 0swaps

	--- time ./test-g++
	The 1000000
	quick 1000000
	brown 1000000
	fox 1000000
	jumped 1000000
	over 1000000
	the 2000000
	lazy 1000000
	dog 1000000
	at 1000000
	a 2000000
	restaurant 1000000
	near 1000000
	lake 1000000
	of 1000000
	new 1000000
	era 1000000
	0.08user 0.00system 0:00.09elapsed 96%CPU (0avgtext+0avgdata 1208maxresident)k
	0inputs+0outputs (0major+355minor)pagefaults 0swaps

	--- time ./test-clang++
	The 1000000
	quick 1000000
	brown 1000000
	fox 1000000
	jumped 1000000
	over 1000000
	the 2000000
	lazy 1000000
	dog 1000000
	at 1000000
	a 2000000
	restaurant 1000000
	near 1000000
	lake 1000000
	of 1000000
	new 1000000
	era 1000000
	0.06user 0.00system 0:00.22elapsed 29%CPU (0avgtext+0avgdata 1204maxresident)k
	0inputs+0outputs (0major+355minor)pagefaults 0swaps

	--- time node test.js
	The 1000000
	quick 1000000
	brown 1000000
	fox 1000000
	jumped 1000000
	over 1000000
	the 2000000
	lazy 1000000
	dog 1000000
	at 1000000
	a 2000000
	restaurant 1000000
	near 1000000
	lake 1000000
	of 1000000
	new 1000000
	era 1000000
	0.25user 0.01system 0:00.43elapsed 62%CPU (0avgtext+0avgdata 9956maxresident)k
	0inputs+8outputs (0major+2798minor)pagefaults 0swaps

	--- time luajit test.lua
	new 1000000
	lazy 1000000
	The 1000000
	era 1000000
	of 1000000
	at 1000000
	fox 1000000
	jumped 1000000
	quick 1000000
	brown 1000000
	lake 1000000
	over 1000000
	near 1000000
	the 2000000
	dog 1000000
	a 2000000
	restaurant 1000000
	0.09user 0.00system 0:00.09elapsed 96%CPU (0avgtext+0avgdata 1128maxresident)k
	0inputs+0outputs (0major+338minor)pagefaults 0swaps
	all: benchmark

	benchmark: test-c test-clang test-g++ test-clang++ test.js test.lua
	@echo "Benchmark updated at `date`" > benchmark
	@echo >> benchmark
	@echo '--- time ./test-c' >> benchmark
	@time ./test-c >> benchmark 2>&1
	@echo >> benchmark
	@echo '--- time ./test-clang' >> benchmark
	@time ./test-clang >> benchmark 2>&1
	@echo >> benchmark
	@echo '--- time ./test-g++' >> benchmark
	@time ./test-g++ >> benchmark 2>&1
	@echo >> benchmark
	@echo '--- time ./test-clang++' >> benchmark
	@time ./test-clang++ >> benchmark 2>&1
	@echo >> benchmark
	@echo '--- time node test.js' >> benchmark
	@time node test.js >> benchmark 2>&1
	@echo >> benchmark
	@echo '--- time luajit test.lua' >> benchmark
	@time luajit test.lua >> benchmark 2>&1

	@cat benchmark

	test-clang: test.c
	clang -O3 -o test-clang test.c
	test-c: test.c
	gcc -O3 -o test-c test.c
	test-g++: test.cpp table.hpp
	g++ -std=gnu++0x -O3 -o test-g++ test.cpp
	test-clang++: test.cpp table.hpp
	clang++ -std=c++11 -O3 -o test-clang++ test.cpp
	#ifndef TABLE_HPP
	#define TABLE_HPP

	#include <utility>
	#include <vector>

	namespace jh3141
	{
	template<typename T1, typename T2>
	class table
	{
	public:
	typedef typename std::vector<std::pair<T1,T2>>::iterator iterator;

	private:
	std::vector<std::pair<T1,T2>> data;
	iterator cachedSearchStart;
	public:
	table ()
	{
	cachedSearchStart = data.end();
	}


	T2& operator [] (const T1 & key)
	{
	if (cachedSearchStart != data.end () && cachedSearchStart->first == key)
	return (cachedSearchStart++)->second;

	for (iterator i = data.begin (); i != data.end (); i ++)
	if (i->first == key) {
	cachedSearchStart = i + 1;
	return i->second;
	}

	auto newItemPos = data.insert(data.end(), std::make_pair(key, T2()));
	cachedSearchStart = data.begin();
	return newItemPos->second;
	}

	iterator begin() { return data.begin(); }
	iterator end() { return data.end(); }
	};
	}

	#endif
	// Like in the c++ version, we
	// have a regular hash table
	// with a custom hash function

	#include <string.h>
	#include <stdlib.h>
	#include <stdio.h>

	struct fht_node {
	char* key;
	void* data;
	struct fht_node * next;
	};

	typedef struct fht {
	struct fht_node ** nodes;
	size_t size;
	} FHT;


	FHT* fht_create() {
	FHT *ht = malloc(sizeof(struct fht));
	ht->size = 1 << (sizeof(char)*8);
	ht->nodes = calloc(ht->size, sizeof(struct fht_node));
	return ht;
	}

	fht_put(FHT* hashtbl, char* key, void* data) {
	struct fht_node *node = hashtbl->nodes[key[0]];

	while(node) {
	if(!strcmp(node->key, key)) {
	node->data=data;
	return 0;
	}
	node=node->next;
	}

	node=malloc(sizeof(struct fht_node));
	node->key= strdup(key);
	node->data=data;
	node->next=hashtbl->nodes[key[0]];
	hashtbl->nodes[key[0]]=node;
	}

	void* fht_get(FHT* hashtbl, char* key) {
	struct fht_node *node = hashtbl->nodes[key[0]];
	while (node) {
	if (!strcmp(node->key, key)) return node->data;
	node = node->next;
	}
	return NULL;
	}

	int main() {

	char* text[19] = {
	"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a",
	"restaurant", "near", "the", "lake", "of", "a", "new", "era"
	};

	FHT *hashtbl = fht_create();

	int textLen = 19;

	int times = 1000000;
	int k, *cnt;


	while (times--)
	for (k = 0; k < textLen; ++k) {
	cnt = fht_get(hashtbl, text[k]);
	if (!cnt) {
	cnt = malloc(sizeof(int));
	*cnt = 1;
	fht_put(hashtbl, text[k], cnt);
	} else { *cnt += 1; }
	}


	for (k = 0; k < hashtbl->size; ++k) {
	struct fht_node *n = hashtbl->nodes[k];
	while (n) {
	printf("%s %d\n", n->key, ((int )n->data));
	n = n->next;
	}
	}
	}
	#include <array>
	#include <set>
	#include <cstring>
	#include <vector>
	#include <iostream>
	#include <string>
	#include "table.hpp"

	using namespace std;
	using namespace jh3141;

	// Use an array to hold the strings rather than vector, as it is a simpler and more easily-optimized object.

	// We also use const char * rather than string to avoid the overhead of performing actual string comparisons,
	// instead using a set of strings to ensure that all references to identical strings are using the same
	// pointer before we begin the operation. This optimization is probably a key reason why node/luajit were
	// originally much faster than the c++ version.

	// We also iterate over the text array by index rather than using an iterator as the compiler can't unroll the
	// loop when we use an iterator but can when we're doing it numerically (this is only a small win, but still
	// worth doing).

	// Not flushing the output after each end of line (using "\n" instead of std::endl) gains nothing in terms of
	// best times, but appears to significantly reduce the variance of the test times, so is also worth doing.

	// Finally, it became apparent that both v8 and luajit use a substantially simpler data structure for storing
	// data in this kind of application, which is optimized for small numbers of items with rather regular access
	// patterns. I developed a similar structure myself, based on a table of key/value pairs with a cached
	// next-item-to-search pointer. Switching to this knocked off another 20% of the run time, finally bringing
	// this implementation to faster than luajit.

	struct cstr_lt {
	bool operator() (const char * left, const char * right) { return strcmp(left, right) < 0; }
	};

	int main() {
	set<const char *, cstr_lt> string_intern;
	array<const char *, 19> text = {"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"};
	table<const char *, int> cnt;
	int times = 1000000;

	// start off by ensuring all duplicated strings are using the same reference
	for (const char *& str : text)
	{
	auto existing = string_intern.find(str);
	if (existing != string_intern.end())
	{
	str = *existing;
	}
	else
	{
	string_intern.insert(str);
	}
	}
	// now update the counts in the map
	while (times--)
	for (int i = 0; i < text.size(); i++)
	cnt[text[i]] += 1;
	// and produce output
	for (auto it = cnt.begin(); it != cnt.end(); ++it) {
	cout << it->first << " " << it->second << "\n";
	}
	}
	var text = ["The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"],
	map = {};
	times = 1000001;

	while (--times)
	for (var k = 0; k < text.length; ++k) {
	// Unlike luajit, if we put if (map[text[k]]) map[text[k]] += 1
	// in v8, this program will become 6 times slower. Instead we create a new object with a counter
	// and this way we'll only need to access the hashtable once per loop.
	var kth = map[text[k]];
	if (kth) kth.c += 1;
	else map[text[k]] = {c:1};
	}

	for (var key in map) {
	console.log(key, map[key].c);
	}
	-- The lua program needs no optimizations.
	-- It just works. Fast.
	-- Note that luajit probably optimizes away
	-- all dictionary lookups :)

	local text = {"The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "dog", "at", "a", "restaurant", "near", "the", "lake", "of", "a", "new", "era"}
	local map = {}
	local times = 1000000

	while times > 0 do
	for i = 1, table.getn(text), 1 do
	if map[text[i]] then map[text[i]] = map[text[i]] + 1
	else map[text[i]] = 1 end
	end
	times = times - 1;
	end

	for key, value in pairs(map) do
	io.write(key, " ", value, "\n")
	end