csabahenk/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Rsync rolling sum calculation in various languages

The programs below calculate the rsync weak sum (of block size 4096)
of the last block of the data available on stdin.
The data is assumed to be at least block size big; the weak sum is calculated in a direct manner for the first block, and from that on,
the sum is adjusted on each incoming byte using the "rollability" of this kind of checksum.
The primary goal was to see the performance of naive implementations; so my metrics was both performance and "how close is the code to
be possible to call it executable pseudocode". (For Python and Ruby, naivity is somewhat compromised by my compulsion of writing code
which runs with both of their major versions (respectively), but hey, my compulsion reflects a real world situation, which in fact does
compromise the easy use of these languages!) So I did not go for using numerical libraries, or mutable monads in case of Haskell.
However, I'd be happy to see contributions which either add other language implementations or improve on performance by any kind of black
magic. If you code up something, to test it for correctness, take a file larger than the block size, then feed various tails of it to the
program like tail -c $n test-file | rols; for $n ≥ block size the sum (the second number in the result) should be invariant (and of course,
match the values seen with the other implementations :) ).

Go is the clear winner by the combined metrics. In pseudocodiness, it wins over Python/Ruby due to the above mentioned compat glue in them
and also because of those ugly modulus operations. Performance is in par wih C.
C rocks, no surprise for such a task.
Python/Ruby sucks, no surprise. Well, tried with Rubinius as well, and that sucks more than expected.
Haskell is ubercool... and ubersucks. Despite my naivity-compatible efforts for optimization (use ByteString, consume input via the
strict fold variant, use O(1)-appendable deques for the rolling window) I ended up with horrible performance, stack overflow,
or consumption of all system memory (in such a mystic way which is worth to elaborate on: got the stack overflow on a 400k input, but
not on a 4M one, that just ate memory w/o stack overflow). Thanks to Balázs Kőműves' scrutine eyes,
the Haskell code can be fixed by enforcing some strictness by means of bang
patterns, by which it ends up with a run time somewhere below that
of LuaJIT.

I'm not writing exact numbers, see for yourself, this is not the "do not try it at home" type of thing.
Update

Also added Javascipt (node.js), Lua and Perl version.

Ruby is twice as fast as Python, Lua and Perl are twice as fast as Ruby, Node is twice as fast as Lua/Perl; however, the speed king of
interpreted dynamic languages is the LuaJIT implementation of Lua, being twice as fast as Node.
OK, this was a bit of oversimplification:

Ruby is twice as fast Python3, actually. Python 2.x is faster than Py3, somewhere between Ruby and Python3.
Node could be the speed king, being twice as fast as LuaJIT, weren't it a dumbass piece of crap who does not know math. If we
used the built-in % modulo operator, it would be like that speed-wise, just we'd get an incorrect result because of JS arithmetics'
weird signedness. So we need to define our modulo function using >>> 0
to sort of "cast to unsigned type". This function quite holds Node back. To make this weird shit even more weirder and shittier, if you were to add the
amended modulo funcion as a Number method, Node will slow down beyond imagination (OK, method dispatch takes some time, but that much??).
So, that incorrect super-fast Node speed would be about fourth times slow than Go/C.


Update

Checked JRuby. Twice as fast as Matz Ruby, somewhere in between Lua and Perl. That's a bit of disappointment, in the light of their agenda about the JVM and JIT.

  
## rols.c
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>

int main()
{
	int i;
	uint16_t a,b;
	uint8_t win[4096];
	int c;
	int j;

	fread(win, sizeof(win), 1, stdin);

	a=0;
	b=0;
	for (i=0; i < sizeof(win); i++) {
		a += win[i];
		b += (sizeof(win) - i) * win[i];
	}

	j = 0;
	i = 1;
	for (;;) {
		c = getchar();
		if (c == EOF)
			break;
		a = a - win[j] + c;
		b = b - sizeof(win) * win[j] + a;
		win[j] = c;
		j = (j+1) % sizeof(win);
		i++;
	}

	printf("%d %u\n", i, a | ((uint32_t)b) << 16);

	return 0;
}

## rols.go
package main

import (
	"bufio"
	"os"
	"log"
	"fmt"
)

func main() {
	win := make([]byte, 4096)

	in := bufio.NewReader(os.Stdin)

	_, err := in.Read(win)
	if err != nil { log.Fatal(err) }

	a := uint16(0)
	b := uint16(0)

	for i, c := range win {
		a += uint16(c)
		b += uint16((len(win) - i)) * uint16(c)
	}

	j := 0
	i := 1
	for {
		c, err := in.ReadByte()
		if err != nil {
			if err == os.EOF { break }
			log.Fatal(err)
		}
		a = a - uint16(win[j]) + uint16(c)
		b = b - uint16(len(win)) * uint16(win[j]) + a
		win[j] = c
		j = (j + 1) % len(win)
		i++
	}

	fmt.Println(i, uint32(a) | (uint32(b) << 16))
}

## rols.hs
{-# LANGUAGE BangPatterns #-}

import Data.Word
import Data.Char
import Data.Bits
import qualified Data.ByteString.Lazy as BS


{-- direct implementation of amortized O(1)
    double-ended queue, courtesy of Balázs Kőműves
 --}
data Window a = W ![a] ![a]

update :: a -> Window a -> (a, Window a)
update z (W (x:xs) ys) = (x, W xs (z:ys))
update z (W []     ys) = (w, W ws [z]) where (w:ws) = reverse ys

fromList :: [a] -> Window a
fromList xs = W xs []


data Pair a = Pr a a


{-- weak sum of window length n: takes n char off of the incoming data
    returns processed window, rest of data and sum-pair
 --}
weakSumPr_n :: Int -> BS.ByteString -> (Window Word8, BS.ByteString, Pair Word16)
weakSumPr_n n indata = (fromList win_w8, indata', Pr (sum win_w16) (fst $ foldl (\(b, k) c -> (k * c + b, k - 1)) (0 :: Word16, fromIntegral n) $ win_w16))
  where (win_bs, indata') = BS.splitAt (fromIntegral n) indata
        win_w8 = BS.unpack win_bs
        win_w16 = map fromIntegral win_w8

{-- rollsum of window length n, calculates the rollsum pair (with window) from
    window, earlier sumpair, and incoming character
 --}
rollSumPr_n :: Int -> Window Word8 -> Pair Word16 -> Word8 -> (Window Word8, Pair Word16)
rollSumPr_n n win (Pr !a !b) c' = (win', (Pr a' (b - n_w16 * c_w16 + a')))
  where (c, win') = update c' win
        [c_w16, c'_w16] = map fromIntegral [c, c']
        n_w16 = fromIntegral n
        a' = a - c_w16 + c'_w16

{-- represents a pair of words as a 32-bit word --}
pair32 :: Integral a => Pair a -> Word32
pair32 (Pr u v) = v32 `shiftL` 16 + u32
  where [u32, v32] = map fromIntegral [u, v]

main_ii :: Int -> BS.ByteString -> (Int, Pair Word16)
main_ii n indata = (\(x, (y, z)) -> (x, z)) $ BS.foldl' (\(!i, (!win, !sp)) !c -> (i + 1, rollSumPr_n n win sp c)) (1, (win0, wsum0)) indata'
  where (win0, indata', wsum0) = weakSumPr_n n indata

main_i :: BS.ByteString -> (Int, Word32)
main_i = (\(x, y) -> (x, pair32 y)) . main_ii 4096

main = print . main_i =<< BS.getContents

## rols.js
#!/usr/bin/env node

var EventEmitter = require('events').EventEmitter;
var sys = require('sys');

function Roller() {
    EventEmitter.apply(this);
    this.writable = true;

    this.win = new Buffer(4096);
    this.wlen = 0;
    this.a = 0;
    this.b = 0;
}
sys.inherits(Roller, EventEmitter);

function mod(n, m) { return (n >>> 0) % m; }

Roller.prototype.write = function write(data) {
    if (this.wlen < this.win.length) {
        var xlen = data.copy(this.win, this.wlen);
        for (var i = this.wlen; i < this.wlen + xlen; i++) {
            this.a = mod(this.a + this.win[i], 1<<16);
            this.b = mod(this.b + (this.win.length - i) * this.win[i], 1<<16);
        }
        this.wlen += xlen;
        data = data.slice(xlen);
        if (this.wlen == this.win.length) {
            this.i = 1;
            this.j = 0;
        }
    }

    for (var i = 0; i < data.length; i++) {
        this.a = mod(this.a - this.win[this.j] + data[i], 1<<16);
        this.b = mod(this.b - this.win.length * this.win[this.j] + this.a, 1<<16);
        this.win[this.j] = data[i];
        this.j = mod(this.j + 1, this.win.length);
        this.i += 1;
    }
}

Roller.prototype.end = function end() {
    console.log(this.i, (this.a | (this.b << 16)) >>> 0);
}

process.stdin.resume();
process.stdin.pipe(new Roller());

## rols.lua
#!/usr/bin/env lua

local bit = require("bit")

local wins = io.read(4096)
local win = {}
local a = 0
local b = 0
local m16 = bit.lshift(1, 16)
for i=0,#wins - 1 do
  win[i] = wins:byte(i + 1)
  a = (a + win[i]) % m16
  b = (b + (#wins - i) * win[i]) % m16
end

local i = 1
local j = 0
while true do
  local cs = io.read(1)
  if not cs then break end
  local c = cs:byte(1)
  a = (a - win[j] + c) % m16
  b = (b - #wins * win[j] + a) % m16
  win[j] = c
  j = (j+1) % #wins
  i = i + 1
end

print(i, a + b * m16)

## rols.pl
#!/usr/bin/perl -w

my $wins = "";
read(STDIN, $wins, 4096);
my @win = unpack("C*", $wins);
my $win = @win;

my $a = 0;
my $b = 0;
foreach my $i (0..$#win) {
	$a = ($a + $win[$i]) % (1<<16);
	$b = ($b + ($win - $i) * $win[$i]) % (1<<16);
}

my $i = 1;
my $j = 0;
while (defined (my $c = getc())) {
        $c = ord($c);
	$a = ($a - $win[$j] + $c) % (1<<16);
	$b = ($b - $win * $win[$j] + $a) % (1<<16);
	$win[$j] = $c;
	$j = ($j + 1) % $win;
	$i++;
}

print $i, " ",  $a | ($b << 16), "\n";

## rols.py
#!/usr/bin/env python

import sys

if hasattr(sys.stdin, 'buffer'):
    # python 3
    raw_stdin = sys.stdin.buffer
    raw_ord = lambda x: x
else:
    raw_stdin = sys.stdin
    raw_ord = ord

win = [ raw_ord(c) for c in raw_stdin.read(4096) ]

a = b = 0
for i in range(len(win)):
    a = (a + win[i]) % (1<<16)
    b = (b + (len(win) - i) * win[i]) % (1<<16)

i=1
j=0
while True:
    c = raw_stdin.read(1)
    if not c:
        break
    c = raw_ord(c[0])
    a = (a - win[j] + c) % (1<<16)
    b = (b - len(win)*win[j] + a) % (1<<16)
    win[j] = c
    j = (j+1) % len(win)
    i += 1

print(i, a | (b<<16))

## rols.rb
#!/usr/bin/env ruby

if RUBY_VERSION < '1.9'
  class String
    alias getbyte []
  end
end

win = []
STDIN.read(4096).bytes { |b| win << b }

a = b = 0
win.size.times { |i|
  a = (a + win[i]) % (1<<16)
  b = (b + (win.size - i) * win[i]) % (1<<16)
}

i = 1
j = 0
loop do
  c = (STDIN.read(1)||"").getbyte(0)
  break unless c
  a = (a - win[j] + c) % (1<<16)
  b = (b - win.size * win[j] + a) % (1<<16)
  win[j] = c
  j = (j+1) % win.size
  i += 1
end

p [i, a | (b<<16)]
	#include <stdio.h>
	#include <stdint.h>
	#include <stdlib.h>

	int main()
	{
	int i;
	uint16_t a,b;
	uint8_t win[4096];
	int c;
	int j;

	fread(win, sizeof(win), 1, stdin);

	a=0;
	b=0;
	for (i=0; i < sizeof(win); i++) {
	a += win[i];
	b += (sizeof(win) - i) * win[i];
	}

	j = 0;
	i = 1;
	for (;;) {
	c = getchar();
	if (c == EOF)
	break;
	a = a - win[j] + c;
	b = b - sizeof(win) * win[j] + a;
	win[j] = c;
	j = (j+1) % sizeof(win);
	i++;
	}

	printf("%d %u\n", i, a \| ((uint32_t)b) << 16);

	return 0;
	}
	package main

	import (
	"bufio"
	"os"
	"log"
	"fmt"
	)

	func main() {
	win := make([]byte, 4096)

	in := bufio.NewReader(os.Stdin)

	_, err := in.Read(win)
	if err != nil { log.Fatal(err) }

	a := uint16(0)
	b := uint16(0)

	for i, c := range win {
	a += uint16(c)
	b += uint16((len(win) - i)) * uint16(c)
	}

	j := 0
	i := 1
	for {
	c, err := in.ReadByte()
	if err != nil {
	if err == os.EOF { break }
	log.Fatal(err)
	}
	a = a - uint16(win[j]) + uint16(c)
	b = b - uint16(len(win)) * uint16(win[j]) + a
	win[j] = c
	j = (j + 1) % len(win)
	i++
	}

	fmt.Println(i, uint32(a) \| (uint32(b) << 16))
	}
	{-# LANGUAGE BangPatterns #-}

	import Data.Word
	import Data.Char
	import Data.Bits
	import qualified Data.ByteString.Lazy as BS


	{-- direct implementation of amortized O(1)
	double-ended queue, courtesy of Balázs Kőműves
	--}
	data Window a = W ![a] ![a]

	update :: a -> Window a -> (a, Window a)
	update z (W (x:xs) ys) = (x, W xs (z:ys))
	update z (W [] ys) = (w, W ws [z]) where (w:ws) = reverse ys

	fromList :: [a] -> Window a
	fromList xs = W xs []


	data Pair a = Pr a a


	{-- weak sum of window length n: takes n char off of the incoming data
	returns processed window, rest of data and sum-pair
	--}
	weakSumPr_n :: Int -> BS.ByteString -> (Window Word8, BS.ByteString, Pair Word16)
	weakSumPr_n n indata = (fromList win_w8, indata', Pr (sum win_w16) (fst $ foldl (\(b, k) c -> (k * c + b, k - 1)) (0 :: Word16, fromIntegral n) $ win_w16))
	where (win_bs, indata') = BS.splitAt (fromIntegral n) indata
	win_w8 = BS.unpack win_bs
	win_w16 = map fromIntegral win_w8

	{-- rollsum of window length n, calculates the rollsum pair (with window) from
	window, earlier sumpair, and incoming character
	--}
	rollSumPr_n :: Int -> Window Word8 -> Pair Word16 -> Word8 -> (Window Word8, Pair Word16)
	rollSumPr_n n win (Pr !a !b) c' = (win', (Pr a' (b - n_w16 * c_w16 + a')))
	where (c, win') = update c' win
	[c_w16, c'_w16] = map fromIntegral [c, c']
	n_w16 = fromIntegral n
	a' = a - c_w16 + c'_w16

	{-- represents a pair of words as a 32-bit word --}
	pair32 :: Integral a => Pair a -> Word32
	pair32 (Pr u v) = v32 `shiftL` 16 + u32
	where [u32, v32] = map fromIntegral [u, v]

	main_ii :: Int -> BS.ByteString -> (Int, Pair Word16)
	main_ii n indata = (\(x, (y, z)) -> (x, z)) $ BS.foldl' (\(!i, (!win, !sp)) !c -> (i + 1, rollSumPr_n n win sp c)) (1, (win0, wsum0)) indata'
	where (win0, indata', wsum0) = weakSumPr_n n indata

	main_i :: BS.ByteString -> (Int, Word32)
	main_i = (\(x, y) -> (x, pair32 y)) . main_ii 4096

	main = print . main_i =<< BS.getContents
	#!/usr/bin/env node

	var EventEmitter = require('events').EventEmitter;
	var sys = require('sys');

	function Roller() {
	EventEmitter.apply(this);
	this.writable = true;

	this.win = new Buffer(4096);
	this.wlen = 0;
	this.a = 0;
	this.b = 0;
	}
	sys.inherits(Roller, EventEmitter);

	function mod(n, m) { return (n >>> 0) % m; }

	Roller.prototype.write = function write(data) {
	if (this.wlen < this.win.length) {
	var xlen = data.copy(this.win, this.wlen);
	for (var i = this.wlen; i < this.wlen + xlen; i++) {
	this.a = mod(this.a + this.win[i], 1<<16);
	this.b = mod(this.b + (this.win.length - i) * this.win[i], 1<<16);
	}
	this.wlen += xlen;
	data = data.slice(xlen);
	if (this.wlen == this.win.length) {
	this.i = 1;
	this.j = 0;
	}
	}

	for (var i = 0; i < data.length; i++) {
	this.a = mod(this.a - this.win[this.j] + data[i], 1<<16);
	this.b = mod(this.b - this.win.length * this.win[this.j] + this.a, 1<<16);
	this.win[this.j] = data[i];
	this.j = mod(this.j + 1, this.win.length);
	this.i += 1;
	}
	}

	Roller.prototype.end = function end() {
	console.log(this.i, (this.a \| (this.b << 16)) >>> 0);
	}

	process.stdin.resume();
	process.stdin.pipe(new Roller());
	#!/usr/bin/env lua

	local bit = require("bit")

	local wins = io.read(4096)
	local win = {}
	local a = 0
	local b = 0
	local m16 = bit.lshift(1, 16)
	for i=0,#wins - 1 do
	win[i] = wins:byte(i + 1)
	a = (a + win[i]) % m16
	b = (b + (#wins - i) * win[i]) % m16
	end

	local i = 1
	local j = 0
	while true do
	local cs = io.read(1)
	if not cs then break end
	local c = cs:byte(1)
	a = (a - win[j] + c) % m16
	b = (b - #wins * win[j] + a) % m16
	win[j] = c
	j = (j+1) % #wins
	i = i + 1
	end

	print(i, a + b * m16)
	#!/usr/bin/perl -w

	my $wins = "";
	read(STDIN, $wins, 4096);
	my @win = unpack("C*", $wins);
	my $win = @win;

	my $a = 0;
	my $b = 0;
	foreach my $i (0..$#win) {
	$a = ($a + $win[$i]) % (1<<16);
	$b = ($b + ($win - $i) * $win[$i]) % (1<<16);
	}

	my $i = 1;
	my $j = 0;
	while (defined (my $c = getc())) {
	$c = ord($c);
	$a = ($a - $win[$j] + $c) % (1<<16);
	$b = ($b - $win * $win[$j] + $a) % (1<<16);
	$win[$j] = $c;
	$j = ($j + 1) % $win;
	$i++;
	}

	print $i, " ", $a \| ($b << 16), "\n";
	#!/usr/bin/env python

	import sys

	if hasattr(sys.stdin, 'buffer'):
	# python 3
	raw_stdin = sys.stdin.buffer
	raw_ord = lambda x: x
	else:
	raw_stdin = sys.stdin
	raw_ord = ord

	win = [ raw_ord(c) for c in raw_stdin.read(4096) ]

	a = b = 0
	for i in range(len(win)):
	a = (a + win[i]) % (1<<16)
	b = (b + (len(win) - i) * win[i]) % (1<<16)

	i=1
	j=0
	while True:
	c = raw_stdin.read(1)
	if not c:
	break
	c = raw_ord(c[0])
	a = (a - win[j] + c) % (1<<16)
	b = (b - len(win)*win[j] + a) % (1<<16)
	win[j] = c
	j = (j+1) % len(win)
	i += 1

	print(i, a \| (b<<16))
	#!/usr/bin/env ruby

	if RUBY_VERSION < '1.9'
	class String
	alias getbyte []
	end
	end

	win = []
	STDIN.read(4096).bytes { \|b\| win << b }

	a = b = 0
	win.size.times { \|i\|
	a = (a + win[i]) % (1<<16)
	b = (b + (win.size - i) * win[i]) % (1<<16)
	}

	i = 1
	j = 0
	loop do
	c = (STDIN.read(1)\|\|"").getbyte(0)
	break unless c
	a = (a - win[j] + c) % (1<<16)
	b = (b - win.size * win[j] + a) % (1<<16)
	win[j] = c
	j = (j+1) % win.size
	i += 1
	end

	p [i, a \| (b<<16)]