coolbutuseless/trueskill in julia

## trueskill in julia
#!/usr/bin/env julia

# straight translation of some python code.
# python code (using pypy) takes less than 1 second/iteration  (core i7 imac)
# After removing globals and typing code, julia code takes same time as pypy code.
# Example pairs.csv file: https://dl.dropboxusercontent.com/u/68676/pairs.zip

function cdf(x::Float64)
    s = x
    t = 0.0
    b = x
    q = x*x
    i = 1.0

    while s != t
        t = s
        i += 2.0
        b *= q / i
        s = t + b
    end

    return .5+s*exp(-.5*q-.91893853320467274178)
end

function pdf(x::Float64)
    return exp(-x*x/2.0)/sqrt(2.0*pi)
end


function update(iters::Int64, pair_comparisons::Array{(Int64,Int64),1}, mu::Array{Float64,1}, sigma2::Array{Float64,1}, initial_sigma::Float64)
    beta          = initial_sigma/2.0
    gamma         = initial_sigma/300.0  # 30 is a better value than 300
    epsilon       = 0.08

    beta2    = beta*beta
    gamma2   = gamma*gamma

    for iter in 1:iters
        println("Doing iter:", iter)
        for (winner_name, loser_name) in pair_comparisons
            # print "winner:", winner_name
            muw, sigmaw2 = mu[winner_name], sigma2[winner_name]
            mul, sigmal2 = mu[loser_name ], sigma2[loser_name ]

            # calculate new stats for these two competitors
            c = (2.0*beta2 + sigmaw2 + sigmal2) ^ (-0.5)
            t = (muw - mul) * c
            e = epsilon     * c

            Vwinte   = pdf(t - e) / cdf(t - e)
            # Vwinte   = norm.pdf(t - e) / norm.cdf(t - e)
            Wwintecc = (Vwinte * (Vwinte + t - e)) * (c*c)

            sigmaw2_new = (sigmaw2 * (1.0 - sigmaw2 * Wwintecc ) + gamma2)
            sigmal2_new = (sigmal2 * (1.0 - sigmal2 * Wwintecc ) + gamma2)
            muw_new = (muw + sigmaw2 * c * Vwinte)
            mul_new = (mul - sigmal2 * c * Vwinte)

            # update the stats for these two competitors
            mu[winner_name] = muw_new
            mu[loser_name ] = mul_new
            sigma2[winner_name] = sigmaw2_new
            sigma2[loser_name ] = sigmal2_new
        end # pair comparisons
    end # iters
end

function get_sorted_competitors(mu::Array{Float64,1}, sigma2::Array{Float64,1})
    out = [(mu-3.0*(sigma2 ^ 0.5), name, mu, sigma2 ^ 0.5) for (name, (mu, sigma2)) in enumerate(zip(mu, sigma2))]
    sort!(out, rev=true)
    return out
end

function main()
    initial_mu    = 200.0
    initial_sigma = initial_mu/5.0

    sigma_factor = 3.0
    initial_sigma = initial_mu / float(sigma_factor)

    tic()
    all_data = readcsv("pairs.csv")
    toc()

    pair_comparisons = (Int64,Int64)[]

    maxnum = 0
    for i in 2:size(all_data, 1)
        A = int(all_data[i, 1])
        B = int(all_data[i, 2])
        if A > maxnum
            maxnum = A
        end
        if B > maxnum
            maxnum = B
        end
        push!(pair_comparisons, (A, B))
    end

    # Create initial stats
    mu     = ones(maxnum) * initial_mu
    sigma2 = ones(maxnum) * initial_sigma ^ 2

    # Just checking types
    println(typeof(mu))
    println(typeof(pair_comparisons))

    # Do the actual work
    tic()
    update(50, pair_comparisons, mu, sigma2, initial_sigma)
    toc()
    #Profile.print()

    # Get the list of competitors in sorted order
    out = get_sorted_competitors(mu, sigma2)

    # Pull out a test subject who should be near the top of the rankings
    for (i, (mulower, name, mu, sigma2)) in enumerate(out)
        if name == 3672
            println("FOUND:", i)
        end
    end
end


main()
	#!/usr/bin/env julia

	# straight translation of some python code.
	# python code (using pypy) takes less than 1 second/iteration (core i7 imac)
	# After removing globals and typing code, julia code takes same time as pypy code.
	# Example pairs.csv file: https://dl.dropboxusercontent.com/u/68676/pairs.zip

	function cdf(x::Float64)
	s = x
	t = 0.0
	b = x
	q = x*x
	i = 1.0

	while s != t
	t = s
	i += 2.0
	b *= q / i
	s = t + b
	end

	return .5+sexp(-.5q-.91893853320467274178)
	end

	function pdf(x::Float64)
	return exp(-xx/2.0)/sqrt(2.0pi)
	end


	function update(iters::Int64, pair_comparisons::Array{(Int64,Int64),1}, mu::Array{Float64,1}, sigma2::Array{Float64,1}, initial_sigma::Float64)
	beta = initial_sigma/2.0
	gamma = initial_sigma/300.0 # 30 is a better value than 300
	epsilon = 0.08

	beta2 = beta*beta
	gamma2 = gamma*gamma

	for iter in 1:iters
	println("Doing iter:", iter)
	for (winner_name, loser_name) in pair_comparisons
	# print "winner:", winner_name
	muw, sigmaw2 = mu[winner_name], sigma2[winner_name]
	mul, sigmal2 = mu[loser_name ], sigma2[loser_name ]

	# calculate new stats for these two competitors
	c = (2.0*beta2 + sigmaw2 + sigmal2) ^ (-0.5)
	t = (muw - mul) * c
	e = epsilon * c

	Vwinte = pdf(t - e) / cdf(t - e)
	# Vwinte = norm.pdf(t - e) / norm.cdf(t - e)
	Wwintecc = (Vwinte * (Vwinte + t - e)) * (c*c)

	sigmaw2_new = (sigmaw2 * (1.0 - sigmaw2 * Wwintecc ) + gamma2)
	sigmal2_new = (sigmal2 * (1.0 - sigmal2 * Wwintecc ) + gamma2)
	muw_new = (muw + sigmaw2 * c * Vwinte)
	mul_new = (mul - sigmal2 * c * Vwinte)

	# update the stats for these two competitors
	mu[winner_name] = muw_new
	mu[loser_name ] = mul_new
	sigma2[winner_name] = sigmaw2_new
	sigma2[loser_name ] = sigmal2_new
	end # pair comparisons
	end # iters
	end

	function get_sorted_competitors(mu::Array{Float64,1}, sigma2::Array{Float64,1})
	out = [(mu-3.0*(sigma2 ^ 0.5), name, mu, sigma2 ^ 0.5) for (name, (mu, sigma2)) in enumerate(zip(mu, sigma2))]
	sort!(out, rev=true)
	return out
	end

	function main()
	initial_mu = 200.0
	initial_sigma = initial_mu/5.0

	sigma_factor = 3.0
	initial_sigma = initial_mu / float(sigma_factor)

	tic()
	all_data = readcsv("pairs.csv")
	toc()

	pair_comparisons = (Int64,Int64)[]

	maxnum = 0
	for i in 2:size(all_data, 1)
	A = int(all_data[i, 1])
	B = int(all_data[i, 2])
	if A > maxnum
	maxnum = A
	end
	if B > maxnum
	maxnum = B
	end
	push!(pair_comparisons, (A, B))
	end

	# Create initial stats
	mu = ones(maxnum) * initial_mu
	sigma2 = ones(maxnum) * initial_sigma ^ 2

	# Just checking types
	println(typeof(mu))
	println(typeof(pair_comparisons))

	# Do the actual work
	tic()
	update(50, pair_comparisons, mu, sigma2, initial_sigma)
	toc()
	#Profile.print()

	# Get the list of competitors in sorted order
	out = get_sorted_competitors(mu, sigma2)

	# Pull out a test subject who should be near the top of the rankings
	for (i, (mulower, name, mu, sigma2)) in enumerate(out)
	if name == 3672
	println("FOUND:", i)
	end
	end
	end


	main()