dustalov/expectation-maximization.rb

## expectation-maximization.rb
#!/usr/bin/env ruby

=begin
http://ai.stanford.edu/~chuongdo/papers/em_tutorial.pdf
http://stats.stackexchange.com/questions/72774/numerical-example-to-understand-expectation-maximization
http://math.stackexchange.com/questions/25111/how-does-expectation-maximization-work
http://math.stackexchange.com/questions/81004/how-does-expectation-maximization-work-in-coin-flipping-problem
http://www.youtube.com/watch?v=7e65vXZEv5Q
=end

# gem install distribution
require 'distribution'

# error bound
EPS = 10**-6

# number of coin tosses
N = 10

# observations
X = [5, 9, 8, 4, 7]

# randomly initialized thetas
theta_a, theta_b = 0.6, 0.5

p [theta_a, theta_b]

loop do
  expectation = X.map do |h|
    like_a = Distribution::Binomial.pdf(h, N, theta_a)
    like_b = Distribution::Binomial.pdf(h, N, theta_b)

    norm_a = like_a / (like_a + like_b)
    norm_b = like_b / (like_a + like_b)

    [norm_a, norm_b, h]
  end

  maximization = expectation.each_with_object([0.0, 0.0, 0.0, 0.0]) do |(norm_a, norm_b, h), r|
    r[0] += norm_a * h; r[1] += norm_a * (N - h)
    r[2] += norm_b * h; r[3] += norm_b * (N - h)
  end

  theta_a_hat = maximization[0] / (maximization[0] + maximization[1])
  theta_b_hat = maximization[2] / (maximization[2] + maximization[3])

  error_a = (theta_a_hat - theta_a).abs / theta_a
  error_b = (theta_b_hat - theta_b).abs / theta_b

  theta_a, theta_b = theta_a_hat, theta_b_hat

  p [theta_a, theta_b]

  break if error_a < EPS && error_b < EPS
end
	#!/usr/bin/env ruby

	=begin
	http://ai.stanford.edu/~chuongdo/papers/em_tutorial.pdf
	http://stats.stackexchange.com/questions/72774/numerical-example-to-understand-expectation-maximization
	http://math.stackexchange.com/questions/25111/how-does-expectation-maximization-work
	http://math.stackexchange.com/questions/81004/how-does-expectation-maximization-work-in-coin-flipping-problem
	http://www.youtube.com/watch?v=7e65vXZEv5Q
	=end

	# gem install distribution
	require 'distribution'

	# error bound
	EPS = 10**-6

	# number of coin tosses
	N = 10

	# observations
	X = [5, 9, 8, 4, 7]

	# randomly initialized thetas
	theta_a, theta_b = 0.6, 0.5

	p [theta_a, theta_b]

	loop do
	expectation = X.map do \|h\|
	like_a = Distribution::Binomial.pdf(h, N, theta_a)
	like_b = Distribution::Binomial.pdf(h, N, theta_b)

	norm_a = like_a / (like_a + like_b)
	norm_b = like_b / (like_a + like_b)

	[norm_a, norm_b, h]
	end

	maximization = expectation.each_with_object([0.0, 0.0, 0.0, 0.0]) do \|(norm_a, norm_b, h), r\|
	r[0] += norm_a * h; r[1] += norm_a * (N - h)
	r[2] += norm_b * h; r[3] += norm_b * (N - h)
	end

	theta_a_hat = maximization[0] / (maximization[0] + maximization[1])
	theta_b_hat = maximization[2] / (maximization[2] + maximization[3])

	error_a = (theta_a_hat - theta_a).abs / theta_a
	error_b = (theta_b_hat - theta_b).abs / theta_b

	theta_a, theta_b = theta_a_hat, theta_b_hat

	p [theta_a, theta_b]

	break if error_a < EPS && error_b < EPS
	end