andreasvc/logprobs.py

## logprobs.py
""" Compare different strategies for adding a large number of small log
probabilities. """
from __future__ import print_function
from math import log, exp, fsum, isinf
from random import expovariate

N = 10000


def logprobadd(x, y):
	""" Add two log probabilities in log space; i.e.:

	>>> a = b = 0.25
	>>> logprobadd(log(a), log(b)) == log(a + b) == log(0.5)
	True

	:param x, y: Python floats with log probabilities; -inf <= x, y <= 0.
	:source: https://facwiki.cs.byu.edu/nlp/index.php/Log_Domain_Computations
	"""
	if isinf(x):
		return y
	elif isinf(y):
		return x
	# If one value is much smaller than the other, keep the larger value.
	elif x < (y - 460):  # ~= log(1e200)
		return y
	elif y < (x - 460):  # ~= log(1e200)
		return x
	diff = y - x
	assert not isinf(diff)
	if isinf(exp(diff)):    # difference is too large
		return x if x > y else y
	# otherwise return the sum.
	return x + log(1.0 + exp(diff))


def logprobsum1(logprobs):
	""" Takes a list of log probabilities and sums them producing a
	normal probability 0 < p <= 1.0; i.e.:

	>>> a = b = c = 0.25
	>>> logprobsum([-log(a), -log(b), -log(c)]) == sum([a, b, c]) == 0.75
	True

	:param logprobs: a list of Python floats with log probilities,
		s.t. -inf <= p <= 0 for each p in ``logprobs``.
	:source: http://blog.smola.org/post/987977550/log-probabilities-semirings-and-floating-point-numbers
	"""
	maxprob = max(logprobs)
	return exp(fsum([maxprob, log(fsum([exp(prob - maxprob)
			for prob in logprobs]))]))


def logprobsum2(logprobs):
	""" Replace first fsum with multiplication. """
	maxprob = max(logprobs)
	return exp(maxprob) * fsum([exp(prob - maxprob) for prob in logprobs])


def logprobsum3(logprobs):
	""" Replace first fsum with normal addition. """
	maxprob = max(logprobs)
	return exp(maxprob + log(fsum([exp(prob - maxprob) for prob in logprobs])))


def incradd(logprobs):
	""" Incremental conversion + addition  """
	incr = 0.0
	for prob in logprobs:
		incr += exp(prob)
	return incr


def incrlogadd(logprobs):
	""" Incremental addition in log space. """
	incr = logprobs[0]
	for prob in logprobs[1:]:
		incr = logprobadd(incr, prob)
	return exp(incr)


def compare(probs, logprobs):
	""" Compare methods of adding probabilities and print a line w/results. """
	ref = fsum(probs)
	results = {name: ref - func(logprobs) for
			name, func in FUNCS.items()}
	results['sum'] = ref - sum(probs)
	for a, b in sorted(results.items(), key=lambda x: abs(x[1])):
		print('%5s %24r' % (a, b), end=' ')
	print()


FUNCS = dict(
		log1=logprobsum1,
		log2=logprobsum2,
		log3=logprobsum3,
		inc=incradd,
		inclg=incrlogadd)


def main():
	"""" Run test 20 times. """
	print('difference with reference, columns ordered by error')
	compare([.1, .1, .1, .1, .1, .1, .1, .1, .1, .1],
			map(log, [.1, .1, .1, .1, .1, .1, .1, .1, .1, .1]))
	for _ in range(20):
		logprobs = [expovariate(-1 / 100.) for _ in range(N)]
		probs = map(exp, logprobs)
		compare(probs, logprobs)

if __name__ == '__main__':
	main()

## results.md

      
    Raw
  

              results.md
            
          
    'log2' appears to be the most accurate; a typical result:
log2                      0.0
log3   -5.684341886080802e-14
log1   -5.684341886080802e-14
sum    7.105427357601002e-14
inc    7.105427357601002e-14
inclg    9.180212146020494e-12

Speed benchmarks:
inclg: 100 loops, best of 3: 10.9 ms per loop
inc: 1000 loops, best of 3: 1.42 ms per loop
log2: 100 loops, best of 3: 11.1 ms per loop
log3: 100 loops, best of 3: 11.1 ms per loop
log1: 100 loops, best of 3: 11.1 ms per loop

Speed of adding normal probabilities:
sum: 10000 loops, best of 3: 148 us per loop
fsum: 100 loops, best of 3: 11.4 ms per loop
	""" Compare different strategies for adding a large number of small log
	probabilities. """
	from __future__ import print_function
	from math import log, exp, fsum, isinf
	from random import expovariate

	N = 10000


	def logprobadd(x, y):
	""" Add two log probabilities in log space; i.e.:

	>>> a = b = 0.25
	>>> logprobadd(log(a), log(b)) == log(a + b) == log(0.5)
	True

	:param x, y: Python floats with log probabilities; -inf <= x, y <= 0.
	:source: https://facwiki.cs.byu.edu/nlp/index.php/Log_Domain_Computations
	"""
	if isinf(x):
	return y
	elif isinf(y):
	return x
	# If one value is much smaller than the other, keep the larger value.
	elif x < (y - 460): # ~= log(1e200)
	return y
	elif y < (x - 460): # ~= log(1e200)
	return x
	diff = y - x
	assert not isinf(diff)
	if isinf(exp(diff)): # difference is too large
	return x if x > y else y
	# otherwise return the sum.
	return x + log(1.0 + exp(diff))


	def logprobsum1(logprobs):
	""" Takes a list of log probabilities and sums them producing a
	normal probability 0 < p <= 1.0; i.e.:

	>>> a = b = c = 0.25
	>>> logprobsum([-log(a), -log(b), -log(c)]) == sum([a, b, c]) == 0.75
	True

	:param logprobs: a list of Python floats with log probilities,
	s.t. -inf <= p <= 0 for each p in ``logprobs``.
	:source: http://blog.smola.org/post/987977550/log-probabilities-semirings-and-floating-point-numbers
	"""
	maxprob = max(logprobs)
	return exp(fsum([maxprob, log(fsum([exp(prob - maxprob)
	for prob in logprobs]))]))


	def logprobsum2(logprobs):
	""" Replace first fsum with multiplication. """
	maxprob = max(logprobs)
	return exp(maxprob) * fsum([exp(prob - maxprob) for prob in logprobs])


	def logprobsum3(logprobs):
	""" Replace first fsum with normal addition. """
	maxprob = max(logprobs)
	return exp(maxprob + log(fsum([exp(prob - maxprob) for prob in logprobs])))


	def incradd(logprobs):
	""" Incremental conversion + addition """
	incr = 0.0
	for prob in logprobs:
	incr += exp(prob)
	return incr


	def incrlogadd(logprobs):
	""" Incremental addition in log space. """
	incr = logprobs[0]
	for prob in logprobs[1:]:
	incr = logprobadd(incr, prob)
	return exp(incr)


	def compare(probs, logprobs):
	""" Compare methods of adding probabilities and print a line w/results. """
	ref = fsum(probs)
	results = {name: ref - func(logprobs) for
	name, func in FUNCS.items()}
	results['sum'] = ref - sum(probs)
	for a, b in sorted(results.items(), key=lambda x: abs(x[1])):
	print('%5s %24r' % (a, b), end=' ')
	print()


	FUNCS = dict(
	log1=logprobsum1,
	log2=logprobsum2,
	log3=logprobsum3,
	inc=incradd,
	inclg=incrlogadd)


	def main():
	"""" Run test 20 times. """
	print('difference with reference, columns ordered by error')
	compare([.1, .1, .1, .1, .1, .1, .1, .1, .1, .1],
	map(log, [.1, .1, .1, .1, .1, .1, .1, .1, .1, .1]))
	for _ in range(20):
	logprobs = [expovariate(-1 / 100.) for _ in range(N)]
	probs = map(exp, logprobs)
	compare(probs, logprobs)

	if __name__ == '__main__':
	main()