JohannesBuchner/sample_broken_powerlaw.py

## sample_broken_powerlaw.py
import numpy

# Sampling from a broken powerlaw or powerlaw segments

# Reference material:
# http://mathworld.wolfram.com/RandomNumber.html
# The strategy is to draw from each powerlaw segment, and make sure the proportions are right based on the segment integrals

# This code does not work correctly, improvements are welcome

def sample_broken(a1, a2, xb, N):
	# a1: index for power-law to left of break
	# a2: index for power-law to right of break
	# xb: break location
	# N: final sample size

	### CDF at break for both power-laws
	n1 = (1-(1+xb)**-a1)
	n2 = (1-(1+xb)**-a2)

	### PDF at break for both power-laws after truncation
	### power-law 1 is truncated at right
	p1 = (a1/(1+xb)**a1) / n1
	### ... while power-law 2 is truncated at left
	p2 = (a2/(1+xb)**a2) / (1 - n2)

	### generate sample size for each power-law
	print(p1, p2, n1, n2)
	mask = numpy.random.uniform(size=N) < p2/(p1+p2)

	### case 1; using inverse CDF from 0 to xb
	### case 2; using inverse CDF from xb to +inf
	f1 = numpy.where(mask, 0, (1 - (1+xb)**-a2))
	f2 = numpy.where(mask, (1 - (1+xb)**-a1), 1)
	a = numpy.where(mask, a1, a2)
	u = numpy.random.uniform(size=N)
	x = (1 - (u*(f2-f1) + f1))**(-1.0/a) - 1

	return x

def sample_broken_powerlaw(xlo, xbrk, xhi, slopelo, slopehi, size=1):
	# first compute normalisation for each segment
	# n = slope - 1, slope = n + 1
	C1 = slopelo / (xhi**slopelo - xbrk**slopelo)
	C2 = slopehi / (xbrk**slopehi - xlo**slopehi)
	#C1 = 1. / C1
	#C2 = 1. / C2
	norm = C1 / (C1 + C2)
	#print(C1, C2, norm)

	#n1 = (1-(1+xbrk)**slopelo)
	#n2 = (1-(1+xbrk)**slopehi)
	print('cdf:', C1, C2)

	### PDF at break for both power-laws after truncation
	### power-law 1 is truncated at right
	#p1 = (slopelo/(1+xbrk)**slopelo) / C1
	### ... while power-law 2 is truncated at left
	#p2 = (slopehi/(1+xbrk)**slopehi) / (1 - C2)
	p1 = C1 * xbrk**(slopelo-1)
	p2 = C2 * xbrk**(slopehi-1)

	norm = p1 / (p1 + p2)
	print('pdf:', p1, p2, norm)
	norm = 1. / (1 + 18)
	# renormalise to 1 across all
	mask = numpy.random.uniform(size=size) > norm

	# draw random numbers proportionally
	slope = numpy.where(mask, slopelo, slopehi)
	xlo = numpy.where(mask, xlo, xbrk)
	xhi = numpy.where(mask, xbrk, xhi)
	y = numpy.random.uniform(size=size)
	x = ((xhi**slope - xlo**slope)*y + xlo**slope)**(1./slope)
	return x

def sample_powerlaw_segments(xbreaks, slopes, size=1):
	# first compute normalisation for each segment

	xlos = numpy.asarray(xbreaks[:-1])
	xhis = numpy.asarray(xbreaks[1:])
	slopes = numpy.asarray(slopes)
	# n = slope - 1
	C = slopes / (xhis**slopes - xlos**slopes)
	norms = 1. / C
	print(norms)
	# renormalise to 1 across all
	norms /= norms.sum()
	k = numpy.random.choice(len(slopes), size=size, p=norms)

	# draw random numbers proportionally
	slope = slopes[k]
	xlo = xlos[k]
	xhi = xhis[k]
	y = numpy.random.uniform(size=size)
	x = ((xhi**slope - xlo**slope)*y + xlo**slope)**(1./slope)
	return x

if __name__ == '__main__':
	import matplotlib.pyplot as plt
	#x = sample_powerlaw_segments([0.5, 1, 5, 50], [-0.4, -2, -3.], size=100000)
	#x = sample_powerlaw_segments([0.1, 1, 50], [-1, -3.], size=100000)
	x = sample_broken_powerlaw(0.1, 1., 10., -1., -2., size=1000000)
	#x = sample_broken(2., 2., 1., 100000)
	plt.hist(numpy.log10(x), bins=100, log=True, histtype='step')
	plt.savefig('bknpowsample.pdf', bbox_inches='tight')
	plt.close()
	import numpy

	# Sampling from a broken powerlaw or powerlaw segments

	# Reference material:
	# http://mathworld.wolfram.com/RandomNumber.html
	# The strategy is to draw from each powerlaw segment, and make sure the proportions are right based on the segment integrals

	# This code does not work correctly, improvements are welcome

	def sample_broken(a1, a2, xb, N):
	# a1: index for power-law to left of break
	# a2: index for power-law to right of break
	# xb: break location
	# N: final sample size

	### CDF at break for both power-laws
	n1 = (1-(1+xb)**-a1)
	n2 = (1-(1+xb)**-a2)

	### PDF at break for both power-laws after truncation
	### power-law 1 is truncated at right
	p1 = (a1/(1+xb)**a1) / n1
	### ... while power-law 2 is truncated at left
	p2 = (a2/(1+xb)**a2) / (1 - n2)

	### generate sample size for each power-law
	print(p1, p2, n1, n2)
	mask = numpy.random.uniform(size=N) < p2/(p1+p2)

	### case 1; using inverse CDF from 0 to xb
	### case 2; using inverse CDF from xb to +inf
	f1 = numpy.where(mask, 0, (1 - (1+xb)**-a2))
	f2 = numpy.where(mask, (1 - (1+xb)**-a1), 1)
	a = numpy.where(mask, a1, a2)
	u = numpy.random.uniform(size=N)
	x = (1 - (u(f2-f1) + f1))*(-1.0/a) - 1

	return x

	def sample_broken_powerlaw(xlo, xbrk, xhi, slopelo, slopehi, size=1):
	# first compute normalisation for each segment
	# n = slope - 1, slope = n + 1
	C1 = slopelo / (xhislopelo - xbrkslopelo)
	C2 = slopehi / (xbrkslopehi - xloslopehi)
	#C1 = 1. / C1
	#C2 = 1. / C2
	norm = C1 / (C1 + C2)
	#print(C1, C2, norm)

	#n1 = (1-(1+xbrk)**slopelo)
	#n2 = (1-(1+xbrk)**slopehi)
	print('cdf:', C1, C2)

	### PDF at break for both power-laws after truncation
	### power-law 1 is truncated at right
	#p1 = (slopelo/(1+xbrk)**slopelo) / C1
	### ... while power-law 2 is truncated at left
	#p2 = (slopehi/(1+xbrk)**slopehi) / (1 - C2)
	p1 = C1 * xbrk**(slopelo-1)
	p2 = C2 * xbrk**(slopehi-1)

	norm = p1 / (p1 + p2)
	print('pdf:', p1, p2, norm)
	norm = 1. / (1 + 18)
	# renormalise to 1 across all
	mask = numpy.random.uniform(size=size) > norm

	# draw random numbers proportionally
	slope = numpy.where(mask, slopelo, slopehi)
	xlo = numpy.where(mask, xlo, xbrk)
	xhi = numpy.where(mask, xbrk, xhi)
	y = numpy.random.uniform(size=size)
	x = ((xhislope - xloslope)y + xloslope)*(1./slope)
	return x

	def sample_powerlaw_segments(xbreaks, slopes, size=1):
	# first compute normalisation for each segment

	xlos = numpy.asarray(xbreaks[:-1])
	xhis = numpy.asarray(xbreaks[1:])
	slopes = numpy.asarray(slopes)
	# n = slope - 1
	C = slopes / (xhisslopes - xlosslopes)
	norms = 1. / C
	print(norms)
	# renormalise to 1 across all
	norms /= norms.sum()
	k = numpy.random.choice(len(slopes), size=size, p=norms)

	# draw random numbers proportionally
	slope = slopes[k]
	xlo = xlos[k]
	xhi = xhis[k]
	y = numpy.random.uniform(size=size)
	x = ((xhislope - xloslope)y + xloslope)*(1./slope)
	return x

	if __name__ == '__main__':
	import matplotlib.pyplot as plt
	#x = sample_powerlaw_segments([0.5, 1, 5, 50], [-0.4, -2, -3.], size=100000)
	#x = sample_powerlaw_segments([0.1, 1, 50], [-1, -3.], size=100000)
	x = sample_broken_powerlaw(0.1, 1., 10., -1., -2., size=1000000)
	#x = sample_broken(2., 2., 1., 100000)
	plt.hist(numpy.log10(x), bins=100, log=True, histtype='step')
	plt.savefig('bknpowsample.pdf', bbox_inches='tight')
	plt.close()