ikegami-yukino/kleinberg.py

## kleinberg.py
from collections import namedtuple
import np

class Bursts:
    def __init__(level, start, end):
        self.level = level
        self.start = start
        self.end = end


def kleinberg(offsets, s=2, gamma=1):
    if s <= 1:
        raise ValueError('s must be greater than 1!')
    if gamma <= 0:
        raise ValueError('gamma must be positive!')
    if length(offsets) < 1:
        raise ValueError('offsets must be non-empty!')
    if len(offsets) == 1:
        return Bursts(level=0, start=offsets[1], end=offsets[1])

    # Get the data into the right format. The value of gaps[t] gives the length
    # of the gap between the events at offsets[t] and offsets[t+1].
    offsets = sorted(offsets)
    gaps = offsets[1:len(offsets)] - offsets[:len(offsets) - 1]
    if np.any(gaps == 0):
        raise ValueError('Input cannot contain events with zero time between!')

    # Compute the average event rate, etc.
    T = np.sum(gaps)
    n = len(gaps)
    ghat = T / n

    # Compute an upper bound on the number of states used in the optimal state
    # sequence, as per Kleinberg.
    k = np.ceil(1 + np.log(T, s) + np.log(1 / np.min(gaps), s))

    # Set up the transition cost function tau, and f, the probability density
    # function for gap lengths when in state j, with precomputed parameters.
    gammalogn = gamma * np.log(n)
    tau = lambda i, j: 0 if i >= j else (j - i) * gammalogn
    alpha = map(lambda x: s ** x / ghat, range(k-1))
    f = lambda j, x: alpha[j] * np.exp(-alpha[j] * x)

    # Compute the optimal state sequence for the model, using the Viterbi
    # algorithm. In each iteration t, we compute for each possible state j the
    # minimum costs of partial state sequences up to iteration t that end in
    # that state. These numbers are stored in C. We use q to keep track of
    # the optimal sequences for each j.
    C = c(0, np.repeat(np.inf, k - 1))
    q = np.zeros((k, 1))
    for t in range(n):
        Cprime = np.repeat(np.nan, k)
        qprime = np.zeros((k, t))
        for j in range(k):
            cost = map(lambda ell: C[ell] + tau(ell, j), range(k))
            ell = np.argmin(cost)
            Cprime[j] = cost[ell] - np.log(f(j, gaps[t]))
            if t > 1:
                qprime[j, 0:(t-1)] = q[ell, 0]
            qprime[j, t] = j
        C = Cprime
        q = qprime

    # Extract the state sequence with the minimum final cost.
    q = q[np.argmin(C)]

    # Compute the number of entries we will need in the output.
    prev_q = 0
    N = 0
    for t in range(n):
        if q[t] > prev_q:
            N += q[t] - prev_q
        prev_q = q[t]

    # Run through the state sequence, and pull out the durations of all the
    # intervals for which the system is at or above a given state greater than 1.
    bursts = Bursts(level=np.repeat(np.nan, N), start=np.repeat(offsets[1], N),
                    end=np.repeat(offsets[1], N))  # Using offsets[1] for the type.

    burstcounter = 0
    prev_q = 0
    stack = np.repeat(np.nan, N) # Keeps track of which bursts are currently open.
    stackcounter = 0
    for t in range(n):
        if q[t] > prev_q:
            num_levels_opened = q[t] - prev_q
            for i in range(num_levels_opened):
                burstcounter += 1
                bursts.level[burstcounter] = prev_q + i
                bursts.start[burstcounter] = offsets[t]
                stackcounter += 1
                stack[stackcounter] = burstcounter
        elif q[t] < prev_q:
            num_levels_closed = prev_q - q[t]
            for i in range(num_levels_closed):
                bursts.end[stack[stackcounter]] = offsets[t]
                stackcounter -= 1
        prev_q = q[t]
        while stackcounter >= 0:
            bursts.end[stack[stackcounter]] = offsets[n + 1]
            stackcounter -= 1
    return bursts
	from collections import namedtuple
	import np

	class Bursts:
	def __init__(level, start, end):
	self.level = level
	self.start = start
	self.end = end


	def kleinberg(offsets, s=2, gamma=1):
	if s <= 1:
	raise ValueError('s must be greater than 1!')
	if gamma <= 0:
	raise ValueError('gamma must be positive!')
	if length(offsets) < 1:
	raise ValueError('offsets must be non-empty!')
	if len(offsets) == 1:
	return Bursts(level=0, start=offsets[1], end=offsets[1])

	# Get the data into the right format. The value of gaps[t] gives the length
	# of the gap between the events at offsets[t] and offsets[t+1].
	offsets = sorted(offsets)
	gaps = offsets[1:len(offsets)] - offsets[:len(offsets) - 1]
	if np.any(gaps == 0):
	raise ValueError('Input cannot contain events with zero time between!')

	# Compute the average event rate, etc.
	T = np.sum(gaps)
	n = len(gaps)
	ghat = T / n

	# Compute an upper bound on the number of states used in the optimal state
	# sequence, as per Kleinberg.
	k = np.ceil(1 + np.log(T, s) + np.log(1 / np.min(gaps), s))

	# Set up the transition cost function tau, and f, the probability density
	# function for gap lengths when in state j, with precomputed parameters.
	gammalogn = gamma * np.log(n)
	tau = lambda i, j: 0 if i >= j else (j - i) * gammalogn
	alpha = map(lambda x: s ** x / ghat, range(k-1))
	f = lambda j, x: alpha[j] * np.exp(-alpha[j] * x)

	# Compute the optimal state sequence for the model, using the Viterbi
	# algorithm. In each iteration t, we compute for each possible state j the
	# minimum costs of partial state sequences up to iteration t that end in
	# that state. These numbers are stored in C. We use q to keep track of
	# the optimal sequences for each j.
	C = c(0, np.repeat(np.inf, k - 1))
	q = np.zeros((k, 1))
	for t in range(n):
	Cprime = np.repeat(np.nan, k)
	qprime = np.zeros((k, t))
	for j in range(k):
	cost = map(lambda ell: C[ell] + tau(ell, j), range(k))
	ell = np.argmin(cost)
	Cprime[j] = cost[ell] - np.log(f(j, gaps[t]))
	if t > 1:
	qprime[j, 0:(t-1)] = q[ell, 0]
	qprime[j, t] = j
	C = Cprime
	q = qprime

	# Extract the state sequence with the minimum final cost.
	q = q[np.argmin(C)]

	# Compute the number of entries we will need in the output.
	prev_q = 0
	N = 0
	for t in range(n):
	if q[t] > prev_q:
	N += q[t] - prev_q
	prev_q = q[t]

	# Run through the state sequence, and pull out the durations of all the
	# intervals for which the system is at or above a given state greater than 1.
	bursts = Bursts(level=np.repeat(np.nan, N), start=np.repeat(offsets[1], N),
	end=np.repeat(offsets[1], N)) # Using offsets[1] for the type.

	burstcounter = 0
	prev_q = 0
	stack = np.repeat(np.nan, N) # Keeps track of which bursts are currently open.
	stackcounter = 0
	for t in range(n):
	if q[t] > prev_q:
	num_levels_opened = q[t] - prev_q
	for i in range(num_levels_opened):
	burstcounter += 1
	bursts.level[burstcounter] = prev_q + i
	bursts.start[burstcounter] = offsets[t]
	stackcounter += 1
	stack[stackcounter] = burstcounter
	elif q[t] < prev_q:
	num_levels_closed = prev_q - q[t]
	for i in range(num_levels_closed):
	bursts.end[stack[stackcounter]] = offsets[t]
	stackcounter -= 1
	prev_q = q[t]
	while stackcounter >= 0:
	bursts.end[stack[stackcounter]] = offsets[n + 1]
	stackcounter -= 1
	return bursts