Skip to content

Instantly share code, notes, and snippets.

@ikegami-yukino
Created March 10, 2015 04:48
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save ikegami-yukino/fb5fa79fde91ca0bbe98 to your computer and use it in GitHub Desktop.
Save ikegami-yukino/fb5fa79fde91ca0bbe98 to your computer and use it in GitHub Desktop.
Burst detection by kleinberg's algorithm
from collections import namedtuple
import np
class Bursts:
def __init__(level, start, end):
self.level = level
self.start = start
self.end = end
def kleinberg(offsets, s=2, gamma=1):
if s <= 1:
raise ValueError('s must be greater than 1!')
if gamma <= 0:
raise ValueError('gamma must be positive!')
if length(offsets) < 1:
raise ValueError('offsets must be non-empty!')
if len(offsets) == 1:
return Bursts(level=0, start=offsets[1], end=offsets[1])
# Get the data into the right format. The value of gaps[t] gives the length
# of the gap between the events at offsets[t] and offsets[t+1].
offsets = sorted(offsets)
gaps = offsets[1:len(offsets)] - offsets[:len(offsets) - 1]
if np.any(gaps == 0):
raise ValueError('Input cannot contain events with zero time between!')
# Compute the average event rate, etc.
T = np.sum(gaps)
n = len(gaps)
ghat = T / n
# Compute an upper bound on the number of states used in the optimal state
# sequence, as per Kleinberg.
k = np.ceil(1 + np.log(T, s) + np.log(1 / np.min(gaps), s))
# Set up the transition cost function tau, and f, the probability density
# function for gap lengths when in state j, with precomputed parameters.
gammalogn = gamma * np.log(n)
tau = lambda i, j: 0 if i >= j else (j - i) * gammalogn
alpha = map(lambda x: s ** x / ghat, range(k-1))
f = lambda j, x: alpha[j] * np.exp(-alpha[j] * x)
# Compute the optimal state sequence for the model, using the Viterbi
# algorithm. In each iteration t, we compute for each possible state j the
# minimum costs of partial state sequences up to iteration t that end in
# that state. These numbers are stored in C. We use q to keep track of
# the optimal sequences for each j.
C = c(0, np.repeat(np.inf, k - 1))
q = np.zeros((k, 1))
for t in range(n):
Cprime = np.repeat(np.nan, k)
qprime = np.zeros((k, t))
for j in range(k):
cost = map(lambda ell: C[ell] + tau(ell, j), range(k))
ell = np.argmin(cost)
Cprime[j] = cost[ell] - np.log(f(j, gaps[t]))
if t > 1:
qprime[j, 0:(t-1)] = q[ell, 0]
qprime[j, t] = j
C = Cprime
q = qprime
# Extract the state sequence with the minimum final cost.
q = q[np.argmin(C)]
# Compute the number of entries we will need in the output.
prev_q = 0
N = 0
for t in range(n):
if q[t] > prev_q:
N += q[t] - prev_q
prev_q = q[t]
# Run through the state sequence, and pull out the durations of all the
# intervals for which the system is at or above a given state greater than 1.
bursts = Bursts(level=np.repeat(np.nan, N), start=np.repeat(offsets[1], N),
end=np.repeat(offsets[1], N)) # Using offsets[1] for the type.
burstcounter = 0
prev_q = 0
stack = np.repeat(np.nan, N) # Keeps track of which bursts are currently open.
stackcounter = 0
for t in range(n):
if q[t] > prev_q:
num_levels_opened = q[t] - prev_q
for i in range(num_levels_opened):
burstcounter += 1
bursts.level[burstcounter] = prev_q + i
bursts.start[burstcounter] = offsets[t]
stackcounter += 1
stack[stackcounter] = burstcounter
elif q[t] < prev_q:
num_levels_closed = prev_q - q[t]
for i in range(num_levels_closed):
bursts.end[stack[stackcounter]] = offsets[t]
stackcounter -= 1
prev_q = q[t]
while stackcounter >= 0:
bursts.end[stack[stackcounter]] = offsets[n + 1]
stackcounter -= 1
return bursts
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment