zpzim/mp_fast.py

## mp_fast.py
import numpy as np
import math
import numba

def get_precomputes(T, m):
  prefix_sum = np.zeros((len(T),))
  prefix_sum_sq = np.zeros((len(T),))
  n = len(T) - m + 1;
  norms = np.zeros((n,))
  means = np.zeros((n,))
  df = np.zeros((n,))
  dg = np.zeros((n,))

  prefix_sum[0] = T[0]
  prefix_sum_sq[0] = T[0] * T[0]
  for i in range(1,len(T)):
    prefix_sum[i] = T[i] + prefix_sum[i - 1]
    prefix_sum_sq[i] = T[i] * T[i] + prefix_sum_sq[i - 1]

  means[0] = prefix_sum[m - 1] / m
  for i in range(1,n):
    means[i] = (prefix_sum[i + m - 1] - prefix_sum[i - 1]) / m

  s = 0;
  for i in range(m):
    val = T[i] - means[0]
    s += val * val
  norms[0] = s

  for i in range(1,n):
    norms[i] = norms[i - 1] + ((T[i - 1] - means[i - 1]) + (T[i + m - 1] - means[i])) * (T[i + m - 1] - T[i - 1])

  for i in range(n):
    '''
    if nanvalues[i]:
      norms[i] = NaN
    else:
    '''
    norms[i] = 1.0 / math.sqrt(norms[i])

  for i in range(n-1):
    df[i] = (T[i + m] - T[i]) / 2.0;
    dg[i] = (T[i + m] - means[i + 1]) + (T[i] - means[i]);

  return means, norms, df, dg

# Fast mp code for python (only computes self joins)
@numba.jit(nopython=True)
def do_mp(a,w,mua,siga,dfa,dga,mub,sigb,dfb,dgb):

    b = a
    na = len(a) - w + 1
    nb = na

    # Mark the last diagonal
    diagmax = na

    # This is the exclusion zone
    minlag = w // 4 + 1

    #Result MP
    mp = np.zeros((na,))

    #Initialize starting covariance for the first row of the distance matrix
    c = np.zeros((diagmax - minlag,))
    for diag in range(minlag,diagmax):
      c[diag-minlag] = np.sum((a[diag:diag+w]-mua[diag]) * (b[:w]-mub[0]))


    #Compute the first row of correlations, this could be done inside the loop, but because python can't slice arrays like arr[:-0] we need to do the first iteration outside
    result = c*(sigb[0]*siga[minlag:])
    # Reduce along the rows
    mp[minlag:] = np.maximum(mp[minlag:], result)
    # Reduce along the column
    mp[0] = max(mp[0], np.amax(result))
    #The number of diagonals we compute gets smaller each iteration
    #Update the covariance for the next iteration
    c[:-1] = c[:-1] + dfb[0] * dga[minlag:-1] + dfa[minlag:-1]*dgb[0]

    # Repeat the process for each row
    for offset in range(1, nb-minlag):
      # Compute the correlations
      result = c[:-offset]*(sigb[offset]*siga[minlag+offset:])
      # Reduce along the row
      mp[minlag+offset:] = np.maximum(mp[minlag+offset:], result )
      # Reduce along the columns
      mp[offset] = max(mp[offset], np.amax(result))
      # Update the covatiance values for the next iteration
      c[:-offset-1] = c[:-offset-1] + dfb[offset] * dga[minlag+offset:-1] + dfa[minlag+offset:-1]*dgb[offset]

    return mp

def matrix_profile(a, w):
    mua, siga, dfa, dga = get_precomputes(a,w)
    mub = mua
    sigb = siga
    dfb = dfa
    dgb = dga

    return do_mp(a,b,w,mua,siga,dfa,dga,mub,sigb,dfb,dgb)
	import numpy as np
	import math
	import numba

	def get_precomputes(T, m):
	prefix_sum = np.zeros((len(T),))
	prefix_sum_sq = np.zeros((len(T),))
	n = len(T) - m + 1;
	norms = np.zeros((n,))
	means = np.zeros((n,))
	df = np.zeros((n,))
	dg = np.zeros((n,))

	prefix_sum[0] = T[0]
	prefix_sum_sq[0] = T[0] * T[0]
	for i in range(1,len(T)):
	prefix_sum[i] = T[i] + prefix_sum[i - 1]
	prefix_sum_sq[i] = T[i] * T[i] + prefix_sum_sq[i - 1]

	means[0] = prefix_sum[m - 1] / m
	for i in range(1,n):
	means[i] = (prefix_sum[i + m - 1] - prefix_sum[i - 1]) / m

	s = 0;
	for i in range(m):
	val = T[i] - means[0]
	s += val * val
	norms[0] = s

	for i in range(1,n):
	norms[i] = norms[i - 1] + ((T[i - 1] - means[i - 1]) + (T[i + m - 1] - means[i])) * (T[i + m - 1] - T[i - 1])

	for i in range(n):
	'''
	if nanvalues[i]:
	norms[i] = NaN
	else:
	'''
	norms[i] = 1.0 / math.sqrt(norms[i])

	for i in range(n-1):
	df[i] = (T[i + m] - T[i]) / 2.0;
	dg[i] = (T[i + m] - means[i + 1]) + (T[i] - means[i]);

	return means, norms, df, dg

	# Fast mp code for python (only computes self joins)
	@numba.jit(nopython=True)
	def do_mp(a,w,mua,siga,dfa,dga,mub,sigb,dfb,dgb):

	b = a
	na = len(a) - w + 1
	nb = na

	# Mark the last diagonal
	diagmax = na

	# This is the exclusion zone
	minlag = w // 4 + 1

	#Result MP
	mp = np.zeros((na,))

	#Initialize starting covariance for the first row of the distance matrix
	c = np.zeros((diagmax - minlag,))
	for diag in range(minlag,diagmax):
	c[diag-minlag] = np.sum((a[diag:diag+w]-mua[diag]) * (b[:w]-mub[0]))


	#Compute the first row of correlations, this could be done inside the loop, but because python can't slice arrays like arr[:-0] we need to do the first iteration outside
	result = c(sigb[0]siga[minlag:])
	# Reduce along the rows
	mp[minlag:] = np.maximum(mp[minlag:], result)
	# Reduce along the column
	mp[0] = max(mp[0], np.amax(result))
	#The number of diagonals we compute gets smaller each iteration
	#Update the covariance for the next iteration
	c[:-1] = c[:-1] + dfb[0] * dga[minlag:-1] + dfa[minlag:-1]*dgb[0]

	# Repeat the process for each row
	for offset in range(1, nb-minlag):
	# Compute the correlations
	result = c[:-offset](sigb[offset]siga[minlag+offset:])
	# Reduce along the row
	mp[minlag+offset:] = np.maximum(mp[minlag+offset:], result )
	# Reduce along the columns
	mp[offset] = max(mp[offset], np.amax(result))
	# Update the covatiance values for the next iteration
	c[:-offset-1] = c[:-offset-1] + dfb[offset] * dga[minlag+offset:-1] + dfa[minlag+offset:-1]*dgb[offset]

	return mp

	def matrix_profile(a, w):
	mua, siga, dfa, dga = get_precomputes(a,w)
	mub = mua
	sigb = siga
	dfb = dfa
	dgb = dga

	return do_mp(a,b,w,mua,siga,dfa,dga,mub,sigb,dfb,dgb)