Skip to content

Instantly share code, notes, and snippets.

@zpzim
Created September 23, 2019 19:18
Show Gist options
  • Save zpzim/e32ff85bf5048a0d05932df74be8c648 to your computer and use it in GitHub Desktop.
Save zpzim/e32ff85bf5048a0d05932df74be8c648 to your computer and use it in GitHub Desktop.
My attempt at fast MP python code (only does self joins and probably could be better)
import numpy as np
import math
import numba
def get_precomputes(T, m):
prefix_sum = np.zeros((len(T),))
prefix_sum_sq = np.zeros((len(T),))
n = len(T) - m + 1;
norms = np.zeros((n,))
means = np.zeros((n,))
df = np.zeros((n,))
dg = np.zeros((n,))
prefix_sum[0] = T[0]
prefix_sum_sq[0] = T[0] * T[0]
for i in range(1,len(T)):
prefix_sum[i] = T[i] + prefix_sum[i - 1]
prefix_sum_sq[i] = T[i] * T[i] + prefix_sum_sq[i - 1]
means[0] = prefix_sum[m - 1] / m
for i in range(1,n):
means[i] = (prefix_sum[i + m - 1] - prefix_sum[i - 1]) / m
s = 0;
for i in range(m):
val = T[i] - means[0]
s += val * val
norms[0] = s
for i in range(1,n):
norms[i] = norms[i - 1] + ((T[i - 1] - means[i - 1]) + (T[i + m - 1] - means[i])) * (T[i + m - 1] - T[i - 1])
for i in range(n):
'''
if nanvalues[i]:
norms[i] = NaN
else:
'''
norms[i] = 1.0 / math.sqrt(norms[i])
for i in range(n-1):
df[i] = (T[i + m] - T[i]) / 2.0;
dg[i] = (T[i + m] - means[i + 1]) + (T[i] - means[i]);
return means, norms, df, dg
# Fast mp code for python (only computes self joins)
@numba.jit(nopython=True)
def do_mp(a,w,mua,siga,dfa,dga,mub,sigb,dfb,dgb):
b = a
na = len(a) - w + 1
nb = na
# Mark the last diagonal
diagmax = na
# This is the exclusion zone
minlag = w // 4 + 1
#Result MP
mp = np.zeros((na,))
#Initialize starting covariance for the first row of the distance matrix
c = np.zeros((diagmax - minlag,))
for diag in range(minlag,diagmax):
c[diag-minlag] = np.sum((a[diag:diag+w]-mua[diag]) * (b[:w]-mub[0]))
#Compute the first row of correlations, this could be done inside the loop, but because python can't slice arrays like arr[:-0] we need to do the first iteration outside
result = c*(sigb[0]*siga[minlag:])
# Reduce along the rows
mp[minlag:] = np.maximum(mp[minlag:], result)
# Reduce along the column
mp[0] = max(mp[0], np.amax(result))
#The number of diagonals we compute gets smaller each iteration
#Update the covariance for the next iteration
c[:-1] = c[:-1] + dfb[0] * dga[minlag:-1] + dfa[minlag:-1]*dgb[0]
# Repeat the process for each row
for offset in range(1, nb-minlag):
# Compute the correlations
result = c[:-offset]*(sigb[offset]*siga[minlag+offset:])
# Reduce along the row
mp[minlag+offset:] = np.maximum(mp[minlag+offset:], result )
# Reduce along the columns
mp[offset] = max(mp[offset], np.amax(result))
# Update the covatiance values for the next iteration
c[:-offset-1] = c[:-offset-1] + dfb[offset] * dga[minlag+offset:-1] + dfa[minlag+offset:-1]*dgb[offset]
return mp
def matrix_profile(a, w):
mua, siga, dfa, dga = get_precomputes(a,w)
mub = mua
sigb = siga
dfb = dfa
dgb = dga
return do_mp(a,b,w,mua,siga,dfa,dga,mub,sigb,dfb,dgb)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment