Created
September 23, 2019 19:18
-
-
Save zpzim/e32ff85bf5048a0d05932df74be8c648 to your computer and use it in GitHub Desktop.
My attempt at fast MP python code (only does self joins and probably could be better)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import math | |
import numba | |
def get_precomputes(T, m): | |
prefix_sum = np.zeros((len(T),)) | |
prefix_sum_sq = np.zeros((len(T),)) | |
n = len(T) - m + 1; | |
norms = np.zeros((n,)) | |
means = np.zeros((n,)) | |
df = np.zeros((n,)) | |
dg = np.zeros((n,)) | |
prefix_sum[0] = T[0] | |
prefix_sum_sq[0] = T[0] * T[0] | |
for i in range(1,len(T)): | |
prefix_sum[i] = T[i] + prefix_sum[i - 1] | |
prefix_sum_sq[i] = T[i] * T[i] + prefix_sum_sq[i - 1] | |
means[0] = prefix_sum[m - 1] / m | |
for i in range(1,n): | |
means[i] = (prefix_sum[i + m - 1] - prefix_sum[i - 1]) / m | |
s = 0; | |
for i in range(m): | |
val = T[i] - means[0] | |
s += val * val | |
norms[0] = s | |
for i in range(1,n): | |
norms[i] = norms[i - 1] + ((T[i - 1] - means[i - 1]) + (T[i + m - 1] - means[i])) * (T[i + m - 1] - T[i - 1]) | |
for i in range(n): | |
''' | |
if nanvalues[i]: | |
norms[i] = NaN | |
else: | |
''' | |
norms[i] = 1.0 / math.sqrt(norms[i]) | |
for i in range(n-1): | |
df[i] = (T[i + m] - T[i]) / 2.0; | |
dg[i] = (T[i + m] - means[i + 1]) + (T[i] - means[i]); | |
return means, norms, df, dg | |
# Fast mp code for python (only computes self joins) | |
@numba.jit(nopython=True) | |
def do_mp(a,w,mua,siga,dfa,dga,mub,sigb,dfb,dgb): | |
b = a | |
na = len(a) - w + 1 | |
nb = na | |
# Mark the last diagonal | |
diagmax = na | |
# This is the exclusion zone | |
minlag = w // 4 + 1 | |
#Result MP | |
mp = np.zeros((na,)) | |
#Initialize starting covariance for the first row of the distance matrix | |
c = np.zeros((diagmax - minlag,)) | |
for diag in range(minlag,diagmax): | |
c[diag-minlag] = np.sum((a[diag:diag+w]-mua[diag]) * (b[:w]-mub[0])) | |
#Compute the first row of correlations, this could be done inside the loop, but because python can't slice arrays like arr[:-0] we need to do the first iteration outside | |
result = c*(sigb[0]*siga[minlag:]) | |
# Reduce along the rows | |
mp[minlag:] = np.maximum(mp[minlag:], result) | |
# Reduce along the column | |
mp[0] = max(mp[0], np.amax(result)) | |
#The number of diagonals we compute gets smaller each iteration | |
#Update the covariance for the next iteration | |
c[:-1] = c[:-1] + dfb[0] * dga[minlag:-1] + dfa[minlag:-1]*dgb[0] | |
# Repeat the process for each row | |
for offset in range(1, nb-minlag): | |
# Compute the correlations | |
result = c[:-offset]*(sigb[offset]*siga[minlag+offset:]) | |
# Reduce along the row | |
mp[minlag+offset:] = np.maximum(mp[minlag+offset:], result ) | |
# Reduce along the columns | |
mp[offset] = max(mp[offset], np.amax(result)) | |
# Update the covatiance values for the next iteration | |
c[:-offset-1] = c[:-offset-1] + dfb[offset] * dga[minlag+offset:-1] + dfa[minlag+offset:-1]*dgb[offset] | |
return mp | |
def matrix_profile(a, w): | |
mua, siga, dfa, dga = get_precomputes(a,w) | |
mub = mua | |
sigb = siga | |
dfb = dfa | |
dgb = dga | |
return do_mp(a,b,w,mua,siga,dfa,dga,mub,sigb,dfb,dgb) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment