Skip to content

Instantly share code, notes, and snippets.

@JVero
Last active April 25, 2018 16:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JVero/9bb4921eeaefba8f0edff41cb584b460 to your computer and use it in GitHub Desktop.
Save JVero/9bb4921eeaefba8f0edff41cb584b460 to your computer and use it in GitHub Desktop.
import numpy as np
from math import sqrt, log10
from scipy.io import loadmat
from sklearn.linear_model import LinearRegression
from matplotlib import pyplot as plt
import seaborn as sns
"""
This code can be found on GitHub using the link below
https://git.io/vH2um
"""
# Example file name with .mat extension
initial_file_name = 'ID_28762.mat'
doPlot = True
file_id = initial_file_name.split('.')[0]
def main():
f = f_for_all_n(initial_file_name, doPlot)
return f
def dfa_linear_fit_example():
"""
This code shows the example of splitting the data into
boxes finding the best linear fit
"""
samp = load_mat_dat(initial_file_name)
y_data = y(samp)[:350].reshape(14, -1)
x_data = np.arange(0, 350).reshape(14, -1)
for i, val in enumerate(y_data):
plt.plot([x_data[i][-1]]*100, np.linspace(-.7, .4, 100), 'k--')
plt.plot(x_data[i], val, 'k')
model = LinearRegression(fit_intercept=True)
x = x_data[i]
model.fit(x[:, np.newaxis], val)
pred_list = []
for x_val in x:
pred_list.append(model.predict(x_val))
plt.plot(x_data[i], pred_list, 'k')
plt.xlabel('Frame number')
plt.ylabel('Detrended Linear Speed (mm/s)')
plt.title('Split series ')
plt.ylim((-.7, .4))
plt.show()
def load_mat_dat(dat):
return loadmat(dat)[file_id]['cut_ls'][0][0]
# Makes fake data, 370 points long, and linear with slope 10
def make_dummy_data():
return np.array([10*x for x in range(370)])
# Cuts off data that doesn't fit into the last box,
# making the length of the data a multiple of the boxsize
def trim_data(series, boxsize):
maxlen = len(series) - len(series) % boxsize
newseries = series[:maxlen]
return newseries
# Calculates y(k) for each point in the series,
# which is the demeaned cumulative sum
def y(series):
return np.cumsum(series-series.mean())
def split_series(series, boxsize):
return series.reshape([len(series)//boxsize, boxsize])
def time(boxsize):
return np.arange(boxsize)
def my_linregress(time, spl):
model = LinearRegression(fit_intercept=True)
model.fit(time[:, np.newaxis], spl)
slope = model.coef_[0]
inter = model.intercept_
return slope, inter
def f(series, boxsize):
N = len(series)
if boxsize > N:
return
if boxsize <= 0:
return
if N % boxsize != 0:
trimmed = trim_data(series, boxsize)
return f(trimmed, boxsize)
ylist = y(series)
spl = split_series(ylist, boxsize)
time = np.arange(boxsize)
numboxes = N//boxsize
slope, intercept = zip(*[my_linregress(time, spl[i])
for i in range(numboxes)])
return (sqrt(np.array([(ylist[i] - (slope[i//boxsize] * (i % boxsize) +
intercept[i//boxsize])) ** 2 for i in range(N)]).sum() * (1/N)))
def f_for_all_n(filename=False, plot=False):
if filename:
samp = load_mat_dat(filename)
else:
samp = make_dummy_data()
plt.plot(samp)
plt.xlabel('Frame number')
plt.ylabel('Linear Speed (mm/s)')
plt.title('Raw Data')
plt.show()
nrange = np.arange(3, len(samp)//10+13) # n (box-size)
f_n = np.array([f(samp, i) for i in nrange]) # F(n)
log_n = np.array([log10(n) for n in nrange])
log_f_n = np.array([log10(f) for f in f_n])
alpha_slopes = []
for n in range(3, 48): # Minimum range to find regression is of length 3
model = LinearRegression(fit_intercept=True)
nrange = np.array([log10(i) for i in np.arange(3, n+3)]).reshape(-1, 1)
model.fit(nrange, log_f_n[:n].reshape(-1, 1))
alpha_slopes.append(model.coef_[0])
val_list = []
for i, val in enumerate(log_n[:n]):
val_list.append(model.predict(val)[0])
plt.plot(log_n[:n], val_list)
plt.plot(log_n, log_f_n, 'ko')
plt.xlabel(r'$log _{10}$ of Box Length')
plt.ylabel(r'$log _{10}$ f(n)')
plt.title('Plotting slopes of increasing length of f(n)')
plt.show()
plt.plot(range(3, 48), alpha_slopes, 'ro')
plt.title('Alphas, based on boxsize (n)')
plt.xlabel('Length of interpolation from n = 3 to Total Length')
plt.ylabel(r'Slope of interpolation of log$_{10}$f(n)')
plt.show()
return alpha_slopes
if __name__ == '__main__':
dfa_linear_fit_example()
f_for_all_n(file_id)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment