Last active
April 25, 2018 16:28
-
-
Save JVero/9bb4921eeaefba8f0edff41cb584b460 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from math import sqrt, log10 | |
from scipy.io import loadmat | |
from sklearn.linear_model import LinearRegression | |
from matplotlib import pyplot as plt | |
import seaborn as sns | |
""" | |
This code can be found on GitHub using the link below | |
https://git.io/vH2um | |
""" | |
# Example file name with .mat extension | |
initial_file_name = 'ID_28762.mat' | |
doPlot = True | |
file_id = initial_file_name.split('.')[0] | |
def main(): | |
f = f_for_all_n(initial_file_name, doPlot) | |
return f | |
def dfa_linear_fit_example(): | |
""" | |
This code shows the example of splitting the data into | |
boxes finding the best linear fit | |
""" | |
samp = load_mat_dat(initial_file_name) | |
y_data = y(samp)[:350].reshape(14, -1) | |
x_data = np.arange(0, 350).reshape(14, -1) | |
for i, val in enumerate(y_data): | |
plt.plot([x_data[i][-1]]*100, np.linspace(-.7, .4, 100), 'k--') | |
plt.plot(x_data[i], val, 'k') | |
model = LinearRegression(fit_intercept=True) | |
x = x_data[i] | |
model.fit(x[:, np.newaxis], val) | |
pred_list = [] | |
for x_val in x: | |
pred_list.append(model.predict(x_val)) | |
plt.plot(x_data[i], pred_list, 'k') | |
plt.xlabel('Frame number') | |
plt.ylabel('Detrended Linear Speed (mm/s)') | |
plt.title('Split series ') | |
plt.ylim((-.7, .4)) | |
plt.show() | |
def load_mat_dat(dat): | |
return loadmat(dat)[file_id]['cut_ls'][0][0] | |
# Makes fake data, 370 points long, and linear with slope 10 | |
def make_dummy_data(): | |
return np.array([10*x for x in range(370)]) | |
# Cuts off data that doesn't fit into the last box, | |
# making the length of the data a multiple of the boxsize | |
def trim_data(series, boxsize): | |
maxlen = len(series) - len(series) % boxsize | |
newseries = series[:maxlen] | |
return newseries | |
# Calculates y(k) for each point in the series, | |
# which is the demeaned cumulative sum | |
def y(series): | |
return np.cumsum(series-series.mean()) | |
def split_series(series, boxsize): | |
return series.reshape([len(series)//boxsize, boxsize]) | |
def time(boxsize): | |
return np.arange(boxsize) | |
def my_linregress(time, spl): | |
model = LinearRegression(fit_intercept=True) | |
model.fit(time[:, np.newaxis], spl) | |
slope = model.coef_[0] | |
inter = model.intercept_ | |
return slope, inter | |
def f(series, boxsize): | |
N = len(series) | |
if boxsize > N: | |
return | |
if boxsize <= 0: | |
return | |
if N % boxsize != 0: | |
trimmed = trim_data(series, boxsize) | |
return f(trimmed, boxsize) | |
ylist = y(series) | |
spl = split_series(ylist, boxsize) | |
time = np.arange(boxsize) | |
numboxes = N//boxsize | |
slope, intercept = zip(*[my_linregress(time, spl[i]) | |
for i in range(numboxes)]) | |
return (sqrt(np.array([(ylist[i] - (slope[i//boxsize] * (i % boxsize) + | |
intercept[i//boxsize])) ** 2 for i in range(N)]).sum() * (1/N))) | |
def f_for_all_n(filename=False, plot=False): | |
if filename: | |
samp = load_mat_dat(filename) | |
else: | |
samp = make_dummy_data() | |
plt.plot(samp) | |
plt.xlabel('Frame number') | |
plt.ylabel('Linear Speed (mm/s)') | |
plt.title('Raw Data') | |
plt.show() | |
nrange = np.arange(3, len(samp)//10+13) # n (box-size) | |
f_n = np.array([f(samp, i) for i in nrange]) # F(n) | |
log_n = np.array([log10(n) for n in nrange]) | |
log_f_n = np.array([log10(f) for f in f_n]) | |
alpha_slopes = [] | |
for n in range(3, 48): # Minimum range to find regression is of length 3 | |
model = LinearRegression(fit_intercept=True) | |
nrange = np.array([log10(i) for i in np.arange(3, n+3)]).reshape(-1, 1) | |
model.fit(nrange, log_f_n[:n].reshape(-1, 1)) | |
alpha_slopes.append(model.coef_[0]) | |
val_list = [] | |
for i, val in enumerate(log_n[:n]): | |
val_list.append(model.predict(val)[0]) | |
plt.plot(log_n[:n], val_list) | |
plt.plot(log_n, log_f_n, 'ko') | |
plt.xlabel(r'$log _{10}$ of Box Length') | |
plt.ylabel(r'$log _{10}$ f(n)') | |
plt.title('Plotting slopes of increasing length of f(n)') | |
plt.show() | |
plt.plot(range(3, 48), alpha_slopes, 'ro') | |
plt.title('Alphas, based on boxsize (n)') | |
plt.xlabel('Length of interpolation from n = 3 to Total Length') | |
plt.ylabel(r'Slope of interpolation of log$_{10}$f(n)') | |
plt.show() | |
return alpha_slopes | |
if __name__ == '__main__': | |
dfa_linear_fit_example() | |
f_for_all_n(file_id) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment