JVero/dfa.py

## dfa.py
import numpy as np
from math import sqrt, log10
from scipy.io import loadmat
from sklearn.linear_model import LinearRegression
from matplotlib import pyplot as plt
import seaborn as sns

"""
This code can be found on GitHub using the link below
https://git.io/vH2um
"""
# Example file name with .mat extension
initial_file_name = 'ID_28762.mat'
doPlot = True
file_id = initial_file_name.split('.')[0]


def main():
    f = f_for_all_n(initial_file_name, doPlot)
    return f


def dfa_linear_fit_example():
    """
    This code shows the example of splitting the data into
    boxes finding the best linear fit
    """

    samp = load_mat_dat(initial_file_name)
    y_data = y(samp)[:350].reshape(14, -1)

    x_data = np.arange(0, 350).reshape(14, -1)
    for i, val in enumerate(y_data):
        plt.plot([x_data[i][-1]]*100, np.linspace(-.7, .4, 100), 'k--')
        plt.plot(x_data[i], val, 'k')
        model = LinearRegression(fit_intercept=True)
        x = x_data[i]
        model.fit(x[:, np.newaxis], val)
        pred_list = []
        for x_val in x:
            pred_list.append(model.predict(x_val))
        plt.plot(x_data[i], pred_list, 'k')
    plt.xlabel('Frame number')
    plt.ylabel('Detrended Linear Speed (mm/s)')
    plt.title('Split series ')
    plt.ylim((-.7, .4))
    plt.show()


def load_mat_dat(dat):
    return loadmat(dat)[file_id]['cut_ls'][0][0]


# Makes fake data, 370 points long, and linear with slope 10
def make_dummy_data():
    return np.array([10*x for x in range(370)])


# Cuts off data that doesn't fit into the last box,
# making the length of the data a multiple of the boxsize
def trim_data(series, boxsize):
    maxlen = len(series) - len(series) % boxsize
    newseries = series[:maxlen]
    return newseries


# Calculates y(k) for each point in the series,
# which is the demeaned cumulative sum
def y(series):
    return np.cumsum(series-series.mean())


def split_series(series, boxsize):
    return series.reshape([len(series)//boxsize, boxsize])


def time(boxsize):
    return np.arange(boxsize)


def my_linregress(time, spl):
    model = LinearRegression(fit_intercept=True)
    model.fit(time[:, np.newaxis], spl)
    slope = model.coef_[0]
    inter = model.intercept_
    return slope, inter


def f(series, boxsize):
    N = len(series)
    if boxsize > N:
        return
    if boxsize <= 0:
        return
    if N % boxsize != 0:
        trimmed = trim_data(series, boxsize)
        return f(trimmed, boxsize)
    ylist = y(series)
    spl = split_series(ylist, boxsize)
    time = np.arange(boxsize)
    numboxes = N//boxsize
    slope, intercept = zip(*[my_linregress(time, spl[i])
                           for i in range(numboxes)])

    return (sqrt(np.array([(ylist[i] - (slope[i//boxsize] * (i % boxsize) +
            intercept[i//boxsize])) ** 2 for i in range(N)]).sum() * (1/N)))


def f_for_all_n(filename=False, plot=False):
    if filename:
        samp = load_mat_dat(filename)
    else:
        samp = make_dummy_data()
    plt.plot(samp)
    plt.xlabel('Frame number')
    plt.ylabel('Linear Speed (mm/s)')
    plt.title('Raw Data')
    plt.show()
    nrange = np.arange(3, len(samp)//10+13)  # n (box-size)
    f_n = np.array([f(samp, i) for i in nrange])  # F(n)

    log_n = np.array([log10(n) for n in nrange])
    log_f_n = np.array([log10(f) for f in f_n])

    alpha_slopes = []
    for n in range(3, 48):  # Minimum range to find regression is of length 3
        model = LinearRegression(fit_intercept=True)
        nrange = np.array([log10(i) for i in np.arange(3, n+3)]).reshape(-1, 1)
        model.fit(nrange, log_f_n[:n].reshape(-1, 1))
        alpha_slopes.append(model.coef_[0])
        val_list = []
        for i, val in enumerate(log_n[:n]):
            val_list.append(model.predict(val)[0])
        plt.plot(log_n[:n], val_list)
    plt.plot(log_n, log_f_n, 'ko')
    plt.xlabel(r'$log _{10}$ of Box Length')
    plt.ylabel(r'$log _{10}$ f(n)')
    plt.title('Plotting slopes of increasing length of f(n)')
    plt.show()

    plt.plot(range(3, 48), alpha_slopes, 'ro')
    plt.title('Alphas, based on boxsize (n)')
    plt.xlabel('Length of interpolation from n = 3 to Total Length')
    plt.ylabel(r'Slope of interpolation of log$_{10}$f(n)')
    plt.show()
    return alpha_slopes


if __name__ == '__main__':
    dfa_linear_fit_example()

    f_for_all_n(file_id)
	import numpy as np
	from math import sqrt, log10
	from scipy.io import loadmat
	from sklearn.linear_model import LinearRegression
	from matplotlib import pyplot as plt
	import seaborn as sns

	"""
	This code can be found on GitHub using the link below
	https://git.io/vH2um
	"""
	# Example file name with .mat extension
	initial_file_name = 'ID_28762.mat'
	doPlot = True
	file_id = initial_file_name.split('.')[0]


	def main():
	f = f_for_all_n(initial_file_name, doPlot)
	return f


	def dfa_linear_fit_example():
	"""
	This code shows the example of splitting the data into
	boxes finding the best linear fit
	"""

	samp = load_mat_dat(initial_file_name)
	y_data = y(samp)[:350].reshape(14, -1)

	x_data = np.arange(0, 350).reshape(14, -1)
	for i, val in enumerate(y_data):
	plt.plot([x_data[i][-1]]*100, np.linspace(-.7, .4, 100), 'k--')
	plt.plot(x_data[i], val, 'k')
	model = LinearRegression(fit_intercept=True)
	x = x_data[i]
	model.fit(x[:, np.newaxis], val)
	pred_list = []
	for x_val in x:
	pred_list.append(model.predict(x_val))
	plt.plot(x_data[i], pred_list, 'k')
	plt.xlabel('Frame number')
	plt.ylabel('Detrended Linear Speed (mm/s)')
	plt.title('Split series ')
	plt.ylim((-.7, .4))
	plt.show()


	def load_mat_dat(dat):
	return loadmat(dat)[file_id]['cut_ls'][0][0]


	# Makes fake data, 370 points long, and linear with slope 10
	def make_dummy_data():
	return np.array([10*x for x in range(370)])


	# Cuts off data that doesn't fit into the last box,
	# making the length of the data a multiple of the boxsize
	def trim_data(series, boxsize):
	maxlen = len(series) - len(series) % boxsize
	newseries = series[:maxlen]
	return newseries


	# Calculates y(k) for each point in the series,
	# which is the demeaned cumulative sum
	def y(series):
	return np.cumsum(series-series.mean())


	def split_series(series, boxsize):
	return series.reshape([len(series)//boxsize, boxsize])


	def time(boxsize):
	return np.arange(boxsize)


	def my_linregress(time, spl):
	model = LinearRegression(fit_intercept=True)
	model.fit(time[:, np.newaxis], spl)
	slope = model.coef_[0]
	inter = model.intercept_
	return slope, inter


	def f(series, boxsize):
	N = len(series)
	if boxsize > N:
	return
	if boxsize <= 0:
	return
	if N % boxsize != 0:
	trimmed = trim_data(series, boxsize)
	return f(trimmed, boxsize)
	ylist = y(series)
	spl = split_series(ylist, boxsize)
	time = np.arange(boxsize)
	numboxes = N//boxsize
	slope, intercept = zip(*[my_linregress(time, spl[i])
	for i in range(numboxes)])

	return (sqrt(np.array([(ylist[i] - (slope[i//boxsize] * (i % boxsize) +
	intercept[i//boxsize])) ** 2 for i in range(N)]).sum() * (1/N)))


	def f_for_all_n(filename=False, plot=False):
	if filename:
	samp = load_mat_dat(filename)
	else:
	samp = make_dummy_data()
	plt.plot(samp)
	plt.xlabel('Frame number')
	plt.ylabel('Linear Speed (mm/s)')
	plt.title('Raw Data')
	plt.show()
	nrange = np.arange(3, len(samp)//10+13) # n (box-size)
	f_n = np.array([f(samp, i) for i in nrange]) # F(n)

	log_n = np.array([log10(n) for n in nrange])
	log_f_n = np.array([log10(f) for f in f_n])

	alpha_slopes = []
	for n in range(3, 48): # Minimum range to find regression is of length 3
	model = LinearRegression(fit_intercept=True)
	nrange = np.array([log10(i) for i in np.arange(3, n+3)]).reshape(-1, 1)
	model.fit(nrange, log_f_n[:n].reshape(-1, 1))
	alpha_slopes.append(model.coef_[0])
	val_list = []
	for i, val in enumerate(log_n[:n]):
	val_list.append(model.predict(val)[0])
	plt.plot(log_n[:n], val_list)
	plt.plot(log_n, log_f_n, 'ko')
	plt.xlabel(r'$log _{10}$ of Box Length')
	plt.ylabel(r'$log _{10}$ f(n)')
	plt.title('Plotting slopes of increasing length of f(n)')
	plt.show()

	plt.plot(range(3, 48), alpha_slopes, 'ro')
	plt.title('Alphas, based on boxsize (n)')
	plt.xlabel('Length of interpolation from n = 3 to Total Length')
	plt.ylabel(r'Slope of interpolation of log$_{10}$f(n)')
	plt.show()
	return alpha_slopes


	if __name__ == '__main__':
	dfa_linear_fit_example()

	f_for_all_n(file_id)