Skip to content

Instantly share code, notes, and snippets.

@parastuffs
Last active May 26, 2018 11:03
Show Gist options
  • Save parastuffs/209501951964494f04dc800ea604d671 to your computer and use it in GitHub Desktop.
Save parastuffs/209501951964494f04dc800ea604d671 to your computer and use it in GitHub Desktop.
# -*- coding: cp1252 -*-
from __future__ import division # http://stackoverflow.com/questions/1267869/how-can-i-force-division-to-be-floating-point-division-keeps-rounding-down-to-0
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress
from matplotlib.dates import date2num
from datetime import datetime, date
EPS = 1e-7
LOGEPS = -2
# Default values
MAXLEN = 300
MINLEN = 150
DELTA = 50
start = 1 # 0: dataset begins with a positive slope
# 1: dataset begins with a negatibe slope
num = 5
# Parse data
def date2int(date_str):
try:
date = datetime.strptime(date_str, '%d-%m-%y %H:%M') #'%Y/%m/%d %H:%M:%S')
except ValueError:
date = datetime.strptime(date_str, '%Y/%m/%d %H:%M:%S')
return date.toordinal()
# no empty value when parsing
input_file = 'data-ht-7.csv'
timestamp, galery, atm, diff = np.loadtxt(input_file, delimiter=',',
dtype=float, unpack = True,
converters = { 0 : date2int })
if '1' in input_file:
MAXLEN = 300
MINLEN = 150
DELTA = 200
start = 0
elif '2' in input_file:
MAXLEN = 300
MINLEN = 150
DELTA = 150
start = 0
elif '5' in input_file:
MAXLEN = 300
MINLEN = 150
DELTA = 50
start = 1
elif '6' in input_file:
MAXLEN = 300
MINLEN = 100
DELTA = 100
start = 0
elif '7' in input_file:
start = 1
MAXLEN = 300
MINLEN = 100
DELTA = 100
# Order data to get the production and pumping cycles
# concaténer cycles c1 c2 c3 si len(c2) < EPS et Delta(C2) < x || all elem of C2 in [0.9/1.1 *mean 10 last elem]
def concatenation_cycles(cycles, label_cycles):
# for cycle in cycles:
# print(cycle)
new_cycles = [cycles[0]]
new_label_cycles = [label_cycles[0]]
i = 1 #remove cycle 1
while i < len(cycles) - 1:
if len(cycles[i]) < 8 :
print("Need to merge cycles")
print("Merge the current cycle with the previous one.")
print cycles[i]
for j in range(len(cycles[i])):
new_cycles[-1].append(cycles[i][j])
print ("Also merge the next one.")
for elem in cycles[i+1]:
new_cycles[-1].append(elem)
i = i + 2
else:
new_cycles.append(cycles[i])
new_label_cycles.append(label_cycles[i])
i = i + 1
return new_cycles, new_label_cycles
def cut_data(galery, timestamp):
galery_cycles = []
label_cycles = [timestamp[0]]
ref = galery[0]
cycle = []
asc = True if galery[1] > galery[0] else False
#start_asc = True if asc == True else False
for i in range (0, len(galery)) :
value = galery[i]
if (value >= 0.99*ref and asc) or (value <= 1.01*ref and not asc): ## on continue le cycle
cycle.append(value)
else :
if cycle is not [] :
galery_cycles.append(cycle)
label_cycles.append(timestamp[i])
cycle = [value]
asc = not asc
if (asc and ref < value) or (not asc and ref > value) :
ref = value # remplace ref que si meilleure
if cycle is not []: # ajouter dernier cycle
galery_cycles.append(cycle)
# return galery_cycles, label_cycles
return concatenation_cycles(galery_cycles, label_cycles)
def create_corresponding_time(cycles):
return [ np.array(range(0, len(cycles[i])) ) for i in range(0, len(cycles)) ]
ht = np.array(galery) - np.array(atm)
cycles, label_cycles = cut_data(ht, timestamp)
print label_cycles
times = create_corresponding_time(cycles)
# Plot
plt.figure(1)
# All the data
plt.plot(np.array(range(1, len(ht)+1)), ht, label="Ht")
#plt.legend()
#plt.draw()
# Plot each cycle separately
#for i in range(0,len(cycles)) :
# plt.plot(times1[i], cycles[i], label="ht")
#plt.draw()
####### ### ### #######
## ## ## ## ## ## ## ##
## ## ## ## ## ##
####### ## ## ## ## #######
## ######### ######### ##
## ## ## ## ## ## ## ##
####### ## ## ## ## #######
# Control theory method
percentages = {95:3, 87:2, 63:1}
coef = {95:[], 87:[], 63:[]}
coefMerged = []
coefMeans = []
coefStd = []
for i in range(start,len(cycles),2) :
print("Cycle {}".format(i))
cycle = cycles[i]
if len(cycle) > MINLEN and max(cycle) - min(cycle) > DELTA :
maxCycle = max(cycle)
minCycle = min(cycle)
rangeCycle = maxCycle - minCycle
# print maxCycle
# print minCycle
# print rangeCycle
# print cycle
for p in percentages:
ratio = percentages[p]
# First, find the threashold at 95, 87 or 63%
threshold = p * rangeCycle / 100
# print("expected: {}, exact: {}".format(minCycle + threshold, min(cycle, key=lambda x:abs(minCycle + threshold-x))))
# The target is minCycle (the minimum value in the cycle) + threshold (threshold that is relative).
# Then the lambda function will find the closest actual value in the list.
# Its index gives us the abscissa of the corresponding value. Since all cycles begin
# at zero, this abscissa is also the T, 2T or 3T of our system.
time_carac = cycle.index(min(cycle, key=lambda x:abs(minCycle + threshold-x))) / ratio
if time_carac > 0:
# print float(1/time_carac)
coef[p].append(1/time_carac)
for c in coef:
coefMeans.append(np.mean(coef[c]))
coefStd.append(np.std(coef[c]))
print("{} mean: {}+/-{}".format(c, coefMeans[-1], coefStd[-1]))
for i in coef[c]:
coefMerged.append(i)
print("SAAS method: {}+/-{}".format(np.mean(coefMeans), np.std(coefMerged)))
# Compute the ln
def compute_ln(ht):
h0 = min(ht)
hf = max(ht)
# last = ht[-25:]
# hf = np.mean(last)
# print(np.std(last))
#print(abs((ht[:-idx]-hf+EPS)/(h0-hf)))
log_ht = np.log(abs((ht - hf + EPS)/(h0 - hf)))
idx = np.argmax(log_ht < LOGEPS)
return log_ht, idx # np.log(abs((ht[:-idx] - hf + EPS)/(h0 - hf)))
####### ## ##### ######## ######### #######
## ## ## ## ## ## ## ## ## ##
## ## ## ## ## ## ## ##
####### ## ## ## ####### ###### #######
## ## ## ## ## ## ##
## ## ## ## ## ## ## ## ##
####### ######### ##### ## ######### #######
ln_cycles = []
len_cycles = []
times_ln = []
slopes = []
intercepts = []
std_errors = []
date_strings = []
# Non-discarded cycles
valid_cycles = []
colors = ["#1f77b4", "#aec7e8", "#ff7f0e", "#ffbb78", "#2ca02c", "#98df8a", "#d62728", "#ff9896", "#9467bd", "#c5b0d5", "#8c564b", "#c49c94", "#e377c2", "#f7b6d2", "#7f7f7f", "#c7c7c7", "#bcbd22", "#dbdb8d", "#17becf", "#9edae5"]
fig, ax = plt.subplots()
ax.set_prop_cycle('color', colors)
for i in range(start,len(cycles),2) :
cycle = cycles[i]
if len(cycle) > MINLEN and max(cycle) - min(cycle) > DELTA :
date_string = date.fromordinal(int(label_cycles[i]))
time_ln = times[i]
#compute everything
ln, idx = compute_ln(cycle)
if idx != 0 : # otherwise array is empty
ln = ln[:idx]
time_ln = time_ln[:idx]
cycle = cycle[:idx]
if len(cycle) > MAXLEN :
ln = ln[:MAXLEN]
time_ln = time_ln[:MAXLEN]
cycle = cycle[:MAXLEN]
r_value = 0
offset = 0
while abs(r_value) < 0.99 and offset < len(ln) - 1:
offset += 1
slope, intercept, r_value, p_value, std_err = linregress(time_ln[:-offset], ln[:-offset])
# print("slope R value: {}".format(r_value))
# print("Dataset length: {}".format(len(ln)))
print("Playtime is over --------------------")
time_ln = time_ln[:-offset]
ln = ln[:-offset]
cycle = cycle[:-offset]
if max(cycle) - min(cycle) > DELTA:
slope, intercept, r_value, p_value, std_err = linregress(time_ln, ln)
print("slope R value: {}".format(r_value))
print("Dataset length: {}".format(len(ln)))
print("Slope: {}, date: {}".format(slope, date_string))
# print(slope)
#save in lists
valid_cycles.append(cycle)
ln_cycles.append(ln)
len_cycles.append(len(ln))
times_ln.append(time_ln)
slopes.append(slope)
std_errors.append(std_err)
intercepts.append(intercept)
date_strings.append(date_string)
else:
print("Discarding cycle from {}, delta too low: {}/{}".format(date_string, max(cycle) - min(cycle), DELTA))
else:
print("Dropping a cycle of length {} and delta {}".format(len(cycle), max(cycle) - min(cycle)))
######### ######## ## ########## ######### ########
## ## ## ## ## ## ##
## ## ## ## ## ## ##
###### ## ## ## ###### ########
## ## ## ## ## ## ##
## ## ## ## ## ## ##
## ######## ######### ## ######### ## ##
# Now that everything has been processed, look for abnormalities and discard them.
# Mean slope
slopeMean = np.mean(slopes)
# Slope standard deviation
slopeStd = np.std(slopes)
print("Discarding abnormalities. Original mean and std: {}, {}".format(slopeMean, slopeStd))
# If a slope is further than twice the stdev from the mean, throw it the fuck away.
slopesCpy = slopes[:]
idxSlopes = 0
for i in range(len(slopesCpy)):
if abs(slopeMean - slopesCpy[i]) > 2 * slopeStd:
print("Slope is too shitty. Slope: {} from {}, mean: {}, stdev: {}".format(slopesCpy[i], date_strings[idxSlopes], slopeMean, slopeStd))
del slopes[idxSlopes]
del std_errors[idxSlopes]
del intercepts[idxSlopes]
del ln_cycles[idxSlopes]
del len_cycles[idxSlopes]
del date_strings[idxSlopes]
else:
plt.figure(2)
plt.plot(times_ln[idxSlopes], slopes[idxSlopes] * times_ln[idxSlopes] + intercepts[idxSlopes], linestyle='--', label="AL " +str(date_strings[idxSlopes]))
plt.legend()
plt.figure(3)
plt.plot(times_ln[idxSlopes], valid_cycles[idxSlopes], label="Cycle " +str(date_strings[idxSlopes]))
# If we do not delete an element from the lists,
# we can increment the index. Otherwise we don't.
idxSlopes += 1
plt.legend()
print("Cycles length: ".format(len_cycles))
print("Slopes: {}".format(slopes))
print("Mean slope: {}".format(-1*np.mean(slopes)))
print("Std error : " + str(np.std(slopes)))
print("std/mean: {}".format(-1*np.std(slopes)/np.mean(slopes)))
print("Min slope : " + str(min(slopes)))
print("Max slope : " + str(max(slopes)))
# for i in range(0, len(slopes)):
# print(str(num) + "," + str(i+1) + "," + str(-slopes[i]*10000))
plt.show()
# TODO
# IF we discard too many values in the cycle, trash it. => max(cycle) - min(cycle) > DELTA still needs to hold.
# Add a feature allowing to tune the time between two measures. By default it is 60 seconds, but this value could change.
# Add time in date string
# Coefficients exacts : 63.2, 86.5, 95
# Approximation linéaire pour déterminer la bonne abscisse aux coefficient SAAS
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment