Instantly share code, notes, and snippets.

# psukys/test_tubes.js Last active Aug 31, 2017

What would you like to do?
TUBE algorithm's utility function http://dx.doi.org/10.1016/j.eswa.2017.08.016
 class TestSuite { constructor() { this.test_subjects = require('./tubes.js'); this.data = [ { x: 1, y: 2 }, { x: 2, y: 3 }, { x: 3, y: 4 }, { x: 4, y: 5 } ]; this.t = new this.test_subjects.Tube(4, 2, 10, this.data); } run() { let final_res = true; for (let name of Object.getOwnPropertyNames(Object.getPrototypeOf(this))) { let method = this[name]; // remove non function, constructor and when name does not start with test_ if (!(method instanceof Function) || method === TestSuite || !name.startsWith('test_')) continue; let res = this[name](); final_res &= res; console.log(name + ': ' + res); } console.log('-----------------'); console.log('Test suite ' + (final_res ? 'passed' : 'failed')); } test_tube_slope() { const slope = this.t.get_tube_slope(this.data); return slope.a == 1 && slope.b == 1; } test_prediction() { return this.t.predict_y(10) == 11; } test_tube_dev() { const dev = this.t.get_tube_dev(); return dev == 0; } test_tube_tolerance() { const tol = this.t.get_tube_tolerance(); return tol == 2; } test_tube_in() { return this.t.in_tube({ x: 1, y: 1 }); } test_tube_not_in() { return !this.t.in_tube({ x: 1, y: 4.1 }); } } testsuite = new TestSuite(); testsuite.run();


The paper uses United States Environmental Protection Agency's (EPA) Statistical Inventory Reconciliation (SIR) software-based underground gasoline tank leak detection method as a baseline for acceptable true positive (not less than 95%) and false positive rates (not more than 5%). The TUBE algorithm, apart from using aforementioned SIR method, uses two-step data filtering mechanism, trend detection over time series using tubes , and detected trends’ assessment for generating a final result

SIR method

Theoretical fuel volume at the end of time frame is $V_{book}$
$V_{book} = V_{open} - V_{sales} + V_{delivery}$
Difference between actual ($V_{close}$) and theoretical ($V_{book}$) fuel volume is variance.
$Var = V_{close} - V_{book}$

Here:

• $V_{book}$ - fuel volume for a single time frame
• $V_{open}$ - starting fuel volume in the tank
• $V_{sales}$ - sold fuel volume
• $V_{delivery}$ - fuel volume that was received into the tank

Cumulative Variance allows detecting overall trends in multiple time intervals, to tell how the fuel tank state changed over time.
$CV = \sum_{i=1}^{n} Var(T_i)$

Data filtering

The filtering in general take care of several significant factors, noises, and spikes, that decrease the quality of data. For this, several techniques are used on a windowed data point (considering values of some neighbors as well):

• Median
• Augmented median (appending various values, like average)
• Linear regression (using least squares method)

TUBE algorithm

Apart from aforementioned SIR and filtering methods, TUBE algorithm also incorporates Trend detection and Trend interpretation. For this, trend interpretation is just a comparion of tube slope to a manually set threshold.

Trend detection

Trends are represented by tubes that are sections with specific outlier tolerance. The graphical representation of a tube is a pair of two functions that are delimiting the filtered CV function from above and below, in relation to the X axis

Upper and lower tube bounds are defined as followed:
$tube_{upper}(x) = tube_{s}(x) + tol$
$tube_{lower}(x) = tube_{s}(x) - tol$

$tol$ is tube tolerance and is calculated as:
$median\{tol_{factor} * tube_{dev} * tol_{min} * tol_{max}\}$
Here:

• $tol_{min}$ - minimal width of the tube
• $tol_{max}$ - maximal width of the tube
• $tol_{factor}$ - scale parameter
• $tube_{dev}$ - average absolute difference between k measurements:
$\frac{1}{k} \sum_{i=1}^{k} |y_i - tube_s (x_i)|$

Data item $y_i$ is added into current tube iff
$y_i \in \langle tube_{lower}(x_i), tube_{upper}(x_i) \rangle$
else a new tube is created with that data item as its first. Each tube has a minimum length k, which means that it is the initial base for each tube.

In above graph, tubes are represented as rectangles , bound by last data point that they have in them (originally paper has them connected). Data points are dots - don't have area, thus some might appear as outside the tube, whereas actually they are on the border.

Below graph, upon hovering a specific tube, information about tube's number, calculated tolerance and slope parameters (a and b) are displayed

Review

1. Even though Augmented Median Filter is constantly mentioned, the configuration of what is being augmented into the set is unclear.
2. Basic TUBE algorithm's requirements, what exactly are they?

References

1. M. Gorawski, A. Gorawska, K. Pasterak, The TUBE algorithm: Discovering trends in time series for the early detection of fuel leaks from underground storage tanks, Expert Systems with Applications, Volume 90, , Pages 356-373, ISSN 0957-4174, http://dx.doi.org/10.1016/j.eswa.2017.08.016. ScienceDirect

Code source

Interactive graph was generated with the help of d3.js v4, sources for this article and code can be found here: gist.github.com/test_tubes.js

 // { return t + d.x }, 0); let y_sum = data.reduce((t, d) => { return t + d.y }, 0); let xy_sum = data.reduce((t, d) => { return t + d.x * d.y }, 0); let xx_sum = data.reduce((t, d) => { return t + d.x * d.x }, 0); let b = (data.length * xy_sum - x_sum * y_sum) / (data.length * xx_sum - x_sum * x_sum); let a = (y_sum - b * x_sum) / data.length; return { a: a, b: b }; } /** * Predict y value * @param {float} x value for y to predict * @return {float} prediction for y value */ predict_y(x) { return this.slope.a + this.slope.b * x; } /** * Average absolute difference between k measurements from the filtered cv. * @return {float} deviation between k measurements */ get_tube_dev() { let len = this.data.length; if (len == 0) { len = 1; } return this.data.reduce((t, i) => { return t + Math.abs(i.y - this.predict_y(i.x)); }, 0) / len; } /** * Find tube tolerance value. * @param {float} tol_factor tolerance factor that scale tube_dev * @param {float} tol_min minimum tolerance * @param {float} tol_max maximum tolerance * @param {list} tube data used to push into tube_dev * @return {float} tube tolerance value */ get_tube_tolerance() { return get_median([this.tol_factor * this.get_tube_dev(), this.tol_min, this.tol_max]); } in_tube(point) { let pred_y = this.predict_y(point.x), tube_lower = pred_y - this.tolerance, tube_upper = pred_y + this.tolerance; return point.y >= tube_lower && point.y <= tube_upper; } } /** Calculates the median for an array of values. * @param {list} data data from which median is calculated * @return {int} median value of data */ function get_median(data) { data.sort(function (l, r) { return l - r }); let low_middle = Math.floor((data.length - 1) / 2); let high_middle = Math.floor((data.length - 1) / 2); return (data[low_middle] + data[high_middle]) / 2; } exports.Tube = Tube; exports.get_median = get_median; //]>
to join this conversation on GitHub. Already have an account? Sign in to comment