Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
TUBE algorithm's utility function http://dx.doi.org/10.1016/j.eswa.2017.08.016
class TestSuite {
constructor() {
this.test_subjects = require('./tubes.js');
this.data = [
{ x: 1, y: 2 },
{ x: 2, y: 3 },
{ x: 3, y: 4 },
{ x: 4, y: 5 }
];
this.t = new this.test_subjects.Tube(4, 2, 10, this.data);
}
run() {
let final_res = true;
for (let name of Object.getOwnPropertyNames(Object.getPrototypeOf(this))) {
let method = this[name];
// remove non function, constructor and when name does not start with test_
if (!(method instanceof Function) || method === TestSuite || !name.startsWith('test_')) continue;
let res = this[name]();
final_res &= res;
console.log(name + ': ' + res);
}
console.log('-----------------');
console.log('Test suite ' + (final_res ? 'passed' : 'failed'));
}
test_tube_slope() {
const slope = this.t.get_tube_slope(this.data);
return slope.a == 1 && slope.b == 1;
}
test_prediction() {
return this.t.predict_y(10) == 11;
}
test_tube_dev() {
const dev = this.t.get_tube_dev();
return dev == 0;
}
test_tube_tolerance() {
const tol = this.t.get_tube_tolerance();
return tol == 2;
}
test_tube_in() {
return this.t.in_tube({ x: 1, y: 1 });
}
test_tube_not_in() {
return !this.t.in_tube({ x: 1, y: 4.1 });
}
}
testsuite = new TestSuite();
testsuite.run();
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/d3/4.10.0/d3.min.js"></script>
<script type="text/javascript" src="https://cdn.rawgit.com/psukys/01da4408cf409c5b29a58e38c7fd6b7a/raw/b5227434e618147c88cce6a1036e0c487a56d378/tubes.js"></script>
<article>
<p>The paper uses United States Environmental Protection Agency's (EPA) Statistical Inventory Reconciliation (SIR) software-based
underground gasoline tank leak detection method as a baseline for acceptable true positive (not less than 95%) and
false positive rates (not more than 5%). The TUBE algorithm, apart from using aforementioned SIR method, uses
<q cite="#bib1">two-step data filtering mechanism, trend detection over time series using tubes , and detected trends’ assessment
for generating a final result</q>
</p>
<section>
<h2>SIR method</h2>
<p>Theoretical fuel volume at the end of time frame is $V_{book}$
<br /> $V_{book} = V_{open} - V_{sales} + V_{delivery}$
<br /> Difference between actual ($V_{close}$) and theoretical ($V_{book}$) fuel volume is variance.
<br /> $Var = V_{close} - V_{book}$
<br /><br /> Here:
<ul>
<li>$V_{book}$ - fuel volume for a single time frame</li>
<li>$V_{open}$ - starting fuel volume in the tank</li>
<li>$V_{sales}$ - sold fuel volume</li>
<li>$V_{delivery}$ - fuel volume that was received into the tank</li>
</ul>
<br /> Cumulative Variance allows <q cite="#bib1">detecting overall trends in multiple time intervals</q>, to
tell how the fuel tank state changed over time.
<br /> $CV = \sum_{i=1}^{n} Var(T_i)$
</p>
</section>
<section>
<h2>Data filtering</h2>
<p>The filtering in general take care of several significant factors, noises, and spikes, that decrease the quality
of data. For this, several techniques are used on a windowed data point (considering values of some neighbors
as well):
<ul>
<li>Median</li>
<li>Augmented median (appending various values, like average)</li>
<li>Linear regression (using least squares method)</li>
</ul>
</p>
</section>
<section>
<h2>TUBE algorithm</h2>
<p>Apart from aforementioned SIR and filtering methods, TUBE algorithm also incorporates <q cite="#bib1">Trend detection</q> and <q cite="#bib1">Trend interpretation</q>. For this, trend interpretation is just a comparion of tube slope to a manually set threshold.</p>
<section>
<h3>Trend detection</h3>
<p>Trends are represented by tubes that are sections with specific outlier tolerance. <q cite="#bib1">The graphical representation of a tube is a pair of two functions that are delimiting the filtered CV function from above and below, in relation to the X axis</q></p>
<p>Upper and lower tube bounds are defined as followed:
<br /> $tube_{upper}(x) = tube_{s}(x) + tol$
<br /> $tube_{lower}(x) = tube_{s}(x) - tol$
</p>
<p>$tol$ is tube tolerance and is calculated as:
<br /> $median\{tol_{factor} * tube_{dev} * tol_{min} * tol_{max}\}$
<br /> Here:
<ul>
<li>$tol_{min}$ - minimal width of the tube</li>
<li>$tol_{max}$ - maximal width of the tube</li>
<li>$tol_{factor}$ - scale parameter</li>
<li>$tube_{dev}$ - average absolute difference between k measurements:
<br /> $\frac{1}{k} \sum_{i=1}^{k} |y_i - tube_s (x_i)|$
</li>
</ul>
</p>
<p>Data item $y_i$ is added into current tube iff
<br /> $y_i \in \langle tube_{lower}(x_i), tube_{upper}(x_i) \rangle$
<br /> else a new tube is created with that data item as its first. Each tube has a minimum length k, which
means that it is the initial base for each tube.
</p>
<figure>
<div id="slidecontainer">
<input type="range" min="2" max="35" value="3" class="slider" id="k">
<label for="k">Minimum tube size: <span id="k_val">3</span></label><br />
<input type="range" min="1" max="100" value="10" class="slider" id="tol_scale">
<label for="tol_scale">Tolerance scale: <span id="tol_scale_val">10</span></label><br />
<input type="range" min="1" max="100" value="1" class="slider" id="tol_min">
<label for="tol_min">Minimal tolerance: <span id="tol_min_val">3</span></label><br />
<input type="range" min="1" max="100" value="5" class="slider" id="tol_max">
<label for="tol_max">Maximal tolerance: <span id="tol_max_val">5</span></label><br />
</div>
<svg id="tubes" width="400" height="400"></svg>
<script type="text/javascript">
//<![CDATA[
let svg = d3.select('svg#tubes');
let margin = { top: 20, right: 80, bottom: 30, left: 50 };
let width = svg.attr("width") - margin.left - margin.right
height = svg.attr("height") - margin.top - margin.bottom;
svg.attr('transform', 'translate(' + margin.left + ',' + margin.top + ')');
let data = [{ x: 0, y: -1.5 },
{ x: 1, y: -1.5 },
{ x: 2, y: -1.5 },
{ x: 3, y: -2 },
{ x: 4, y: -2.3 },
{ x: 5, y: -2.6 },
{ x: 6, y: -2.85 },
{ x: 7, y: -3.20 },
{ x: 8, y: -3.7 },
{ x: 9, y: -4 },
{ x: 10, y: -3.8 },
{ x: 11, y: -3 },
{ x: 12, y: -1 },
{ x: 13, y: 1 },
{ x: 14, y: 3 },
{ x: 15, y: 4.8 },
{ x: 16, y: 5 },
{ x: 17, y: 4.9 },
{ x: 18, y: 4.5 },
{ x: 19, y: 4 },
{ x: 20, y: 3 },
{ x: 21, y: 2.5 },
{ x: 22, y: 2.35 },
{ x: 23, y: 2.25 },
{ x: 24, y: 2.15 },
{ x: 25, y: 2.10 },
{ x: 26, y: 2 },
{ x: 27, y: 1.9 },
{ x: 28, y: 1.3 },
{ x: 29, y: 0.9 },
{ x: 30, y: 0.2 },
{ x: 31, y: 0.1 }];
let y = d3.scaleLinear()
.domain(d3.extent(data, function (d) { return d.y; }))
.range([height, height / 5]),// trivial value, since boundaries get outside
x = d3.scaleLinear()
.domain(d3.extent(data, function (d) { return d.x; }))
.range([0, width]);
let value_line = d3.line()
.x(function (d) { return x(d.x); })
.y(function (d) { return y(d.y); });
let value_area = d3.area()
.x(function (d) { return x(d.x); })
.y0(function (d) { return y(d.y0); })
.y1(function (d) { return y(d.y1); });
function build_tubes(scale, min_tol, max_tol, step) {
let tubes = [];
let tube = new Tube(scale, max_tol, min_tol, data.slice(0, step));
for (let i = step; i < data.length; i++) {
if (tube.in_tube(data[i])) {
tube.data.push(data[i]);
} else {
tubes.push(tube);
tube = new Tube(scale, max_tol, min_tol, data.slice(i, i + step));
i += step - 1;
}
}
tubes.push(tube); // push last element
// TODO: check for problems with last elements
return tubes;
}
function clear_tubes() {
d3.selectAll('.spec_tube').remove();
}
function draw_tubes(tubes) {
let boundary = [],
tol = null,
y_pred = null;
for (let i = 0; i < tubes.length; i++) {
tol = tubes[i].tolerance;
boundary = tubes[i].data.map(function (d) {
y_pred = tubes[i].predict_y(d.x);
return {
x: d.x,
y0: y_pred - tol,
y1: y_pred + tol
};
});
svg.append('path')
.attr('class', 'line')
.attr('fill', 'none')
.attr('stroke', '#846672')
.attr('transform', 'translate(' + margin.left + ', 0)')
.attr('d', value_area(boundary))
.attr("pointer-events", "fill")
.attr('class', "spec_tube")
.on("mouseover", function (d) {
d3.select(this).style("fill", "green");
document.getElementById('tube_caption').innerHTML = 'Tube ' + i + ', tolerance ' + tol + ', slope y = ' + tubes[i].slope.a + ' + x * ' + tubes[i].slope.b;
})
.on("mouseout", function (d) {
d3.select(this).style("fill", "none");
document.getElementById('tube_caption').innerHTML = 'Hover tube for info';
});
}
}
function update_all() {
let k_val = document.getElementById('k').value;
let tol_scale_val = document.getElementById('tol_scale').value;
let tol_min_val = document.getElementById('tol_min').value;
let tol_max_val = document.getElementById('tol_max').value;
document.getElementById('k_val').innerHTML = k_val;
document.getElementById('tol_scale_val').innerHTML = tol_scale_val;
document.getElementById('tol_min_val').innerHTML = tol_min_val;
document.getElementById('tol_max_val').innerHTML = tol_max_val;
let tubes = build_tubes(parseInt(tol_scale_val), parseInt(tol_min_val), parseInt(tol_max_val), parseInt(k_val));
clear_tubes();
draw_tubes(tubes);
}
document.getElementById('k').oninput = update_all;
document.getElementById('tol_scale').oninput = update_all;
document.getElementById('tol_min').oninput = update_all;
document.getElementById('tol_max').oninput = update_all;
update_all();
svg.append('g')
.attr('class', 'axis axis--y')
.attr('transform', 'translate(' + margin.left + ', 0)')
.call(d3.axisLeft(y));
svg.append('g')
.attr('class', 'axis axis--x')
.attr('transform', 'translate(' + margin.left + ', ' + height + ')')
.call(d3.axisBottom(x));
svg.append('path')
.attr('class', 'line')
.attr('fill', 'none')
.attr('stroke', '#667184')
.attr('transform', 'translate(' + margin.left + ', 0)')
.attr('d', value_line(data));
svg.selectAll('dot')
.data(data)
.enter().append('circle')
.attr('r', 3.5)
.attr('transform', 'translate(' + margin.left + ', 0)')
.attr('cx', function (d) { return x(d.x); })
.attr('cy', function (d) { return y(d.y); });
//]]>
</script>
<figcaption id="tube_caption">Hover tube for info</figcaption>
</figure>
<p>In above graph, tubes are represented as rectangles , bound by last data point that they have in them (originally paper has them connected). Data points are dots - don't have area, thus some might appear as outside the tube, whereas actually they are on the border.</p>
<p>Below graph, upon hovering a specific tube, information about tube's number, calculated tolerance and slope parameters (a and b) are displayed</p>
</section>
</section>
<section>
<h2>Review</h2>
<ol>
<li>Even though Augmented Median Filter is constantly mentioned, the configuration of what is being augmented into
the set is unclear.</li>
<li>Basic TUBE algorithm's requirements, what exactly are they?</li>
</ol>
</section>
<section>
<h2>References</h2>
<ol>
<li id="bib1">M. Gorawski, A. Gorawska, K. Pasterak, <cite>The TUBE algorithm: Discovering trends in time series for the early detection of fuel leaks from underground storage tanks</cite>,
Expert Systems with Applications, Volume 90,
<time>2017</time>, Pages 356-373, ISSN 0957-4174, http://dx.doi.org/10.1016/j.eswa.2017.08.016. <a href="http://www.sciencedirect.com/science/article/pii/S0957417417305547">ScienceDirect</a></li>
</ol>
</section>
<section>
<h2>Code source</h2>
<p>Interactive graph was generated with the help of <a href="https://d3js.org/">d3.js v4</a>, sources for this article
and code can be found here: <a href="https://gist.github.com/psukys/01da4408cf409c5b29a58e38c7fd6b7a">gist.github.com/test_tubes.js</a></p>
</section>
</article>
//<![CDATA[
class Tube {
/**
* Initialize tube
* @param {float} tol_factor tolerance factor that scale tube_dev
* @param {float} tol_min minimum tolerance
* @param {float} tol_max maximum tolerance
* @param {list} data list data points {x, y}
*/
constructor(tol_factor, tol_min, tol_max, data) {
this.tol_factor = tol_factor;
this.tol_min = tol_min;
this.tol_max = tol_max;
this.data = data;
this.slope = this.get_tube_slope(data)
this.tolerance = this.get_tube_tolerance();
}
/**
* Linear function regression to determine tube slope.
* @param {list} data tube's data {x, y} items
* @return {float} slope parameters {a, b}
*/
get_tube_slope(data) {
// https://www.easycalculation.com/analytical/learn-least-square-regression.php
let x_sum = data.reduce((t, d) => { return t + d.x }, 0);
let y_sum = data.reduce((t, d) => { return t + d.y }, 0);
let xy_sum = data.reduce((t, d) => { return t + d.x * d.y }, 0);
let xx_sum = data.reduce((t, d) => { return t + d.x * d.x }, 0);
let b = (data.length * xy_sum - x_sum * y_sum) / (data.length * xx_sum - x_sum * x_sum);
let a = (y_sum - b * x_sum) / data.length;
return { a: a, b: b };
}
/**
* Predict y value
* @param {float} x value for y to predict
* @return {float} prediction for y value
*/
predict_y(x) {
return this.slope.a + this.slope.b * x;
}
/**
* Average absolute difference between k measurements from the filtered cv.
* @return {float} deviation between k measurements
*/
get_tube_dev() {
let len = this.data.length;
if (len == 0) {
len = 1;
}
return this.data.reduce((t, i) => {
return t + Math.abs(i.y - this.predict_y(i.x));
}, 0) / len;
}
/**
* Find tube tolerance value.
* @param {float} tol_factor tolerance factor that scale tube_dev
* @param {float} tol_min minimum tolerance
* @param {float} tol_max maximum tolerance
* @param {list} tube data used to push into tube_dev
* @return {float} tube tolerance value
*/
get_tube_tolerance() {
return get_median([this.tol_factor * this.get_tube_dev(), this.tol_min, this.tol_max]);
}
in_tube(point) {
let pred_y = this.predict_y(point.x),
tube_lower = pred_y - this.tolerance,
tube_upper = pred_y + this.tolerance;
return point.y >= tube_lower && point.y <= tube_upper;
}
}
/** Calculates the median for an array of values.
* @param {list} data data from which median is calculated
* @return {int} median value of data
*/
function get_median(data) {
data.sort(function (l, r) { return l - r });
let low_middle = Math.floor((data.length - 1) / 2);
let high_middle = Math.floor((data.length - 1) / 2);
return (data[low_middle] + data[high_middle]) / 2;
}
exports.Tube = Tube;
exports.get_median = get_median;
//]>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment