Skip to content

Instantly share code, notes, and snippets.

@AlbinoDrought
Forked from adadgio/data.bits.normalizer.ts
Last active August 27, 2018 18:35
Show Gist options
  • Save AlbinoDrought/9ef941d5e95428bbf29b57fa77c010f0 to your computer and use it in GitHub Desktop.
Save AlbinoDrought/9ef941d5e95428bbf29b57fa77c010f0 to your computer and use it in GitHub Desktop.
non-typescript version of DataBitsNormalizer, with the default export being the created class
function isArray(input) {
return (Object.prototype.toString.call(input) === '[object Array]') ? true : false;
}
var sampleData = [
{ soilhum: 500, airtemp: true, airhum: 18, water: true, name: "romain", cats: ["a", "b"] },
{ soilhum: 1050, airtemp: false, airhum: 21, water: true, name: "romain", cats: ["c", "a"] },
{ soilhum: 300, airtemp: true, airhum: 90, water: false, name: "edwards", cats: ["a", "b"] },
{ soilhum: 950, airtemp: true, airhum: 26, water: true, name: "jane", cats: ["c", "b"] },
{ soilhum: 1050, airtemp: false, airhum: 26, water: true, name: "romain", cats: ["a", "b"] },
{ soilhum: 1050, airtemp: false, airhum: 26, water: true, name: "romain", cats: ["b", "c"] },
];
var DataBitsNormalizer = /** @class */ (function () {
function DataBitsNormalizer(data) {
this.dataset = [];
this.binaryInput = [];
this.binaryOutput = [];
this.outputProperties = [];
this.dataset = data;
// prevent empty data input
if (this.dataset.length <= 0) {
throw new Error("Input data cant be empty");
}
// prevent data rows to contain no properties
if (Object.keys(this.dataset[0]).length <= 0) {
throw new Error("Input data rows has to contain some properties (only 1st row is checked)");
}
}
DataBitsNormalizer.prototype.getOutputLength = function () {
return this.outputProperties.length;
};
DataBitsNormalizer.prototype.getOutputProperties = function () {
return this.outputProperties;
};
DataBitsNormalizer.prototype.getInputLength = function () {
return this.binaryInput[0].length;
};
DataBitsNormalizer.prototype.getBinaryInputDataset = function () {
return this.binaryInput;
};
DataBitsNormalizer.prototype.getBinaryOutputDataset = function () {
return this.binaryOutput;
};
DataBitsNormalizer.prototype.normalize = function () {
// at this point we know that data is not an empty array and
// that the first row contains at least one property (the others should as well)
// depending on each data row property, find the values data type using only the first row
var firstRow = this.dataset[0];
var distinctProps = this.distinctProps(firstRow);
var distinctTypes = this.distinctTypes(firstRow);
var metadata = {};
var bitDataset = [];
for (var _i = 0, distinctProps_1 = distinctProps; _i < distinctProps_1.length; _i++) {
var prop = distinctProps_1[_i];
var type = distinctTypes[prop];
metadata[prop] = {
type: type,
min: null,
max: null,
distinctValues: null,
};
switch (type) {
case 'number':
// data will be normalize with a number between 0 and 1
var minMax = this.getMinMax(prop, this.dataset);
metadata[prop].min = minMax[0];
metadata[prop].max = minMax[1];
break;
case 'boolean':
// data is a simple 0 or 1 bit
metadata[prop].min = 0;
metadata[prop].max = 1;
break;
case 'string':
// data will be normalize in an array of bits which length is equivalent
// to the total number of distinct string values of the whole dataset
var distinctStrVals = this.getDistinctVals(prop, this.dataset);
metadata[prop].distinctValues = distinctStrVals;
break;
case 'array':
var distinctArrVals = this.getDistinctArrayVals(prop, this.dataset);
metadata[prop].distinctValues = distinctArrVals;
break;
}
}
// now loop through data and convert any data to bits
// depending on data type and known settings of metadata
var binaryInput = [];
var binaryOutput = [];
for (var i in this.dataset) {
var row = this.dataset[i];
var index = 0;
var inputBits = [];
var outputBits = [];
for (var prop in row) {
// skip output properties, they are not in the input dataset
// start turning all data into bits!
var bitsArr = void 0;
var value = row[prop];
var meta = metadata[prop];
switch (meta.type) {
case 'number':
bitsArr = [this.numToBit(meta.min, meta.max, value)]; // scalar to array of 1 length
break;
case 'boolean':
bitsArr = [this.boolToBit(value)]; // scalar to array of 1 length
break;
case 'string':
bitsArr = this.strToBitsArr(meta.distinctValues, value);
break;
case 'array':
bitsArr = this.arrToBitsArr(meta.distinctValues, value);
break;
default:
break;
}
if (this.outputProperties.indexOf(prop) > -1) {
outputBits = outputBits.concat(bitsArr);
}
else {
inputBits = inputBits.concat(bitsArr);
}
index++;
}
if (inputBits.length > 0) {
this.binaryInput.push(inputBits);
}
if (outputBits.length > 0) {
this.binaryOutput.push(outputBits);
}
}
};
DataBitsNormalizer.prototype.setOutputProperties = function (props) {
this.outputProperties = props;
return this;
};
DataBitsNormalizer.prototype.getMinMax = function (prop, data) {
var min = null;
var max = null;
for (var i in data) {
var val = data[i][prop];
if (min === null || val < min) {
min = val;
}
if (max === null || val > max) {
max = val;
}
}
return [min, max];
};
DataBitsNormalizer.prototype.getDistinctVals = function (property, data) {
var count = 0;
var distinctValues = [];
for (var _i = 0, data_1 = data; _i < data_1.length; _i++) {
var row = data_1[_i];
var val = row[property];
if (distinctValues.indexOf(val) === -1) {
distinctValues.push(val);
}
}
return distinctValues;
};
DataBitsNormalizer.prototype.getDistinctArrayVals = function (property, data) {
var count = 0;
var distinctValues = [];
for (var _i = 0, data_2 = data; _i < data_2.length; _i++) {
var row = data_2[_i];
var arrVal = row[property];
for (var _a = 0, arrVal_1 = arrVal; _a < arrVal_1.length; _a++) {
var val = arrVal_1[_a];
if (distinctValues.indexOf(val) === -1) {
distinctValues.push(val);
}
}
}
return distinctValues;
};
DataBitsNormalizer.prototype.numToBit = function (min, max, value) {
var num = (value - min) / (max - min);
return Number((num).toFixed(6));
};
DataBitsNormalizer.prototype.boolToBit = function (val) {
return +val;
};
/**
* Turns discint values into unique array of bits to represent them all.
* For example if we have distinct data values of [ 500, 1050, 300, 950 ]
* will will need a 4 length array of bits to represent them all.
* The 1st value will be [0,0,0,1], the second [0,0,1,0]... and so on.
* The methor
*/
DataBitsNormalizer.prototype.strToBitsArr = function (distinctValues, val) {
var bitArr = new Array(distinctValues.length);
bitArr.fill(0);
for (var i in distinctValues) {
if (val === distinctValues[i]) {
bitArr[i] = 1;
}
}
return bitArr;
};
DataBitsNormalizer.prototype.arrToBitsArr = function (distinctValues, vals) {
var bitArr = new Array(distinctValues.length);
bitArr.fill(0);
for (var j in vals) {
var val = vals[j];
var idx = distinctValues.indexOf(val);
bitArr[idx] = 1;
}
return bitArr;
};
DataBitsNormalizer.prototype.distinctProps = function (row) {
return Object.keys(row);
};
DataBitsNormalizer.prototype.distinctTypes = function (row) {
var distinctTypes = {};
for (var prop in row) {
var value = row[prop];
// also check for "real" array or object type
if (typeof value === 'object' && isArray(value)) {
distinctTypes[prop] = 'array';
}
else if (typeof value === 'object') {
distinctTypes[prop] = 'object';
}
else {
distinctTypes[prop] = typeof (value);
}
}
return distinctTypes;
};
DataBitsNormalizer.prototype.getRow1stValue = function (row) {
return row[Object.keys(row)[0]];
};
return DataBitsNormalizer;
}());
module.exports = DataBitsNormalizer;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment