Skip to content

Instantly share code, notes, and snippets.

@imaximix
Created January 31, 2019 10:27
Show Gist options
  • Save imaximix/9e6535fe68924e1d0386fd4187d592c4 to your computer and use it in GitHub Desktop.
Save imaximix/9e6535fe68924e1d0386fd4187d592c4 to your computer and use it in GitHub Desktop.
wine-classifier. logistic regrestion
var fs = require('fs');
var csv = require('fast-csv');
var math = require('mathjs');
var R = require('ramda');
var colors = require('colors/safe');
var process = require('process');
var stream = fs.createReadStream("wines.csv");
var yKey = 'Class';
// Alcohol;Malic acid;Ash;AlcalinityOfAsh;Magnesium;TotalPhenols;Flavanoids
var x1Key = 'Proline';
var x2Key = 'Malic acid';
var x3Key = 'Ash';
var x4Key = 'AlcalinityOfAsh';
var x5Key = 'Magnesium';
var x6Key = 'TotalPhenols';
var x7Key = 'Flavanoids';
var wines = [];
csv
.fromStream(stream, {headers : true, delimiter: ';'})
// .validate(function(data){
// return data.age < 50; //all persons must be under the age of 50
// })
.on("data", function(data){
wines.push(data);
})
.on("end", function(){
var maxX1 = R.reduce(R.max, 0, R.map(R.pipe(R.prop(x1Key), parseFloat), wines));
var maxX2 = R.reduce(R.max, 0, R.map(R.pipe(R.prop(x2Key), parseFloat), wines));
var maxX3 = R.reduce(R.max, 0, R.map(R.pipe(R.prop(x3Key), parseFloat), wines))
var maxX4 = R.reduce(R.max, 0, R.map(R.pipe(R.prop(x4Key), parseFloat), wines))
var maxX5 = R.reduce(R.max, 0, R.map(R.pipe(R.prop(x5Key), parseFloat), wines))
var maxX6 = R.reduce(R.max, 0, R.map(R.pipe(R.prop(x6Key), parseFloat), wines))
var maxX7 = R.reduce(R.max, 0, R.map(R.pipe(R.prop(x7Key), parseFloat), wines))
var class1Wines = wines.map(wine => {
if (wine.Class !== '1') {
wine.Class = 0;
}
wine.Class = parseInt(wine.Class);
wine[x1Key] = wine[x1Key] / maxX1;
wine[x2Key] = wine[x2Key] / maxX2;
wine[x3Key] = wine[x3Key] / maxX3;
wine[x4Key] = wine[x4Key] / maxX4;
wine[x5Key] = wine[x5Key] / maxX5;
wine[x6Key] = wine[x6Key] / maxX6;
wine[x7Key] = wine[x7Key] / maxX7;
return wine;
})
trainingCount = Math.round(80 / 100 * class1Wines.length);
testCount = Math.round(20 / 100 * class1Wines.length);
var class1Training = R.take(trainingCount, class1Wines);
var class1Test = R.takeLast(testCount, class1Wines).map(wine => {
// wine.Alcohol = wine.Alcohol * maxAlcohol;
return wine;
});
// results = wines.map(h.bind(null, [0, 1.5]));
// console.log(h([-0.1, 0.01, 0.02, 0.01, 0.03], class1Training[1]));
// results.forEach(num => console.log(num));
//console.log(cost(class1Wines, [1, 1]));
// console.log(class1Training);
//console.log(cost(class1Training, [-24, 0.2, 0.2]));
// console.log(cost(class1Training, [0.1,12.00921658929115,11.262842205513591]));
// console.log(h(class1Training[0], [0.005,0.3738794,0.44764]));
var trainedTheta = math.matrix([
[-36.50400989355553],
[39.35967276977222],
[14.902893582525891],
[20.922553915013747],
[-32.01471434145231],
[7.237059043218884],
[1.1149415839415822],
[23.126977106507752]]);
var XTest = R.map(wine => [1, wine[x1Key], wine[x2Key], wine[x3Key], wine[x4Key], wine[x5Key], wine[x6Key], wine[x7Key]], class1Test);
// console.log(hVectorized(XTest, trainedTheta));
var XTraining = R.map(wine => [1, wine[x1Key], wine[x2Key], wine[x3Key], wine[x4Key], wine[x5Key], wine[x6Key], wine[x7Key]], class1Training);
var yTraining = R.map(wine => [wine.Class], class1Training);
var thetaForVector = math.transpose(math.matrix([math.zeros(8)])); // TODO: change the size if changing the number of features
var trainedThetaVectorized = gradientDescentVectorized(XTraining, thetaForVector, yTraining);
// console.log(trainedThetaVectorized);
console.log(hVectorized(XTest, trainedThetaVectorized))
var theta = [0, 0, 0, 0, 0, 0, 0, 0]
// var theta = [0, 0, 0]
// var theta = [1.5, 3, 1.5, 1, 1];
// var theta = [0, 0, 0, 0, 0];
// var trainedTheta = [-36.50400989355553,39.35967276977222,14.902893582525891,20.922553915013747,-32.01471434145231,7.237059043218884,1.1149415839415822,23.126977106507752];
// class1Test.forEach(testWine => {
// var actual = testWine[yKey];
// var prediction = h(testWine, trainedTheta);
// var roundedPrediction = Math.round(prediction);
//
// if (roundedPrediction === parseInt(actual)) {
// console.log(colors.green(`Actual: ${actual}; Prediction: ${prediction}`));
// } else {
// console.log(colors.red(`Actual: ${actual}; Prediction: ${prediction}`));
// }
// })
// console.log(h(theta, class1Wines[1]));
// const alpha = 1;
// const m = class1Training.length;
// for (var i = 0; i < 100000; i++) {
// var theta0gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * 1).reduce(R.add, 0);
// var theta1gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * wine[x1Key]).reduce(R.add, 0);
// var theta2gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * wine[x2Key]).reduce(R.add, 0);
// var theta3gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * wine[x3Key]).reduce(R.add, 0);
// var theta4gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * wine[x4Key]).reduce(R.add, 0);
// var theta5gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * wine[x5Key]).reduce(R.add, 0);
// var theta6gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * wine[x6Key]).reduce(R.add, 0);
// var theta7gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * wine[x7Key]).reduce(R.add, 0);
//
// theta = [
// theta[0] - theta0gradient,
// theta[1] - theta1gradient,
// theta[2] - theta2gradient,
// theta[3] - theta3gradient,
// theta[4] - theta4gradient,
// theta[5] - theta5gradient,
// theta[6] - theta6gradient,
// theta[7] - theta7gradient,
// ];
// console.log(`cost = ${cost(class1Training, theta)}; theta = ${theta}`);
//
// }
});
function gradientDescentVectorized(X, theta, y) {
var computedTheta = theta;
const alpha = 1;
const m = X.length;
for (var i = 0; i < 100000; i++) {
var gradients = math.multiply(alpha / m, math.multiply(math.transpose(X), math.subtract(hVectorized(X, computedTheta), y)));
computedTheta = math.subtract(computedTheta, gradients);
if (i % 100 == 0) {
process.stdout.write('.');
}
}
return computedTheta;
}
function hVectorized(data, theta) {
return math.multiply(data, theta).map(result => 1 / (1 + math.pow(math.E, -result)));
}
function h(wine, theta) {
var x = [
1,
wine[x1Key],
wine[x2Key],
wine[x3Key],
wine[x4Key],
wine[x5Key],
wine[x6Key],
wine[x7Key]
];
var tranposedTheta = math.transpose(theta);
var result = math.multiply(tranposedTheta, x);
return 1 / (1 + math.pow(math.E, -result));
}
function cost(wines, theta) {
const terms_for_sum = wines.map(function (wine) {
const wineClass = wine[yKey];
var result;
const hResult = h(wine, theta);
// const result = wineClass * math.log(hResult) + (1 - wineClass) * math.log(1 - hResult);
// console.log(hResult);
if (wineClass == 0) {
result = - math.log(1 - hResult);
} else {
result = - math.log(hResult);
}
// console.log(result);
return result;
});
// console.log(terms_for_sum);
const sum = terms_for_sum.reduce((memo, item) => memo + item, 0)
// console.log(sum);
// console.log(`sum: ${sum}`);
const noOfWines = wines.length;
// return -(1/noOfWines) * sum;
return 1 / noOfWines * sum;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment