Created
January 31, 2019 10:27
-
-
Save imaximix/9e6535fe68924e1d0386fd4187d592c4 to your computer and use it in GitHub Desktop.
wine-classifier. logistic regrestion
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs = require('fs'); | |
var csv = require('fast-csv'); | |
var math = require('mathjs'); | |
var R = require('ramda'); | |
var colors = require('colors/safe'); | |
var process = require('process'); | |
var stream = fs.createReadStream("wines.csv"); | |
var yKey = 'Class'; | |
// Alcohol;Malic acid;Ash;AlcalinityOfAsh;Magnesium;TotalPhenols;Flavanoids | |
var x1Key = 'Proline'; | |
var x2Key = 'Malic acid'; | |
var x3Key = 'Ash'; | |
var x4Key = 'AlcalinityOfAsh'; | |
var x5Key = 'Magnesium'; | |
var x6Key = 'TotalPhenols'; | |
var x7Key = 'Flavanoids'; | |
var wines = []; | |
csv | |
.fromStream(stream, {headers : true, delimiter: ';'}) | |
// .validate(function(data){ | |
// return data.age < 50; //all persons must be under the age of 50 | |
// }) | |
.on("data", function(data){ | |
wines.push(data); | |
}) | |
.on("end", function(){ | |
var maxX1 = R.reduce(R.max, 0, R.map(R.pipe(R.prop(x1Key), parseFloat), wines)); | |
var maxX2 = R.reduce(R.max, 0, R.map(R.pipe(R.prop(x2Key), parseFloat), wines)); | |
var maxX3 = R.reduce(R.max, 0, R.map(R.pipe(R.prop(x3Key), parseFloat), wines)) | |
var maxX4 = R.reduce(R.max, 0, R.map(R.pipe(R.prop(x4Key), parseFloat), wines)) | |
var maxX5 = R.reduce(R.max, 0, R.map(R.pipe(R.prop(x5Key), parseFloat), wines)) | |
var maxX6 = R.reduce(R.max, 0, R.map(R.pipe(R.prop(x6Key), parseFloat), wines)) | |
var maxX7 = R.reduce(R.max, 0, R.map(R.pipe(R.prop(x7Key), parseFloat), wines)) | |
var class1Wines = wines.map(wine => { | |
if (wine.Class !== '1') { | |
wine.Class = 0; | |
} | |
wine.Class = parseInt(wine.Class); | |
wine[x1Key] = wine[x1Key] / maxX1; | |
wine[x2Key] = wine[x2Key] / maxX2; | |
wine[x3Key] = wine[x3Key] / maxX3; | |
wine[x4Key] = wine[x4Key] / maxX4; | |
wine[x5Key] = wine[x5Key] / maxX5; | |
wine[x6Key] = wine[x6Key] / maxX6; | |
wine[x7Key] = wine[x7Key] / maxX7; | |
return wine; | |
}) | |
trainingCount = Math.round(80 / 100 * class1Wines.length); | |
testCount = Math.round(20 / 100 * class1Wines.length); | |
var class1Training = R.take(trainingCount, class1Wines); | |
var class1Test = R.takeLast(testCount, class1Wines).map(wine => { | |
// wine.Alcohol = wine.Alcohol * maxAlcohol; | |
return wine; | |
}); | |
// results = wines.map(h.bind(null, [0, 1.5])); | |
// console.log(h([-0.1, 0.01, 0.02, 0.01, 0.03], class1Training[1])); | |
// results.forEach(num => console.log(num)); | |
//console.log(cost(class1Wines, [1, 1])); | |
// console.log(class1Training); | |
//console.log(cost(class1Training, [-24, 0.2, 0.2])); | |
// console.log(cost(class1Training, [0.1,12.00921658929115,11.262842205513591])); | |
// console.log(h(class1Training[0], [0.005,0.3738794,0.44764])); | |
var trainedTheta = math.matrix([ | |
[-36.50400989355553], | |
[39.35967276977222], | |
[14.902893582525891], | |
[20.922553915013747], | |
[-32.01471434145231], | |
[7.237059043218884], | |
[1.1149415839415822], | |
[23.126977106507752]]); | |
var XTest = R.map(wine => [1, wine[x1Key], wine[x2Key], wine[x3Key], wine[x4Key], wine[x5Key], wine[x6Key], wine[x7Key]], class1Test); | |
// console.log(hVectorized(XTest, trainedTheta)); | |
var XTraining = R.map(wine => [1, wine[x1Key], wine[x2Key], wine[x3Key], wine[x4Key], wine[x5Key], wine[x6Key], wine[x7Key]], class1Training); | |
var yTraining = R.map(wine => [wine.Class], class1Training); | |
var thetaForVector = math.transpose(math.matrix([math.zeros(8)])); // TODO: change the size if changing the number of features | |
var trainedThetaVectorized = gradientDescentVectorized(XTraining, thetaForVector, yTraining); | |
// console.log(trainedThetaVectorized); | |
console.log(hVectorized(XTest, trainedThetaVectorized)) | |
var theta = [0, 0, 0, 0, 0, 0, 0, 0] | |
// var theta = [0, 0, 0] | |
// var theta = [1.5, 3, 1.5, 1, 1]; | |
// var theta = [0, 0, 0, 0, 0]; | |
// var trainedTheta = [-36.50400989355553,39.35967276977222,14.902893582525891,20.922553915013747,-32.01471434145231,7.237059043218884,1.1149415839415822,23.126977106507752]; | |
// class1Test.forEach(testWine => { | |
// var actual = testWine[yKey]; | |
// var prediction = h(testWine, trainedTheta); | |
// var roundedPrediction = Math.round(prediction); | |
// | |
// if (roundedPrediction === parseInt(actual)) { | |
// console.log(colors.green(`Actual: ${actual}; Prediction: ${prediction}`)); | |
// } else { | |
// console.log(colors.red(`Actual: ${actual}; Prediction: ${prediction}`)); | |
// } | |
// }) | |
// console.log(h(theta, class1Wines[1])); | |
// const alpha = 1; | |
// const m = class1Training.length; | |
// for (var i = 0; i < 100000; i++) { | |
// var theta0gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * 1).reduce(R.add, 0); | |
// var theta1gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * wine[x1Key]).reduce(R.add, 0); | |
// var theta2gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * wine[x2Key]).reduce(R.add, 0); | |
// var theta3gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * wine[x3Key]).reduce(R.add, 0); | |
// var theta4gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * wine[x4Key]).reduce(R.add, 0); | |
// var theta5gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * wine[x5Key]).reduce(R.add, 0); | |
// var theta6gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * wine[x6Key]).reduce(R.add, 0); | |
// var theta7gradient = (alpha / m) * class1Training.map(wine => (h(wine, theta) - wine[yKey]) * wine[x7Key]).reduce(R.add, 0); | |
// | |
// theta = [ | |
// theta[0] - theta0gradient, | |
// theta[1] - theta1gradient, | |
// theta[2] - theta2gradient, | |
// theta[3] - theta3gradient, | |
// theta[4] - theta4gradient, | |
// theta[5] - theta5gradient, | |
// theta[6] - theta6gradient, | |
// theta[7] - theta7gradient, | |
// ]; | |
// console.log(`cost = ${cost(class1Training, theta)}; theta = ${theta}`); | |
// | |
// } | |
}); | |
function gradientDescentVectorized(X, theta, y) { | |
var computedTheta = theta; | |
const alpha = 1; | |
const m = X.length; | |
for (var i = 0; i < 100000; i++) { | |
var gradients = math.multiply(alpha / m, math.multiply(math.transpose(X), math.subtract(hVectorized(X, computedTheta), y))); | |
computedTheta = math.subtract(computedTheta, gradients); | |
if (i % 100 == 0) { | |
process.stdout.write('.'); | |
} | |
} | |
return computedTheta; | |
} | |
function hVectorized(data, theta) { | |
return math.multiply(data, theta).map(result => 1 / (1 + math.pow(math.E, -result))); | |
} | |
function h(wine, theta) { | |
var x = [ | |
1, | |
wine[x1Key], | |
wine[x2Key], | |
wine[x3Key], | |
wine[x4Key], | |
wine[x5Key], | |
wine[x6Key], | |
wine[x7Key] | |
]; | |
var tranposedTheta = math.transpose(theta); | |
var result = math.multiply(tranposedTheta, x); | |
return 1 / (1 + math.pow(math.E, -result)); | |
} | |
function cost(wines, theta) { | |
const terms_for_sum = wines.map(function (wine) { | |
const wineClass = wine[yKey]; | |
var result; | |
const hResult = h(wine, theta); | |
// const result = wineClass * math.log(hResult) + (1 - wineClass) * math.log(1 - hResult); | |
// console.log(hResult); | |
if (wineClass == 0) { | |
result = - math.log(1 - hResult); | |
} else { | |
result = - math.log(hResult); | |
} | |
// console.log(result); | |
return result; | |
}); | |
// console.log(terms_for_sum); | |
const sum = terms_for_sum.reduce((memo, item) => memo + item, 0) | |
// console.log(sum); | |
// console.log(`sum: ${sum}`); | |
const noOfWines = wines.length; | |
// return -(1/noOfWines) * sum; | |
return 1 / noOfWines * sum; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment