Last active
October 12, 2017 11:35
-
-
Save hfhchan/b178105921a43c90295c6173ce1ee226 to your computer and use it in GitHub Desktop.
COMP4331 Assignment 1 Question 2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1. Use Google Chrome 61.0 or above. | |
2. Navigate to this page. | |
3. Press F12. | |
4. Choose the "Console" tab. | |
5. Paste in the script and press enter. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var labels = ["name", "height(cm)", "weight(kg)" ,"eye-color", "hair-color", "region"]; | |
var dataset = [ | |
["Abel","180","73","hazel","black","Europe"], | |
["Bob","183","75","brown","brown","Europe"], | |
["Carl","176","70","hazel","blond","Europe"], | |
["Dale","168","60","brown","black","Asia"], | |
["Eric","174","65","brown","brown","Asia"], | |
["Felix","170","59","brown","blond","Asia"], | |
["George","180","78","blue","black","America"], | |
["Howard","179","80","hazel","brown","America"], | |
["Igor","175","75","blue","blond","America"] | |
]; | |
// Question 2a | |
// Calculate the prior probabilities for each class | |
console.log('Quesiton 2(a): Calculate the prior probabilities for each class'); | |
var q2a = dataset.map((row) => { | |
// Get region | |
return row[5] | |
}).reduce((arr, region) => { | |
// Reduce to region and count | |
if (!arr[region]) arr[region] = 0; | |
arr[region] += 1; | |
return arr; | |
}, {}); | |
// Output table of region and probability | |
console.table(Object.keys(q2a).map((region) => { | |
return [region, q2a[region] / dataset.length]; | |
})); | |
// Question 2b | |
// for each class C_i and test sample in part 1(h), the value of P(sample|C_i); | |
console.log('Question 2(b):'); | |
console.log('Output for each class C_i and test sample in part 1h, the value of P(sample|C_i);'); | |
var samples = [ | |
["Jack","173","65","brown","black","?"], | |
["Todd","184","75","blue","blond","?"] | |
]; | |
var regions = [...new Set(dataset.map((row) => row[5]))]; | |
var average = (arr) => arr.reduce((a, b) => a + b, 0) / arr.length; | |
var stddev = (arr) => { | |
let avg = average(arr); | |
return Math.sqrt( arr.reduce((a, b) => a + Math.pow(b - avg, 2), 0) / arr.length ); | |
} | |
// Calculate probability given continuous attribute A_k | |
var continuous_p = (x_k, values) => { | |
let stddev_values = stddev(values); | |
let avg_values = average(values); | |
let exponent = Math.pow(x_k - avg_values, 2) * -1; | |
exponent = exponent / 2 / stddev_values / stddev_values; | |
return 1 / ( Math.sqrt(2 * Math.PI) * stddev_values ) * Math.pow(Math.E, exponent) | |
}; | |
let answer = []; | |
samples.forEach((sample) => { | |
let name = sample[0]; | |
console.log('For ' + name + ':'); | |
regions.map((region) => { | |
let rows = dataset.filter((row) => row[5] === region); | |
let working = []; | |
working.push([ | |
'P(' + labels[1] + '=' + sample[1] + '|' + region + ')', | |
continuous_p(+sample[1], rows.map((row) => +row[1])) | |
]); | |
working.push([ | |
'P(' + labels[2] + '=' + sample[2] + '|' + region + ')', | |
continuous_p(+sample[2], rows.map((row) => +row[2])) | |
]); | |
// Calculate probability for A_k with discrete values with Laplace correction | |
// For simplicity of code, hardcoded to 3 for number of unique values | |
working.push([ | |
'P(' + labels[3] + '=' + sample[3] + '|' + region + ')', | |
(rows.filter((row) => row[3] === sample[3]).length + 1) / (rows.length + 3) | |
]); | |
working.push([ | |
'P(' + labels[4] + '=' + sample[4] + '|' + region + ')', | |
(rows.filter((row) => row[4] === sample[4]).length + 1) / (rows.length + 3) | |
]); | |
// Calculate the probability of P(sample|C_i) | |
let probability = working.map((row) => row[1]).reduce(function(a,b){ return a * b; }); | |
// Get answer for Part 2(c) | |
// For simplicity of code, quick and dirty cut-off at 1e-4 instead of finding the largest value | |
if (probability > 0.0001) { | |
answer.push([name, region, probability]); | |
} | |
// Output working and answer for 2(b) | |
console.table(working); | |
console.log('P(' + name + '|' + region + ') = ' + "\r\n" + | |
working.map((row) => row[0]).join(' x ') + "\r\n" + | |
probability); | |
}); | |
}); | |
console.log('Question 2(c): Predicted Region for each test sample in part 1h:') | |
console.table(answer); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment