Last active
December 19, 2017 11:37
-
-
Save hfhchan/5bc83e41e45dcf529cd5f8bac44cf66f to your computer and use it in GitHub Desktop.
ISOM3370 code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(async function() { | |
let use_trim = true; | |
let apriori = 0; | |
// Filename | |
let filename = []; | |
if (apriori) { | |
filename.push('apriori-' + apriori); | |
} | |
if (use_trim) { | |
filename.push('trimmed'); | |
} | |
let suffix = ''; | |
if (filename.length) { | |
suffix = ' (' + filename.join(',') + ')'; | |
} | |
// Functions | |
let k_perm = function(arr) { | |
return arr.reduce((set, record1, index1) => { | |
arr.forEach((record2, index2) => { | |
if (index1 !== index2) { | |
set.push([record1, record2]); | |
} | |
}); | |
return set; | |
}, []); | |
}; | |
let k_perm_3 = function(arr) { | |
return arr.reduce((set, record1, index1) => { | |
arr.forEach((record2, index2) => { | |
if (index1 < index2) { | |
arr.forEach((record3, index3) => { | |
if (index1 !== index3 && index2 !== index3) { | |
set.push([record1, record2, record3]); | |
} | |
}); | |
} | |
}); | |
return set; | |
}, []); | |
}; | |
let flatten = (obj) => { | |
return Object.keys(obj).map(function(key) { | |
return [key, obj[key]]; | |
}); | |
} | |
let inflate = (arr) => { | |
return arr.reduce((set, record) => { set[record[0]] = record[1]; return set}, {}); | |
}; | |
let dump = (object) => { | |
let str = []; | |
Object.keys(object).forEach((key) => { | |
str.push('(' + key + ',[' + object[key].map((tuple) => tuple[0]).join(',') + '])'); | |
}); | |
return str.join("\n"); | |
}; | |
let start = new Date().getTime(); | |
// Load Data | |
let url = 'https://cdn.rawgit.com/hfhchan/bcaaa02a8a1b13036a9d747ac749e6a4/raw/d08f8c3d19eac02b304dcdf7a76a99ea1ff9b046/browsing.txt'; | |
if (use_trim) { | |
url = 'https://cdn.rawgit.com/hfhchan/57f3e3fed2da473ea434094b69c54c95/raw/42903230b31a580d9a3e3a8f366b7aa33d01079a/browsing-trim.txt'; | |
} | |
let records = await fetch(url).then((response) => { | |
return response.text(); | |
}).then(function (text) { | |
let lines = text.trim().split("\n"); | |
return lines.map((line) => line.trim().split(" ")); | |
}); | |
// Part 1 | |
// Output popularity for each product | |
// with product name as key and popularity as value | |
let itemset1_1 = []; | |
records.forEach((record) => { | |
record.map((product) => { | |
itemset1_1[product] = itemset1_1[product] || 0; | |
itemset1_1[product] += 1; | |
}); | |
}); | |
// Filter products with popularity over 100 and Sort by popularity | |
let itemset1_2 = flatten(itemset1_1).filter((item) => item[1] >= apriori).sort((a,b) => { | |
return b[1] - a[1]; | |
}); | |
itemset1_1 = null; | |
// Cast into Correct Format | |
let itemset1_3 = inflate(itemset1_2); | |
itemset1_2 = null; | |
console.log(itemset1_3); | |
// Part 2 | |
// Output popularity of co-occurences of two products | |
// with product names as key and occurences as value, with product names pre-filtered by the most popular items | |
let itemset2_1 = []; | |
records.forEach((record) => { | |
let products = record.filter((product) => product in itemset1_3).sort(); | |
k_perm(products).map((product) => { | |
product = product.join(' '); | |
itemset2_1[product] = itemset2_1[product] || 0; | |
itemset2_1[product] += 1; | |
}); | |
}); | |
// Filter pairs with popularity over 100 and Sort by popularity | |
let itemset2_2 = flatten(itemset2_1).filter((item) => item[1] >= apriori).sort((a,b) => { | |
return b[1] - a[1]; | |
}); | |
// Output popularity of co-occurences of two products | |
// with product name as key and tuples of co-occuring product and occurences as value | |
let itemset2_3 = []; | |
itemset2_2.forEach((record) => { | |
let [product1, product2] = record[0].split(" "); | |
itemset2_3[product1] = itemset2_3[product1] || []; | |
itemset2_3[product1].push([product2, record[1] / itemset1_3[product1]]); | |
}); | |
itemset2_2 = null; | |
// Truncate to most popular 10 | |
// Object.keys(itemset2_3).forEach((key) => itemset2_3[key].length = itemset2_3[key].length > 10 ? 10 : itemset2_3[key].length ); | |
// Save to file | |
var blobData = dump(itemset2_3); | |
var myBlob = new Blob([blobData], {type: "plain/text"}); | |
blobURL = URL.createObjectURL(myBlob); | |
var href = document.createElement("a"); | |
href.href = blobURL; | |
href.download = 'Itemset 2' + suffix + '.txt'; | |
href.id = "download2" | |
document.body.appendChild(href); | |
document.getElementById("download2").click(); | |
// Part 3 | |
// Output popularity of co-occurences of three products | |
// with product names as key and occurences as value, with product names pre-filtered by the most popular items | |
let itemset3_1 = []; | |
records.forEach((record) => { | |
let products = record.filter((product) => product in itemset2_3).sort(); | |
k_perm_3(products).map((product) => { | |
product = product.join(' '); | |
itemset3_1[product] = itemset3_1[product] || 0; | |
itemset3_1[product] += 1; | |
}); | |
}); | |
// Sort the values | |
let itemset3_2 = flatten(itemset3_1).sort((a,b) => { | |
return b[1] - a[1]; | |
}); | |
itemset3_1 = null | |
// Output popularity of co-occurences of three products | |
// with two product names as key and tuples of co-occuring third product and occurences as value | |
let itemset3_3 = []; | |
itemset3_2.forEach((record) => { | |
let [product1, product2, product3] = record[0].split(" "); | |
let product1_2 = product1 + " " + product2; | |
itemset3_3[product1_2] = itemset3_3[product1_2] || []; | |
itemset3_3[product1_2].push([product3, record[1] / itemset2_1[product1_2]]); | |
}); | |
itemset3_2 = null; | |
// Truncate to most popular 10 | |
// Object.keys(itemset3_3).forEach((key) => itemset3_3[key].length = itemset3_3[key].length > 10 ? 10 : itemset3_3[key].length ); | |
// Save to file | |
var blobData = dump(itemset3_3); | |
var myBlob = new Blob([blobData], {type: "plain/text"}); | |
blobURL = URL.createObjectURL(myBlob); | |
var href = document.createElement("a"); | |
href.href = blobURL; | |
href.download = 'Itemset 3' + suffix + '.txt'; | |
href.id = "download3" | |
document.body.appendChild(href); | |
document.getElementById("download3").click(); | |
let end = new Date().getTime(); | |
console.log('Completed in ' + (end - start) + ' seconds.'); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment