Skip to content

Instantly share code, notes, and snippets.

@hfhchan
Last active December 19, 2017 11:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hfhchan/5bc83e41e45dcf529cd5f8bac44cf66f to your computer and use it in GitHub Desktop.
Save hfhchan/5bc83e41e45dcf529cd5f8bac44cf66f to your computer and use it in GitHub Desktop.
ISOM3370 code
(async function() {
let use_trim = true;
let apriori = 0;
// Filename
let filename = [];
if (apriori) {
filename.push('apriori-' + apriori);
}
if (use_trim) {
filename.push('trimmed');
}
let suffix = '';
if (filename.length) {
suffix = ' (' + filename.join(',') + ')';
}
// Functions
let k_perm = function(arr) {
return arr.reduce((set, record1, index1) => {
arr.forEach((record2, index2) => {
if (index1 !== index2) {
set.push([record1, record2]);
}
});
return set;
}, []);
};
let k_perm_3 = function(arr) {
return arr.reduce((set, record1, index1) => {
arr.forEach((record2, index2) => {
if (index1 < index2) {
arr.forEach((record3, index3) => {
if (index1 !== index3 && index2 !== index3) {
set.push([record1, record2, record3]);
}
});
}
});
return set;
}, []);
};
let flatten = (obj) => {
return Object.keys(obj).map(function(key) {
return [key, obj[key]];
});
}
let inflate = (arr) => {
return arr.reduce((set, record) => { set[record[0]] = record[1]; return set}, {});
};
let dump = (object) => {
let str = [];
Object.keys(object).forEach((key) => {
str.push('(' + key + ',[' + object[key].map((tuple) => tuple[0]).join(',') + '])');
});
return str.join("\n");
};
let start = new Date().getTime();
// Load Data
let url = 'https://cdn.rawgit.com/hfhchan/bcaaa02a8a1b13036a9d747ac749e6a4/raw/d08f8c3d19eac02b304dcdf7a76a99ea1ff9b046/browsing.txt';
if (use_trim) {
url = 'https://cdn.rawgit.com/hfhchan/57f3e3fed2da473ea434094b69c54c95/raw/42903230b31a580d9a3e3a8f366b7aa33d01079a/browsing-trim.txt';
}
let records = await fetch(url).then((response) => {
return response.text();
}).then(function (text) {
let lines = text.trim().split("\n");
return lines.map((line) => line.trim().split(" "));
});
// Part 1
// Output popularity for each product
// with product name as key and popularity as value
let itemset1_1 = [];
records.forEach((record) => {
record.map((product) => {
itemset1_1[product] = itemset1_1[product] || 0;
itemset1_1[product] += 1;
});
});
// Filter products with popularity over 100 and Sort by popularity
let itemset1_2 = flatten(itemset1_1).filter((item) => item[1] >= apriori).sort((a,b) => {
return b[1] - a[1];
});
itemset1_1 = null;
// Cast into Correct Format
let itemset1_3 = inflate(itemset1_2);
itemset1_2 = null;
console.log(itemset1_3);
// Part 2
// Output popularity of co-occurences of two products
// with product names as key and occurences as value, with product names pre-filtered by the most popular items
let itemset2_1 = [];
records.forEach((record) => {
let products = record.filter((product) => product in itemset1_3).sort();
k_perm(products).map((product) => {
product = product.join(' ');
itemset2_1[product] = itemset2_1[product] || 0;
itemset2_1[product] += 1;
});
});
// Filter pairs with popularity over 100 and Sort by popularity
let itemset2_2 = flatten(itemset2_1).filter((item) => item[1] >= apriori).sort((a,b) => {
return b[1] - a[1];
});
// Output popularity of co-occurences of two products
// with product name as key and tuples of co-occuring product and occurences as value
let itemset2_3 = [];
itemset2_2.forEach((record) => {
let [product1, product2] = record[0].split(" ");
itemset2_3[product1] = itemset2_3[product1] || [];
itemset2_3[product1].push([product2, record[1] / itemset1_3[product1]]);
});
itemset2_2 = null;
// Truncate to most popular 10
// Object.keys(itemset2_3).forEach((key) => itemset2_3[key].length = itemset2_3[key].length > 10 ? 10 : itemset2_3[key].length );
// Save to file
var blobData = dump(itemset2_3);
var myBlob = new Blob([blobData], {type: "plain/text"});
blobURL = URL.createObjectURL(myBlob);
var href = document.createElement("a");
href.href = blobURL;
href.download = 'Itemset 2' + suffix + '.txt';
href.id = "download2"
document.body.appendChild(href);
document.getElementById("download2").click();
// Part 3
// Output popularity of co-occurences of three products
// with product names as key and occurences as value, with product names pre-filtered by the most popular items
let itemset3_1 = [];
records.forEach((record) => {
let products = record.filter((product) => product in itemset2_3).sort();
k_perm_3(products).map((product) => {
product = product.join(' ');
itemset3_1[product] = itemset3_1[product] || 0;
itemset3_1[product] += 1;
});
});
// Sort the values
let itemset3_2 = flatten(itemset3_1).sort((a,b) => {
return b[1] - a[1];
});
itemset3_1 = null
// Output popularity of co-occurences of three products
// with two product names as key and tuples of co-occuring third product and occurences as value
let itemset3_3 = [];
itemset3_2.forEach((record) => {
let [product1, product2, product3] = record[0].split(" ");
let product1_2 = product1 + " " + product2;
itemset3_3[product1_2] = itemset3_3[product1_2] || [];
itemset3_3[product1_2].push([product3, record[1] / itemset2_1[product1_2]]);
});
itemset3_2 = null;
// Truncate to most popular 10
// Object.keys(itemset3_3).forEach((key) => itemset3_3[key].length = itemset3_3[key].length > 10 ? 10 : itemset3_3[key].length );
// Save to file
var blobData = dump(itemset3_3);
var myBlob = new Blob([blobData], {type: "plain/text"});
blobURL = URL.createObjectURL(myBlob);
var href = document.createElement("a");
href.href = blobURL;
href.download = 'Itemset 3' + suffix + '.txt';
href.id = "download3"
document.body.appendChild(href);
document.getElementById("download3").click();
let end = new Date().getTime();
console.log('Completed in ' + (end - start) + ' seconds.');
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment