Last active
May 2, 2022 13:52
-
-
Save theGove/e7c158b8544aaacf2e2dce2a6acfcd52 to your computer and use it in GitHub Desktop.
A Naive Bayes tool for the JADE Excel Add-in
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const version=16 | |
//e7c158b8544aaacf2e2dce2a6acfcd52 | |
//Naive Bayes | |
async function auto_exec(){ | |
// set the tool css | |
;console.log("===========================",version,"===========================") | |
Jade.set_css(gist_files('style.css'),"naive-bayes") | |
// place the tool html | |
tag("tools-body").innerHTML=gist_files('tool.html') | |
// fill in values from last tool use | |
jade_modules.jet_engine.restore_html_values() | |
} | |
function save_inputs(){ | |
jade_modules.jet_engine.save_html_values(["datasheet","datarange","trainingprecent","predictors","outcome"]) | |
} | |
async function get_address(tag_name){ | |
tag('datarange').value=await jade_modules.jet_engine.selected_range_address() | |
} | |
async function split_data(){ | |
// save the defaults for the tool | |
save_inputs() | |
Excel.run(async function(excel){ | |
const sheet = excel.workbook.worksheets.getItem(tag("datasheet").value) | |
const datarange = sheet.getRange(tag("datarange").value) | |
//datarange.load('columnCount') | |
//sheet.activate() | |
await excel.sync() | |
datarange.getColumnsAfter(1).insert(Excel.InsertShiftDirection.right); | |
const next_col = datarange.getColumnsAfter(1) | |
next_col.load('values') | |
await excel.sync() | |
const train=[["Train"]] | |
for(let x=1;x<next_col.values.length;x++){ | |
if(Math.random()*100>parseFloat(tag("trainingprecent").value)){ | |
train.push([false]) | |
}else{ | |
train.push([true]) | |
} | |
} | |
next_col.values=train | |
await excel.sync() | |
}) | |
} | |
async function train_model(){ | |
// save the defaults for the tool | |
save_inputs() | |
Excel.run(async function(excel){ | |
const sheet = excel.workbook.worksheets.getItem(tag("datasheet").value) | |
const predictors = sheet.getRange(tag("predictors").value) | |
const outcome = sheet.getRange(tag("outcome").value) | |
const train = sheet.getRange(tag("datarange").value).getColumnsAfter(1) | |
predictors.load('values') | |
outcome.load('values') | |
train.load('values') | |
sheet.activate() | |
let out_sheet = excel.workbook.worksheets.getItemOrNullObject("Naive Bayes Results"); | |
await excel.sync() | |
if (out_sheet.isNullObject) { | |
// sheet does not exist yet | |
out_sheet=excel.workbook.worksheets.add("Naive Bayes Results") | |
}else{ | |
// clear the sheet | |
out_sheet.getUsedRange().delete(Excel.DeleteShiftDirection.up); | |
} | |
out_sheet.activate() | |
await excel.sync() | |
const counts = {}; | |
for(let r=1;r<outcome.values.length;r++){ | |
if(train.values[r][0]){ | |
if(!counts[outcome.values[r][0]]){counts[outcome.values[r][0]]={}} | |
for(let c=0;c<predictors.values[0].length;c++){ | |
if(counts[outcome.values[r][0]][predictors.values[r][c]]){ | |
counts[outcome.values[r][0]][predictors.values[r][c]] += 1 | |
}else{ | |
counts[outcome.values[r][0]][predictors.values[r][c]] = 1 | |
} | |
} | |
} | |
} | |
//console.log("sorted uniques",counts) | |
let row=1 | |
probabilities = {} | |
for(const [result,token_counts] of Object.entries(counts)){ | |
probabilities[result] = {} | |
const data = [] | |
data.push(['Token','Count','Probability']) | |
let sum=0 | |
//sum the counts for all categories | |
for(const [token,count] of Object.entries(token_counts)){ | |
sum += count + .01 | |
} | |
for(const [token,count] of Object.entries(token_counts)){ | |
data.push([token,count,(count+.01)/sum]) | |
probabilities[result][token] = (count+.01)/sum | |
} | |
//console.log("data",data) | |
out_sheet.getRangeByIndexes(row, 1, 1, 3).merge(true) | |
out_sheet.getRangeByIndexes(row, 1, 1, 1).values=`Probabilities: ${outcome.values[0][0]}=${result}` | |
out_sheet.getRangeByIndexes(row+1, 1, data.length, 3).values=data | |
jade_modules.jet_engine.border_around(out_sheet, out_sheet.getRangeByIndexes(row+1, 1, data.length, 3),"Medium","Continuous","Black") | |
jade_modules.jet_engine.range_format(out_sheet, out_sheet.getRangeByIndexes(row, 1,1,1), "format/font/bold", true) | |
jade_modules.jet_engine.range_format(out_sheet, out_sheet.getRangeByIndexes(row, 1,1,1), "format/font/italic", true) | |
jade_modules.jet_engine.range_format(out_sheet, out_sheet.getRangeByIndexes(row+1, 1, 1, 3), "format/fill/color", "Maroon") | |
jade_modules.jet_engine.range_format(out_sheet, out_sheet.getRangeByIndexes(row+1, 1, 1, 3), "format/font/color", "White") | |
jade_modules.jet_engine.range_format(out_sheet, out_sheet.getRangeByIndexes(row+1, 1, 1, 3), "format/font/bold", true) | |
console.log("token_counts",token_counts) | |
row += Object.keys(token_counts).length+3 | |
} | |
out_sheet.getRangeByIndexes(1, 1, 1, 3).format.autofitColumns(); | |
// format of the confusion matrix | |
jade_modules.jet_engine.range_format(out_sheet, "F3:H3", "format/fill/color", "Maroon") | |
jade_modules.jet_engine.range_format(out_sheet, "F3:H3", "format/font/color", "White") | |
jade_modules.jet_engine.range_format(out_sheet, "F3:H7", "format/font/bold", true) | |
jade_modules.jet_engine.range_format(out_sheet, "G6:H7", "format/font/bold", false) | |
jade_modules.jet_engine.range_format(out_sheet, "F4:H7", "format/horizontalAlignment", "Center") | |
jade_modules.jet_engine.border_around(out_sheet, "F3:H7","Medium","Continuous","Black") | |
out_sheet.getRangeByIndexes(2,5,3,2).values = [ | |
["Confusion Matrix",null], | |
[null,"Predicted"], | |
["Actual",null], | |
] | |
out_sheet.getRangeByIndexes(2,5,1,3).merge(true) | |
out_sheet.getRangeByIndexes(3,6,1,2).merge(true) | |
console.log("counts",counts) | |
console.log("probabilities",probabilities) | |
// calculating probabilities | |
const predictions={} | |
for(let r=1;r<outcome.values.length;r++){ | |
if(!train.values[r][0]){ | |
// this is not a training value | |
let cum_prob=1 | |
let pointer=predictions | |
for(const token of predictors.values[r]){ | |
cum_prob = cum_prob * probabilities[outcome.values[r][0]][[token]] | |
if(!pointer[token]){pointer[token]={}} | |
pointer=pointer[token] | |
} | |
if(!pointer[outcome.values[r][0]]){ | |
pointer[outcome.values[r][0]]={prob:cum_prob, count:0} | |
} | |
pointer[outcome.values[r][0]].count++ | |
} | |
} | |
console.log("predictions",predictions) | |
const confusion={} | |
for(const out of Object.keys(probabilities)){ | |
confusion[out]={} | |
for(const out2 of Object.keys(probabilities)){ | |
confusion[out][out2]=0 | |
} | |
} | |
compute_confusion(predictions, confusion, counts) | |
console.log("confusion",confusion) | |
const confusion_array=[] | |
const conf_col_head=[[]] | |
const conf_row_head=[] | |
for(const out of Object.keys(probabilities)){ | |
conf_row_head.push([out]) | |
conf_col_head[0].push(out) | |
const temp_array=[] | |
for(const out2 of Object.keys(probabilities)){ | |
temp_array.push(confusion[out][out2]) | |
} | |
confusion_array.push(temp_array) | |
} | |
console.log("conf_col_head",conf_col_head) | |
out_sheet.getRange("G5:H5").values = conf_col_head | |
out_sheet.getRange("F6:F7").values = conf_row_head | |
out_sheet.getRange("G6:H7").values = confusion_array | |
excel.sync() | |
}) | |
} | |
function compute_confusion(obj, confusion, counts){ | |
let check_count=0 | |
// console.log("at comput confusion") | |
for(const key of Object.keys(obj)){ | |
// console.log(key) | |
if(obj[key]){ | |
// console.log("---------------obj[key]",obj[key]) | |
if(obj[key].count !== undefined){ | |
// console.log("------------count incement check_count") | |
check_count++ | |
} | |
if(obj[key].prob !== undefined){ | |
// console.log("prob incement check_count") | |
check_count++ | |
} | |
} | |
} | |
if(check_count===4){ | |
// we have the final node | |
// console.log("---------------in check count = 4") | |
let winner=Object.entries(obj)[0] | |
let loser=Object.entries(obj)[1] | |
// console.log("initial winner:",winner) | |
// console.log("initial loser:",loser) | |
if(loser[1].prob > winner[1].prob){ | |
// console.log("in winner.loser flipper") | |
winner=Object.entries(obj)[1] | |
loser=Object.entries(obj)[0] | |
} | |
//confusion[winner[0]][winner[0]] += winner[1][count] | |
// console.log("obj",obj) | |
// console.log("winner",winner) | |
// console.log("loser",loser) | |
//confusion.prediction.actual | |
for(const outcome of Object.keys(counts)){ | |
confusion[winner[0]][outcome] += obj[outcome].count | |
} | |
}else{ | |
for(const key of Object.keys(obj)){ | |
compute_confusion(obj[key],confusion,counts) | |
} | |
} | |
} | |
function onlyUnique(value, index, self) { | |
return self.indexOf(value) === index; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/**/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<div class="hidden" id="jet-refresh" style="margin-left: .5rem;"><i title="Re-load this tool." onclick="jade_modules.jet_engine.select_page()" class="fa-solid fa-arrows-rotate" cursor:pointer"></i></div> | |
<div style="margin:1rem; text-align: center;"><div style="display:inline-block; text-align: left;"> | |
<table style="width:100%" align="center" cellspacing="0" cellpadding="4"> | |
<tbody> | |
<tr> | |
<td onclick="jade_modules.jet_engine.toggle_next_row(this)" colspan="2" class="section"> 1. Split Data Set</td> | |
<td class="section"><i title="Hide or show this step." onclick="jade_modules.jet_engine.toggle_table(this)" class="fa-solid fa-chevron-up"></i></td> | |
</tr> | |
<tr class="hidden note"> | |
<td colspan="3"> This step will add a columnn to your data set named "Train" with values of true or false based on the "Training Set %" you specify below.</td> | |
</tr> | |
<tr> | |
<td onclick="jade_modules.jet_engine.toggle_next_row(this)" class="label" >Data Sheet:</td> | |
<td><input size="6" type="text" id="datasheet" onfocus="jade_modules.jet_engine.default_selected_sheet(this)"/></td> | |
<td><i title="Get the name of the active sheet." onclick="jade_modules.jet_engine.get_sheet_name('datasheet')" class="fa-solid fa-file" style="color:#777; cursor:pointer"></i></td> | |
</tr> | |
<tr class="hidden note"> | |
<td colspan="3">This is the name of the worksheet that has your data</td> | |
</tr> | |
<tr> | |
<td onclick="jade_modules.jet_engine.toggle_next_row(this)" class="label" >Data Range:</td> | |
<td><input size="6" type="text" id="datarange" onfocus="jade_modules.jet_engine.default_selected_range(this)"/></td> | |
<td><i title="Get the address of the selected range." onclick="jade_modules.jet_engine.get_multicell_address('datarange')" class="fa-solid fa-table" style="color:#777; cursor:pointer"></i></td> | |
</tr> | |
<tr class="hidden note"> | |
<td colspan="3">This is a reference to your whole dataset, including a header row.</td> | |
</tr> | |
<tr onclick="jade_modules.jet_engine.toggle_next_row(this)" class="label" > | |
<td onclick="jade_modules.jet_engine.toggle_next_row(this)" class="label" >Training Set %:</td> | |
<td><input size="6" type="text" id="trainingprecent" /></td> | |
<td></td> | |
</tr> | |
<tr class="hidden note"> | |
<td colspan="3">This is the percent of your data you want to use to train the model. It is number like 30.</td> | |
</tr> | |
<tr> | |
<td colspan="3" align="right"><button onclick="jade_modules.code.split_data()">Split Data Set</button></td> | |
</tr> | |
</tbody> | |
</table> | |
<div style="margin-top: 1rem;" /> | |
<table style="width:100%" align="center" cellspacing="0" cellpadding="4"> | |
<tbody> | |
<tr> | |
<td onclick="jade_modules.jet_engine.toggle_next_row(this)" colspan="2" class="section"> 2. Run the Model</td> | |
<td class="section"><i title="Hide or show this step." onclick="jade_modules.jet_engine.toggle_table(this)" class="fa-solid fa-chevron-up"></i></td> | |
</tr> | |
<tr class="hidden note"> | |
<td colspan="3">Calcualte token probabilities from the portion of the data set selcted for training in the prior step.</td> | |
</tr> | |
<tr> | |
<td onclick="jade_modules.jet_engine.toggle_next_row(this)" class="label" >Predictors:</td> | |
<td><input size="6" type="text" id="predictors" onfocus="jade_modules.jet_engine.default_selected_range(this)"/></td> | |
<td><i title="Get the data columns from the selected range." onclick="jade_modules.jet_engine.get_data_column_address('predictors')" class="fa-solid fa-table" style="color:#777; cursor:pointer"></i></td> | |
</tr> | |
<tr class="hidden note"> | |
<td colspan="3">The range that holds the model input data (independent variables).</td> | |
</tr> | |
<tr> | |
<td onclick="jade_modules.jet_engine.toggle_next_row(this)" class="label" >Outcome:</td> | |
<td><input size="6" type="text" id="outcome" onfocus="jade_modules.jet_engine.default_selected_range(this)"/></td> | |
<td><i title="Get the data columns from the selecte range." onclick="jade_modules.jet_engine.get_data_column_address('outcome')" class="fa-solid fa-table" style="color:#777; cursor:pointer"></i></td> | |
</tr> | |
<tr class="hidden note"> | |
<td colspan="3">The range that holds the model outcome data (dependent variable).</td> | |
</tr> | |
<tr> | |
<td colspan="3" align="right"><button onclick="jade_modules.code.train_model()">Run Model</button></td> | |
</tr> | |
</tbody> | |
</table> | |
</div></div> | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment