Skip to content

Instantly share code, notes, and snippets.

@theGove
Last active May 2, 2022 13:52
Show Gist options
  • Save theGove/e7c158b8544aaacf2e2dce2a6acfcd52 to your computer and use it in GitHub Desktop.
Save theGove/e7c158b8544aaacf2e2dce2a6acfcd52 to your computer and use it in GitHub Desktop.
A Naive Bayes tool for the JADE Excel Add-in
const version=16
//e7c158b8544aaacf2e2dce2a6acfcd52
//Naive Bayes
async function auto_exec(){
// set the tool css
;console.log("===========================",version,"===========================")
Jade.set_css(gist_files('style.css'),"naive-bayes")
// place the tool html
tag("tools-body").innerHTML=gist_files('tool.html')
// fill in values from last tool use
jade_modules.jet_engine.restore_html_values()
}
function save_inputs(){
jade_modules.jet_engine.save_html_values(["datasheet","datarange","trainingprecent","predictors","outcome"])
}
async function get_address(tag_name){
tag('datarange').value=await jade_modules.jet_engine.selected_range_address()
}
async function split_data(){
// save the defaults for the tool
save_inputs()
Excel.run(async function(excel){
const sheet = excel.workbook.worksheets.getItem(tag("datasheet").value)
const datarange = sheet.getRange(tag("datarange").value)
//datarange.load('columnCount')
//sheet.activate()
await excel.sync()
datarange.getColumnsAfter(1).insert(Excel.InsertShiftDirection.right);
const next_col = datarange.getColumnsAfter(1)
next_col.load('values')
await excel.sync()
const train=[["Train"]]
for(let x=1;x<next_col.values.length;x++){
if(Math.random()*100>parseFloat(tag("trainingprecent").value)){
train.push([false])
}else{
train.push([true])
}
}
next_col.values=train
await excel.sync()
})
}
async function train_model(){
// save the defaults for the tool
save_inputs()
Excel.run(async function(excel){
const sheet = excel.workbook.worksheets.getItem(tag("datasheet").value)
const predictors = sheet.getRange(tag("predictors").value)
const outcome = sheet.getRange(tag("outcome").value)
const train = sheet.getRange(tag("datarange").value).getColumnsAfter(1)
predictors.load('values')
outcome.load('values')
train.load('values')
sheet.activate()
let out_sheet = excel.workbook.worksheets.getItemOrNullObject("Naive Bayes Results");
await excel.sync()
if (out_sheet.isNullObject) {
// sheet does not exist yet
out_sheet=excel.workbook.worksheets.add("Naive Bayes Results")
}else{
// clear the sheet
out_sheet.getUsedRange().delete(Excel.DeleteShiftDirection.up);
}
out_sheet.activate()
await excel.sync()
const counts = {};
for(let r=1;r<outcome.values.length;r++){
if(train.values[r][0]){
if(!counts[outcome.values[r][0]]){counts[outcome.values[r][0]]={}}
for(let c=0;c<predictors.values[0].length;c++){
if(counts[outcome.values[r][0]][predictors.values[r][c]]){
counts[outcome.values[r][0]][predictors.values[r][c]] += 1
}else{
counts[outcome.values[r][0]][predictors.values[r][c]] = 1
}
}
}
}
//console.log("sorted uniques",counts)
let row=1
probabilities = {}
for(const [result,token_counts] of Object.entries(counts)){
probabilities[result] = {}
const data = []
data.push(['Token','Count','Probability'])
let sum=0
//sum the counts for all categories
for(const [token,count] of Object.entries(token_counts)){
sum += count + .01
}
for(const [token,count] of Object.entries(token_counts)){
data.push([token,count,(count+.01)/sum])
probabilities[result][token] = (count+.01)/sum
}
//console.log("data",data)
out_sheet.getRangeByIndexes(row, 1, 1, 3).merge(true)
out_sheet.getRangeByIndexes(row, 1, 1, 1).values=`Probabilities: ${outcome.values[0][0]}=${result}`
out_sheet.getRangeByIndexes(row+1, 1, data.length, 3).values=data
jade_modules.jet_engine.border_around(out_sheet, out_sheet.getRangeByIndexes(row+1, 1, data.length, 3),"Medium","Continuous","Black")
jade_modules.jet_engine.range_format(out_sheet, out_sheet.getRangeByIndexes(row, 1,1,1), "format/font/bold", true)
jade_modules.jet_engine.range_format(out_sheet, out_sheet.getRangeByIndexes(row, 1,1,1), "format/font/italic", true)
jade_modules.jet_engine.range_format(out_sheet, out_sheet.getRangeByIndexes(row+1, 1, 1, 3), "format/fill/color", "Maroon")
jade_modules.jet_engine.range_format(out_sheet, out_sheet.getRangeByIndexes(row+1, 1, 1, 3), "format/font/color", "White")
jade_modules.jet_engine.range_format(out_sheet, out_sheet.getRangeByIndexes(row+1, 1, 1, 3), "format/font/bold", true)
console.log("token_counts",token_counts)
row += Object.keys(token_counts).length+3
}
out_sheet.getRangeByIndexes(1, 1, 1, 3).format.autofitColumns();
// format of the confusion matrix
jade_modules.jet_engine.range_format(out_sheet, "F3:H3", "format/fill/color", "Maroon")
jade_modules.jet_engine.range_format(out_sheet, "F3:H3", "format/font/color", "White")
jade_modules.jet_engine.range_format(out_sheet, "F3:H7", "format/font/bold", true)
jade_modules.jet_engine.range_format(out_sheet, "G6:H7", "format/font/bold", false)
jade_modules.jet_engine.range_format(out_sheet, "F4:H7", "format/horizontalAlignment", "Center")
jade_modules.jet_engine.border_around(out_sheet, "F3:H7","Medium","Continuous","Black")
out_sheet.getRangeByIndexes(2,5,3,2).values = [
["Confusion Matrix",null],
[null,"Predicted"],
["Actual",null],
]
out_sheet.getRangeByIndexes(2,5,1,3).merge(true)
out_sheet.getRangeByIndexes(3,6,1,2).merge(true)
console.log("counts",counts)
console.log("probabilities",probabilities)
// calculating probabilities
const predictions={}
for(let r=1;r<outcome.values.length;r++){
if(!train.values[r][0]){
// this is not a training value
let cum_prob=1
let pointer=predictions
for(const token of predictors.values[r]){
cum_prob = cum_prob * probabilities[outcome.values[r][0]][[token]]
if(!pointer[token]){pointer[token]={}}
pointer=pointer[token]
}
if(!pointer[outcome.values[r][0]]){
pointer[outcome.values[r][0]]={prob:cum_prob, count:0}
}
pointer[outcome.values[r][0]].count++
}
}
console.log("predictions",predictions)
const confusion={}
for(const out of Object.keys(probabilities)){
confusion[out]={}
for(const out2 of Object.keys(probabilities)){
confusion[out][out2]=0
}
}
compute_confusion(predictions, confusion, counts)
console.log("confusion",confusion)
const confusion_array=[]
const conf_col_head=[[]]
const conf_row_head=[]
for(const out of Object.keys(probabilities)){
conf_row_head.push([out])
conf_col_head[0].push(out)
const temp_array=[]
for(const out2 of Object.keys(probabilities)){
temp_array.push(confusion[out][out2])
}
confusion_array.push(temp_array)
}
console.log("conf_col_head",conf_col_head)
out_sheet.getRange("G5:H5").values = conf_col_head
out_sheet.getRange("F6:F7").values = conf_row_head
out_sheet.getRange("G6:H7").values = confusion_array
excel.sync()
})
}
function compute_confusion(obj, confusion, counts){
let check_count=0
// console.log("at comput confusion")
for(const key of Object.keys(obj)){
// console.log(key)
if(obj[key]){
// console.log("---------------obj[key]",obj[key])
if(obj[key].count !== undefined){
// console.log("------------count incement check_count")
check_count++
}
if(obj[key].prob !== undefined){
// console.log("prob incement check_count")
check_count++
}
}
}
if(check_count===4){
// we have the final node
// console.log("---------------in check count = 4")
let winner=Object.entries(obj)[0]
let loser=Object.entries(obj)[1]
// console.log("initial winner:",winner)
// console.log("initial loser:",loser)
if(loser[1].prob > winner[1].prob){
// console.log("in winner.loser flipper")
winner=Object.entries(obj)[1]
loser=Object.entries(obj)[0]
}
//confusion[winner[0]][winner[0]] += winner[1][count]
// console.log("obj",obj)
// console.log("winner",winner)
// console.log("loser",loser)
//confusion.prediction.actual
for(const outcome of Object.keys(counts)){
confusion[winner[0]][outcome] += obj[outcome].count
}
}else{
for(const key of Object.keys(obj)){
compute_confusion(obj[key],confusion,counts)
}
}
}
function onlyUnique(value, index, self) {
return self.indexOf(value) === index;
}
<div class="hidden" id="jet-refresh" style="margin-left: .5rem;"><i title="Re-load this tool." onclick="jade_modules.jet_engine.select_page()" class="fa-solid fa-arrows-rotate" cursor:pointer"></i></div>
<div style="margin:1rem; text-align: center;"><div style="display:inline-block; text-align: left;">
<table style="width:100%" align="center" cellspacing="0" cellpadding="4">
<tbody>
<tr>
<td onclick="jade_modules.jet_engine.toggle_next_row(this)" colspan="2" class="section">&nbsp;1. Split Data Set</td>
<td class="section"><i title="Hide or show this step." onclick="jade_modules.jet_engine.toggle_table(this)" class="fa-solid fa-chevron-up"></i></td>
</tr>
<tr class="hidden note">
<td colspan="3"> This step will add a columnn to your data set named "Train" with values of true or false based on the "Training Set %" you specify below.</td>
</tr>
<tr>
<td onclick="jade_modules.jet_engine.toggle_next_row(this)" class="label" >Data Sheet:</td>
<td><input size="6" type="text" id="datasheet" onfocus="jade_modules.jet_engine.default_selected_sheet(this)"/></td>
<td><i title="Get the name of the active sheet." onclick="jade_modules.jet_engine.get_sheet_name('datasheet')" class="fa-solid fa-file" style="color:#777; cursor:pointer"></i></td>
</tr>
<tr class="hidden note">
<td colspan="3">This is the name of the worksheet that has your data</td>
</tr>
<tr>
<td onclick="jade_modules.jet_engine.toggle_next_row(this)" class="label" >Data Range:</td>
<td><input size="6" type="text" id="datarange" onfocus="jade_modules.jet_engine.default_selected_range(this)"/></td>
<td><i title="Get the address of the selected range." onclick="jade_modules.jet_engine.get_multicell_address('datarange')" class="fa-solid fa-table" style="color:#777; cursor:pointer"></i></td>
</tr>
<tr class="hidden note">
<td colspan="3">This is a reference to your whole dataset, including a header row.</td>
</tr>
<tr onclick="jade_modules.jet_engine.toggle_next_row(this)" class="label" >
<td onclick="jade_modules.jet_engine.toggle_next_row(this)" class="label" >Training Set %:</td>
<td><input size="6" type="text" id="trainingprecent" /></td>
<td></td>
</tr>
<tr class="hidden note">
<td colspan="3">This is the percent of your data you want to use to train the model. It is number like 30.</td>
</tr>
<tr>
<td colspan="3" align="right"><button onclick="jade_modules.code.split_data()">Split Data Set</button></td>
</tr>
</tbody>
</table>
<div style="margin-top: 1rem;" />
<table style="width:100%" align="center" cellspacing="0" cellpadding="4">
<tbody>
<tr>
<td onclick="jade_modules.jet_engine.toggle_next_row(this)" colspan="2" class="section">&nbsp;2. Run the Model</td>
<td class="section"><i title="Hide or show this step." onclick="jade_modules.jet_engine.toggle_table(this)" class="fa-solid fa-chevron-up"></i></td>
</tr>
<tr class="hidden note">
<td colspan="3">Calcualte token probabilities from the portion of the data set selcted for training in the prior step.</td>
</tr>
<tr>
<td onclick="jade_modules.jet_engine.toggle_next_row(this)" class="label" >Predictors:</td>
<td><input size="6" type="text" id="predictors" onfocus="jade_modules.jet_engine.default_selected_range(this)"/></td>
<td><i title="Get the data columns from the selected range." onclick="jade_modules.jet_engine.get_data_column_address('predictors')" class="fa-solid fa-table" style="color:#777; cursor:pointer"></i></td>
</tr>
<tr class="hidden note">
<td colspan="3">The range that holds the model input data (independent variables).</td>
</tr>
<tr>
<td onclick="jade_modules.jet_engine.toggle_next_row(this)" class="label" >Outcome:</td>
<td><input size="6" type="text" id="outcome" onfocus="jade_modules.jet_engine.default_selected_range(this)"/></td>
<td><i title="Get the data columns from the selecte range." onclick="jade_modules.jet_engine.get_data_column_address('outcome')" class="fa-solid fa-table" style="color:#777; cursor:pointer"></i></td>
</tr>
<tr class="hidden note">
<td colspan="3">The range that holds the model outcome data (dependent variable).</td>
</tr>
<tr>
<td colspan="3" align="right"><button onclick="jade_modules.code.train_model()">Run Model</button></td>
</tr>
</tbody>
</table>
</div></div>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment