Created
February 27, 2016 10:24
-
-
Save malemi/4997821d391886189bc2 to your computer and use it in GitHub Desktop.
Addon for Google Spreadsheet / A bunch of statistical functions in a new menu "QM Stat"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// QM Stat 0.2 | |
// Simple Data Mining Algorithms, (c) Mario Alemi 2010 | |
// | |
// This program is free software: you can redistribute it and/or modify | |
// it under the terms of the GNU Affero General Public License as | |
// published by the Free Software Foundation | |
// This program is distributed in the hope that it will be useful, | |
// but WITHOUT ANY WARRANTY; without even the implied warranty of | |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
// GNU Affero General Public License for more details. | |
// You should have received a copy of the GNU General Public License | |
// along with this program. If not, see <http://www.gnu.org/licenses/>. | |
function onOpen() { | |
var mySpreadsheet = SpreadsheetApp.getActiveSpreadsheet(); | |
var menuEntries = [ {name: "One attribute Rule (1R)", functionName: "doOneR"}, | |
{name: "Similarity matrix", functionName: "doSimilarity"}, | |
{name: "Frequency Plot Average and StDev", functionName: "ave_stdev"}, | |
{name: "Kurtosis of a distribution", functionName: "getKurtosis"}, | |
{name: "Help", functionName: "doHelp"}]; | |
mySpreadsheet.addMenu("QM Stat", menuEntries); | |
} | |
function doHelp(){ | |
var mydoc = SpreadsheetApp.getActiveSpreadsheet(); | |
var app = UiApp.createApplication().setTitle('QM Stat Help'); | |
var text1 = "Compute the One-attribute-rule error for each attribute. Select at least two columns. The last column is supposed represent the class' values and must be 0/1. The first line gives the name of the attributes. \nSee http://en.wikipedia.org/wiki/Association_rule_learning#One-attribute-rule for a quick explanation of the algorithm. "; | |
var text2 = "Compute the distance between each istance and creates a worksheet with a matrix of the distances. ***Select only the attributes values***. Do not select classes and attributes' name. At the moment only for discreet values."; | |
var text3 = "It computes the average and the standard deviation of a frequency plot. Select two adjacent columns, on the first the Class, on the second the Frequency.<br><i>Nota Bene<i>: values of the class are considered middle value of the bin."; | |
var text4 = "It computes the kurtosis of a distribution. Select two adjacent columns: on the left the X, on the right the counts. <br><i>Nota Bene<i>: values of the class are considered middle value of the bin."; | |
var tabPanel = app.createTabPanel(); | |
var flowPanel1 = app.createFlowPanel(); | |
var flowPanel2 = app.createFlowPanel(); | |
var flowPanel3 = app.createFlowPanel(); | |
var flowPanel4 = app.createFlowPanel(); | |
flowPanel1.add(app.createLabel(text1)); | |
tabPanel.add(flowPanel1, "1R Rule"); | |
flowPanel2.add(app.createLabel(text2)); | |
tabPanel.add(flowPanel2, "Similarity Matrix"); | |
flowPanel3.add(app.createLabel(text3)); | |
tabPanel.add(flowPanel3, "Average and StDev"); | |
flowPanel4.add(app.createLabel(text4)); | |
tabPanel.add(flowPanel4, "Reduced Kurtosis"); | |
tabPanel.selectTab(0); | |
app.add(tabPanel); | |
mydoc.show(app); | |
} | |
function ave_stdev(){ | |
var mySpreadsheet = SpreadsheetApp.getActiveSpreadsheet(); | |
var mySheet = SpreadsheetApp.getActiveSheet(); | |
var mySelection = mySheet.getActiveSelection(); | |
var numCols = mySelection.getNumColumns(); | |
var numRows = mySelection.getNumRows(); | |
if (numCols != 2) { | |
Browser.msgBox("Select *just* two columns. Classes on the first, counts on the second one."); | |
return; | |
} | |
var theClass = mySelection.offset(0, 0, numRows, 1).getValues(); | |
var theCounts = mySelection.offset(0, 1, numRows, 1).getValues(); | |
var hasHeader = 0; | |
if(isNaN(theClass[0]) || isNaN(theCounts[0])){ | |
Browser.msgBox("First line considered as header"); | |
hasHeader = 1; | |
} | |
var ave=0; | |
var stDev=0; | |
var sqAve = 0; | |
var norm = 0; | |
var halfBinWidth = (theClass[hasHeader+1]-theClass[hasHeader])/2.0; | |
// Browser.msgBox("HBW "+halfBinWidth); | |
for(var i=hasHeader; i < numRows; i++){ | |
ave = 1.0*ave + (theClass[i])*theCounts[i]; | |
sqAve = sqAve + theClass[i]*theClass[i]*theCounts[i]; | |
// Browser.msgBox("AVE "+i+" "+ave); | |
// Browser.msgBox("SQ AVE "+i+" "+sqAve); | |
norm = 1.0*norm + 1.0*theCounts[i]; | |
// Browser.msgBox("NORM "+i+" "+norm); | |
} | |
ave = ave/norm; | |
sqAve = sqAve/norm; | |
stDev = Math.sqrt(sqAve - ave*ave); | |
ave = ave; | |
Browser.msgBox("AVE "+ave+"\nSTDEV "+stDev); | |
mySheet.getRange(mySelection.getLastRow()+1, mySelection.getColumn()).setValue("Average: "); | |
mySheet.getRange(mySelection.getLastRow()+2, mySelection.getColumn()).setValue("Standard Deviation: "); | |
mySheet.getRange(mySelection.getLastRow()+1, mySelection.getColumn()+1).setValue(ave); | |
mySheet.getRange(mySelection.getLastRow()+2, mySelection.getColumn()+1).setValue(stDev); | |
mySheet.getRange(mySelection.getLastRow()+1, mySelection.getColumn()).setBackgroundColor('red'); | |
mySheet.getRange(mySelection.getLastRow()+2, mySelection.getColumn()).setBackgroundColor('red'); | |
} | |
function getKurtosis(){ | |
var mySpreadsheet = SpreadsheetApp.getActiveSpreadsheet(); | |
var mySheet = SpreadsheetApp.getActiveSheet(); | |
var mySelection = mySheet.getActiveSelection(); | |
var numCols = mySelection.getNumColumns(); | |
var numRows = mySelection.getNumRows(); | |
if (numCols != 2) { | |
Browser.msgBox("Select *just* two columns. Number of stars on the first, ratings on the second one."); | |
return; | |
} | |
var theClass = mySelection.offset(0, 0, numRows, 1).getValues(); | |
var theCounts = mySelection.offset(0, 1, numRows, 1).getValues(); | |
var hasHeader = 0; | |
if(isNaN(theClass[0]) || isNaN(theCounts[0])){ | |
Browser.msgBox("First line considered as header"); | |
hasHeader = 1; | |
} | |
var ave=0; | |
var stDev=0; | |
var sqAve = 0; | |
var norm = 0; | |
var halfBinWidth = (theClass[hasHeader+1]-theClass[hasHeader])/2.0; | |
// Browser.msgBox("HBW "+halfBinWidth); | |
for(var i=hasHeader; i < numRows; i++){ | |
ave = 1.0*ave + (theClass[i])*theCounts[i]; | |
sqAve = sqAve + theClass[i]*theClass[i]*theCounts[i]; | |
// Browser.msgBox("AVE "+i+" "+ave); | |
// Browser.msgBox("SQ AVE "+i+" "+sqAve); | |
norm = 1.0*norm + 1.0*theCounts[i]; | |
// Browser.msgBox("NORM "+i+" "+norm); | |
} | |
ave = ave/norm; | |
sqAve = sqAve/norm; | |
stDev = Math.sqrt(sqAve - ave*ave); | |
ave = ave; | |
var ms4 = new Array(); // [(CLASS_VALUE - AVERAGE)/STDEV]^4 | |
var normCounts = new Array(); //theClass / norm | |
var kurtosis = 0.0; | |
for(var i=0; i < (numRows - hasHeader); i++){ | |
ms4[i] = Math.pow( (theClass[i+hasHeader] - ave)/stDev, 4); | |
normCounts[i] = theCounts[i+hasHeader] / norm; | |
//OK Browser.msgBox("ms4["+i+"]: "+ms4[i]+" , normCounts["+i+"]= "+normCounts[i]); | |
kurtosis = kurtosis*1.0 + ( ms4[i] * normCounts[i]); | |
} | |
kurtosis = kurtosis - 3.0; | |
var polarization = Math.exp(-2-kurtosis)*10; | |
//Browser.msgBox("AVE "+ave+"\nSTDEV "+stDev); | |
mySheet.getRange(mySelection.getLastRow()+1, mySelection.getColumn()).setValue("Kurtosis: "); | |
mySheet.getRange(mySelection.getLastRow()+2, mySelection.getColumn()).setValue("Polarization: "); | |
mySheet.getRange(mySelection.getLastRow()+1, mySelection.getColumn()+1).setValue(kurtosis); | |
mySheet.getRange(mySelection.getLastRow()+2, mySelection.getColumn()+1).setValue(polarization); | |
mySheet.getRange(mySelection.getLastRow()+1, mySelection.getColumn()).setBackgroundColor('red'); | |
mySheet.getRange(mySelection.getLastRow()+2, mySelection.getColumn()).setBackgroundColor('red'); | |
} | |
function kurtosis(ratings){ | |
maxPol=10; | |
var nonEmpty =0; | |
var ave = 0; | |
var sqAve =0; | |
var norm =0; | |
for(var i=0; i < ratings.length; i++){ | |
ave = 1.0*ave + ratings[i]*(i+1); | |
sqAve = sqAve + (i+1)*(i+1)*ratings[i]; | |
norm = 1.0*norm + 1.0*ratings[i]; | |
if(ratings[i] != 0) nonEmpty++; | |
} | |
if(nonEmpty ==0) return 0; | |
if(nonEmpty ==1) return 0; | |
ave = ave/norm; | |
sqAve = sqAve/norm; | |
stDev = Math.sqrt(sqAve - ave*ave); | |
var ms4 = new Array(); | |
var normCounts = new Array(); | |
var k = 0.0; | |
for(var i=0; i < ratings.length; i++){ | |
ms4[i] = Math.pow( (i+1 - ave)/stDev, 4); | |
normCounts[i] = ratings[i] / norm; | |
k = k+(ms4[i]*normCounts[i]); | |
} | |
return k; | |
} | |
function ratingstdev(ratings){ | |
maxPol=10; | |
var nonEmpty =0; | |
var ave = 0; | |
var sqAve =0; | |
var norm =0; | |
for(var i=0; i < ratings.length; i++){ | |
ave = 1.0*ave + ratings[i]*(i+1); | |
sqAve = sqAve + (i+1)*(i+1)*ratings[i]; | |
norm = 1.0*norm + 1.0*ratings[i]; | |
if(ratings[i] != 0) nonEmpty++; | |
} | |
if(nonEmpty ==0) return 0; | |
if(nonEmpty ==1) return 0; | |
ave = ave/norm; | |
sqAve = sqAve/norm; | |
return Math.sqrt(sqAve - ave*ave); | |
} | |
function doOneR() { | |
var mySpreadsheet = SpreadsheetApp.getActiveSpreadsheet(); | |
var mySheet = SpreadsheetApp.getActiveSheet(); | |
var mySelection = mySheet.getActiveSelection(); | |
var numCols = mySelection.getNumColumns(); | |
var numRows = mySelection.getNumRows(); | |
var newSheet = mySpreadsheet.insertSheet("1R Rule"); | |
newSheet.deleteColumns(5, 15); | |
if (numCols < 2) { | |
Browser.msgBox("Select at least two columns, the last one must be 1/0 (classes).\n The first line gives the name of the attributes"); | |
} | |
if (numCols > 1) { | |
var theClass = mySelection.offset(0, numCols-1, numRows, 1).getValues(); | |
var theAttributes = mySelection.offset(0, 0, numRows, numCols-1).getValues(); | |
theAtts = new Array; | |
theR = new Array; | |
for(var i=0; i < numCols-1; i++){ | |
theAtts[i] = new Attribute(mySelection.offset(0, i, numRows, 1).getValues()); | |
//OK Browser.msgBox(theAtts[i].type); | |
//OK Browser.msgBox(theAtts[i].getPoxValues()); | |
theR[i] = theAtts[i].getOneR(theClass); | |
// Browser.msgBox("R for "+theAtts[i].type+" is "+theR[i]); | |
// mySheet.getRange(mySelection.getLastRow()+i+1, mySelection.getColumn()).setValue("R for "+theAtts[i].type+" is "); | |
// mySheet.getRange(mySelection.getLastRow()+i+1, mySelection.getColumn()+1).setValue(theR[i]); | |
// mySheet.getRange(mySelection.getLastRow()+i+1, mySelection.getColumn()).setBackgroundColor('red'); | |
newSheet.getRange(i+1, 1).setValue("R for "+theAtts[i].type+" is "); | |
newSheet.getRange(i+1, 2).setValue(theR[i]); | |
newSheet.getRange(i+1, 1).setBackgroundColor('red'); | |
} | |
} | |
} | |
function doSimilarity(){ | |
var mySpreadsheet = SpreadsheetApp.getActiveSpreadsheet(); | |
var mySheet = SpreadsheetApp.getActiveSheet(); | |
var mySelection = mySheet.getActiveSelection(); | |
var numCols = mySelection.getNumColumns(); | |
var numRows = mySelection.getNumRows(); | |
var newSheet = mySpreadsheet.insertSheet("SimMtx"); | |
// var binSheet = mySpreadsheet.insertSheet("SimAdjacencyMtx"); | |
var dist = new Array(numRows*(numRows-1)/2); | |
var distAve=0; | |
var variance =0; | |
var stdev =0; | |
var norm=0; | |
var index=0;//to get back rows and column number | |
if (numRows < 2) { | |
Browser.msgBox("Select at least two rows. ***Only attributes values***, no name of attributes, no name of instance."); | |
} | |
if (numRows > 1) { | |
var instances = mySelection.getValues(); | |
for(var r=0; r < numRows; r++){ | |
for(var c=0; c < numRows; c++){ | |
if(r==0){ | |
newSheet.getRange(1, c+2).setValue("INSTANCE"+(c*1+1)); | |
newSheet.getRange(1, c+2).setFontColor('blue'); | |
} | |
if(c==0){ | |
newSheet.getRange(r+2, 1).setValue("INSTANCE"+(1*r+1)); | |
newSheet.getRange(r+2, 1).setFontColor('blue'); | |
} | |
if(c>r){//only half matrix | |
dist[index]= distance(instances[r],instances[c]); | |
newSheet.getRange(r+2, c+2).setValue(dist[index]); | |
distAve= distAve + dist[index]; | |
index++; | |
} | |
} | |
} | |
} | |
/****Uncomment here if you want a new sheet with 3 columns [instance i][instance j][distance ij] | |
distAve = distAve/index; | |
for(var i=0; i < dist.length; i++){ | |
if(isNumber(dist[i]) && dist[i]>0){ | |
variance = variance + (dist[i]-distAve)*(dist[i]-distAve); | |
} | |
} | |
variance=variance/index; | |
stdev=Math.sqrt(variance); | |
//Browser.msgBox("Ave SD "+distAve+" "+stdev+" "+norm); | |
index=0;//to get back rows and column number | |
var line = 0; | |
binSheet.getRange(1,1).setValue("INSTANCE"); | |
binSheet.getRange(1,2).setValue("INSTANCE"); | |
binSheet.getRange(1,3).setValue("Distance"); | |
for(var r=0; r < numRows; r++){ | |
for(var c=0; c < numRows; c++){ | |
if(c>r){ | |
// Browser.msgBox("dist "+dist[index]); | |
// Browser.msgBox("limite "+(distAve-numSigmas*stdev)); | |
// binSheet.getRange(index+1,1).setValue(r+1); | |
// binSheet.getRange(index+1,2).setValue(c+1); | |
// binSheet.getRange(index+1,3).setValue(dist[index]); | |
//^^^^OK, it's a bell distribution | |
// if(dist[index]<distAve-numSigmas*stdev && dist[index]!=0){ | |
//Browser.msgBox("pippo"); | |
binSheet.getRange(line+2,1).setValue(r+1); | |
binSheet.getRange(line+2,2).setValue(c+1); | |
binSheet.getRange(line+2,3).setValue(dist[index]); | |
line++; | |
// } | |
index++; | |
} | |
} | |
} | |
Browser.msgBox("Created two worksheets: 'Similarity Matrix' and 'Similarity Adjacency Matrix'."); | |
****End uncomment***** | |
*/ | |
} | |
//////////////////////////////////////////////////////////////////////////////////////// | |
//function distance(vector1, vector2) | |
//If two elements are both not empty strings, if equal distance=0, otherwise 1. | |
//////////////////////////////////////////////////////////////////////////////////////// | |
function distance(v1,v2){ | |
if(v1.length != v2.length){ | |
Browser.msgBox("function distance(v1,v2): vectors of different lengths"); | |
} | |
//Browser.msgBox("v1 "+v1); | |
//Browser.msgBox("v2 "+v2); | |
var norm=0; | |
var dist=0; | |
for(var i=0;i<v1.length;i++){ | |
if(v1[i].toString()!="" && v2[i].toString()!=""){ | |
norm++; | |
if(v1[i]!=v2[i]){ | |
dist++; | |
// Browser.msgBox("v1 "+v1[i]+" v2 "+v2[i]+" dist "+dist); | |
} | |
} | |
} | |
return dist/norm; | |
} | |
function Attribute (vector) { | |
this.type = vector[0]; | |
this.poxValues = this.computePoxValues(vector); | |
this.nValues = this.poxValues.length; | |
this.oneR = new Array(this.nValues); | |
//* getPoxValues */ | |
this.getPoxValues = function() { | |
return this.poxValues; | |
} | |
////////////////// | |
//* getOneR */ | |
///////////////// | |
this.getOneR = function(theClass) { | |
if(theClass.length != vector.length){ | |
Browser.msgBox("Classes and Attributes array have different length"); | |
return 0; | |
} | |
var norm = new Array(this.nValues); | |
//Initialize oneR and norm to 0 | |
for(var i=0;i<this.nValues;i++){ | |
this.oneR[i]=0.0; | |
norm[i]=0.0; | |
} | |
//We have nValues pox values for the att. | |
for(var i=1; i<theClass.length; i++){ | |
for(var j=0; j<this.nValues; j++){ | |
if(vector[i].toString() == this.poxValues[j].toString()){ | |
this.oneR[j] = 1.0*this.oneR[j] + theClass[i]*1.0; | |
norm[j]++; | |
} | |
} | |
} | |
for(var j=0; j<this.nValues; j++){ | |
this.oneR[j] = Math.min(this.oneR[j]*1.0, norm[j]-this.oneR[j]*1.0); | |
//Browser.msgBox(this.type+": oneR["+this.poxValues[j]+"] = "+this.oneR[j]+"/"+norm[j]); | |
} | |
//Sum errors for each value | |
var R=0; | |
var denominator=0; | |
//Don't want to consider empty values | |
for(var j=0; j<this.nValues; j++){ | |
R = R*1.0 + 1.0*this.oneR[j]; | |
denominator=denominator*1.0+norm[j]; | |
} | |
return R/denominator; | |
} | |
} | |
//////////////////////////////////// | |
//* prototype.computePoxValues */ | |
//////////////////////////////////// | |
Attribute.prototype.computePoxValues = function(vector) { | |
//NB empty values are not considered possible values | |
var values = new Array;//Array of no null strings in case some events have empty cells | |
var o=0; | |
for(var i=1;i<vector.length;i++){ | |
if(vector[i] != ""){ | |
values[o]=vector[i]; | |
o++; | |
} | |
} | |
values.sort(); | |
var j=1; | |
var poxValues = new Array(); | |
poxValues[0]=values[0].toString(); | |
for(var i=1; i<values.length;i++){ | |
if(values[i].toString() != values[i-1].toString()){ | |
poxValues[j]=values[i]; | |
j++; | |
} | |
} | |
return poxValues; | |
} | |
/* var isNumeric = function(x) { | |
// returns true if x is numeric and false if it is not. | |
var RegExp = /^(-)?(\d*)(\.?)(\d*)$/; | |
return String(x).match(RegExp); | |
} | |
*/ | |
function isAlien(a) { | |
return isObject(a) && typeof a.constructor != 'function'; | |
} | |
function isArray(a) { | |
return isObject(a) && a.constructor == Array; | |
} | |
function isBoolean(a) { | |
return typeof a == 'boolean'; | |
} | |
function isEmpty(o) { | |
var i, v; | |
if (isObject(o)) { | |
for (i in o) { | |
v = o[i]; | |
if (isUndefined(v) && isFunction(v)) { | |
return false; | |
} | |
} | |
} | |
return true; | |
} | |
function isFunction(a) { | |
return typeof a == 'function'; | |
} | |
function isNull(a) { | |
return typeof a == 'object' && !a; | |
} | |
function isNumber(a) { | |
return typeof a == 'number' && isFinite(a); | |
} | |
function isObject(a) { | |
return (a && typeof a == 'object') || isFunction(a); | |
} | |
function isString(a) { | |
return typeof a == 'string'; | |
} | |
function isUndefined(a) { | |
return typeof a == 'undefined'; | |
} | |
function TESTPO(ratings){ | |
var mp=10; | |
var ne =0; | |
var ave = 0; | |
var save =0; | |
var norm =0; | |
for(var i=0; i < ratings.length; i++){ | |
ave = 1.0*ave + ratings[i]*(i+1); | |
save = save + (i+1)*(i+1)*ratings[i]; | |
norm = 1.0*norm + 1.0*ratings[i]; | |
if(ratings[i] != 0) ne++; | |
} | |
if(ne ==0) return 0; | |
if(ne ==1) return 0; | |
ave = ave/norm; | |
save = save/norm; | |
sd = Math.sqrt(save - ave*ave); | |
var ms4 = new Array(); | |
var nc = new Array(); | |
var k = 0.0; | |
for(var i=0; i < ratings.length; i++){ | |
ms4[i] = Math.pow( (i+1 - ave)/sd, 4); | |
nc[i] = ratings[i] / norm; | |
k = k+(ms4[i]*nc[i]); | |
} | |
return Math.exp(1.0 - k)*mp*sd/2; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment