Skip to content

Instantly share code, notes, and snippets.

@malemi
Created February 27, 2016 10:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save malemi/4997821d391886189bc2 to your computer and use it in GitHub Desktop.
Save malemi/4997821d391886189bc2 to your computer and use it in GitHub Desktop.
Addon for Google Spreadsheet / A bunch of statistical functions in a new menu "QM Stat"
// QM Stat 0.2
// Simple Data Mining Algorithms, (c) Mario Alemi 2010
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
function onOpen() {
var mySpreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var menuEntries = [ {name: "One attribute Rule (1R)", functionName: "doOneR"},
{name: "Similarity matrix", functionName: "doSimilarity"},
{name: "Frequency Plot Average and StDev", functionName: "ave_stdev"},
{name: "Kurtosis of a distribution", functionName: "getKurtosis"},
{name: "Help", functionName: "doHelp"}];
mySpreadsheet.addMenu("QM Stat", menuEntries);
}
function doHelp(){
var mydoc = SpreadsheetApp.getActiveSpreadsheet();
var app = UiApp.createApplication().setTitle('QM Stat Help');
var text1 = "Compute the One-attribute-rule error for each attribute. Select at least two columns. The last column is supposed represent the class' values and must be 0/1. The first line gives the name of the attributes. \nSee http://en.wikipedia.org/wiki/Association_rule_learning#One-attribute-rule for a quick explanation of the algorithm. ";
var text2 = "Compute the distance between each istance and creates a worksheet with a matrix of the distances. ***Select only the attributes values***. Do not select classes and attributes' name. At the moment only for discreet values.";
var text3 = "It computes the average and the standard deviation of a frequency plot. Select two adjacent columns, on the first the Class, on the second the Frequency.<br><i>Nota Bene<i>: values of the class are considered middle value of the bin.";
var text4 = "It computes the kurtosis of a distribution. Select two adjacent columns: on the left the X, on the right the counts. <br><i>Nota Bene<i>: values of the class are considered middle value of the bin.";
var tabPanel = app.createTabPanel();
var flowPanel1 = app.createFlowPanel();
var flowPanel2 = app.createFlowPanel();
var flowPanel3 = app.createFlowPanel();
var flowPanel4 = app.createFlowPanel();
flowPanel1.add(app.createLabel(text1));
tabPanel.add(flowPanel1, "1R Rule");
flowPanel2.add(app.createLabel(text2));
tabPanel.add(flowPanel2, "Similarity Matrix");
flowPanel3.add(app.createLabel(text3));
tabPanel.add(flowPanel3, "Average and StDev");
flowPanel4.add(app.createLabel(text4));
tabPanel.add(flowPanel4, "Reduced Kurtosis");
tabPanel.selectTab(0);
app.add(tabPanel);
mydoc.show(app);
}
function ave_stdev(){
var mySpreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var mySheet = SpreadsheetApp.getActiveSheet();
var mySelection = mySheet.getActiveSelection();
var numCols = mySelection.getNumColumns();
var numRows = mySelection.getNumRows();
if (numCols != 2) {
Browser.msgBox("Select *just* two columns. Classes on the first, counts on the second one.");
return;
}
var theClass = mySelection.offset(0, 0, numRows, 1).getValues();
var theCounts = mySelection.offset(0, 1, numRows, 1).getValues();
var hasHeader = 0;
if(isNaN(theClass[0]) || isNaN(theCounts[0])){
Browser.msgBox("First line considered as header");
hasHeader = 1;
}
var ave=0;
var stDev=0;
var sqAve = 0;
var norm = 0;
var halfBinWidth = (theClass[hasHeader+1]-theClass[hasHeader])/2.0;
// Browser.msgBox("HBW "+halfBinWidth);
for(var i=hasHeader; i < numRows; i++){
ave = 1.0*ave + (theClass[i])*theCounts[i];
sqAve = sqAve + theClass[i]*theClass[i]*theCounts[i];
// Browser.msgBox("AVE "+i+" "+ave);
// Browser.msgBox("SQ AVE "+i+" "+sqAve);
norm = 1.0*norm + 1.0*theCounts[i];
// Browser.msgBox("NORM "+i+" "+norm);
}
ave = ave/norm;
sqAve = sqAve/norm;
stDev = Math.sqrt(sqAve - ave*ave);
ave = ave;
Browser.msgBox("AVE "+ave+"\nSTDEV "+stDev);
mySheet.getRange(mySelection.getLastRow()+1, mySelection.getColumn()).setValue("Average: ");
mySheet.getRange(mySelection.getLastRow()+2, mySelection.getColumn()).setValue("Standard Deviation: ");
mySheet.getRange(mySelection.getLastRow()+1, mySelection.getColumn()+1).setValue(ave);
mySheet.getRange(mySelection.getLastRow()+2, mySelection.getColumn()+1).setValue(stDev);
mySheet.getRange(mySelection.getLastRow()+1, mySelection.getColumn()).setBackgroundColor('red');
mySheet.getRange(mySelection.getLastRow()+2, mySelection.getColumn()).setBackgroundColor('red');
}
function getKurtosis(){
var mySpreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var mySheet = SpreadsheetApp.getActiveSheet();
var mySelection = mySheet.getActiveSelection();
var numCols = mySelection.getNumColumns();
var numRows = mySelection.getNumRows();
if (numCols != 2) {
Browser.msgBox("Select *just* two columns. Number of stars on the first, ratings on the second one.");
return;
}
var theClass = mySelection.offset(0, 0, numRows, 1).getValues();
var theCounts = mySelection.offset(0, 1, numRows, 1).getValues();
var hasHeader = 0;
if(isNaN(theClass[0]) || isNaN(theCounts[0])){
Browser.msgBox("First line considered as header");
hasHeader = 1;
}
var ave=0;
var stDev=0;
var sqAve = 0;
var norm = 0;
var halfBinWidth = (theClass[hasHeader+1]-theClass[hasHeader])/2.0;
// Browser.msgBox("HBW "+halfBinWidth);
for(var i=hasHeader; i < numRows; i++){
ave = 1.0*ave + (theClass[i])*theCounts[i];
sqAve = sqAve + theClass[i]*theClass[i]*theCounts[i];
// Browser.msgBox("AVE "+i+" "+ave);
// Browser.msgBox("SQ AVE "+i+" "+sqAve);
norm = 1.0*norm + 1.0*theCounts[i];
// Browser.msgBox("NORM "+i+" "+norm);
}
ave = ave/norm;
sqAve = sqAve/norm;
stDev = Math.sqrt(sqAve - ave*ave);
ave = ave;
var ms4 = new Array(); // [(CLASS_VALUE - AVERAGE)/STDEV]^4
var normCounts = new Array(); //theClass / norm
var kurtosis = 0.0;
for(var i=0; i < (numRows - hasHeader); i++){
ms4[i] = Math.pow( (theClass[i+hasHeader] - ave)/stDev, 4);
normCounts[i] = theCounts[i+hasHeader] / norm;
//OK Browser.msgBox("ms4["+i+"]: "+ms4[i]+" , normCounts["+i+"]= "+normCounts[i]);
kurtosis = kurtosis*1.0 + ( ms4[i] * normCounts[i]);
}
kurtosis = kurtosis - 3.0;
var polarization = Math.exp(-2-kurtosis)*10;
//Browser.msgBox("AVE "+ave+"\nSTDEV "+stDev);
mySheet.getRange(mySelection.getLastRow()+1, mySelection.getColumn()).setValue("Kurtosis: ");
mySheet.getRange(mySelection.getLastRow()+2, mySelection.getColumn()).setValue("Polarization: ");
mySheet.getRange(mySelection.getLastRow()+1, mySelection.getColumn()+1).setValue(kurtosis);
mySheet.getRange(mySelection.getLastRow()+2, mySelection.getColumn()+1).setValue(polarization);
mySheet.getRange(mySelection.getLastRow()+1, mySelection.getColumn()).setBackgroundColor('red');
mySheet.getRange(mySelection.getLastRow()+2, mySelection.getColumn()).setBackgroundColor('red');
}
function kurtosis(ratings){
maxPol=10;
var nonEmpty =0;
var ave = 0;
var sqAve =0;
var norm =0;
for(var i=0; i < ratings.length; i++){
ave = 1.0*ave + ratings[i]*(i+1);
sqAve = sqAve + (i+1)*(i+1)*ratings[i];
norm = 1.0*norm + 1.0*ratings[i];
if(ratings[i] != 0) nonEmpty++;
}
if(nonEmpty ==0) return 0;
if(nonEmpty ==1) return 0;
ave = ave/norm;
sqAve = sqAve/norm;
stDev = Math.sqrt(sqAve - ave*ave);
var ms4 = new Array();
var normCounts = new Array();
var k = 0.0;
for(var i=0; i < ratings.length; i++){
ms4[i] = Math.pow( (i+1 - ave)/stDev, 4);
normCounts[i] = ratings[i] / norm;
k = k+(ms4[i]*normCounts[i]);
}
return k;
}
function ratingstdev(ratings){
maxPol=10;
var nonEmpty =0;
var ave = 0;
var sqAve =0;
var norm =0;
for(var i=0; i < ratings.length; i++){
ave = 1.0*ave + ratings[i]*(i+1);
sqAve = sqAve + (i+1)*(i+1)*ratings[i];
norm = 1.0*norm + 1.0*ratings[i];
if(ratings[i] != 0) nonEmpty++;
}
if(nonEmpty ==0) return 0;
if(nonEmpty ==1) return 0;
ave = ave/norm;
sqAve = sqAve/norm;
return Math.sqrt(sqAve - ave*ave);
}
function doOneR() {
var mySpreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var mySheet = SpreadsheetApp.getActiveSheet();
var mySelection = mySheet.getActiveSelection();
var numCols = mySelection.getNumColumns();
var numRows = mySelection.getNumRows();
var newSheet = mySpreadsheet.insertSheet("1R Rule");
newSheet.deleteColumns(5, 15);
if (numCols < 2) {
Browser.msgBox("Select at least two columns, the last one must be 1/0 (classes).\n The first line gives the name of the attributes");
}
if (numCols > 1) {
var theClass = mySelection.offset(0, numCols-1, numRows, 1).getValues();
var theAttributes = mySelection.offset(0, 0, numRows, numCols-1).getValues();
theAtts = new Array;
theR = new Array;
for(var i=0; i < numCols-1; i++){
theAtts[i] = new Attribute(mySelection.offset(0, i, numRows, 1).getValues());
//OK Browser.msgBox(theAtts[i].type);
//OK Browser.msgBox(theAtts[i].getPoxValues());
theR[i] = theAtts[i].getOneR(theClass);
// Browser.msgBox("R for "+theAtts[i].type+" is "+theR[i]);
// mySheet.getRange(mySelection.getLastRow()+i+1, mySelection.getColumn()).setValue("R for "+theAtts[i].type+" is ");
// mySheet.getRange(mySelection.getLastRow()+i+1, mySelection.getColumn()+1).setValue(theR[i]);
// mySheet.getRange(mySelection.getLastRow()+i+1, mySelection.getColumn()).setBackgroundColor('red');
newSheet.getRange(i+1, 1).setValue("R for "+theAtts[i].type+" is ");
newSheet.getRange(i+1, 2).setValue(theR[i]);
newSheet.getRange(i+1, 1).setBackgroundColor('red');
}
}
}
function doSimilarity(){
var mySpreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var mySheet = SpreadsheetApp.getActiveSheet();
var mySelection = mySheet.getActiveSelection();
var numCols = mySelection.getNumColumns();
var numRows = mySelection.getNumRows();
var newSheet = mySpreadsheet.insertSheet("SimMtx");
// var binSheet = mySpreadsheet.insertSheet("SimAdjacencyMtx");
var dist = new Array(numRows*(numRows-1)/2);
var distAve=0;
var variance =0;
var stdev =0;
var norm=0;
var index=0;//to get back rows and column number
if (numRows < 2) {
Browser.msgBox("Select at least two rows. ***Only attributes values***, no name of attributes, no name of instance.");
}
if (numRows > 1) {
var instances = mySelection.getValues();
for(var r=0; r < numRows; r++){
for(var c=0; c < numRows; c++){
if(r==0){
newSheet.getRange(1, c+2).setValue("INSTANCE"+(c*1+1));
newSheet.getRange(1, c+2).setFontColor('blue');
}
if(c==0){
newSheet.getRange(r+2, 1).setValue("INSTANCE"+(1*r+1));
newSheet.getRange(r+2, 1).setFontColor('blue');
}
if(c>r){//only half matrix
dist[index]= distance(instances[r],instances[c]);
newSheet.getRange(r+2, c+2).setValue(dist[index]);
distAve= distAve + dist[index];
index++;
}
}
}
}
/****Uncomment here if you want a new sheet with 3 columns [instance i][instance j][distance ij]
distAve = distAve/index;
for(var i=0; i < dist.length; i++){
if(isNumber(dist[i]) && dist[i]>0){
variance = variance + (dist[i]-distAve)*(dist[i]-distAve);
}
}
variance=variance/index;
stdev=Math.sqrt(variance);
//Browser.msgBox("Ave SD "+distAve+" "+stdev+" "+norm);
index=0;//to get back rows and column number
var line = 0;
binSheet.getRange(1,1).setValue("INSTANCE");
binSheet.getRange(1,2).setValue("INSTANCE");
binSheet.getRange(1,3).setValue("Distance");
for(var r=0; r < numRows; r++){
for(var c=0; c < numRows; c++){
if(c>r){
// Browser.msgBox("dist "+dist[index]);
// Browser.msgBox("limite "+(distAve-numSigmas*stdev));
// binSheet.getRange(index+1,1).setValue(r+1);
// binSheet.getRange(index+1,2).setValue(c+1);
// binSheet.getRange(index+1,3).setValue(dist[index]);
//^^^^OK, it's a bell distribution
// if(dist[index]<distAve-numSigmas*stdev && dist[index]!=0){
//Browser.msgBox("pippo");
binSheet.getRange(line+2,1).setValue(r+1);
binSheet.getRange(line+2,2).setValue(c+1);
binSheet.getRange(line+2,3).setValue(dist[index]);
line++;
// }
index++;
}
}
}
Browser.msgBox("Created two worksheets: 'Similarity Matrix' and 'Similarity Adjacency Matrix'.");
****End uncomment*****
*/
}
////////////////////////////////////////////////////////////////////////////////////////
//function distance(vector1, vector2)
//If two elements are both not empty strings, if equal distance=0, otherwise 1.
////////////////////////////////////////////////////////////////////////////////////////
function distance(v1,v2){
if(v1.length != v2.length){
Browser.msgBox("function distance(v1,v2): vectors of different lengths");
}
//Browser.msgBox("v1 "+v1);
//Browser.msgBox("v2 "+v2);
var norm=0;
var dist=0;
for(var i=0;i<v1.length;i++){
if(v1[i].toString()!="" && v2[i].toString()!=""){
norm++;
if(v1[i]!=v2[i]){
dist++;
// Browser.msgBox("v1 "+v1[i]+" v2 "+v2[i]+" dist "+dist);
}
}
}
return dist/norm;
}
function Attribute (vector) {
this.type = vector[0];
this.poxValues = this.computePoxValues(vector);
this.nValues = this.poxValues.length;
this.oneR = new Array(this.nValues);
//* getPoxValues */
this.getPoxValues = function() {
return this.poxValues;
}
//////////////////
//* getOneR */
/////////////////
this.getOneR = function(theClass) {
if(theClass.length != vector.length){
Browser.msgBox("Classes and Attributes array have different length");
return 0;
}
var norm = new Array(this.nValues);
//Initialize oneR and norm to 0
for(var i=0;i<this.nValues;i++){
this.oneR[i]=0.0;
norm[i]=0.0;
}
//We have nValues pox values for the att.
for(var i=1; i<theClass.length; i++){
for(var j=0; j<this.nValues; j++){
if(vector[i].toString() == this.poxValues[j].toString()){
this.oneR[j] = 1.0*this.oneR[j] + theClass[i]*1.0;
norm[j]++;
}
}
}
for(var j=0; j<this.nValues; j++){
this.oneR[j] = Math.min(this.oneR[j]*1.0, norm[j]-this.oneR[j]*1.0);
//Browser.msgBox(this.type+": oneR["+this.poxValues[j]+"] = "+this.oneR[j]+"/"+norm[j]);
}
//Sum errors for each value
var R=0;
var denominator=0;
//Don't want to consider empty values
for(var j=0; j<this.nValues; j++){
R = R*1.0 + 1.0*this.oneR[j];
denominator=denominator*1.0+norm[j];
}
return R/denominator;
}
}
////////////////////////////////////
//* prototype.computePoxValues */
////////////////////////////////////
Attribute.prototype.computePoxValues = function(vector) {
//NB empty values are not considered possible values
var values = new Array;//Array of no null strings in case some events have empty cells
var o=0;
for(var i=1;i<vector.length;i++){
if(vector[i] != ""){
values[o]=vector[i];
o++;
}
}
values.sort();
var j=1;
var poxValues = new Array();
poxValues[0]=values[0].toString();
for(var i=1; i<values.length;i++){
if(values[i].toString() != values[i-1].toString()){
poxValues[j]=values[i];
j++;
}
}
return poxValues;
}
/* var isNumeric = function(x) {
// returns true if x is numeric and false if it is not.
var RegExp = /^(-)?(\d*)(\.?)(\d*)$/;
return String(x).match(RegExp);
}
*/
function isAlien(a) {
return isObject(a) && typeof a.constructor != 'function';
}
function isArray(a) {
return isObject(a) && a.constructor == Array;
}
function isBoolean(a) {
return typeof a == 'boolean';
}
function isEmpty(o) {
var i, v;
if (isObject(o)) {
for (i in o) {
v = o[i];
if (isUndefined(v) && isFunction(v)) {
return false;
}
}
}
return true;
}
function isFunction(a) {
return typeof a == 'function';
}
function isNull(a) {
return typeof a == 'object' && !a;
}
function isNumber(a) {
return typeof a == 'number' && isFinite(a);
}
function isObject(a) {
return (a && typeof a == 'object') || isFunction(a);
}
function isString(a) {
return typeof a == 'string';
}
function isUndefined(a) {
return typeof a == 'undefined';
}
function TESTPO(ratings){
var mp=10;
var ne =0;
var ave = 0;
var save =0;
var norm =0;
for(var i=0; i < ratings.length; i++){
ave = 1.0*ave + ratings[i]*(i+1);
save = save + (i+1)*(i+1)*ratings[i];
norm = 1.0*norm + 1.0*ratings[i];
if(ratings[i] != 0) ne++;
}
if(ne ==0) return 0;
if(ne ==1) return 0;
ave = ave/norm;
save = save/norm;
sd = Math.sqrt(save - ave*ave);
var ms4 = new Array();
var nc = new Array();
var k = 0.0;
for(var i=0; i < ratings.length; i++){
ms4[i] = Math.pow( (i+1 - ave)/sd, 4);
nc[i] = ratings[i] / norm;
k = k+(ms4[i]*nc[i]);
}
return Math.exp(1.0 - k)*mp*sd/2;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment