briatte/useful.r

## useful.r
#useful functions for psychometrics and personality research
#a growing compilation of simple functions that I have found useful for doing personality research
#last updated Feb  14, 2005
#by William Revelle
#includes
#alpha.scale     #find coefficient alpha for a scale and a dataframe of items
#describe        give means, sd, skew, n, and se
#summ.stats      #basic summary statistics by a grouping variable
#error.crosses   (error bars in two space)
#skew            find skew
#panel.cor       taken from the examples for pairs
#pairs.panels    adapted from panel.cor  --   gives a splom, histogram, and correlation matrix
#multi.hist  #plot multiple histograms
#correct.cor    #given a correlation matrix and a vector of reliabilities, correct for reliability
#fisherz        #convert pearson r to fisher z
#paired.r       #test for difference of dependent correlations
#count.pairwise  #count the number of good cases when doing pairwise analysis
#eigen.loadings  #convert eigen vector vectors to factor loadings by unnormalizing them
#principal       #yet another way to do a principal components analysis -- brute force eignvalue decomp
#factor.congruence #find the factor congruence coeffiecints
#factor.model    #given a factor model, find the correlation matrix
#factor.residuals #how well does it fit?
#factor.rotate    # rotate two columns of a factor matrix by theta (in degrees)
#phi2poly       #convert a matrix of phi coefficients to polychoric correlations

#define a function to calculate coefficient alpha
alpha.scale=function (x,y)   #find coefficient alpha given a scale and a data.frame of the items in the scale
  {
		n=length(y)          #number of variables
		Vi=sum(diag(var(y,na.rm=TRUE)))     #sum of item variance
		Vt=var(x,na.rm=TRUE)                #total test variance
		((Vt-Vi)/Vt)*(n/(n-1))}              #alpha


#general descriptive statistics
describe  <-  function (x, digits = 2,na.rm=TRUE)   #basic stats after dropping non-numeric data
{                                   #first, define a local function
    valid <- function(x) {
        return(sum(!is.na(x)))
   		 }
    if (is.vector(x) )          #do it for vectors or
    	{
    	    stats = matrix(rep(NA,6),ncol=6)    #create a temporary array

			stats[1, 1] <-  mean(x, na.rm=na.rm )
			stats[1, 2] <-  median(x,na.rm=na.rm  )
			stats[1, 3] <-  min(x, na.rm=na.rm )
			stats[1, 4] <-  max(x, na.rm=na.rm )
			stats[1, 5] <-  skew(x,na.rm=na.rm  )
			stats[1, 6] <-  valid(x )
        	len <- 1;
    	}


    else  {len = dim(x)[2]     #do it for matrices or data.frames

    stats = matrix(rep(NA,len*6),ncol=6)    #create a temporary array
    for (i in 1:len) {
    	if (is.numeric(x[,i])) {   #just do this for numeric data
			stats[i, 1] <-  mean(x[,i], na.rm=na.rm )
			stats[i, 2] <-  median(x[,i],na.rm=na.rm  )
			stats[i, 3] <-  min(x[,i], na.rm=na.rm )
			stats[i, 4] <-  max(x[,i], na.rm=na.rm )
			stats[i, 5] <-  skew(x[,i],na.rm=na.rm  )
			stats[i, 6] <-  valid(x[,i] )
        		}
    	}
    	}
   temp <-  data.frame(n = stats[,6],mean = stats[,1], sd = sd(x,na.rm=TRUE), median = stats[,
        2],min= stats[,3],max=stats[,4], range=stats[,4]-stats[,3],skew = stats[, 5])
    answer <-  round(data.frame(V=seq(1:len),temp, se = temp$sd/sqrt(temp$n)),  digits)
     return(answer)
}


# find basic summary statistics by groups      #adapted from "Kickstarting R"
 summ.stats <- function (x,y) {               #data are x, grouping variable is y
  means <- tapply(x, y, mean, na.rm=TRUE)
  sds  <- tapply(x,y, sd, na.rm = TRUE)
  valid <- function (x) {return(sum(!is.na(x)) )}
  ns <- tapply(x,y, valid )
  se=sds/sqrt(ns)
  answer <-data.frame(mean=means,sd=sds,se=se,n=ns)
  }

#error bars in a two dimensional plot
old.error.crosses <-function (x,y,z)  # x  and y are data frame with z points
    {for (i in 1:z)
    	{xcen <- x$mean[i]
    	 ycen <- y$mean[i]
    	 xse  <- x$se[i]
    	 yse <-  y$se[i]
    	 arrows(xcen-xse,ycen,xcen+xse,ycen,length=.2, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)
    	 arrows(xcen,ycen-yse,xcen,ycen+yse,length=.2, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)
    	}
   }

#error bars in a two dimensional plot with labels
error.crosses <-function (x,y,labels=NULL,pos=NULL,arrow.len=.2,...)  # x  and y are data frame with
    {z <- dim(x)[1]
     if (length(pos)==0) {locate<-rep(1,z)} else {locate <- pos}
     if (length(labels)==0) lab<- rep("",z) else lab <-labels
        for (i in 1:z)
    	{xcen <- x$mean[i]
    	 ycen <- y$mean[i]
    	 xse  <- x$se[i]
    	 yse <-  y$se[i]
    	 arrows(xcen-xse,ycen,xcen+xse,ycen,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)
    	 arrows(xcen,ycen-yse,xcen,ycen+yse,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)
    	text(xcen,ycen,labels=lab[i],pos=positions[i],cex=1,offset=arrow.len+1)     #puts in labels for all points
    	}
   }
#examples
# plot(mPA,mNA,pch=symb[condit],cex=4.5,col=colors[condit],bg=colors[condit],main="Postive vs. Negative Affect",xlim=c(-1,1.5),ylim=c(-1,1.5),xlab="Positive Affect",ylab="Negative Affect")

#error.crosses(paf.stats,naf.stats,4)     #put in x and y error bars!


#find a vector of skews for a data matrix

 skew= function (x, na.rm = FALSE)
 {
    if (na.rm)    x <- x[!is.na(x)]             #remove missing values
    sum((x - mean(x))^3)/(length(x) * sd(x)^3)  #calculate skew
 }


 #some fancy graphics   -- adapted from help.cor

 panel.cor.scale <- function(x, y, digits=2, prefix="", cex.cor)
     {
         usr <- par("usr"); on.exit(par(usr))
         par(usr = c(0, 1, 0, 1))
         r = (cor(x, y,use="pairwise"))
         txt <- format(c(r, 0.123456789), digits=digits)[1]
         txt <- paste(prefix, txt, sep="")
         if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
         text(0.5, 0.5, txt, cex = cex * abs(r))
     }

  panel.cor <- function(x, y, digits=2, prefix="", cex.cor)
     {
         usr <- par("usr"); on.exit(par(usr))
         par(usr = c(0, 1, 0, 1))
         r = (cor(x, y,use="pairwise"))
         txt <- format(c(round(r,digits), 0.123456789), digits=digits)[1]
         txt <- paste(prefix, txt, sep="")
         if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
         text(0.5, 0.5, txt, cex = cex )
     }

     panel.hist <- function(x, ...)
{
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(usr[1:2], 0, 1.5) )
    h <- hist(x, plot = FALSE)
    breaks <- h$breaks; nB <- length(breaks)
    y <- h$counts; y <- y/max(y)
    rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...)
}

pairs.panels <- function (x,y,smooth=TRUE,scale=FALSE,digits=2) #combines a splom, histograms, and correlations
      {if (smooth ){
         if (scale) {
             pairs(x,diag.panel=panel.hist,upper.panel=panel.cor.scale,lower.panel=panel.smooth)
                    }
                    else {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor,lower.panel=panel.smooth)
                    } #else  {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor,lower.panel=panel.smooth)
                    }

                    else      #smooth is not true
             { if (scale) {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor.scale)
               } else  {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor) }
            } #end of else (smooth)

      }   #end of function


 multi.hist <- function(x,...) {nvar <- dim(x)[2]  #number of variables
     nsize=trunc(sqrt(nvar))+1   #size of graphic
     old.par <- par(no.readonly = TRUE) # all par settings which can be changed
     par(mfrow=c(nsize,nsize))       #set new graphic parameters
     for (i in 1:nvar) {
     name=names(x)[i]                #get the names for the variables
     hist(x[,i],main=name,xlab=name,...) }  #draw the histograms for each variable
     on.exit(par(old.par))   #set the graphic parameters back to the original
     }


   #function to do lowess fitting if we have missing data
  lowess.na <- function(x, y = NULL, f = 2/3,...) {  #do lowess with missing data

  	x1 <- subset(x,(!is.na(x)) &(!is.na(y)))
  	y1 <- subset(y, (!is.na(x)) &(!is.na(y)))
  lowess.na <- lowess(x1,y1,f, ...)
  }


#function to replace upper triangle of matrix with unattenuated correlations, and the diagonal with reliabilities
#input is a correlation matrix and a vector of reliabilities

 correct.cor <- function(x,y) { n=dim(x)[1]
        { x[1,1] <- y[1,1]
        for (i in 2:n) {
           x[i,i] <- y[1,i]   #put reliabilities on the diagonal
           k=i-1
           for (j in 1:k) {
              x[j,i] <- x[j,i]/sqrt(y[1,i]*y[1,j])  }   #fix the upper triangular part of the matrix
             }
           return(x)  }}


 #difference of dependent (paired) correlations (following Steiger,J., 1980, Tests for comparing elements of a correlation matrix. Psychological Bulletin, 87, 245-251)
 paired.r <- function(xy,xz,yz,n) {
       diff <- xy-xz
       determin=1-xy*xy - xz*xz - yz*yz + 2*xy*xz*yz
       av=(xy+xz)/2
       cube= (1-yz)*(1-yz)*(1-yz)
       t2 = diff * sqrt((n-1)*(1+yz)/(((2*(n-1)/(n-3))*determin+av*av*cube)))
       return(t2)
        }


  fisherz <- function(rho)  {0.5*log((1+rho)/(1-rho)) }   #converts r to z


  #a function to call John Fox's polychor multiple times to form a matrix of polychoric correlations
#needs package polychor

polychor.matrix <-function(x,y=NULL) {

 sizex <- dim(x)[2]
if (((is.data.frame(y))|(is.matrix(y))))  sizey<-dim(y)[2]
   else  sizey <- dim(x)[2]

 result<-matrix(1,nrow=sizey,ncol=sizex)   #create the output array

 xnames<- names(x)
 colnames(result)<- names(x)

if (((is.data.frame(y))|(is.matrix(y))))  rownames(result) <- names(y)
    else  rownames(result) <- names(x)


if (!((is.data.frame(y))|(is.matrix(y)))) {     #default case returns a square matrix
   for (i in 2: sizex ) {
     for (j in 1:( i-1)) {
      result[j,i]<-polychor(table(x[,j],x[,i]) )
       result[i,j] <- result[j,i]
       }
      }
      }
     else {                   #if y is input, then return the rectangular array
        for (i in 1: sizex ) {
            for (j in 1:sizey) {
                 result[j,i]<-polychor(table(x[,i],y[,j]) )
                     }
                } }
   return (result) }

#count.pairwise   -- count the number of good cases when doing pairwise analysis (e.g.,for correlations)
count.pairwise <- function(x,y) {
   sizex <- dim(x)[2]
if (is.data.frame(y))  sizey<-dim(y)[2]
   else  sizey <- dim(x)[2]

 result<-matrix(1,nrow=sizey,ncol=sizex)   #create the output array

 xnames<- names(x)
 colnames(result)<- names(x)

 if (((is.data.frame(y))|(is.matrix(y))))  rownames(result) <- names(y)
    else  rownames(result) <- names(x)


if (!is.data.frame(y)) {     #default case returns a square matrix
   for (i in 2: sizex ) {
     for (j in 1:( i-1)) {
     result[j,i]<- sum((!is.na(x[,j]))&(!is.na(x[,i])))
     result[i,j] <-  result[j,i]
       }
      }
      }
     else {                   #if y is input, then return the rectangular array
        for (i in 1: sizex ) {
            for (j in 1:sizey) {
                 result[j,i]<-sum((!is.na(x[,i]))&(!is.na(y[,j])))
                     }
                } }
   return (result) }


  #minor routines to manipulate matrices for factor analysis
#output from eigen is two lists: $values and $vectors
#
eigen.loadings <- function (x) { #convert eigen vectors to loadings by unnormalizing them
    n <- length(x$values)
    x$values[ x$values<0]<- 0
    fix<-sqrt(x$values)
    result<- x$vectors * rep(fix, each = n)}


 #do a principal components analysis by doing eigen vector decomposition and then showing the biggest nfactors
principal <-
function(r,nfactors=0) {
   n <- dim(r)[1]
    result<-list(values=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),residual=matrix(rep(0,n*n),ncol=n),fit=0)
    eigens <-eigen(r)    #call the eigen value decomposition routine
    result$values <-eigens$values
    result$loadings <-eigen.loadings(eigens)
    rownames(result$loadings) <- rownames(r)
    if (nfactors>0) result$loadings<-result$loadings[,1:nfactors]
    residual<- factor.residuals(r,result$loadings)
    r2<- sum(r*r)
    rstar2<- sum(residual*residual)
    result$residual<-residual
    result$fit<- 1-rstar2/r2
    return(result)
   }


 #function to find factor congruence (

 factor.congruence <- function (x,y) {
  nx<- dim(x)[2]
   ny<- dim(y)[2]
   cross<- t(y) %*% x   #inner product will have dim of ny * nx
   sumsx<- sqrt(1/diag(t(x)%*%x))
   sumsy<- sqrt(1/diag(t(y)%*%y))

   result<- matrix(rep(0,nx*ny),ncol=nx)
    result<-  sumsy * (cross * rep(sumsx, each = ny))
   return(result)
   }


 #estimate a correlation matrix given a particular factor/component model
 factor.model <- function(x) {
    result<- x%*%t(x)
    return (result)}

  #find residuals
  factor.residuals <- function(r, f) {
   rstar<- r- factor.model(f)
   return(rstar)}

   factor.fit <-function (r,f) {
     r2 <-sum( r*r)
     rstar <- factor.residuals(r,f)
     rstar2 <- sum(rstar*rstar)
     fit<- 1- rstar2/r2
     return(fit) }


  #do a principal components analysis by doing eigen vector decomposition and then showing the biggest nfactors
 principal <- function(r,nfactors=0) {
   n <- dim(r)[1]
    result<-list(values=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),residual=matrix(rep(0,n*n),ncol=n),fit=0)
    eigens <-eigen(r)    #call the eigen value decomposition routine
    result$values <-eigens$values
    result$loadings <-eigen.loadings(eigens)
    if (nfactors>0) result$loadings<-result$loadings[,1:nfactors]
    residual<- factor.residuals(r,result$loadings)
    r2<- sum(r*r)
    rstar2<- sum(residual*residual)
    result$residual<-residual
    result$fit<- 1-rstar2/r2
    return(result)
   }


   factor.rotate <-function(f,angle,col1,col2)  {
     nvar<- dim(f)[2]
     rot<- matrix(rep(0,nvar*nvar),ncol=nvar)
     rot[cbind(1:nvar, 1:nvar)] <- 1
     theta<- 2*pi*angle/360
     rot[col1,col1]<-cos(theta)
     rot[col2,col2]<-cos(theta)
     rot[col1,col2]<- -sin(theta)
     rot[col2,col1]<- sin(theta)
     result <- f %*% rot
     return(result) }

#phi coefficient from a 2 x 2 table  (function probably already exists but I can't find it)
phi <- function(t) {  #t is a 2 x 2 matrix

    r.sum<-rowSums(t)
    c.sum <-colSums(t)
    total<-sum(r.sum)
    r.sum<- r.sum/total
    c.sum <-c.sum/total
    v<- prod(r.sum,c.sum)
    phi <- (t[1,1]/total- c.sum[1]*r.sum[1])/sqrt(v)
  }

   #an alternative function to do the same, need to combine into one
  phi1 <- function(x) {  # returns the phi coefficient   x  is a set of 4 numbers e.g. c(1,2,34)
   xm <- matrix(x,ncol=2)
   total <- sum(xm)
   xp <- xm/total
   HR <- xp[1,1] + xp[1,2]
   SR <- xp[1,1]+ xp[2,1]
   VP <- xp[1,1]
   phi <- (VP - HR*SR) /sqrt(HR*(1-HR)*(SR)*(1-SR))
   return(phi)
   }

 #convert a  phi coefficient to the corresponding polychoric correlation
 #needs John Fox's polychor program
 phi2poly <- function(ph,cp,cc) {
     #ph is the phi coefficient
     #cp is the selection ratio of the predictor
     #cc is the success rate of the criterion
     r.marg<-rep(0,2)
     c.marg<- rep(0,2)
     p<-array(rep(0,4),dim=c(2,2))
     r.marg[1]<- cp
     r.marg[2]<- 1 -cp
     c.marg[1]<- cc
     c.marg[2]<- 1-cc

     p[1,1]<- r.marg[1]*c.marg[1]+ ph*sqrt(prod(r.marg,c.marg))
     p[2,2]<- r.marg[2]*c.marg[2]+ ph*sqrt(prod(r.marg,c.marg))
     p[1,2]<- r.marg[1]*c.marg[2]- ph*sqrt(prod(r.marg,c.marg))
     p[2,1]<- r.marg[2]*c.marg[1]- ph*sqrt(prod(r.marg,c.marg))

     result<-polychor(p )
     return(result)}


 psycho.demo <- function() {
 #simulate correlation matrix with variable cut points -- psychometric demo
 #make up some correlations with different cut points
cuts <-c(-2,-1,0,1,2)
nsub<-1000    #how many subjects
r<-.6         #population correlation of observed with theta
theta <-rnorm(nsub)  #make up some random normal theta scores
err<- rnorm(nsub)    #random normal error scores

obser<- theta*(r) + err*sqrt(1-r*r)  #observed = T + E

#convert to 0/1  with different values of cuts
trunc<- matrix(rep(obser,length(cuts)),ncol=length(cuts))
for (i in 1:length(cuts)) {
   trunc[obser>cuts[i],i]<- 1
   trunc[obser< cuts[i],i]<- 0}

d.mat<- data.frame(theta,obser,trunc)  #combine into a data frame
trunc.cor<- cor(d.mat)                 #find the correlations
freq <- mean(d.mat)                    #find the frequencies of scores

#now, convert the upper diagonal to polychorics using John Fox's polychor and my phi2poly

for (i in 4:length(d.mat)) {
   for (j in 3:i) {
       trunc.cor[j,i]<- phi2poly(trunc.cor[i,j],freq[i],freq[j])
       }}

}


#quick demonstration of a random walk process
#to show range of variability over trials and replications
#the "confidence lines" represent 2 sd of theoretical sum

random.walk <- function(length=200,n=20,effect=0) {
	num <- seq(1:length)
	colors <- rainbow(n)                 #select colors
 	x <- cumsum(rnorm(length,effect))/num
 	plot(x,ylim = c(-2.5,2.5),typ="b",col=colors[1],main="Sample means as function of sample size",ylab="sample mean",xlab="sample size")
	for (i in 2:n) {
 		x <- cumsum(rnorm(length,effect))/num     #find the next line
 		points(x,col=colors[i],typ="b")         #draw it
 			}
  	curve(2/sqrt(x),1,length,101,add=TRUE)     #upper confidence region
 	curve(-2/sqrt(x),1,length,101,add=TRUE)    #lower confidence region
 	text(length/2,2,"effect size is " )
 	text(length/2, 1.8,eval(effect))
 }

#read from clipboard for Mac (thanks to Ken Knoblauch for this hint
#for PCs, use read.table(file("clipboard"))
read.clipboard<-function(header=TRUE,...) {
    MAC<-Sys.info()[1]=="Darwin"    #are we on a Mac using the Darwin system?
   if (!MAC ) {if (header) read.clipboard<-read.table(file("clipboard"),header=TRUE,...)
            else read.clipboard<-read.table(file("clipboard"),...) }
    else {
   if (header) read.clipboard<-  read.table(pipe("pbpaste"),header=TRUE,...)
   else read.clipboard<- read.table(pipe("pbpaste") ,...)}
   }

 read.clipboard.csv<-function(header=TRUE,sep=',',...) {  #same as read.clipboard(sep=',')
    MAC<-Sys.info()[1]=="Darwin"    #are we on a Mac using the Darwin system?
   if (!MAC ) {if (header) read.clipboard<-read.table(file("clipboard"),header=TRUE,...)
            else read.clipboard<-read.table(file("clipboard"),...) }
    else {
   if (header) read.clipboard<-  read.table(pipe("pbpaste"),header=TRUE,...)
   else read.clipboard<- read.table(pipe("pbpaste") ,...)}
   }


#functions to help analyze data from correlation matrices
 #used as part of the Synthetic Aperture Personality Assessment  (SAPA) project
 #
 #extract cluster keys from a factor/principal components loadings matrix

 factor2cluster <-
function(r,nfactors=0) {
   n <- dim(r)[1]
    result<-list(values=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),residual=matrix(rep(0,n*n),ncol=n),fit=0)
    eigens <-eigen(r)    #call the eigen value decomposition routine
    result$values <-eigens$values
    result$loadings <-eigen.loadings(eigens)
    rownames(result$loadings) <- rownames(r)
    if (nfactors>0) result$loadings<-result$loadings[,1:nfactors]
    residual<- factor.residuals(r,result$loadings)
    r2<- sum(r*r)
    rstar2<- sum(residual*residual)
    result$residual<-residual
    result$fit<- 1-rstar2/r2
    return(result)
   }

#find the correlation matrix of scales made up of items defined in a keys matrix (e.g., extracted by factor2cluster)
#takes as input the keys matrix as well as a correlation matrix of all the items
cluster.cor <-
    function(keys,r.mat,correct=TRUE) { #function to extract clusters according to the key vector
				#default is to correct for attenuation and show this above the diagonal
				#find the correlation matrix of scales made up of items defined in a keys matrix (e.g., extracted by factor2cluster)
                #takes as input the keys matrix as well as a correlation matrix of all the items
 if(!is.matrix(keys)) keys <- as.matrix(keys)  #keys are sometimes a data frame - must be a matrix
 covar <- t(keys) %*% r.mat %*% keys    #matrix algebra is our friend
 var <- diag(covar)
 sd.inv <- 1/sqrt(var)
 ident.sd <- diag(sd.inv,ncol = length(sd.inv))
 cluster.correl <- ident.sd %*% covar  %*% ident.sd
 key.var <- diag(t(keys) %*% keys)
 key.alpha <- ((var-key.var)/var)*(key.var/(key.var-1))
 key.alpha[is.nan(key.alpha)] <- 1           #if only 1 variable to the cluster, then alpha is undefined
 key.alpha[!is.finite(key.alpha)] <- 1
 colnames(cluster.correl) <- names(key.alpha)
 rownames(cluster.correl) <- names(key.alpha)
 if (correct) {cluster.corrected <- correct.cor(cluster.correl,t(key.alpha))}  #correct for attenuation

 return(list(cor=cluster.correl,sd=sqrt(var),corrected= cluster.corrected))
 }


 #a function to extract subsets of variables (a and b) from a correlation matrix m
     #and find the multiple correlation beta weights + R2 of the a set predicting the b set
     mat.regress <- function(m,a,b) {
        #first reorder the matrix to select the right variables
         nm <- dim(m)[1]
        t.mat <- matrix(0,ncol=nm,nrow=nm)
        ab <- c(a,b)
         numa <- length(a)
     	numb <- length(b)
        nab <- numa+numb
        for (i in 1:nab) {
     	t.mat[i,ab[i]] <- 1 }

     	reorder <- t.mat %*% m %*% t(t.mat)
     	a.matrix <- reorder[1:numa,1:numa]
     	b.matrix <- reorder[1:numa,(numa+1):nab]
     	model.mat <- solve(a.matrix,b.matrix)       #solve the equation bY~aX
     	if (length(b) >1 ) { rownames(model.mat) <- rownames(m)[a]
     	 colnames(model.mat) <- colnames(m)[b]

     	r2 <- colSums(model.mat * b.matrix) }
     	 else { r2 <- sum(model.mat * b.matrix)
     	 names(model.mat) <- rownames(m)[a]
     	 names(r2) <- colnames(m)[b]}
     	mat.regress <- list(beta=model.mat,r2=r2)
     	return(mat.regress)
     	}
	#useful functions for psychometrics and personality research
	#a growing compilation of simple functions that I have found useful for doing personality research
	#last updated Feb 14, 2005
	#by William Revelle
	#includes
	#alpha.scale #find coefficient alpha for a scale and a dataframe of items
	#describe give means, sd, skew, n, and se
	#summ.stats #basic summary statistics by a grouping variable
	#error.crosses (error bars in two space)
	#skew find skew
	#panel.cor taken from the examples for pairs
	#pairs.panels adapted from panel.cor -- gives a splom, histogram, and correlation matrix
	#multi.hist #plot multiple histograms
	#correct.cor #given a correlation matrix and a vector of reliabilities, correct for reliability
	#fisherz #convert pearson r to fisher z
	#paired.r #test for difference of dependent correlations
	#count.pairwise #count the number of good cases when doing pairwise analysis
	#eigen.loadings #convert eigen vector vectors to factor loadings by unnormalizing them
	#principal #yet another way to do a principal components analysis -- brute force eignvalue decomp
	#factor.congruence #find the factor congruence coeffiecints
	#factor.model #given a factor model, find the correlation matrix
	#factor.residuals #how well does it fit?
	#factor.rotate # rotate two columns of a factor matrix by theta (in degrees)
	#phi2poly #convert a matrix of phi coefficients to polychoric correlations

	#define a function to calculate coefficient alpha
	alpha.scale=function (x,y) #find coefficient alpha given a scale and a data.frame of the items in the scale
	{
	n=length(y) #number of variables
	Vi=sum(diag(var(y,na.rm=TRUE))) #sum of item variance
	Vt=var(x,na.rm=TRUE) #total test variance
	((Vt-Vi)/Vt)*(n/(n-1))} #alpha


	#general descriptive statistics
	describe <- function (x, digits = 2,na.rm=TRUE) #basic stats after dropping non-numeric data
	{ #first, define a local function
	valid <- function(x) {
	return(sum(!is.na(x)))
	}
	if (is.vector(x) ) #do it for vectors or
	{
	stats = matrix(rep(NA,6),ncol=6) #create a temporary array

	stats[1, 1] <- mean(x, na.rm=na.rm )
	stats[1, 2] <- median(x,na.rm=na.rm )
	stats[1, 3] <- min(x, na.rm=na.rm )
	stats[1, 4] <- max(x, na.rm=na.rm )
	stats[1, 5] <- skew(x,na.rm=na.rm )
	stats[1, 6] <- valid(x )
	len <- 1;
	}


	else {len = dim(x)[2] #do it for matrices or data.frames

	stats = matrix(rep(NA,len*6),ncol=6) #create a temporary array
	for (i in 1:len) {
	if (is.numeric(x[,i])) { #just do this for numeric data
	stats[i, 1] <- mean(x[,i], na.rm=na.rm )
	stats[i, 2] <- median(x[,i],na.rm=na.rm )
	stats[i, 3] <- min(x[,i], na.rm=na.rm )
	stats[i, 4] <- max(x[,i], na.rm=na.rm )
	stats[i, 5] <- skew(x[,i],na.rm=na.rm )
	stats[i, 6] <- valid(x[,i] )
	}
	}
	}
	temp <- data.frame(n = stats[,6],mean = stats[,1], sd = sd(x,na.rm=TRUE), median = stats[,
	2],min= stats[,3],max=stats[,4], range=stats[,4]-stats[,3],skew = stats[, 5])
	answer <- round(data.frame(V=seq(1:len),temp, se = temp$sd/sqrt(temp$n)), digits)
	return(answer)
	}


	# find basic summary statistics by groups #adapted from "Kickstarting R"
	summ.stats <- function (x,y) { #data are x, grouping variable is y
	means <- tapply(x, y, mean, na.rm=TRUE)
	sds <- tapply(x,y, sd, na.rm = TRUE)
	valid <- function (x) {return(sum(!is.na(x)) )}
	ns <- tapply(x,y, valid )
	se=sds/sqrt(ns)
	answer <-data.frame(mean=means,sd=sds,se=se,n=ns)
	}

	#error bars in a two dimensional plot
	old.error.crosses <-function (x,y,z) # x and y are data frame with z points
	{for (i in 1:z)
	{xcen <- x$mean[i]
	ycen <- y$mean[i]
	xse <- x$se[i]
	yse <- y$se[i]
	arrows(xcen-xse,ycen,xcen+xse,ycen,length=.2, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)
	arrows(xcen,ycen-yse,xcen,ycen+yse,length=.2, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)
	}
	}

	#error bars in a two dimensional plot with labels
	error.crosses <-function (x,y,labels=NULL,pos=NULL,arrow.len=.2,...) # x and y are data frame with
	{z <- dim(x)[1]
	if (length(pos)==0) {locate<-rep(1,z)} else {locate <- pos}
	if (length(labels)==0) lab<- rep("",z) else lab <-labels
	for (i in 1:z)
	{xcen <- x$mean[i]
	ycen <- y$mean[i]
	xse <- x$se[i]
	yse <- y$se[i]
	arrows(xcen-xse,ycen,xcen+xse,ycen,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)
	arrows(xcen,ycen-yse,xcen,ycen+yse,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)
	text(xcen,ycen,labels=lab[i],pos=positions[i],cex=1,offset=arrow.len+1) #puts in labels for all points
	}
	}
	#examples
	# plot(mPA,mNA,pch=symb[condit],cex=4.5,col=colors[condit],bg=colors[condit],main="Postive vs. Negative Affect",xlim=c(-1,1.5),ylim=c(-1,1.5),xlab="Positive Affect",ylab="Negative Affect")

	#error.crosses(paf.stats,naf.stats,4) #put in x and y error bars!


	#find a vector of skews for a data matrix

	skew= function (x, na.rm = FALSE)
	{
	if (na.rm) x <- x[!is.na(x)] #remove missing values
	sum((x - mean(x))^3)/(length(x) * sd(x)^3) #calculate skew
	}



	#some fancy graphics -- adapted from help.cor

	panel.cor.scale <- function(x, y, digits=2, prefix="", cex.cor)
	{
	usr <- par("usr"); on.exit(par(usr))
	par(usr = c(0, 1, 0, 1))
	r = (cor(x, y,use="pairwise"))
	txt <- format(c(r, 0.123456789), digits=digits)[1]
	txt <- paste(prefix, txt, sep="")
	if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
	text(0.5, 0.5, txt, cex = cex * abs(r))
	}

	panel.cor <- function(x, y, digits=2, prefix="", cex.cor)
	{
	usr <- par("usr"); on.exit(par(usr))
	par(usr = c(0, 1, 0, 1))
	r = (cor(x, y,use="pairwise"))
	txt <- format(c(round(r,digits), 0.123456789), digits=digits)[1]
	txt <- paste(prefix, txt, sep="")
	if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
	text(0.5, 0.5, txt, cex = cex )
	}

	panel.hist <- function(x, ...)
	{
	usr <- par("usr"); on.exit(par(usr))
	par(usr = c(usr[1:2], 0, 1.5) )
	h <- hist(x, plot = FALSE)
	breaks <- h$breaks; nB <- length(breaks)
	y <- h$counts; y <- y/max(y)
	rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...)
	}

	pairs.panels <- function (x,y,smooth=TRUE,scale=FALSE,digits=2) #combines a splom, histograms, and correlations
	{if (smooth ){
	if (scale) {
	pairs(x,diag.panel=panel.hist,upper.panel=panel.cor.scale,lower.panel=panel.smooth)
	}
	else {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor,lower.panel=panel.smooth)
	} #else {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor,lower.panel=panel.smooth)
	}

	else #smooth is not true
	{ if (scale) {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor.scale)
	} else {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor) }
	} #end of else (smooth)

	} #end of function




	multi.hist <- function(x,...) {nvar <- dim(x)[2] #number of variables
	nsize=trunc(sqrt(nvar))+1 #size of graphic
	old.par <- par(no.readonly = TRUE) # all par settings which can be changed
	par(mfrow=c(nsize,nsize)) #set new graphic parameters
	for (i in 1:nvar) {
	name=names(x)[i] #get the names for the variables
	hist(x[,i],main=name,xlab=name,...) } #draw the histograms for each variable
	on.exit(par(old.par)) #set the graphic parameters back to the original
	}


	#function to do lowess fitting if we have missing data
	lowess.na <- function(x, y = NULL, f = 2/3,...) { #do lowess with missing data

	x1 <- subset(x,(!is.na(x)) &(!is.na(y)))
	y1 <- subset(y, (!is.na(x)) &(!is.na(y)))
	lowess.na <- lowess(x1,y1,f, ...)
	}




	#function to replace upper triangle of matrix with unattenuated correlations, and the diagonal with reliabilities
	#input is a correlation matrix and a vector of reliabilities

	correct.cor <- function(x,y) { n=dim(x)[1]
	{ x[1,1] <- y[1,1]
	for (i in 2:n) {
	x[i,i] <- y[1,i] #put reliabilities on the diagonal
	k=i-1
	for (j in 1:k) {
	x[j,i] <- x[j,i]/sqrt(y[1,i]*y[1,j]) } #fix the upper triangular part of the matrix
	}
	return(x) }}



	#difference of dependent (paired) correlations (following Steiger,J., 1980, Tests for comparing elements of a correlation matrix. Psychological Bulletin, 87, 245-251)
	paired.r <- function(xy,xz,yz,n) {
	diff <- xy-xz
	determin=1-xyxy - xzxz - yzyz + 2xyxzyz
	av=(xy+xz)/2
	cube= (1-yz)(1-yz)(1-yz)
	t2 = diff * sqrt((n-1)(1+yz)/(((2(n-1)/(n-3))determin+avav*cube)))
	return(t2)
	}


	fisherz <- function(rho) {0.5*log((1+rho)/(1-rho)) } #converts r to z


	#a function to call John Fox's polychor multiple times to form a matrix of polychoric correlations
	#needs package polychor

	polychor.matrix <-function(x,y=NULL) {

	sizex <- dim(x)[2]
	if (((is.data.frame(y))\|(is.matrix(y)))) sizey<-dim(y)[2]
	else sizey <- dim(x)[2]

	result<-matrix(1,nrow=sizey,ncol=sizex) #create the output array

	xnames<- names(x)
	colnames(result)<- names(x)

	if (((is.data.frame(y))\|(is.matrix(y)))) rownames(result) <- names(y)
	else rownames(result) <- names(x)


	if (!((is.data.frame(y))\|(is.matrix(y)))) { #default case returns a square matrix
	for (i in 2: sizex ) {
	for (j in 1:( i-1)) {
	result[j,i]<-polychor(table(x[,j],x[,i]) )
	result[i,j] <- result[j,i]
	}
	}
	}
	else { #if y is input, then return the rectangular array
	for (i in 1: sizex ) {
	for (j in 1:sizey) {
	result[j,i]<-polychor(table(x[,i],y[,j]) )
	}
	} }
	return (result) }

	#count.pairwise -- count the number of good cases when doing pairwise analysis (e.g.,for correlations)
	count.pairwise <- function(x,y) {
	sizex <- dim(x)[2]
	if (is.data.frame(y)) sizey<-dim(y)[2]
	else sizey <- dim(x)[2]

	result<-matrix(1,nrow=sizey,ncol=sizex) #create the output array

	xnames<- names(x)
	colnames(result)<- names(x)

	if (((is.data.frame(y))\|(is.matrix(y)))) rownames(result) <- names(y)
	else rownames(result) <- names(x)


	if (!is.data.frame(y)) { #default case returns a square matrix
	for (i in 2: sizex ) {
	for (j in 1:( i-1)) {
	result[j,i]<- sum((!is.na(x[,j]))&(!is.na(x[,i])))
	result[i,j] <- result[j,i]
	}
	}
	}
	else { #if y is input, then return the rectangular array
	for (i in 1: sizex ) {
	for (j in 1:sizey) {
	result[j,i]<-sum((!is.na(x[,i]))&(!is.na(y[,j])))
	}
	} }
	return (result) }


	#minor routines to manipulate matrices for factor analysis
	#output from eigen is two lists: $values and $vectors
	#
	eigen.loadings <- function (x) { #convert eigen vectors to loadings by unnormalizing them
	n <- length(x$values)
	x$values[ x$values<0]<- 0
	fix<-sqrt(x$values)
	result<- x$vectors * rep(fix, each = n)}


	#do a principal components analysis by doing eigen vector decomposition and then showing the biggest nfactors
	principal <-
	function(r,nfactors=0) {
	n <- dim(r)[1]
	result<-list(values=c(rep(0,n)),loadings=matrix(rep(0,nn),ncol=n),residual=matrix(rep(0,nn),ncol=n),fit=0)
	eigens <-eigen(r) #call the eigen value decomposition routine
	result$values <-eigens$values
	result$loadings <-eigen.loadings(eigens)
	rownames(result$loadings) <- rownames(r)
	if (nfactors>0) result$loadings<-result$loadings[,1:nfactors]
	residual<- factor.residuals(r,result$loadings)
	r2<- sum(r*r)
	rstar2<- sum(residual*residual)
	result$residual<-residual
	result$fit<- 1-rstar2/r2
	return(result)
	}




	#function to find factor congruence (

	factor.congruence <- function (x,y) {
	nx<- dim(x)[2]
	ny<- dim(y)[2]
	cross<- t(y) %% x #inner product will have dim of ny nx
	sumsx<- sqrt(1/diag(t(x)%*%x))
	sumsy<- sqrt(1/diag(t(y)%*%y))

	result<- matrix(rep(0,nx*ny),ncol=nx)
	result<- sumsy * (cross * rep(sumsx, each = ny))
	return(result)
	}


	#estimate a correlation matrix given a particular factor/component model
	factor.model <- function(x) {
	result<- x%*%t(x)
	return (result)}

	#find residuals
	factor.residuals <- function(r, f) {
	rstar<- r- factor.model(f)
	return(rstar)}

	factor.fit <-function (r,f) {
	r2 <-sum( r*r)
	rstar <- factor.residuals(r,f)
	rstar2 <- sum(rstar*rstar)
	fit<- 1- rstar2/r2
	return(fit) }


	#do a principal components analysis by doing eigen vector decomposition and then showing the biggest nfactors
	principal <- function(r,nfactors=0) {
	n <- dim(r)[1]
	result<-list(values=c(rep(0,n)),loadings=matrix(rep(0,nn),ncol=n),residual=matrix(rep(0,nn),ncol=n),fit=0)
	eigens <-eigen(r) #call the eigen value decomposition routine
	result$values <-eigens$values
	result$loadings <-eigen.loadings(eigens)
	if (nfactors>0) result$loadings<-result$loadings[,1:nfactors]
	residual<- factor.residuals(r,result$loadings)
	r2<- sum(r*r)
	rstar2<- sum(residual*residual)
	result$residual<-residual
	result$fit<- 1-rstar2/r2
	return(result)
	}


	factor.rotate <-function(f,angle,col1,col2) {
	nvar<- dim(f)[2]
	rot<- matrix(rep(0,nvar*nvar),ncol=nvar)
	rot[cbind(1:nvar, 1:nvar)] <- 1
	theta<- 2piangle/360
	rot[col1,col1]<-cos(theta)
	rot[col2,col2]<-cos(theta)
	rot[col1,col2]<- -sin(theta)
	rot[col2,col1]<- sin(theta)
	result <- f %*% rot
	return(result) }

	#phi coefficient from a 2 x 2 table (function probably already exists but I can't find it)
	phi <- function(t) { #t is a 2 x 2 matrix

	r.sum<-rowSums(t)
	c.sum <-colSums(t)
	total<-sum(r.sum)
	r.sum<- r.sum/total
	c.sum <-c.sum/total
	v<- prod(r.sum,c.sum)
	phi <- (t[1,1]/total- c.sum[1]*r.sum[1])/sqrt(v)
	}

	#an alternative function to do the same, need to combine into one
	phi1 <- function(x) { # returns the phi coefficient x is a set of 4 numbers e.g. c(1,2,34)
	xm <- matrix(x,ncol=2)
	total <- sum(xm)
	xp <- xm/total
	HR <- xp[1,1] + xp[1,2]
	SR <- xp[1,1]+ xp[2,1]
	VP <- xp[1,1]
	phi <- (VP - HRSR) /sqrt(HR(1-HR)(SR)(1-SR))
	return(phi)
	}

	#convert a phi coefficient to the corresponding polychoric correlation
	#needs John Fox's polychor program
	phi2poly <- function(ph,cp,cc) {
	#ph is the phi coefficient
	#cp is the selection ratio of the predictor
	#cc is the success rate of the criterion
	r.marg<-rep(0,2)
	c.marg<- rep(0,2)
	p<-array(rep(0,4),dim=c(2,2))
	r.marg[1]<- cp
	r.marg[2]<- 1 -cp
	c.marg[1]<- cc
	c.marg[2]<- 1-cc

	p[1,1]<- r.marg[1]c.marg[1]+ phsqrt(prod(r.marg,c.marg))
	p[2,2]<- r.marg[2]c.marg[2]+ phsqrt(prod(r.marg,c.marg))
	p[1,2]<- r.marg[1]c.marg[2]- phsqrt(prod(r.marg,c.marg))
	p[2,1]<- r.marg[2]c.marg[1]- phsqrt(prod(r.marg,c.marg))

	result<-polychor(p )
	return(result)}


	psycho.demo <- function() {
	#simulate correlation matrix with variable cut points -- psychometric demo
	#make up some correlations with different cut points
	cuts <-c(-2,-1,0,1,2)
	nsub<-1000 #how many subjects
	r<-.6 #population correlation of observed with theta
	theta <-rnorm(nsub) #make up some random normal theta scores
	err<- rnorm(nsub) #random normal error scores

	obser<- theta(r) + errsqrt(1-r*r) #observed = T + E

	#convert to 0/1 with different values of cuts
	trunc<- matrix(rep(obser,length(cuts)),ncol=length(cuts))
	for (i in 1:length(cuts)) {
	trunc[obser>cuts[i],i]<- 1
	trunc[obser< cuts[i],i]<- 0}

	d.mat<- data.frame(theta,obser,trunc) #combine into a data frame
	trunc.cor<- cor(d.mat) #find the correlations
	freq <- mean(d.mat) #find the frequencies of scores

	#now, convert the upper diagonal to polychorics using John Fox's polychor and my phi2poly

	for (i in 4:length(d.mat)) {
	for (j in 3:i) {
	trunc.cor[j,i]<- phi2poly(trunc.cor[i,j],freq[i],freq[j])
	}}

	}


	#quick demonstration of a random walk process
	#to show range of variability over trials and replications
	#the "confidence lines" represent 2 sd of theoretical sum

	random.walk <- function(length=200,n=20,effect=0) {
	num <- seq(1:length)
	colors <- rainbow(n) #select colors
	x <- cumsum(rnorm(length,effect))/num
	plot(x,ylim = c(-2.5,2.5),typ="b",col=colors[1],main="Sample means as function of sample size",ylab="sample mean",xlab="sample size")
	for (i in 2:n) {
	x <- cumsum(rnorm(length,effect))/num #find the next line
	points(x,col=colors[i],typ="b") #draw it
	}
	curve(2/sqrt(x),1,length,101,add=TRUE) #upper confidence region
	curve(-2/sqrt(x),1,length,101,add=TRUE) #lower confidence region
	text(length/2,2,"effect size is " )
	text(length/2, 1.8,eval(effect))
	}

	#read from clipboard for Mac (thanks to Ken Knoblauch for this hint
	#for PCs, use read.table(file("clipboard"))
	read.clipboard<-function(header=TRUE,...) {
	MAC<-Sys.info()[1]=="Darwin" #are we on a Mac using the Darwin system?
	if (!MAC ) {if (header) read.clipboard<-read.table(file("clipboard"),header=TRUE,...)
	else read.clipboard<-read.table(file("clipboard"),...) }
	else {
	if (header) read.clipboard<- read.table(pipe("pbpaste"),header=TRUE,...)
	else read.clipboard<- read.table(pipe("pbpaste") ,...)}
	}

	read.clipboard.csv<-function(header=TRUE,sep=',',...) { #same as read.clipboard(sep=',')
	MAC<-Sys.info()[1]=="Darwin" #are we on a Mac using the Darwin system?
	if (!MAC ) {if (header) read.clipboard<-read.table(file("clipboard"),header=TRUE,...)
	else read.clipboard<-read.table(file("clipboard"),...) }
	else {
	if (header) read.clipboard<- read.table(pipe("pbpaste"),header=TRUE,...)
	else read.clipboard<- read.table(pipe("pbpaste") ,...)}
	}



	#functions to help analyze data from correlation matrices
	#used as part of the Synthetic Aperture Personality Assessment (SAPA) project
	#
	#extract cluster keys from a factor/principal components loadings matrix

	factor2cluster <-
	function(r,nfactors=0) {
	n <- dim(r)[1]
	result<-list(values=c(rep(0,n)),loadings=matrix(rep(0,nn),ncol=n),residual=matrix(rep(0,nn),ncol=n),fit=0)
	eigens <-eigen(r) #call the eigen value decomposition routine
	result$values <-eigens$values
	result$loadings <-eigen.loadings(eigens)
	rownames(result$loadings) <- rownames(r)
	if (nfactors>0) result$loadings<-result$loadings[,1:nfactors]
	residual<- factor.residuals(r,result$loadings)
	r2<- sum(r*r)
	rstar2<- sum(residual*residual)
	result$residual<-residual
	result$fit<- 1-rstar2/r2
	return(result)
	}

	#find the correlation matrix of scales made up of items defined in a keys matrix (e.g., extracted by factor2cluster)
	#takes as input the keys matrix as well as a correlation matrix of all the items
	cluster.cor <-
	function(keys,r.mat,correct=TRUE) { #function to extract clusters according to the key vector
	#default is to correct for attenuation and show this above the diagonal
	#find the correlation matrix of scales made up of items defined in a keys matrix (e.g., extracted by factor2cluster)
	#takes as input the keys matrix as well as a correlation matrix of all the items
	if(!is.matrix(keys)) keys <- as.matrix(keys) #keys are sometimes a data frame - must be a matrix
	covar <- t(keys) %% r.mat %% keys #matrix algebra is our friend
	var <- diag(covar)
	sd.inv <- 1/sqrt(var)
	ident.sd <- diag(sd.inv,ncol = length(sd.inv))
	cluster.correl <- ident.sd %% covar %% ident.sd
	key.var <- diag(t(keys) %*% keys)
	key.alpha <- ((var-key.var)/var)*(key.var/(key.var-1))
	key.alpha[is.nan(key.alpha)] <- 1 #if only 1 variable to the cluster, then alpha is undefined
	key.alpha[!is.finite(key.alpha)] <- 1
	colnames(cluster.correl) <- names(key.alpha)
	rownames(cluster.correl) <- names(key.alpha)
	if (correct) {cluster.corrected <- correct.cor(cluster.correl,t(key.alpha))} #correct for attenuation

	return(list(cor=cluster.correl,sd=sqrt(var),corrected= cluster.corrected))
	}



	#a function to extract subsets of variables (a and b) from a correlation matrix m
	#and find the multiple correlation beta weights + R2 of the a set predicting the b set
	mat.regress <- function(m,a,b) {
	#first reorder the matrix to select the right variables
	nm <- dim(m)[1]
	t.mat <- matrix(0,ncol=nm,nrow=nm)
	ab <- c(a,b)
	numa <- length(a)
	numb <- length(b)
	nab <- numa+numb
	for (i in 1:nab) {
	t.mat[i,ab[i]] <- 1 }

	reorder <- t.mat %% m %% t(t.mat)
	a.matrix <- reorder[1:numa,1:numa]
	b.matrix <- reorder[1:numa,(numa+1):nab]
	model.mat <- solve(a.matrix,b.matrix) #solve the equation bY~aX
	if (length(b) >1 ) { rownames(model.mat) <- rownames(m)[a]
	colnames(model.mat) <- colnames(m)[b]

	r2 <- colSums(model.mat * b.matrix) }
	else { r2 <- sum(model.mat * b.matrix)
	names(model.mat) <- rownames(m)[a]
	names(r2) <- colnames(m)[b]}
	mat.regress <- list(beta=model.mat,r2=r2)
	return(mat.regress)
	}