Created
April 23, 2013 21:43
-
-
Save briatte/5447652 to your computer and use it in GitHub Desktop.
Useful R functions for psychometrics and personality research, by William Revelle -- copied from http://personality-project.org/r/useful.r
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#useful functions for psychometrics and personality research | |
#a growing compilation of simple functions that I have found useful for doing personality research | |
#last updated Feb 14, 2005 | |
#by William Revelle | |
#includes | |
#alpha.scale #find coefficient alpha for a scale and a dataframe of items | |
#describe give means, sd, skew, n, and se | |
#summ.stats #basic summary statistics by a grouping variable | |
#error.crosses (error bars in two space) | |
#skew find skew | |
#panel.cor taken from the examples for pairs | |
#pairs.panels adapted from panel.cor -- gives a splom, histogram, and correlation matrix | |
#multi.hist #plot multiple histograms | |
#correct.cor #given a correlation matrix and a vector of reliabilities, correct for reliability | |
#fisherz #convert pearson r to fisher z | |
#paired.r #test for difference of dependent correlations | |
#count.pairwise #count the number of good cases when doing pairwise analysis | |
#eigen.loadings #convert eigen vector vectors to factor loadings by unnormalizing them | |
#principal #yet another way to do a principal components analysis -- brute force eignvalue decomp | |
#factor.congruence #find the factor congruence coeffiecints | |
#factor.model #given a factor model, find the correlation matrix | |
#factor.residuals #how well does it fit? | |
#factor.rotate # rotate two columns of a factor matrix by theta (in degrees) | |
#phi2poly #convert a matrix of phi coefficients to polychoric correlations | |
#define a function to calculate coefficient alpha | |
alpha.scale=function (x,y) #find coefficient alpha given a scale and a data.frame of the items in the scale | |
{ | |
n=length(y) #number of variables | |
Vi=sum(diag(var(y,na.rm=TRUE))) #sum of item variance | |
Vt=var(x,na.rm=TRUE) #total test variance | |
((Vt-Vi)/Vt)*(n/(n-1))} #alpha | |
#general descriptive statistics | |
describe <- function (x, digits = 2,na.rm=TRUE) #basic stats after dropping non-numeric data | |
{ #first, define a local function | |
valid <- function(x) { | |
return(sum(!is.na(x))) | |
} | |
if (is.vector(x) ) #do it for vectors or | |
{ | |
stats = matrix(rep(NA,6),ncol=6) #create a temporary array | |
stats[1, 1] <- mean(x, na.rm=na.rm ) | |
stats[1, 2] <- median(x,na.rm=na.rm ) | |
stats[1, 3] <- min(x, na.rm=na.rm ) | |
stats[1, 4] <- max(x, na.rm=na.rm ) | |
stats[1, 5] <- skew(x,na.rm=na.rm ) | |
stats[1, 6] <- valid(x ) | |
len <- 1; | |
} | |
else {len = dim(x)[2] #do it for matrices or data.frames | |
stats = matrix(rep(NA,len*6),ncol=6) #create a temporary array | |
for (i in 1:len) { | |
if (is.numeric(x[,i])) { #just do this for numeric data | |
stats[i, 1] <- mean(x[,i], na.rm=na.rm ) | |
stats[i, 2] <- median(x[,i],na.rm=na.rm ) | |
stats[i, 3] <- min(x[,i], na.rm=na.rm ) | |
stats[i, 4] <- max(x[,i], na.rm=na.rm ) | |
stats[i, 5] <- skew(x[,i],na.rm=na.rm ) | |
stats[i, 6] <- valid(x[,i] ) | |
} | |
} | |
} | |
temp <- data.frame(n = stats[,6],mean = stats[,1], sd = sd(x,na.rm=TRUE), median = stats[, | |
2],min= stats[,3],max=stats[,4], range=stats[,4]-stats[,3],skew = stats[, 5]) | |
answer <- round(data.frame(V=seq(1:len),temp, se = temp$sd/sqrt(temp$n)), digits) | |
return(answer) | |
} | |
# find basic summary statistics by groups #adapted from "Kickstarting R" | |
summ.stats <- function (x,y) { #data are x, grouping variable is y | |
means <- tapply(x, y, mean, na.rm=TRUE) | |
sds <- tapply(x,y, sd, na.rm = TRUE) | |
valid <- function (x) {return(sum(!is.na(x)) )} | |
ns <- tapply(x,y, valid ) | |
se=sds/sqrt(ns) | |
answer <-data.frame(mean=means,sd=sds,se=se,n=ns) | |
} | |
#error bars in a two dimensional plot | |
old.error.crosses <-function (x,y,z) # x and y are data frame with z points | |
{for (i in 1:z) | |
{xcen <- x$mean[i] | |
ycen <- y$mean[i] | |
xse <- x$se[i] | |
yse <- y$se[i] | |
arrows(xcen-xse,ycen,xcen+xse,ycen,length=.2, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL) | |
arrows(xcen,ycen-yse,xcen,ycen+yse,length=.2, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL) | |
} | |
} | |
#error bars in a two dimensional plot with labels | |
error.crosses <-function (x,y,labels=NULL,pos=NULL,arrow.len=.2,...) # x and y are data frame with | |
{z <- dim(x)[1] | |
if (length(pos)==0) {locate<-rep(1,z)} else {locate <- pos} | |
if (length(labels)==0) lab<- rep("",z) else lab <-labels | |
for (i in 1:z) | |
{xcen <- x$mean[i] | |
ycen <- y$mean[i] | |
xse <- x$se[i] | |
yse <- y$se[i] | |
arrows(xcen-xse,ycen,xcen+xse,ycen,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL) | |
arrows(xcen,ycen-yse,xcen,ycen+yse,length=arrow.len, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL) | |
text(xcen,ycen,labels=lab[i],pos=positions[i],cex=1,offset=arrow.len+1) #puts in labels for all points | |
} | |
} | |
#examples | |
# plot(mPA,mNA,pch=symb[condit],cex=4.5,col=colors[condit],bg=colors[condit],main="Postive vs. Negative Affect",xlim=c(-1,1.5),ylim=c(-1,1.5),xlab="Positive Affect",ylab="Negative Affect") | |
#error.crosses(paf.stats,naf.stats,4) #put in x and y error bars! | |
#find a vector of skews for a data matrix | |
skew= function (x, na.rm = FALSE) | |
{ | |
if (na.rm) x <- x[!is.na(x)] #remove missing values | |
sum((x - mean(x))^3)/(length(x) * sd(x)^3) #calculate skew | |
} | |
#some fancy graphics -- adapted from help.cor | |
panel.cor.scale <- function(x, y, digits=2, prefix="", cex.cor) | |
{ | |
usr <- par("usr"); on.exit(par(usr)) | |
par(usr = c(0, 1, 0, 1)) | |
r = (cor(x, y,use="pairwise")) | |
txt <- format(c(r, 0.123456789), digits=digits)[1] | |
txt <- paste(prefix, txt, sep="") | |
if(missing(cex.cor)) cex <- 0.8/strwidth(txt) | |
text(0.5, 0.5, txt, cex = cex * abs(r)) | |
} | |
panel.cor <- function(x, y, digits=2, prefix="", cex.cor) | |
{ | |
usr <- par("usr"); on.exit(par(usr)) | |
par(usr = c(0, 1, 0, 1)) | |
r = (cor(x, y,use="pairwise")) | |
txt <- format(c(round(r,digits), 0.123456789), digits=digits)[1] | |
txt <- paste(prefix, txt, sep="") | |
if(missing(cex.cor)) cex <- 0.8/strwidth(txt) | |
text(0.5, 0.5, txt, cex = cex ) | |
} | |
panel.hist <- function(x, ...) | |
{ | |
usr <- par("usr"); on.exit(par(usr)) | |
par(usr = c(usr[1:2], 0, 1.5) ) | |
h <- hist(x, plot = FALSE) | |
breaks <- h$breaks; nB <- length(breaks) | |
y <- h$counts; y <- y/max(y) | |
rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...) | |
} | |
pairs.panels <- function (x,y,smooth=TRUE,scale=FALSE,digits=2) #combines a splom, histograms, and correlations | |
{if (smooth ){ | |
if (scale) { | |
pairs(x,diag.panel=panel.hist,upper.panel=panel.cor.scale,lower.panel=panel.smooth) | |
} | |
else {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor,lower.panel=panel.smooth) | |
} #else {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor,lower.panel=panel.smooth) | |
} | |
else #smooth is not true | |
{ if (scale) {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor.scale) | |
} else {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor) } | |
} #end of else (smooth) | |
} #end of function | |
multi.hist <- function(x,...) {nvar <- dim(x)[2] #number of variables | |
nsize=trunc(sqrt(nvar))+1 #size of graphic | |
old.par <- par(no.readonly = TRUE) # all par settings which can be changed | |
par(mfrow=c(nsize,nsize)) #set new graphic parameters | |
for (i in 1:nvar) { | |
name=names(x)[i] #get the names for the variables | |
hist(x[,i],main=name,xlab=name,...) } #draw the histograms for each variable | |
on.exit(par(old.par)) #set the graphic parameters back to the original | |
} | |
#function to do lowess fitting if we have missing data | |
lowess.na <- function(x, y = NULL, f = 2/3,...) { #do lowess with missing data | |
x1 <- subset(x,(!is.na(x)) &(!is.na(y))) | |
y1 <- subset(y, (!is.na(x)) &(!is.na(y))) | |
lowess.na <- lowess(x1,y1,f, ...) | |
} | |
#function to replace upper triangle of matrix with unattenuated correlations, and the diagonal with reliabilities | |
#input is a correlation matrix and a vector of reliabilities | |
correct.cor <- function(x,y) { n=dim(x)[1] | |
{ x[1,1] <- y[1,1] | |
for (i in 2:n) { | |
x[i,i] <- y[1,i] #put reliabilities on the diagonal | |
k=i-1 | |
for (j in 1:k) { | |
x[j,i] <- x[j,i]/sqrt(y[1,i]*y[1,j]) } #fix the upper triangular part of the matrix | |
} | |
return(x) }} | |
#difference of dependent (paired) correlations (following Steiger,J., 1980, Tests for comparing elements of a correlation matrix. Psychological Bulletin, 87, 245-251) | |
paired.r <- function(xy,xz,yz,n) { | |
diff <- xy-xz | |
determin=1-xy*xy - xz*xz - yz*yz + 2*xy*xz*yz | |
av=(xy+xz)/2 | |
cube= (1-yz)*(1-yz)*(1-yz) | |
t2 = diff * sqrt((n-1)*(1+yz)/(((2*(n-1)/(n-3))*determin+av*av*cube))) | |
return(t2) | |
} | |
fisherz <- function(rho) {0.5*log((1+rho)/(1-rho)) } #converts r to z | |
#a function to call John Fox's polychor multiple times to form a matrix of polychoric correlations | |
#needs package polychor | |
polychor.matrix <-function(x,y=NULL) { | |
sizex <- dim(x)[2] | |
if (((is.data.frame(y))|(is.matrix(y)))) sizey<-dim(y)[2] | |
else sizey <- dim(x)[2] | |
result<-matrix(1,nrow=sizey,ncol=sizex) #create the output array | |
xnames<- names(x) | |
colnames(result)<- names(x) | |
if (((is.data.frame(y))|(is.matrix(y)))) rownames(result) <- names(y) | |
else rownames(result) <- names(x) | |
if (!((is.data.frame(y))|(is.matrix(y)))) { #default case returns a square matrix | |
for (i in 2: sizex ) { | |
for (j in 1:( i-1)) { | |
result[j,i]<-polychor(table(x[,j],x[,i]) ) | |
result[i,j] <- result[j,i] | |
} | |
} | |
} | |
else { #if y is input, then return the rectangular array | |
for (i in 1: sizex ) { | |
for (j in 1:sizey) { | |
result[j,i]<-polychor(table(x[,i],y[,j]) ) | |
} | |
} } | |
return (result) } | |
#count.pairwise -- count the number of good cases when doing pairwise analysis (e.g.,for correlations) | |
count.pairwise <- function(x,y) { | |
sizex <- dim(x)[2] | |
if (is.data.frame(y)) sizey<-dim(y)[2] | |
else sizey <- dim(x)[2] | |
result<-matrix(1,nrow=sizey,ncol=sizex) #create the output array | |
xnames<- names(x) | |
colnames(result)<- names(x) | |
if (((is.data.frame(y))|(is.matrix(y)))) rownames(result) <- names(y) | |
else rownames(result) <- names(x) | |
if (!is.data.frame(y)) { #default case returns a square matrix | |
for (i in 2: sizex ) { | |
for (j in 1:( i-1)) { | |
result[j,i]<- sum((!is.na(x[,j]))&(!is.na(x[,i]))) | |
result[i,j] <- result[j,i] | |
} | |
} | |
} | |
else { #if y is input, then return the rectangular array | |
for (i in 1: sizex ) { | |
for (j in 1:sizey) { | |
result[j,i]<-sum((!is.na(x[,i]))&(!is.na(y[,j]))) | |
} | |
} } | |
return (result) } | |
#minor routines to manipulate matrices for factor analysis | |
#output from eigen is two lists: $values and $vectors | |
# | |
eigen.loadings <- function (x) { #convert eigen vectors to loadings by unnormalizing them | |
n <- length(x$values) | |
x$values[ x$values<0]<- 0 | |
fix<-sqrt(x$values) | |
result<- x$vectors * rep(fix, each = n)} | |
#do a principal components analysis by doing eigen vector decomposition and then showing the biggest nfactors | |
principal <- | |
function(r,nfactors=0) { | |
n <- dim(r)[1] | |
result<-list(values=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),residual=matrix(rep(0,n*n),ncol=n),fit=0) | |
eigens <-eigen(r) #call the eigen value decomposition routine | |
result$values <-eigens$values | |
result$loadings <-eigen.loadings(eigens) | |
rownames(result$loadings) <- rownames(r) | |
if (nfactors>0) result$loadings<-result$loadings[,1:nfactors] | |
residual<- factor.residuals(r,result$loadings) | |
r2<- sum(r*r) | |
rstar2<- sum(residual*residual) | |
result$residual<-residual | |
result$fit<- 1-rstar2/r2 | |
return(result) | |
} | |
#function to find factor congruence ( | |
factor.congruence <- function (x,y) { | |
nx<- dim(x)[2] | |
ny<- dim(y)[2] | |
cross<- t(y) %*% x #inner product will have dim of ny * nx | |
sumsx<- sqrt(1/diag(t(x)%*%x)) | |
sumsy<- sqrt(1/diag(t(y)%*%y)) | |
result<- matrix(rep(0,nx*ny),ncol=nx) | |
result<- sumsy * (cross * rep(sumsx, each = ny)) | |
return(result) | |
} | |
#estimate a correlation matrix given a particular factor/component model | |
factor.model <- function(x) { | |
result<- x%*%t(x) | |
return (result)} | |
#find residuals | |
factor.residuals <- function(r, f) { | |
rstar<- r- factor.model(f) | |
return(rstar)} | |
factor.fit <-function (r,f) { | |
r2 <-sum( r*r) | |
rstar <- factor.residuals(r,f) | |
rstar2 <- sum(rstar*rstar) | |
fit<- 1- rstar2/r2 | |
return(fit) } | |
#do a principal components analysis by doing eigen vector decomposition and then showing the biggest nfactors | |
principal <- function(r,nfactors=0) { | |
n <- dim(r)[1] | |
result<-list(values=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),residual=matrix(rep(0,n*n),ncol=n),fit=0) | |
eigens <-eigen(r) #call the eigen value decomposition routine | |
result$values <-eigens$values | |
result$loadings <-eigen.loadings(eigens) | |
if (nfactors>0) result$loadings<-result$loadings[,1:nfactors] | |
residual<- factor.residuals(r,result$loadings) | |
r2<- sum(r*r) | |
rstar2<- sum(residual*residual) | |
result$residual<-residual | |
result$fit<- 1-rstar2/r2 | |
return(result) | |
} | |
factor.rotate <-function(f,angle,col1,col2) { | |
nvar<- dim(f)[2] | |
rot<- matrix(rep(0,nvar*nvar),ncol=nvar) | |
rot[cbind(1:nvar, 1:nvar)] <- 1 | |
theta<- 2*pi*angle/360 | |
rot[col1,col1]<-cos(theta) | |
rot[col2,col2]<-cos(theta) | |
rot[col1,col2]<- -sin(theta) | |
rot[col2,col1]<- sin(theta) | |
result <- f %*% rot | |
return(result) } | |
#phi coefficient from a 2 x 2 table (function probably already exists but I can't find it) | |
phi <- function(t) { #t is a 2 x 2 matrix | |
r.sum<-rowSums(t) | |
c.sum <-colSums(t) | |
total<-sum(r.sum) | |
r.sum<- r.sum/total | |
c.sum <-c.sum/total | |
v<- prod(r.sum,c.sum) | |
phi <- (t[1,1]/total- c.sum[1]*r.sum[1])/sqrt(v) | |
} | |
#an alternative function to do the same, need to combine into one | |
phi1 <- function(x) { # returns the phi coefficient x is a set of 4 numbers e.g. c(1,2,34) | |
xm <- matrix(x,ncol=2) | |
total <- sum(xm) | |
xp <- xm/total | |
HR <- xp[1,1] + xp[1,2] | |
SR <- xp[1,1]+ xp[2,1] | |
VP <- xp[1,1] | |
phi <- (VP - HR*SR) /sqrt(HR*(1-HR)*(SR)*(1-SR)) | |
return(phi) | |
} | |
#convert a phi coefficient to the corresponding polychoric correlation | |
#needs John Fox's polychor program | |
phi2poly <- function(ph,cp,cc) { | |
#ph is the phi coefficient | |
#cp is the selection ratio of the predictor | |
#cc is the success rate of the criterion | |
r.marg<-rep(0,2) | |
c.marg<- rep(0,2) | |
p<-array(rep(0,4),dim=c(2,2)) | |
r.marg[1]<- cp | |
r.marg[2]<- 1 -cp | |
c.marg[1]<- cc | |
c.marg[2]<- 1-cc | |
p[1,1]<- r.marg[1]*c.marg[1]+ ph*sqrt(prod(r.marg,c.marg)) | |
p[2,2]<- r.marg[2]*c.marg[2]+ ph*sqrt(prod(r.marg,c.marg)) | |
p[1,2]<- r.marg[1]*c.marg[2]- ph*sqrt(prod(r.marg,c.marg)) | |
p[2,1]<- r.marg[2]*c.marg[1]- ph*sqrt(prod(r.marg,c.marg)) | |
result<-polychor(p ) | |
return(result)} | |
psycho.demo <- function() { | |
#simulate correlation matrix with variable cut points -- psychometric demo | |
#make up some correlations with different cut points | |
cuts <-c(-2,-1,0,1,2) | |
nsub<-1000 #how many subjects | |
r<-.6 #population correlation of observed with theta | |
theta <-rnorm(nsub) #make up some random normal theta scores | |
err<- rnorm(nsub) #random normal error scores | |
obser<- theta*(r) + err*sqrt(1-r*r) #observed = T + E | |
#convert to 0/1 with different values of cuts | |
trunc<- matrix(rep(obser,length(cuts)),ncol=length(cuts)) | |
for (i in 1:length(cuts)) { | |
trunc[obser>cuts[i],i]<- 1 | |
trunc[obser< cuts[i],i]<- 0} | |
d.mat<- data.frame(theta,obser,trunc) #combine into a data frame | |
trunc.cor<- cor(d.mat) #find the correlations | |
freq <- mean(d.mat) #find the frequencies of scores | |
#now, convert the upper diagonal to polychorics using John Fox's polychor and my phi2poly | |
for (i in 4:length(d.mat)) { | |
for (j in 3:i) { | |
trunc.cor[j,i]<- phi2poly(trunc.cor[i,j],freq[i],freq[j]) | |
}} | |
} | |
#quick demonstration of a random walk process | |
#to show range of variability over trials and replications | |
#the "confidence lines" represent 2 sd of theoretical sum | |
random.walk <- function(length=200,n=20,effect=0) { | |
num <- seq(1:length) | |
colors <- rainbow(n) #select colors | |
x <- cumsum(rnorm(length,effect))/num | |
plot(x,ylim = c(-2.5,2.5),typ="b",col=colors[1],main="Sample means as function of sample size",ylab="sample mean",xlab="sample size") | |
for (i in 2:n) { | |
x <- cumsum(rnorm(length,effect))/num #find the next line | |
points(x,col=colors[i],typ="b") #draw it | |
} | |
curve(2/sqrt(x),1,length,101,add=TRUE) #upper confidence region | |
curve(-2/sqrt(x),1,length,101,add=TRUE) #lower confidence region | |
text(length/2,2,"effect size is " ) | |
text(length/2, 1.8,eval(effect)) | |
} | |
#read from clipboard for Mac (thanks to Ken Knoblauch for this hint | |
#for PCs, use read.table(file("clipboard")) | |
read.clipboard<-function(header=TRUE,...) { | |
MAC<-Sys.info()[1]=="Darwin" #are we on a Mac using the Darwin system? | |
if (!MAC ) {if (header) read.clipboard<-read.table(file("clipboard"),header=TRUE,...) | |
else read.clipboard<-read.table(file("clipboard"),...) } | |
else { | |
if (header) read.clipboard<- read.table(pipe("pbpaste"),header=TRUE,...) | |
else read.clipboard<- read.table(pipe("pbpaste") ,...)} | |
} | |
read.clipboard.csv<-function(header=TRUE,sep=',',...) { #same as read.clipboard(sep=',') | |
MAC<-Sys.info()[1]=="Darwin" #are we on a Mac using the Darwin system? | |
if (!MAC ) {if (header) read.clipboard<-read.table(file("clipboard"),header=TRUE,...) | |
else read.clipboard<-read.table(file("clipboard"),...) } | |
else { | |
if (header) read.clipboard<- read.table(pipe("pbpaste"),header=TRUE,...) | |
else read.clipboard<- read.table(pipe("pbpaste") ,...)} | |
} | |
#functions to help analyze data from correlation matrices | |
#used as part of the Synthetic Aperture Personality Assessment (SAPA) project | |
# | |
#extract cluster keys from a factor/principal components loadings matrix | |
factor2cluster <- | |
function(r,nfactors=0) { | |
n <- dim(r)[1] | |
result<-list(values=c(rep(0,n)),loadings=matrix(rep(0,n*n),ncol=n),residual=matrix(rep(0,n*n),ncol=n),fit=0) | |
eigens <-eigen(r) #call the eigen value decomposition routine | |
result$values <-eigens$values | |
result$loadings <-eigen.loadings(eigens) | |
rownames(result$loadings) <- rownames(r) | |
if (nfactors>0) result$loadings<-result$loadings[,1:nfactors] | |
residual<- factor.residuals(r,result$loadings) | |
r2<- sum(r*r) | |
rstar2<- sum(residual*residual) | |
result$residual<-residual | |
result$fit<- 1-rstar2/r2 | |
return(result) | |
} | |
#find the correlation matrix of scales made up of items defined in a keys matrix (e.g., extracted by factor2cluster) | |
#takes as input the keys matrix as well as a correlation matrix of all the items | |
cluster.cor <- | |
function(keys,r.mat,correct=TRUE) { #function to extract clusters according to the key vector | |
#default is to correct for attenuation and show this above the diagonal | |
#find the correlation matrix of scales made up of items defined in a keys matrix (e.g., extracted by factor2cluster) | |
#takes as input the keys matrix as well as a correlation matrix of all the items | |
if(!is.matrix(keys)) keys <- as.matrix(keys) #keys are sometimes a data frame - must be a matrix | |
covar <- t(keys) %*% r.mat %*% keys #matrix algebra is our friend | |
var <- diag(covar) | |
sd.inv <- 1/sqrt(var) | |
ident.sd <- diag(sd.inv,ncol = length(sd.inv)) | |
cluster.correl <- ident.sd %*% covar %*% ident.sd | |
key.var <- diag(t(keys) %*% keys) | |
key.alpha <- ((var-key.var)/var)*(key.var/(key.var-1)) | |
key.alpha[is.nan(key.alpha)] <- 1 #if only 1 variable to the cluster, then alpha is undefined | |
key.alpha[!is.finite(key.alpha)] <- 1 | |
colnames(cluster.correl) <- names(key.alpha) | |
rownames(cluster.correl) <- names(key.alpha) | |
if (correct) {cluster.corrected <- correct.cor(cluster.correl,t(key.alpha))} #correct for attenuation | |
return(list(cor=cluster.correl,sd=sqrt(var),corrected= cluster.corrected)) | |
} | |
#a function to extract subsets of variables (a and b) from a correlation matrix m | |
#and find the multiple correlation beta weights + R2 of the a set predicting the b set | |
mat.regress <- function(m,a,b) { | |
#first reorder the matrix to select the right variables | |
nm <- dim(m)[1] | |
t.mat <- matrix(0,ncol=nm,nrow=nm) | |
ab <- c(a,b) | |
numa <- length(a) | |
numb <- length(b) | |
nab <- numa+numb | |
for (i in 1:nab) { | |
t.mat[i,ab[i]] <- 1 } | |
reorder <- t.mat %*% m %*% t(t.mat) | |
a.matrix <- reorder[1:numa,1:numa] | |
b.matrix <- reorder[1:numa,(numa+1):nab] | |
model.mat <- solve(a.matrix,b.matrix) #solve the equation bY~aX | |
if (length(b) >1 ) { rownames(model.mat) <- rownames(m)[a] | |
colnames(model.mat) <- colnames(m)[b] | |
r2 <- colSums(model.mat * b.matrix) } | |
else { r2 <- sum(model.mat * b.matrix) | |
names(model.mat) <- rownames(m)[a] | |
names(r2) <- colnames(m)[b]} | |
mat.regress <- list(beta=model.mat,r2=r2) | |
return(mat.regress) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment