Skip to content

Instantly share code, notes, and snippets.

@richarddmorey
Last active March 4, 2017 14:51
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save richarddmorey/787d7b7547a736a49c3f to your computer and use it in GitHub Desktop.
Save richarddmorey/787d7b7547a736a49c3f to your computer and use it in GitHub Desktop.
R script to check possible means and standard deviations for likert responses
############
# Option 1: Get all solutions
# using brute force
############
## This function creates every possible distribution of responses for
## a likert scale with nlev responses. This is total brute force. There's
## probably a better way.
## Argument:
## v : initially, the total number of responses
## nlev : number of likert response levels
## level : used for recursive calling (users ignore)
get.ns = function(v, nlev = 2, level = nlev-1){
if(all(v==0)) return(rep(0,length(v)+1))
le = length(v)
n = v[le]
prev = matrix(v[-le],byrow = TRUE,nrow=n+1,ncol=le-1)
x = cbind(prev,0:n,n:0)
if(level==1){
return(x)
}else{
xl = apply(x,1,get.ns,level=level-1)
if(!is.list(xl)) return(t(xl))
return(do.call(rbind,xl))
}
}
## This function counts the total number of response
## distributions, so you know what you're getting into
## Arguments:
## n : initially, the total number of responses
## nlev : number of likert response levels
## level : used for recursive calling (users ignore)
count.ns = function(n,nlev = 2,level = nlev){
if(n == 0) return(1)
if(level == 2) return(n + 1)
if(level == 3) return( (n^2 + 3*n)/2 + 1 )
return(sum(sapply(0:n,count.ns,level = level - 1) ))
}
## parameters for search
nlev = 6 ## likert response levels
N <- 94 ## total responses
mn <- 0.83 ## Mean response for search (assuming first resp is 0)
std.dev <- 1.21 ## std dev of response for search
## How many will we have to generate?
count.ns(N, nlev)
## compute all possible response distributions (may take minutes)
x = get.ns(N,nlev=nlev)
## compute means of response distributions
mns = colSums(t(x/N)*(0:(nlev-1)))
## compute sum of squared response for response distributions
sx2 = colSums(t(x)*(0:(nlev-1))^2)
## compute standard deviation of responses
sds = sqrt((sx2 - N*mns^2)/(N-1))
## sum squared error across mean and std dev (to find "closest" pattern)
err = (mns - mn)^2 + (sds - std.dev)^2
# sort
srt = order(err)
## Paste everything together
res = cbind(x,mns,sx2,sds,err)
colnames(res) = c(paste("resp",0:(nlev-1),sep=""),c("mean","sum x^2","std. dev.","sum error"))
# sort
res = res[srt,]
## delete unnecessary objects from memory
rm(x,mns,sx2,sds,err,srt)
## look at best 100 patterns
res[1:100,]
############
# Option 2: Get approximate solutions fast
# Using Linear Inverse Models
############
## Compute useful quantity
sumX2 = std.dev^2*(N-1) + N*mn^2
### Constraint: The sum of proportions of responses must be 1
E <- matrix(nrow = 1, byrow = TRUE, data = rep(1,nlev))
F <- 1
### Approximate constraint: mean and sum of squares
### Must approximately match the samples'
A <- matrix(nrow = 2, byrow = TRUE, data = c(1:nlev-1,
(1:nlev-1)^2))
B <- c(mn, sumX2/N)
## Inequality constraint: proportions must be >0 and <1
G <- rbind(diag (nrow = nlev),
-diag (nrow = nlev))
H <- c(rep(0, times = nlev),rep(-1, times = nlev))
### Now, sample potential solutions
# loosen up sdB to find more possible solutions
# You may have to install limSolve first
xs <- limSolve::xsample(A = A, B = B, E = E, F = F, G = G, H = H, sdB = 1)
# Create a matrix of solutions (each solution is a row)
# The solutions will NOT be integer; you'll have to round them
# and check if they're acceptable, but it should get you
# in the right ball park
possibleSolutions = xs$X*N
## first 100 (may be duplicates)
possibleSolutions[1:100,]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment