richarddmorey/likert_check.R

## likert_check.R
############
# Option 1: Get all solutions
# using brute force
############

## This function creates every possible distribution of responses for
## a likert scale with nlev responses. This is total brute force. There's
## probably a better way.
## Argument:
## v : initially, the total number of responses
## nlev : number of likert response levels
## level : used for recursive calling (users ignore)
get.ns = function(v, nlev = 2, level = nlev-1){
  if(all(v==0)) return(rep(0,length(v)+1))
  le = length(v)
  n = v[le]
  prev = matrix(v[-le],byrow = TRUE,nrow=n+1,ncol=le-1)
  x = cbind(prev,0:n,n:0)
  if(level==1){
    return(x)
  }else{
    xl = apply(x,1,get.ns,level=level-1)
    if(!is.list(xl)) return(t(xl))
    return(do.call(rbind,xl))
  }
}

## This function counts the total number of response
## distributions, so you know what you're getting into
## Arguments:
## n : initially, the total number of responses
## nlev : number of likert response levels
## level : used for recursive calling (users ignore)
count.ns = function(n,nlev = 2,level = nlev){
  if(n == 0) return(1)
  if(level == 2) return(n + 1)
  if(level == 3) return( (n^2 + 3*n)/2 + 1 )
  return(sum(sapply(0:n,count.ns,level = level - 1) ))
}


## parameters for search
nlev = 6        ## likert response levels
N <- 94         ## total responses
mn <- 0.83      ## Mean response for search (assuming first resp is 0)
std.dev <- 1.21 ## std dev of response for search

## How many will we have to generate?
count.ns(N, nlev)

## compute all possible response distributions (may take minutes)
x = get.ns(N,nlev=nlev)
## compute means of response distributions
mns = colSums(t(x/N)*(0:(nlev-1)))
## compute sum of squared response for response distributions
sx2 = colSums(t(x)*(0:(nlev-1))^2)
## compute standard deviation of responses
sds = sqrt((sx2 - N*mns^2)/(N-1))
## sum squared error across mean and std dev (to find "closest" pattern)
err = (mns - mn)^2 + (sds - std.dev)^2
# sort
srt = order(err)

## Paste everything together
res = cbind(x,mns,sx2,sds,err)
colnames(res) = c(paste("resp",0:(nlev-1),sep=""),c("mean","sum x^2","std. dev.","sum error"))
# sort
res = res[srt,]

## delete unnecessary objects from memory
rm(x,mns,sx2,sds,err,srt)

## look at best 100 patterns
res[1:100,]

############
# Option 2: Get approximate solutions fast
# Using Linear Inverse Models
############

## Compute useful quantity
sumX2 = std.dev^2*(N-1) + N*mn^2


### Constraint: The sum of proportions of responses must be 1
E <- matrix(nrow = 1, byrow = TRUE, data = rep(1,nlev))
F <- 1

### Approximate constraint: mean and sum of squares
### Must approximately match the samples'
A <- matrix(nrow = 2, byrow = TRUE, data = c(1:nlev-1,
                                             (1:nlev-1)^2))
B <- c(mn, sumX2/N)

## Inequality constraint: proportions must be >0 and <1
G <- rbind(diag (nrow = nlev),
           -diag (nrow = nlev))
H <- c(rep(0, times = nlev),rep(-1, times = nlev))

### Now, sample potential solutions
# loosen up sdB to find more possible solutions
# You may have to install limSolve first
xs <- limSolve::xsample(A = A, B = B, E = E, F = F, G = G, H = H, sdB = 1)

# Create a matrix of solutions (each solution is a row)
# The solutions will NOT be integer; you'll have to round them
# and check if they're acceptable, but it should get you
# in the right ball park
possibleSolutions = xs$X*N

## first 100 (may be duplicates)
possibleSolutions[1:100,]
	############
	# Option 1: Get all solutions
	# using brute force
	############

	## This function creates every possible distribution of responses for
	## a likert scale with nlev responses. This is total brute force. There's
	## probably a better way.
	## Argument:
	## v : initially, the total number of responses
	## nlev : number of likert response levels
	## level : used for recursive calling (users ignore)
	get.ns = function(v, nlev = 2, level = nlev-1){
	if(all(v==0)) return(rep(0,length(v)+1))
	le = length(v)
	n = v[le]
	prev = matrix(v[-le],byrow = TRUE,nrow=n+1,ncol=le-1)
	x = cbind(prev,0:n,n:0)
	if(level==1){
	return(x)
	}else{
	xl = apply(x,1,get.ns,level=level-1)
	if(!is.list(xl)) return(t(xl))
	return(do.call(rbind,xl))
	}
	}

	## This function counts the total number of response
	## distributions, so you know what you're getting into
	## Arguments:
	## n : initially, the total number of responses
	## nlev : number of likert response levels
	## level : used for recursive calling (users ignore)
	count.ns = function(n,nlev = 2,level = nlev){
	if(n == 0) return(1)
	if(level == 2) return(n + 1)
	if(level == 3) return( (n^2 + 3*n)/2 + 1 )
	return(sum(sapply(0:n,count.ns,level = level - 1) ))
	}


	## parameters for search
	nlev = 6 ## likert response levels
	N <- 94 ## total responses
	mn <- 0.83 ## Mean response for search (assuming first resp is 0)
	std.dev <- 1.21 ## std dev of response for search

	## How many will we have to generate?
	count.ns(N, nlev)

	## compute all possible response distributions (may take minutes)
	x = get.ns(N,nlev=nlev)
	## compute means of response distributions
	mns = colSums(t(x/N)*(0:(nlev-1)))
	## compute sum of squared response for response distributions
	sx2 = colSums(t(x)*(0:(nlev-1))^2)
	## compute standard deviation of responses
	sds = sqrt((sx2 - N*mns^2)/(N-1))
	## sum squared error across mean and std dev (to find "closest" pattern)
	err = (mns - mn)^2 + (sds - std.dev)^2
	# sort
	srt = order(err)

	## Paste everything together
	res = cbind(x,mns,sx2,sds,err)
	colnames(res) = c(paste("resp",0:(nlev-1),sep=""),c("mean","sum x^2","std. dev.","sum error"))
	# sort
	res = res[srt,]

	## delete unnecessary objects from memory
	rm(x,mns,sx2,sds,err,srt)

	## look at best 100 patterns
	res[1:100,]

	############
	# Option 2: Get approximate solutions fast
	# Using Linear Inverse Models
	############

	## Compute useful quantity
	sumX2 = std.dev^2(N-1) + Nmn^2


	### Constraint: The sum of proportions of responses must be 1
	E <- matrix(nrow = 1, byrow = TRUE, data = rep(1,nlev))
	F <- 1

	### Approximate constraint: mean and sum of squares
	### Must approximately match the samples'
	A <- matrix(nrow = 2, byrow = TRUE, data = c(1:nlev-1,
	(1:nlev-1)^2))
	B <- c(mn, sumX2/N)

	## Inequality constraint: proportions must be >0 and <1
	G <- rbind(diag (nrow = nlev),
	-diag (nrow = nlev))
	H <- c(rep(0, times = nlev),rep(-1, times = nlev))

	### Now, sample potential solutions
	# loosen up sdB to find more possible solutions
	# You may have to install limSolve first
	xs <- limSolve::xsample(A = A, B = B, E = E, F = F, G = G, H = H, sdB = 1)

	# Create a matrix of solutions (each solution is a row)
	# The solutions will NOT be integer; you'll have to round them
	# and check if they're acceptable, but it should get you
	# in the right ball park
	possibleSolutions = xs$X*N

	## first 100 (may be duplicates)
	possibleSolutions[1:100,]