gdemin/ConjointExample.R

## ConjointExample.R
options(stringsAsFactors = FALSE)
library(ChoiceModelR)
library(foreign)
w = read.spss("data.sav",use.value.labels = FALSE, to.data.frame = TRUE)
colnames(w) = tolower(colnames(w))
# token - unique id
# sort by token
if (!anyDuplicated(w$token)) w = w[order(w$token),] else stop("Duplicated ID.")

w1 = subset(w,!is.na(q4_1),select = c(token,s1,q4_1:q4_9))
str(w1)

# design of survey - 27 cards
# card num, brand, price
plan = unlist(strsplit(
"1,BrandA,161
14,BrandB,195.21
20,BrandC,177.46
2,BrandA,177.46
12,BrandB,185.15
21,BrandC,185.15
8,BrandA,212.95
15,BrandB,199.1
22,BrandC,190.05
7,BrandA,204.08
10,BrandB,161
25,BrandC,204.08
6,BrandA,199.1
11,BrandB,177.46
23,BrandC,195.21
4,BrandA,190.05
13,BrandB,190.05
24,BrandC,199.1
5,BrandA,195.21
18,BrandB,217.2
19,BrandC,161
9,BrandA,217.2
16,BrandB,204.08
26,BrandC,212.95
3,BrandA,185.15
17,BrandB,212.95
27,BrandC,217.2",
",|\n"))

plan = as.data.frame(matrix(plan,ncol=3,byrow=TRUE))
str(plan)
colnames(plan) = c("card","brand","price")

plan$card = as.numeric(plan$card)
plan$brand = match(plan$brand,sort(unique(plan$brand)))

plan$price = match(as.numeric(plan$price),sort(as.numeric(unique(plan$price))))


plan # quick look at the plan

# we will use mixed model
# s1 - city (two possible values, 1 and 2)
# if distinct values more than two, we should encode variable to dummies
city = scale(ifelse(w1$s1==1,0,1),scale=F)


#### warnings here
#### we prepare data for ChoiceModelR. See ?choicemodelr
res=apply(subset(w1,select=-s1), 1,function(resp){
	cbind(id=resp[1],plan[,-1],choice=1*(plan$card %in% resp[-1]))
})

str(res)
w2=do.call(rbind,res) ## for each respondent we have several records with choices

str(w2)
table(w2$choice)
### here we assign number for choice. Choices from the same task have the same number
w2$task=ave(w2$id,w2$id,FUN=function(x) rep(1:9,each=3))
str(w2)

# take care about "Hard to say"
w2$choice2=ave(w2$choice,w2$id,w2$task,FUN=function(choice){
		if (all(choice==0)) choice2=c(4,c(0,0)) else choice2=c(which(choice>0),c(0,0))
		if (length(choice2)!=3) stop (choice2," unexpected")
		choice2
})

# alternative number in each task
w2$alt=1:3
str(w2)

# rearrange coolumns
w3=subset(w2,select=c(id,task,alt,`brand`:`price`,choice2))
str(w3)


# infrom choicemodelr about type of variables
# we consider both brand and price as caategorical variables
xcoding=rep(0,2)

### put constraints on prices: higher prices should have lower utilities
constr=function(size,interval=NULL,value=0){
	res=matrix(0,nrow=size,ncol=size)
	trig=upper.tri(res)
	if (!is.null(interval)) {
		trig[-interval,]=FALSE
		trig[,-interval]=FALSE
	}
	res[trig]=value
	res
}

# • c1[i, j] = 1, beta_i > beta_j

# • c1[i, j] = -1, beta_i < beta_j

# • c1[i, j] = 0, no constraint

constraints=list(
	# brand
	constr(3),
	# price
	constr(9,NULL,1)

)


# just to make things a little faster
library(compiler)
enableJIT(3)
########
mcmc=list(R=8000,use=4000)
opt=list(none=TRUE) # we have "Hard to say" option


cconj=cmpfun(choicemodelr)
conj=function(){
	print(date())
	flush.console()
	res=cconj(w3,xcoding=xcoding,demos=city,mcmc=mcmc,constraints=constraints,options=opt)
	date()
}

# It's take rather long time to compute, may be several hours. Utilities will be
# located in the file rbetas.csv in your working folder.
conj()
	options(stringsAsFactors = FALSE)
	library(ChoiceModelR)
	library(foreign)
	w = read.spss("data.sav",use.value.labels = FALSE, to.data.frame = TRUE)
	colnames(w) = tolower(colnames(w))
	# token - unique id
	# sort by token
	if (!anyDuplicated(w$token)) w = w[order(w$token),] else stop("Duplicated ID.")

	w1 = subset(w,!is.na(q4_1),select = c(token,s1,q4_1:q4_9))
	str(w1)

	# design of survey - 27 cards
	# card num, brand, price
	plan = unlist(strsplit(
	"1,BrandA,161
	14,BrandB,195.21
	20,BrandC,177.46
	2,BrandA,177.46
	12,BrandB,185.15
	21,BrandC,185.15
	8,BrandA,212.95
	15,BrandB,199.1
	22,BrandC,190.05
	7,BrandA,204.08
	10,BrandB,161
	25,BrandC,204.08
	6,BrandA,199.1
	11,BrandB,177.46
	23,BrandC,195.21
	4,BrandA,190.05
	13,BrandB,190.05
	24,BrandC,199.1
	5,BrandA,195.21
	18,BrandB,217.2
	19,BrandC,161
	9,BrandA,217.2
	16,BrandB,204.08
	26,BrandC,212.95
	3,BrandA,185.15
	17,BrandB,212.95
	27,BrandC,217.2",
	",\|\n"))

	plan = as.data.frame(matrix(plan,ncol=3,byrow=TRUE))
	str(plan)
	colnames(plan) = c("card","brand","price")

	plan$card = as.numeric(plan$card)
	plan$brand = match(plan$brand,sort(unique(plan$brand)))

	plan$price = match(as.numeric(plan$price),sort(as.numeric(unique(plan$price))))


	plan # quick look at the plan

	# we will use mixed model
	# s1 - city (two possible values, 1 and 2)
	# if distinct values more than two, we should encode variable to dummies
	city = scale(ifelse(w1$s1==1,0,1),scale=F)


	#### warnings here
	#### we prepare data for ChoiceModelR. See ?choicemodelr
	res=apply(subset(w1,select=-s1), 1,function(resp){
	cbind(id=resp[1],plan[,-1],choice=1*(plan$card %in% resp[-1]))
	})

	str(res)
	w2=do.call(rbind,res) ## for each respondent we have several records with choices

	str(w2)
	table(w2$choice)
	### here we assign number for choice. Choices from the same task have the same number
	w2$task=ave(w2$id,w2$id,FUN=function(x) rep(1:9,each=3))
	str(w2)

	# take care about "Hard to say"
	w2$choice2=ave(w2$choice,w2$id,w2$task,FUN=function(choice){
	if (all(choice==0)) choice2=c(4,c(0,0)) else choice2=c(which(choice>0),c(0,0))
	if (length(choice2)!=3) stop (choice2," unexpected")
	choice2
	})

	# alternative number in each task
	w2$alt=1:3
	str(w2)

	# rearrange coolumns
	w3=subset(w2,select=c(id,task,alt,`brand`:`price`,choice2))
	str(w3)


	# infrom choicemodelr about type of variables
	# we consider both brand and price as caategorical variables
	xcoding=rep(0,2)

	### put constraints on prices: higher prices should have lower utilities
	constr=function(size,interval=NULL,value=0){
	res=matrix(0,nrow=size,ncol=size)
	trig=upper.tri(res)
	if (!is.null(interval)) {
	trig[-interval,]=FALSE
	trig[,-interval]=FALSE
	}
	res[trig]=value
	res
	}

	# • c1[i, j] = 1, beta_i > beta_j

	# • c1[i, j] = -1, beta_i < beta_j

	# • c1[i, j] = 0, no constraint

	constraints=list(
	# brand
	constr(3),
	# price
	constr(9,NULL,1)

	)


	# just to make things a little faster
	library(compiler)
	enableJIT(3)
	########
	mcmc=list(R=8000,use=4000)
	opt=list(none=TRUE) # we have "Hard to say" option


	cconj=cmpfun(choicemodelr)
	conj=function(){
	print(date())
	flush.console()
	res=cconj(w3,xcoding=xcoding,demos=city,mcmc=mcmc,constraints=constraints,options=opt)
	date()
	}

	# It's take rather long time to compute, may be several hours. Utilities will be
	# located in the file rbetas.csv in your working folder.
	conj()