This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(mlogit) # 0.2-4 | |
as.mldata <- function(data){ | |
# convert HC dataset | |
# The alternatives are | |
# 1. Gas central heat with cooling (gcc) | |
# 2. Electric central resistence heat with cooling (ecc) | |
# 3. Electric room resistence heat with cooling (erc) | |
# 4. Electric heat pump, which provides cooling also (hpc) | |
# 5. Gas central heat without cooling (gc) | |
# 6. Electric central resistence heat without cooling (ec) | |
# 7. Electric room resistence heat without cooling (er) | |
# depvar gives the name of the chosen alternative, | |
# ich.alt are the installation cost for the heating portion of the system, | |
# icca is the installation cost for cooling | |
# och.alt are the operating cost for the heating portion of the system | |
# occa is the operating cost for cooling | |
# income is the annual income of the household | |
data <- mlogit.data(data, varying = c(2:8, 10:16), choice = "depvar", shape = "wide") | |
cooling.modes <- index(data)$alt %in% c('gcc', 'ecc', 'erc', 'hpc') | |
room.modes <- index(data)$alt %in% c('erc', 'er') | |
# installation / operating costs for cooling are constants, | |
# only relevant for mixed systems | |
data$icca[!cooling.modes] <- 0 | |
data$occa[!cooling.modes] <- 0 | |
# create income variables for two sets cooling and rooms | |
data$inc.cooling <- data$inc.room <- 0 | |
data$inc.cooling[cooling.modes] <- data$income[cooling.modes] | |
data$inc.room[room.modes] <- data$income[room.modes] | |
# create an intercet for cooling modes | |
data$int.cooling <- as.numeric(cooling.modes) | |
return(data) | |
} | |
logloss <- function(y, pred){ | |
# y: vector | |
# pred: wide matrix | |
return(-mean(apply(data.frame(y=y, pred), 1, function(x){sum(log(as.numeric(x[x[1]])))}))) | |
} | |
data("HC", package = "mlogit") | |
set.seed(42) | |
train <- 1: nrow(HC) %in% sample(x = 1:nrow(HC), size = 100, replace = F) | |
df.train <- as.mldata(HC[train, ]) | |
df.test <- as.mldata(HC[!train, ]) | |
y <- as.character(HC$depvar) | |
# nested | |
nl <- mlogit(depvar ~ ich + och + icca + occa + inc.room + inc.cooling + int.cooling | 0, df.train, | |
nests = list(cooling = c('gcc','ecc','erc','hpc'), | |
other = c('gc', 'ec', 'er')), | |
un.nest.el = TRUE) | |
# multinomial | |
ml <- update(nl, nests = NULL) | |
summary(nl) | |
summary(ml) | |
logloss(y[!train], predict(object = nl, newdata=df.test)) | |
logloss(y[!train], predict(object = ml, newdata=df.test)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment