Skip to content

Instantly share code, notes, and snippets.

@alcazar90
Last active August 29, 2017 03:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alcazar90/7585306aef0e5f1adabe8bc00e79b014 to your computer and use it in GitHub Desktop.
Save alcazar90/7585306aef0e5f1adabe8bc00e79b014 to your computer and use it in GitHub Desktop.
library(ISLR)
# prepare data example
df <- ISLR::Hitters[, c("Salary", "Years")]
row.names(df) <- NULL
df$Salary <- log(df$Salary)
# remove the observation with missing values in the response variable
df <- df[!is.na(df$Salary), ]
x <- df$Years
y <- df$Salary
# sort the variable to split
to_s <- sort(x)
# unique observation
to_s <- unique(to_s)
# create the mid-point as a cut-points candidate to split the space
s <- vector("double", length(to_s) - 1)
i <- 1
while (i < length(to_s)) {
s[i] <- (to_s[i + 1] + to_s[i]) / 2
i <- i + 1
}
# compute the loss function for each s candidate
output <- vector("double", length(s))
var <- x
for (i in 1:length(s)) {
half_plane <- var > s[i]
yhat1 <- mean(y[half_plane])
yhat2 <- mean(y[!half_plane])
e1 <- y[half_plane] - yhat1
e2 <- y[!half_plane] - yhat2
SRC1 <- sum(e1 ^ 2)
SRC2 <- sum(e2 ^ 2)
SRCT <- SRC1 + SRC2
output[i] <- SRCT
names(output)[i] <- s[i]
}
# plot the loss function; exist two possible values as minimum
p <- data.frame(x = as.double(names(output)), y = output,
stringsAsFactors = FALSE)
plot(p$x, p$y, type = "l")
output[which.min(output)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment