Skip to content

Instantly share code, notes, and snippets.

@vikjam
Last active June 30, 2017 11:47
Show Gist options
  • Save vikjam/007b850b110f27c9cfca to your computer and use it in GitHub Desktop.
Save vikjam/007b850b110f27c9cfca to your computer and use it in GitHub Desktop.
Visualizing probit regressions in R
library(readxl)
library(ggplot2)
library(reshape2)
asq.data <- read_excel("ASQ.xlsx")
variables <- c("Interval" ,
"CommunicationResults" ,
"GrossMotorResults" ,
"FineMotorResults" ,
"ProblemSolvingResults",
"PersonalSocialResults")
asq.abstract <- asq.data[ , variables]
asq.abstract$months <- as.numeric(gsub("Month", "", asq.data$Interval))
recode.result <- function(x) {
as.numeric(grepl("Above", x))
}
recoded <- sapply(asq.abstract[ , 2:6], recode.result)
recoded.names <- c("Communication" ,
"GrossMotor" ,
"FineMotor" ,
"ProblemSolving",
"PersonalSocial")
colnames(recoded) <- recoded.names
asq.coded <- cbind(asq.abstract, recoded)
asq.coded$months2 <- asq.coded$months^2
asq.coded$monthcuts <- cut(asq.coded$months, breaks = 10)
binscatter <- aggregate(asq.coded[ , recoded.names],
list(asq.coded$monthcuts),
mean)
colnames(binscatter) <- c("Interval", recoded.names)
melted <- melt(binscatter,
id.vars = c("Interval"))
colnames(melted) <- c("Interval", "Skill", "Fraction")
g <- ggplot(melted, aes(x = Interval,
y = Fraction,
group = Skill))
plot.bins <- g + geom_line(aes(colour = Skill)) +
geom_point(aes(colour = Skill)) +
theme(axis.text.x = element_text(angle = 45,
hjust = 1,
vjust = 1))
logit_x <- function(x) {
glm(as.formula(paste(x, " ~ months + months2")),
family = binomial(link = "logit"),
data = asq.coded)
}
logit_list <- lapply(recoded.names, logit_x)
for (l in 1:5) {
print(paste("Results for", recoded.names[l]))
print(summary(logit_list[[l]]))
}
# End of script
# load ggplot2 for graphing
library(ggplot2)
# load reshape2 for reshaping
library(reshape2)
# Generate simulated data
x <- runif(n = 1000, min = -10, max = 10)
ystar <- x^2 + rnorm(n = 1000, sd = 10) # Latent
y <- as.numeric(50 <= ystar) # Observed
sim <- data.frame(x = x,
x2 = x^2,
ystar = ystar,
y = y)
# Inspect the data with little assumptions (first, with loess, then by binning)
l <- ggplot(sim, aes(x = x, y = y))
plot.loess <- l + stat_smooth(method = "loess")
ggsave(file = "plot-loess.pdf", plot = plot.loess)
sim$cuts <- as.factor(cut(sim$x, breaks = 21))
binscatter <- aggregate(sim$y, list(sim$cuts), mean)
colnames(binscatter) <- c("Interval", "Fraction")
g <- ggplot(binscatter, aes(x = Interval, y = Fraction))
plot.bins <- g + geom_point() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
ggsave(file = "plot-bin.pdf", plot = plot.bins)
# Probit regression with linear term
probit_l <- glm(y ~ x,
family = binomial(link = "probit"),
data = sim)
# Probit regression with quadratic term
probit_q <- glm(y ~ x + x2,
family = binomial(link = "probit"),
data = sim)
# Plot predictions
sim$Linear <- predict(probit_l, type = "response")
sim$Quadratic <- predict(probit_q, type = "response")
sim$Truth <- pnorm(sim$x2 - 50, sd = 10)
melted <- melt(sim,
measure.vars = c("Linear", "Quadratic", "Truth"))
colnames(melted) <- c("x", "x2", "ystar", "y", "cuts", "Model", "Probability")
p <- ggplot(melted, aes(x = x, y = Probability, group = Model))
plot.predict <- p + geom_line(aes(colour = Model))
ggsave(file = "plot-predict.pdf", plot = plot.predict)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment