andrewheiss/ologit_sans_tidyverse.R

## ologit_sans_tidyverse.R
library(MASS) # For ordered logit with polr()
library(reshape2)  # For reshaping data
library(ggplot2)  # For fun plots
library(broom)  # For converting models into data frames
library(haven)  # For read_stata

example_df <- read_stata("http://stats.idre.ucla.edu/stat/data/ologit.dta")
example_df$apply <- factor(example_df$apply, levels = 1:3,
                           labels = c("unlikely", "somewhat likely", "very likely"),
                           ordered = TRUE)

model <- polr(apply ~ pared + public + gpa,
              data = example_df, Hess = TRUE)

model_results <- tidy(model, exponentiate = TRUE, conf.int = TRUE)

# For whatever reason, polr() doesn't give p-values, so you have to make them yourself
model_results$p_value <- pnorm(abs(model_results$statistic), lower.tail = FALSE) * 2
model_results
#> # A tibble: 5 x 8
#>   term  estimate std.error statistic conf.low conf.high coefficient_type p_value
#>   <chr>    <dbl>     <dbl>     <dbl>    <dbl>     <dbl> <chr>              <dbl>
#> 1 gpa     1.62e0     0.504    0.955     0.604      4.39 coefficient       0.340
#> 2 pared   1.65e0     0.406    1.23      0.729      3.62 coefficient       0.219
#> 3 publ…   2.22e0     0.479    1.67      0.848      5.63 coefficient       0.0955
#> 4 some…   5.45e8   721.       0.0279   NA         NA    zeta              0.978
#> 5 unli…   2.03e1     1.54     1.95     NA         NA    zeta              0.0507

# Make a new data frame for values that'll be held at their means and values that'll be varied
# expand.grid() makes a data frame with every combination of the different values
newdata <- expand.grid(
  pared = c(0, 1),
  public = c(0, 1),
  gpa = seq(1.9, 4, by = 0.1))

# Add predictions
newdata <- cbind(newdata, predict(model, newdata, type = "probs"))

# This is the original way to reshape data from wide to long
newdata_long <- melt(newdata, id.vars = c("pared", "public", "gpa"),
                     variable.name = "Level", value.name = "Probability")

# I find that it's easier with pivot_longer() from the tidyr package
# library(tidyr)
# newdata_long <- pivot_longer(newdata,
#                              cols = c("unlikely", "somewhat likely", "very likely"),
#                              names_to = "Level", values_to = "Probability")

ggplot(newdata_long, aes(x = gpa, y = Probability, color = Level)) +
  geom_line() +
  facet_grid(pared ~ public)
	library(MASS) # For ordered logit with polr()
	library(reshape2) # For reshaping data
	library(ggplot2) # For fun plots
	library(broom) # For converting models into data frames
	library(haven) # For read_stata

	example_df <- read_stata("http://stats.idre.ucla.edu/stat/data/ologit.dta")
	example_df$apply <- factor(example_df$apply, levels = 1:3,
	labels = c("unlikely", "somewhat likely", "very likely"),
	ordered = TRUE)

	model <- polr(apply ~ pared + public + gpa,
	data = example_df, Hess = TRUE)

	model_results <- tidy(model, exponentiate = TRUE, conf.int = TRUE)

	# For whatever reason, polr() doesn't give p-values, so you have to make them yourself
	model_results$p_value <- pnorm(abs(model_results$statistic), lower.tail = FALSE) * 2
	model_results
	#> # A tibble: 5 x 8
	#> term estimate std.error statistic conf.low conf.high coefficient_type p_value
	#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
	#> 1 gpa 1.62e0 0.504 0.955 0.604 4.39 coefficient 0.340
	#> 2 pared 1.65e0 0.406 1.23 0.729 3.62 coefficient 0.219
	#> 3 publ… 2.22e0 0.479 1.67 0.848 5.63 coefficient 0.0955
	#> 4 some… 5.45e8 721. 0.0279 NA NA zeta 0.978
	#> 5 unli… 2.03e1 1.54 1.95 NA NA zeta 0.0507

	# Make a new data frame for values that'll be held at their means and values that'll be varied
	# expand.grid() makes a data frame with every combination of the different values
	newdata <- expand.grid(
	pared = c(0, 1),
	public = c(0, 1),
	gpa = seq(1.9, 4, by = 0.1))

	# Add predictions
	newdata <- cbind(newdata, predict(model, newdata, type = "probs"))

	# This is the original way to reshape data from wide to long
	newdata_long <- melt(newdata, id.vars = c("pared", "public", "gpa"),
	variable.name = "Level", value.name = "Probability")

	# I find that it's easier with pivot_longer() from the tidyr package
	# library(tidyr)
	# newdata_long <- pivot_longer(newdata,
	# cols = c("unlikely", "somewhat likely", "very likely"),
	# names_to = "Level", values_to = "Probability")

	ggplot(newdata_long, aes(x = gpa, y = Probability, color = Level)) +
	geom_line() +
	facet_grid(pared ~ public)