Skip to content

Instantly share code, notes, and snippets.

@topepo
Created February 25, 2020 18:29
Show Gist options
  • Save topepo/952fee9ff3cdce5544dde925b6c87532 to your computer and use it in GitHub Desktop.
Save topepo/952fee9ff3cdce5544dde925b6c87532 to your computer and use it in GitHub Desktop.
data analysis for nyc flights data set
library(tidymodels)
library(nycflights13)
set.seed(25213)
flight_data <-
flights %>%
mutate(
delay = ifelse(arr_delay >= 30, "late", "on_time"),
delay = factor(delay),
date = as.Date(time_hour)
) %>%
select(dep_time, flight, origin, dest, air_time, distance,
date, delay, dest, time_hour) %>%
inner_join(weather %>% select(-month, -day, -hour, -year),
by = c("origin", "time_hour")) %>%
select(-time_hour) %>%
na.omit() %>%
sample_n(10000) %>%
mutate_if(is.character, as.factor)
set.seed(48256)
data_split <- initial_split(flight_data, prop = 3/4)
train_data <- training(data_split)
holdout_data <- testing(data_split)
lr_model <- logistic_reg() %>% set_engine("glm")
flights_rec <-
recipe(delay ~ ., data = train_data) %>%
step_date(date, features = c("dow", "month")) %>%
step_holiday(date, holidays = timeDate::listHolidays("US")) %>%
update_role(date, new_role = "date") %>%
step_dummy(all_nominal(), -all_outcomes()) %>%
step_zv(all_predictors())
flights_wflow <-
workflow() %>%
add_model(lr_model) %>%
add_recipe(flights_rec)
flights_fit <- fit(flights_wflow, data = train_data)
flights_pred <-
predict(flights_fit, holdout_data, type = "prob") %>%
bind_cols(holdout_data %>% select(delay))
flights_pred %>%
roc_curve(truth = delay, .pred_late) %>%
autoplot()
roc_auc(flights_pred, truth = delay, .pred_late)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment