Skip to content

Instantly share code, notes, and snippets.

@emilyriederer
Created October 10, 2022 13:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save emilyriederer/4dcc6a05ea53c82db175e15f698a1fb6 to your computer and use it in GitHub Desktop.
Save emilyriederer/4dcc6a05ea53c82db175e15f698a1fb6 to your computer and use it in GitHub Desktop.
# data source: http://web.mta.info/developers/turnstile.html
library(ggplot2)
library(dplyr)
library(tidyr)
library(readr)
# define read function with schema ----
read_data <- function(url) {
readr::read_csv(url,
col_names = TRUE,
col_types =
cols(
`C/A` = col_character(),
UNIT = col_character(),
SCP = col_character(),
STATION = col_character(),
LINENAME = col_character(),
DIVISION = col_character(),
DATE = col_date(format = "%m/%d/%Y"),
TIME = col_time(format = ""),
DESC = col_character(),
ENTRIES = col_integer(),
EXITS = col_integer()
))
}
# ridership data ----
dates <- seq.Date(from = as.Date('2020-01-11'), to =, as.Date('2020-04-11'), by = '7 days')
dates_str <- format(dates, format = '%y%m%d')
dates_url <- sprintf('http://web.mta.info/developers/data/nyct/turnstile/turnstile_%s.txt', dates_str)
datasets <- lapply(dates_url, FUN = read_data)
full_data <- do.call(rbind, datasets)
full_data <- full_data[full_data$DESC == "REGULAR",]
names(full_data)[1] <- "CA"
rm(datasets)
# max date ----
df_max_date <-
full_data %>%
group_by(CA, UNIT, SCP) %>%
summarize(min_date = min(DATE), max_date = max(DATE)) %>%
ungroup() %>%
arrange(max_date) %>%
mutate(TURNSTILE_ID = row_number())
ggplot(df_max_date %>% filter(TURNSTILE_ID <= 50)) +
aes(y = -1*TURNSTILE_ID, yend = -1*TURNSTILE_ID,
x = min_date, xend = max_date) +
geom_segment(
arrow = arrow(length = unit(0.01, "npc"))
) +
geom_point() +
theme(
axis.text.y = element_blank(),
axis.title = element_blank()
)
df_data_miss <-
full_data %>%
transmute(T = as.POSIXct(DATE, TIME),
TURNSTILE_ID = paste(CA, UNIT, SCP),
ENTRIES) %>%
complete(T, TURNSTILE_ID) %>%
mutate(IND_MISSING = is.na(ENTRIES)) %>%
group_by(T) %>%
summarize(P = sum(IND_MISSING))
ggplot(df_data_miss) +
aes(x = T, y = P) +
geom_point(color = 'darkblue') +
ylim(0,NA) +
theme(
#axis.text.y = element_blank(),
axis.title = element_blank(),
legend.position = 'none'
)
# monotonicity? ----
#> normal ----
df_monotonicity_good <-
full_data %>%
filter(CA == 'A033', UNIT == 'R170', SCP == '02-00-05') %>%
mutate(T = as.POSIXct(paste(DATE, TIME))) %>%
mutate(ENTRIES = (ENTRIES - min(ENTRIES)) / (max(ENTRIES) - min(ENTRIES)) )
ggplot(df_monotonicity_good) +
aes(x = T, y = ENTRIES) +
geom_line(size = 1, color = "darkblue") +
theme(
axis.text.y = element_blank(),
axis.title = element_blank(),
legend.position = 'none'
)
#> this guy always goes down ----
df_monotonicity_down <-
full_data %>%
filter(CA == 'N559', UNIT == 'R425', SCP == '00-06-01') %>%
mutate(T = as.POSIXct(paste(DATE, TIME))) %>%
mutate(ENTRIES = (ENTRIES - min(ENTRIES)) / (max(ENTRIES) - min(ENTRIES)) )
ggplot(df_monotonicity_down) +
aes(x = T, y = ENTRIES) +
geom_line(size = 1, color = "darkblue") +
theme(
axis.text.y = element_blank(),
axis.title = element_blank()
)
#> this plummets and restarts ----
df_monotonicity_drop <-
full_data %>%
filter(CA == 'PTH03', UNIT == 'R552', SCP == '00-00-07') %>%
mutate(T = as.POSIXct(paste(DATE, TIME))) %>%
mutate(ENTRIES = (ENTRIES - min(ENTRIES)) / (max(ENTRIES) - min(ENTRIES)) )
ggplot(df_monotonicity_drop) +
aes(x = T, y = ENTRIES) +
geom_line(size = 1, color = "darkblue") +
theme(
axis.text.y = element_blank(),
axis.title = element_blank(),
text=element_text(size=16, family="Montserrat")
)
#> all of above ----
df_monotonicity <- bind_rows(df_monotonicity_down, df_monotonicity_good, df_monotonicity_drop)
ggplot(df_monotonicity) +
aes(x = T, y = ENTRIES, color = paste(CA, UNIT, SCP)) +
geom_line(size = 1) +
scale_color_manual(values = c("darkblue", "cornflowerblue", "lightblue")) +
theme(
axis.text.y = element_blank(),
axis.title = element_blank(),
text=element_text(size=16, family="Montserrat"),
legend.position = "none"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment