Piping Operator %>% in dplyr
# %>% OPERATOR ---------------------------------------------------------------------- | |
# with %>% operator | |
hflights %>% | |
mutate(diff = TaxiOut - TaxiIn) %>% | |
filter(!is.na(diff)) %>% | |
summarise(avg = mean(diff)) | |
# without %>% operator | |
# arguments get further and further apart | |
summarize(filter(mutate(hflights, diff = TaxiOut - TaxiIn),!is.na(diff)), | |
avg = mean(diff)) | |
# with %>% operator | |
d <- hflights %>% | |
select(Dest, UniqueCarrier, Distance, ActualElapsedTime) %>% | |
mutate(RealTime = ActualElapsedTime + 100, mph = Distance/RealTime*60) | |
# without %>% operator | |
d <- mutate(select(hflights, Dest, UniqueCarrier, Distance, ActualElapsedTime), | |
RealTime = ActualElapsedTime + 100, mph = Distance/RealTime*60) | |
# Filter and summarise d | |
d %>% | |
filter(!is.na(mph), mph < 70) %>% | |
summarise(n_less = n(), n_dest = n_distinct(Dest), | |
min_dist = min(Distance), max_dist = max(Distance)) | |
# Let's define preferable flights as flights that are 150% faster than driving, | |
# i.e. that travel 105 mph or greater in real time. Also, assume that cancelled or | |
# diverted flights are less preferable than driving. | |
# ADVANCED PIPING EXERCISES | |
# Use one single piped call to print a summary with the following variables: | |
# n_non - the number of non-preferable flights in hflights, | |
# p_non - the percentage of non-preferable flights in hflights, | |
# n_dest - the number of destinations that non-preferable flights traveled to, | |
# min_dist - the minimum distance that non-preferable flights traveled, | |
# max_dist - the maximum distance that non-preferable flights traveled | |
hflights %>% | |
mutate(RealTime = ActualElapsedTime + 100, mph = Distance/RealTime*60) %>% | |
filter(mph < 105 | Cancelled == 1 | Diverted == 1) %>% | |
summarise(n_non = n(), p_non = 100*n_non/nrow(hflights), n_dest = n_distinct(Dest), | |
min_dist = min(Distance), max_dist = max(Distance)) | |
# Use summarise() to create a summary of hflights with a single variable, n, | |
# that counts the number of overnight flights. These flights have an arrival | |
# time that is earlier than their departure time. Only include flights that have | |
# no NA values for both DepTime and ArrTime in your count. | |
hflights %>% | |
mutate(overnight = (ArrTime < DepTime)) %>% | |
filter(overnight == TRUE) %>% | |
summarise(n = n()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment