Created
December 19, 2015 07:47
-
-
Save anirudhjayaraman/291ac968e54f24fabea4 to your computer and use it in GitHub Desktop.
Piping Operator %>% in dplyr
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# %>% OPERATOR ---------------------------------------------------------------------- | |
# with %>% operator | |
hflights %>% | |
mutate(diff = TaxiOut - TaxiIn) %>% | |
filter(!is.na(diff)) %>% | |
summarise(avg = mean(diff)) | |
# without %>% operator | |
# arguments get further and further apart | |
summarize(filter(mutate(hflights, diff = TaxiOut - TaxiIn),!is.na(diff)), | |
avg = mean(diff)) | |
# with %>% operator | |
d <- hflights %>% | |
select(Dest, UniqueCarrier, Distance, ActualElapsedTime) %>% | |
mutate(RealTime = ActualElapsedTime + 100, mph = Distance/RealTime*60) | |
# without %>% operator | |
d <- mutate(select(hflights, Dest, UniqueCarrier, Distance, ActualElapsedTime), | |
RealTime = ActualElapsedTime + 100, mph = Distance/RealTime*60) | |
# Filter and summarise d | |
d %>% | |
filter(!is.na(mph), mph < 70) %>% | |
summarise(n_less = n(), n_dest = n_distinct(Dest), | |
min_dist = min(Distance), max_dist = max(Distance)) | |
# Let's define preferable flights as flights that are 150% faster than driving, | |
# i.e. that travel 105 mph or greater in real time. Also, assume that cancelled or | |
# diverted flights are less preferable than driving. | |
# ADVANCED PIPING EXERCISES | |
# Use one single piped call to print a summary with the following variables: | |
# n_non - the number of non-preferable flights in hflights, | |
# p_non - the percentage of non-preferable flights in hflights, | |
# n_dest - the number of destinations that non-preferable flights traveled to, | |
# min_dist - the minimum distance that non-preferable flights traveled, | |
# max_dist - the maximum distance that non-preferable flights traveled | |
hflights %>% | |
mutate(RealTime = ActualElapsedTime + 100, mph = Distance/RealTime*60) %>% | |
filter(mph < 105 | Cancelled == 1 | Diverted == 1) %>% | |
summarise(n_non = n(), p_non = 100*n_non/nrow(hflights), n_dest = n_distinct(Dest), | |
min_dist = min(Distance), max_dist = max(Distance)) | |
# Use summarise() to create a summary of hflights with a single variable, n, | |
# that counts the number of overnight flights. These flights have an arrival | |
# time that is earlier than their departure time. Only include flights that have | |
# no NA values for both DepTime and ArrTime in your count. | |
hflights %>% | |
mutate(overnight = (ArrTime < DepTime)) %>% | |
filter(overnight == TRUE) %>% | |
summarise(n = n()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment