Last active
September 24, 2017 07:51
-
-
Save aravindhebbali/55c4f40476028c09949b73af97bb1619 to your computer and use it in GitHub Desktop.
Data Wrangling with dplyr - Part 3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install | |
install.packages('dplyr') | |
install.packages('readr') | |
# library | |
library(dplyr) | |
library(readr) | |
# import data | |
ecom <- readr::read_csv('https://raw.githubusercontent.com/rsquaredacademy/datasets/master/web.csv') | |
ecom | |
# check the sources of traffic and device types. | |
ecom %>% | |
distinct(referrer) | |
ecom %>% | |
distinct(device) | |
# rename columns | |
ecom %>% | |
rename(time_on_site = duration) | |
# sampling data | |
ecom %>% | |
sample_n(700) | |
ecom %>% | |
group_by(referrer) %>% | |
sample_n(100) | |
ecom %>% | |
sample_frac(size = 0.7) | |
ecom %>% | |
group_by(referrer) %>% | |
sample_frac(0.3) | |
# extract the `device` column. | |
ecom %>% | |
pull(device) | |
# extract the first column. | |
ecom %>% | |
pull(1) | |
# extract the last column | |
ecom %>% | |
pull(-1) | |
# extract the first 20 rows | |
ecom %>% | |
slice(1:20) | |
# extract the last row | |
ecom %>% | |
slice(n()) | |
# total number of observations in the data | |
ecom %>% | |
tally() | |
# observations of different types of referrers | |
ecom %>% | |
group_by(referrer) %>% | |
tally() | |
# observations of referrers and bouncers | |
ecom %>% | |
group_by(referrer, bouncers) %>% | |
tally() | |
# observations of referrers and purchasers | |
ecom %>% | |
group_by(referrer, purchase) %>% | |
tally() | |
ecom %>% | |
group_by(referrer, purchase) %>% | |
tally() %>% | |
filter(purchase == 'true') | |
# use `count()` instead of `tally()` | |
ecom %>% | |
count(referrer, purchase) | |
# top 2 referrers that bring orders | |
ecom %>% | |
count(referrer, purchase) %>% | |
filter(purchase == 'true') %>% | |
arrange(desc(n)) %>% | |
top_n(n = 2) | |
ecom %>% | |
pull(n_pages) %>% | |
between(5, 15) | |
mtcars %>% | |
select(mpg, disp, cyl, gear, carb) %>% | |
mutate( | |
type = case_when( | |
disp > 200 ~ 'High', | |
cyl == 8 ~ 'Eight', | |
TRUE ~ 'True' | |
) | |
) | |
ecom %>% | |
pull(referrer) %>% | |
nth(1) | |
ecom %>% | |
pull(referrer) %>% | |
nth(1000) | |
ecom %>% | |
pull(referrer) %>% | |
last() | |
ecom %>% | |
pull(referrer) %>% | |
first() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment