Last active
September 24, 2017 07:10
-
-
Save aravindhebbali/7758b86c2bc13ff1e5d88d9d1c204f8c to your computer and use it in GitHub Desktop.
Data Wrangling with dplyr
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install | |
install.packages('dplyr') | |
install.packages('readr') | |
# library | |
library(dplyr) | |
library(readr) | |
# read data | |
ecom <- readr::read_csv('https://raw.githubusercontent.com/rsquaredacademy/datasets/master/web.csv') | |
# average order value | |
ecom %>% | |
filter(purchase == 'true') %>% | |
select(device, order_value, order_items) %>% | |
group_by(device) %>% | |
summarise_all(funs(sum)) %>% | |
mutate( | |
aov = order_value / order_items | |
) %>% | |
select(device, aov) | |
# visits from mobile | |
filter(ecom, device == "mobile") | |
# visits from mobile that converted | |
filter(ecom, device == "mobile", purchase == "true") | |
# visits from mobile that visited more than 5 pages | |
filter(ecom, device == "mobile", n_pages > 5) | |
# for our case study, we want all visits that converted. | |
filter(ecom, purchase == "true") | |
# select device and purchase columns | |
select(ecom, device, purchase) | |
# select all columns from device to purchase | |
select(ecom, device:purchase) | |
# select all columns excluding id and country | |
select(ecom, -id, -country) | |
# for our case study, we need to select the columns order value | |
# and order items to calculate the AOV, we also need to select | |
# the device column as we are computing the AOV for different devices. | |
select(ecom, device, order_value, order_items) | |
# select ecom, device, order_value and order_items for purchasers | |
ecom1 <- filter(ecom, purchase == "true") | |
ecom2 <- select(ecom1, device, order_value, order_items) | |
ecom2 | |
# group data by referrer type | |
group_by(ecom, referrer) | |
# for our case study, we need to group the data by device type. | |
ecom3 <- group_by(ecom2, device) | |
ecom3 | |
# total order value and order items | |
ecom4 <- summarise(ecom3, total_value = sum(order_value), | |
total_items = sum(order_items)) | |
ecom4 | |
# compute aov | |
ecom5 <- mutate(ecom4, aov = order_value / order_items) | |
ecom5 | |
# select relevant columns | |
ecom6 <- select(ecom5, device, aov) | |
ecom6 | |
# full code | |
ecom1 <- filter(ecom, purchase == "true") | |
ecom2 <- select(ecom1, device, order_value, order_items) | |
ecom3 <- group_by(ecom2, device) | |
ecom4 <- summarise_all(ecom3, funs(sum)) | |
ecom5 <- mutate(ecom4, aov = order_value / order_items) | |
ecom6 <- select(ecom5, device, aov) | |
ecom6 | |
# full code using pipe | |
ecom %>% | |
filter(purchase == 'true') %>% | |
select(device, order_value, order_items) %>% | |
group_by(device) %>% | |
summarise_all(funs(sum)) %>% | |
mutate( | |
aov = order_value / order_items | |
) %>% | |
select(device, aov) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment