Skip to content

Instantly share code, notes, and snippets.

@aravindhebbali
Last active September 24, 2017 07:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aravindhebbali/7758b86c2bc13ff1e5d88d9d1c204f8c to your computer and use it in GitHub Desktop.
Save aravindhebbali/7758b86c2bc13ff1e5d88d9d1c204f8c to your computer and use it in GitHub Desktop.
Data Wrangling with dplyr
# install
install.packages('dplyr')
install.packages('readr')
# library
library(dplyr)
library(readr)
# read data
ecom <- readr::read_csv('https://raw.githubusercontent.com/rsquaredacademy/datasets/master/web.csv')
# average order value
ecom %>%
filter(purchase == 'true') %>%
select(device, order_value, order_items) %>%
group_by(device) %>%
summarise_all(funs(sum)) %>%
mutate(
aov = order_value / order_items
) %>%
select(device, aov)
# visits from mobile
filter(ecom, device == "mobile")
# visits from mobile that converted
filter(ecom, device == "mobile", purchase == "true")
# visits from mobile that visited more than 5 pages
filter(ecom, device == "mobile", n_pages > 5)
# for our case study, we want all visits that converted.
filter(ecom, purchase == "true")
# select device and purchase columns
select(ecom, device, purchase)
# select all columns from device to purchase
select(ecom, device:purchase)
# select all columns excluding id and country
select(ecom, -id, -country)
# for our case study, we need to select the columns order value
# and order items to calculate the AOV, we also need to select
# the device column as we are computing the AOV for different devices.
select(ecom, device, order_value, order_items)
# select ecom, device, order_value and order_items for purchasers
ecom1 <- filter(ecom, purchase == "true")
ecom2 <- select(ecom1, device, order_value, order_items)
ecom2
# group data by referrer type
group_by(ecom, referrer)
# for our case study, we need to group the data by device type.
ecom3 <- group_by(ecom2, device)
ecom3
# total order value and order items
ecom4 <- summarise(ecom3, total_value = sum(order_value),
total_items = sum(order_items))
ecom4
# compute aov
ecom5 <- mutate(ecom4, aov = order_value / order_items)
ecom5
# select relevant columns
ecom6 <- select(ecom5, device, aov)
ecom6
# full code
ecom1 <- filter(ecom, purchase == "true")
ecom2 <- select(ecom1, device, order_value, order_items)
ecom3 <- group_by(ecom2, device)
ecom4 <- summarise_all(ecom3, funs(sum))
ecom5 <- mutate(ecom4, aov = order_value / order_items)
ecom6 <- select(ecom5, device, aov)
ecom6
# full code using pipe
ecom %>%
filter(purchase == 'true') %>%
select(device, order_value, order_items) %>%
group_by(device) %>%
summarise_all(funs(sum)) %>%
mutate(
aov = order_value / order_items
) %>%
select(device, aov)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment