Skip to content

Instantly share code, notes, and snippets.

@berkorbay
Last active May 24, 2020 11:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save berkorbay/eaec5e414bcb1e1ba90d0972fde011f2 to your computer and use it in GitHub Desktop.
Save berkorbay/eaec5e414bcb1e1ba90d0972fde011f2 to your computer and use it in GitHub Desktop.
Parses YS order emails and brings them under a single data frame
options(stringsAsFactors=FALSE)
library(tidyverse)
library(rvest)
library(gmailr)
## FOLLOW AUTH INSTRUCTIONS FROM HERE https://gmailr.r-lib.org/articles/gmailr.html
parse_order_table <-function(my_msg,full_info_list=FALSE){
my_info <- read_html(gm_body(my_msg)) %>% html_nodes(xpath="/html/body/table/tr/td/center/table[1]/tr[2]/td") %>% html_children() %>% `[[`(2)
info_data <- tibble(info_raw = str_trim(my_info %>% html_nodes("td.padding-right-20") %>% html_text()))
info_data <- info_data %>% separate(info_raw, into=c("key","value"), sep=":",extra="merge") %>% mutate_all(str_trim)
info_list <- info_data %>% spread(key,value) %>% as.list()
my_order <- read_html(gm_body(my_msg)) %>% html_nodes(xpath="/html/body/table/tr/td/center/table[1]/tr[2]/td") %>% html_children() %>% `[[`(3)
the_place <- my_order %>% html_nodes("td.sub-columns.twelve.padding-left-20") %>% html_text()
order_items <- my_order %>% html_nodes("table.basket-item.full-width.border-bottom-e5e5e5.nbs") %>% html_nodes("td.padding-top-10.sub-columns.six.left-text-pad.nls") %>% html_text()
item_unit_costs_and_units <- my_order %>% html_nodes("table.basket-item.full-width.border-bottom-e5e5e5.nbs") %>% html_nodes("td.padding-top-10.sub-columns.two.center.last") %>% html_text()
item_unit_costs <- as.numeric(gsub(",",".",gsub("\\.","",gsub(" TL","",item_unit_costs_and_units[(1:length(item_unit_costs_and_units)) %% 2 == 1]))))
item_units <- as.numeric(item_unit_costs_and_units[(1:length(item_unit_costs_and_units)) %% 2 == 0])
item_total_costs <- my_order %>% html_nodes("table.basket-item.full-width.border-bottom-e5e5e5.nbs") %>% html_nodes("td.padding-top-10.sub-columns.two.right.last") %>% html_text()
item_total_costs <- as.numeric(gsub(",",".",gsub("\\.","",gsub(" TL","",item_total_costs))))
order_df <- tibble(date=parse_date(info_list$`Gönderim Tarihi`,format="%d.%m.%Y"),city=info_list$`Şehir`,neighborhood=info_list$`Semt`,place=the_place,item=order_items,unit_cost=item_unit_costs,units=item_units,total_cost=item_total_costs)
if(full_info_list){
return(list(order_df=order_df,info_list=info_list))
}
return(order_df)
}
ys_threads <- gm_threads("from:siparis@news.yemeksepeti.com",num_results = 500)
the_vec <- c()
for(i in 1:length(ys_threads)){
the_sublist <- ys_threads[[i]]$threads
for(j in 1:length(the_sublist)){
the_vec <- c(the_vec, ys_threads[[i]]$threads[[j]]$id)
}
}
all_orders_df <- tibble()
for(i in 1:length(the_vec)){
print(i)
latest_thread <- gm_thread(the_vec[i])
my_msg <- latest_thread$messages[[1]]
if(grepl("Yemeksepeti Sipariş Onay",gm_subject(my_msg))){
tryCatch({all_orders_df <- bind_rows(all_orders_df,parse_order_table(my_msg))},error=function(e){print(e)})
}else{
print("This is not an order. Order titles start with Yemeksepeti Sipariş Onay")
}
print("======")
Sys.sleep(0.5)
}
print(all_orders_df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment