Last active
May 24, 2020 11:58
-
-
Save berkorbay/eaec5e414bcb1e1ba90d0972fde011f2 to your computer and use it in GitHub Desktop.
Parses YS order emails and brings them under a single data frame
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
options(stringsAsFactors=FALSE) | |
library(tidyverse) | |
library(rvest) | |
library(gmailr) | |
## FOLLOW AUTH INSTRUCTIONS FROM HERE https://gmailr.r-lib.org/articles/gmailr.html | |
parse_order_table <-function(my_msg,full_info_list=FALSE){ | |
my_info <- read_html(gm_body(my_msg)) %>% html_nodes(xpath="/html/body/table/tr/td/center/table[1]/tr[2]/td") %>% html_children() %>% `[[`(2) | |
info_data <- tibble(info_raw = str_trim(my_info %>% html_nodes("td.padding-right-20") %>% html_text())) | |
info_data <- info_data %>% separate(info_raw, into=c("key","value"), sep=":",extra="merge") %>% mutate_all(str_trim) | |
info_list <- info_data %>% spread(key,value) %>% as.list() | |
my_order <- read_html(gm_body(my_msg)) %>% html_nodes(xpath="/html/body/table/tr/td/center/table[1]/tr[2]/td") %>% html_children() %>% `[[`(3) | |
the_place <- my_order %>% html_nodes("td.sub-columns.twelve.padding-left-20") %>% html_text() | |
order_items <- my_order %>% html_nodes("table.basket-item.full-width.border-bottom-e5e5e5.nbs") %>% html_nodes("td.padding-top-10.sub-columns.six.left-text-pad.nls") %>% html_text() | |
item_unit_costs_and_units <- my_order %>% html_nodes("table.basket-item.full-width.border-bottom-e5e5e5.nbs") %>% html_nodes("td.padding-top-10.sub-columns.two.center.last") %>% html_text() | |
item_unit_costs <- as.numeric(gsub(",",".",gsub("\\.","",gsub(" TL","",item_unit_costs_and_units[(1:length(item_unit_costs_and_units)) %% 2 == 1])))) | |
item_units <- as.numeric(item_unit_costs_and_units[(1:length(item_unit_costs_and_units)) %% 2 == 0]) | |
item_total_costs <- my_order %>% html_nodes("table.basket-item.full-width.border-bottom-e5e5e5.nbs") %>% html_nodes("td.padding-top-10.sub-columns.two.right.last") %>% html_text() | |
item_total_costs <- as.numeric(gsub(",",".",gsub("\\.","",gsub(" TL","",item_total_costs)))) | |
order_df <- tibble(date=parse_date(info_list$`Gönderim Tarihi`,format="%d.%m.%Y"),city=info_list$`Şehir`,neighborhood=info_list$`Semt`,place=the_place,item=order_items,unit_cost=item_unit_costs,units=item_units,total_cost=item_total_costs) | |
if(full_info_list){ | |
return(list(order_df=order_df,info_list=info_list)) | |
} | |
return(order_df) | |
} | |
ys_threads <- gm_threads("from:siparis@news.yemeksepeti.com",num_results = 500) | |
the_vec <- c() | |
for(i in 1:length(ys_threads)){ | |
the_sublist <- ys_threads[[i]]$threads | |
for(j in 1:length(the_sublist)){ | |
the_vec <- c(the_vec, ys_threads[[i]]$threads[[j]]$id) | |
} | |
} | |
all_orders_df <- tibble() | |
for(i in 1:length(the_vec)){ | |
print(i) | |
latest_thread <- gm_thread(the_vec[i]) | |
my_msg <- latest_thread$messages[[1]] | |
if(grepl("Yemeksepeti Sipariş Onay",gm_subject(my_msg))){ | |
tryCatch({all_orders_df <- bind_rows(all_orders_df,parse_order_table(my_msg))},error=function(e){print(e)}) | |
}else{ | |
print("This is not an order. Order titles start with Yemeksepeti Sipariş Onay") | |
} | |
print("======") | |
Sys.sleep(0.5) | |
} | |
print(all_orders_df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment