Created
June 5, 2019 10:19
-
-
Save RickPack/297ac7c29f6c9abb6b004ee1c38c712a to your computer and use it in GitHub Desktop.
Use SurveyMonkey responses to remove unsubscribes and bounced emails from an Excel file with an Email_Address column
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#################################################### | |
## Reads email addresses copied and pasted from ## | |
## Surveymonkey screens into .txt files and then ## | |
## removes them from the original list. The ## | |
## final Excel workbook has the cleaned emails, ## | |
## a copy of the previously created second sheet ## | |
## (invalid emails, produced by ## | |
## excel_email_cleaner_new.R, found at: ## | |
## https://gist.github.com/RickPack/907a200cd40c786e19d045b379527f6d) ## | |
## and then worksheets with (i) bounced email ## | |
## addresses and (ii) opt-out + unsubscribes ## | |
#################################################### | |
library(readr) | |
library(readxl) | |
library(dplyr) | |
library(openxlsx) | |
out_folder <- 'Output/' | |
fl <- paste0(out_folder, 'old100BM_Email_Contacts.xlsx') | |
# Read the first sheet in the last cleaned emails Excel file in the output folder | |
# Sheet is named Valid_2019_02_14 | |
originalxls1 <- read_excel(fl, col_types = "text", sheet = 1) | |
# Now sheet 2, named Invalid_2019_02_14 | |
originalxls2 <- read_excel(fl, col_types = "text", sheet = 2) | |
# Read the bounced email addresses copied and pasted from SurveyMonkey | |
# read_delim is used because fortunately, copying and pasting caused | |
# columns to be separated by spaces, which I set as the delimiter. | |
# This causes a false read of other columns but I only want the first | |
# (email_addresses), hence the use of select(1) | |
bounced_emails <- readr::read_delim('Survey1_bounces.txt', " ") %>% | |
select(1) %>% | |
mutate(Email_Address = toupper(stringr::str_trim(Email_Address))) | |
unsub_emails <- readr::read_delim('Survey1_unsubscribes.txt', " ") %>% | |
select(1) %>% | |
mutate(Email_Address = toupper(stringr::str_trim(Email_Address))) | |
# Confirm email counts match .txt files | |
nrow(bounced_emails) | |
nrow(unsub_emails) | |
# Now let's remove the email addresses that were either bounced or unsubscribes | |
remove_emails <- bind_rows(bounced_emails, unsub_emails) | |
## Are there any strange characters that we want to remove? | |
remove_emails %>% | |
dplyr::filter(grepl("[^A-Z0-9_@.\\-]", Email_Address)) | |
## No additional cleaning needed so let's remove any email addresses | |
## from the original emails that were found in the remove list | |
newxls1 <- anti_join(originalxls1, remove_emails) | |
# Count of emails in list went from 1593 to 1388 | |
# Finally, let's create the new output Excel file | |
########################################### | |
# This is code using openxlsx functions # | |
# that I often reuse. # | |
# It emboldens and enlarges the header # | |
# row and facilitates adding worksheets # | |
# with desired names. # | |
########################################### | |
### Create the Excel workbook | |
### style to embolden the first row | |
bldStyle <- createStyle(fontSize = 14, fontColour = "black", textDecoration = c("BOLD")) | |
### function to position data frames on separate worksheets, then save workbook at end | |
### When n_xlsx argument equals max_nxlsx argument, save workbook | |
xlsxformat <- function(wb, namxlsx="", wksht_name, df_inxlsx, nxlsx, max_nxlsx){ | |
if (nxlsx == 1) { | |
wb <- createWorkbook() | |
} | |
addWorksheet(wb, wksht_name) | |
writeData(wb, nxlsx, df_inxlsx, colNames = TRUE, headerStyle = bldStyle) | |
setColWidths(wb, sheet = nxlsx, cols = 1:ncol(df_inxlsx), widths = "auto") | |
if (max_nxlsx == nxlsx) { | |
saveWorkbook(wb, paste0(namxlsx, ".xlsx"), overwrite = TRUE) | |
} | |
invisible(wb) | |
} | |
date_creation <- gsub("-", "_", Sys.Date()) | |
wb <- xlsxformat(wb, | |
wksht_name = paste0("Valid_", date_creation), | |
df_inxlsx = newxls1, nxlsx = 1, max_nxlsx = 4) | |
wb <- xlsxformat(wb, namxlsx = paste0(out_folder, "100BM_Email_Contacts"), | |
wksht_name = paste0("Invalid_", date_creation), | |
df_inxlsx = originalxls2, nxlsx = 2, max_nxlsx = 4) | |
wb <- xlsxformat(wb, namxlsx = paste0(out_folder, "100BM_Email_Contacts"), | |
wksht_name = "Bounced Emails", | |
df_inxlsx = bounced_emails, nxlsx = 3, max_nxlsx = 4) | |
wb <- xlsxformat(wb, namxlsx = paste0(out_folder, "100BM_Email_Contacts"), | |
wksht_name = "Unsubscribed Emails", | |
df_inxlsx = unsub_emails, nxlsx = 4, max_nxlsx = 4) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment