kierisi/code_to_fix_distro.R

## code_to_fix_distro.R
# load required packages via tidyverse
library(tidyverse)

# here() used to write csv to correct folder - need to spend time sorting how to use it on file import
library(here)

# create a single file of our entire event email distribution (currently 10 separate files)
# import files from data folder and bind - know that all column headers are the same
# shamelessly stolen from stackoverflow
orig_contacts <-
  list.files(path = "./data/",
             pattern = "*.csv",
             full.names = TRUE) %>%
  map_df(~read_csv(., col_types = cols(.default = "c")))

# what do we have?
names(orig_contacts)

# remove unnecessary columns that were added on export
orig_contacts_2 <-
  orig_contacts %>%
  select(c(FirstName:`TT Participant ID`, Program:District))
names(orig_contacts_2)

# Qualtrics tags emails that have been sent with various statuses
# We need to see what's been sent in our 10 distributions, and then filter for email addresses that haven't been opened

# distribution history - what's already been sent
distro_hx <-
  list.files(path = "./data/sent/",
             pattern = "*.csv",
             full.names = TRUE) %>%
  map_df(~read_csv(., col_types = cols(.default = "c")))

# what do we have?
names(distro_hx)
head(distro_hx)

# need to sort out what the various categories Qualtrics assigns to each email
distro_hx %>%
  group_by(Status) %>%
  summarise(n = n())
# categories:
# Email Bounced, Email Sent, Finished Survey, Opted Out,
# Partially Completed Survey, Skipped as Duplicate, Started Survey

# creating the new email distribution list
# will need the following columns: Last Name, Status == Email Sent
# need to rename Last Name column to last_name in order to keep embedded data column headers consistent
distro_merge <- distro_hx %>%
  rename("last_name" = "Last Name") %>%
  select(last_name, Status) %>%
  filter(Status == "Email Sent") %>%
  inner_join(orig_contacts_2, by = "last_name") %>%
  # remove duplicates - semi-redundant, as Qualtrics will also screen duplicates
  unique() %>%
  select(-Status) %>%
  write_csv(here("results", "distro_merge.csv"))

names(distro_merge)
	# load required packages via tidyverse
	library(tidyverse)

	# here() used to write csv to correct folder - need to spend time sorting how to use it on file import
	library(here)

	# create a single file of our entire event email distribution (currently 10 separate files)
	# import files from data folder and bind - know that all column headers are the same
	# shamelessly stolen from stackoverflow
	orig_contacts <-
	list.files(path = "./data/",
	pattern = "*.csv",
	full.names = TRUE) %>%
	map_df(~read_csv(., col_types = cols(.default = "c")))

	# what do we have?
	names(orig_contacts)

	# remove unnecessary columns that were added on export
	orig_contacts_2 <-
	orig_contacts %>%
	select(c(FirstName:`TT Participant ID`, Program:District))
	names(orig_contacts_2)

	# Qualtrics tags emails that have been sent with various statuses
	# We need to see what's been sent in our 10 distributions, and then filter for email addresses that haven't been opened

	# distribution history - what's already been sent
	distro_hx <-
	list.files(path = "./data/sent/",
	pattern = "*.csv",
	full.names = TRUE) %>%
	map_df(~read_csv(., col_types = cols(.default = "c")))

	# what do we have?
	names(distro_hx)
	head(distro_hx)

	# need to sort out what the various categories Qualtrics assigns to each email
	distro_hx %>%
	group_by(Status) %>%
	summarise(n = n())
	# categories:
	# Email Bounced, Email Sent, Finished Survey, Opted Out,
	# Partially Completed Survey, Skipped as Duplicate, Started Survey

	# creating the new email distribution list
	# will need the following columns: Last Name, Status == Email Sent
	# need to rename Last Name column to last_name in order to keep embedded data column headers consistent
	distro_merge <- distro_hx %>%
	rename("last_name" = "Last Name") %>%
	select(last_name, Status) %>%
	filter(Status == "Email Sent") %>%
	inner_join(orig_contacts_2, by = "last_name") %>%
	# remove duplicates - semi-redundant, as Qualtrics will also screen duplicates
	unique() %>%
	select(-Status) %>%
	write_csv(here("results", "distro_merge.csv"))

	names(distro_merge)