tts/app.R

## app.R
library(shiny)
library(highcharter)
library(dygraphs)
library(xts)
library(dplyr)

shinyApp(
  ui = fluidPage(
    fluidRow(
      column(12,
             mainPanel(
               tabsetPanel(
                 tabPanel("Time series",
                          h4("My CRIS project emails"),
                          dygraphOutput("dp", width = "100%", height = "700")
                 ),
                 tabPanel("Hour",
                          h4("Emails/hour"),
                          highchartOutput("hchr", width = "100%", height = "500")
                          ),
                 tabPanel("Weekday",
                          h4("Emails/weekday"),
                          highchartOutput("hcwd", width = "100%", height = "500")
                 ),
                 tabPanel("Top 400 words 2015",
                          imageOutput("preImage2015")
                 ),
                 tabPanel("Top 400 words 2016",
                          imageOutput("preImage2016")
                 ),
                 tabPanel("Co-occurence of most frequent words",
                          imageOutput("occ", height = 600))
                 )
               )
             )
    )
  ),

  server = function(input, output) {

    # Import daily stats for dygraph
    emailstats <- read.csv("emailstats.csv", stringsAsFactors = F)
    # hourly stats for highcharts
    emailstatsdetail <- read.csv("emaildetailstats.csv", stringsAsFactors = F)
    # and weekday stats
    emailstatsweekday <- read.csv("emailwdstats.csv", stringsAsFactors = F)

    # Prepare dygraph object
    #
    # Fill in missing dates
    # http://stackoverflow.com/a/31484550
    ts <- seq.POSIXt(as.POSIXct("2012-03-23",'%Y-%m-%d'), as.POSIXct("2016-10-25",'%Y-%m-%d'), by="day")
    df <- data.frame(timestamp=ts)
    emailstats$date <- as.POSIXct(emailstats$date, '%Y-%m-%d')
    data_with_missing_dates <- full_join(df,emailstats,by=c("timestamp"="date"))
    # Replace NA with 0
    data_with_missing_dates$emails <- ifelse(is.na(data_with_missing_dates$emails), 0, data_with_missing_dates$emails)

    # Add rownames
    rownames(data_with_missing_dates) <- data_with_missing_dates[[1]]
    # Cast to a XTS object
    emailstats.xts <- as.xts(data_with_missing_dates, dateFormat = "Date")

    dygraph_plot <- dygraph(emailstats.xts) %>%
      dyOptions(stepPlot=TRUE,
                pointSize=7,
                fillGraph = TRUE,
                colors = RColorBrewer::brewer.pal(3, "Set1"),
                strokeWidth = 3,
                strokePattern = "dashed") %>%
      dyLegend(width = 200,
               show = "follow",
               hideOnMouseOut = FALSE) %>%
      dyEvent("2012-05-24", "Project set", labelLoc = "top") %>%
      dyEvent("2013-10-02", "Investment decision", labelLoc = "top") %>%
      dyEvent("2013-12-19", "Call for tender", labelLoc = "top") %>%
      dyEvent("2014-06-05", "Procurement decision", labelLoc = "top") %>%
      dyEvent("2014-09-12", "Kick-off", labelLoc = "top") %>%
      dyEvent("2016-09-05", "Portal open!", labelLoc = "top")

    emailstatsdetail <- emailstatsdetail %>%
      group_by(hour) %>%
      summarise(emails = sum(emails))

    emailstatsdetail$hour <- factor(emailstatsdetail$hour, levels = 1:24)

    # Hourly stats
    hchart_hr <- hchart(emailstatsdetail, "column", x = hour, y = emails) %>%
      hc_colors("orange")

    # and weekday stats
    hchart_wd <- hchart(emailstatsweekday, "column", x = weekday, y = emails) %>%
      hc_colors("orange")

    output$dp <- renderDygraph(dygraph_plot)

    output$hchr <- renderHighchart(hchart_hr)
    output$hcwd <- renderHighchart(hchart_wd)


    output$preImage2015 <- renderImage({
      filename <- normalizePath(file.path('./images/emailcloud_2015.png'))

      list(src = filename,
           alt = "Wordcloud 2015")

    }, deleteFile = FALSE)

    output$preImage2016 <- renderImage({
      filename <- normalizePath(file.path('./images/emailcloud_2016.png'))

      list(src = filename,
           alt = "Wordcloud 2016")

    }, deleteFile = FALSE)

    output$occ <- renderImage({
      filename <- normalizePath(file.path('./images/word_occ_ggraph.png'))

      list(src = filename,
           alt = "Word occurences")

    }, deleteFile = FALSE)

     }
)

## parse_and_clean_email_data.R
library(dplyr)
library(stringr)
library(tidyr)
library(wordcloud)
library(tm)
library(lubridate)

raw <- readLines("parsed_w_awk_notempty.txt", encoding = "UTF-8")
lines <- data_frame(raw = raw)

# http://stackoverflow.com/a/12985882
# Number sequences of emails
lines$email <- NA

# Define, which head/body belongs to which email in sequence
x <- grepl("^head ", lines$raw)
spanEmail <- rle(x)$lengths[rle(x)$values == TRUE]
lines$email[x] <- rep(seq_along(spanEmail), times = spanEmail)

x <- grepl("^body ", lines$raw)
spanEmail <- rle(x)$lengths[rle(x)$values == TRUE]
lines$email[x] <- rep(seq_along(spanEmail), times = spanEmail)

# Group by email Within each group, paste and clean head and body
lines_g <- lines %>%
  group_by(email) %>%
  summarize(head_raw = str_c(raw, collapse = " "), body_raw = str_c(raw, collapse = " ")) %>%  # hacky
  mutate(head = gsub("body .*", "", head_raw)) %>% # cleaning the hack but a caveat:
                                                   # if the email is a fwd'ed one, Sent etc are in the body part
  mutate(body = gsub("(^head .*)(body .*?)", "\\2", body_raw))

lines_g$head <- gsub("head ", "", lines_g$head)
lines_g$body <- gsub("body ", "", lines_g$body)

# In header, separate From, Sent/To, and Subject to own columns.
# First, add a separator
lines_g$head <- gsub(" Sent:", "%Sent:", lines_g$head)
lines_g$head <- gsub(" Date:", "%Sent:", lines_g$head)
lines_g$head <- gsub(" To:", "%To:", lines_g$head)
lines_g$head <- gsub(" Subject:", "%Subject:", lines_g$head)

lines_g_h <- lines_g %>%
  separate(head, c("From", "Sent", "To", "Subject"), sep = "%", remove = FALSE) %>% # only the first occurrences are parsed, which is OK
  select(email, From, Sent, To, Subject, body)

# Remove labels and whitespace
lines_g_h$From <- gsub("From:", "", lines_g_h$From)
lines_g_h$Sent <- gsub("Sent:", "", lines_g_h$Sent)
lines_g_h$To <- gsub("To:", "", lines_g_h$To)
lines_g_h$Subject <- gsub("Subject:", "", lines_g_h$Subject)

lines_g_h$From <- gsub("\t", "", lines_g_h$From)
lines_g_h$Sent <- gsub("\t", "", lines_g_h$Sent)
lines_g_h$To <- gsub("\t", "", lines_g_h$To)
lines_g_h$Subject <- gsub("\t", "", lines_g_h$Subject)

# Trouble with those emails where the head doesn't contain all necessary fields.
# Leaving them out.
lines_g_h <- lines_g_h[!is.na(lines_g_h$Sent),]

# Cast day and time to Date and Time, after translating months
lines_g_h$Sent <- gsub("tammikuuta","January", lines_g_h$Sent)
lines_g_h$Sent <- gsub("helmikuuta","February", lines_g_h$Sent)
lines_g_h$Sent <- gsub("maaliskuuta","March", lines_g_h$Sent)
lines_g_h$Sent <- gsub("huhtikuuta","April", lines_g_h$Sent)
lines_g_h$Sent <- gsub("toukokuuta","May", lines_g_h$Sent)
lines_g_h$Sent <- gsub("kesäkuuta","June", lines_g_h$Sent)
lines_g_h$Sent <- gsub("heinäkuuta","July", lines_g_h$Sent)
lines_g_h$Sent <- gsub("elokuuta","August", lines_g_h$Sent)
lines_g_h$Sent <- gsub("syyskuuta","September", lines_g_h$Sent)
lines_g_h$Sent <- gsub("lokakuuta","October", lines_g_h$Sent)
lines_g_h$Sent <- gsub("marraskuuta","November", lines_g_h$Sent)
lines_g_h$Sent <- gsub("joulukuuta","December", lines_g_h$Sent)

lines_g_h$Sent <- gsub("maanantaina","Monday", lines_g_h$Sent)
lines_g_h$Sent <- gsub("tiistaina","Tuesday", lines_g_h$Sent)
lines_g_h$Sent <- gsub("keskiviikkona","Wednesday", lines_g_h$Sent)
lines_g_h$Sent <- gsub("torstaina","Thursday", lines_g_h$Sent)
lines_g_h$Sent <- gsub("perjantaina","Friday", lines_g_h$Sent)
lines_g_h$Sent <- gsub("lauantaina","Saturday", lines_g_h$Sent)
lines_g_h$Sent <- gsub("sununtaina","Sunday", lines_g_h$Sent)

library(lubridate)
dmy_hm <- dmy_hm(lines_g_h$Sent)
lines_g_h$Sent <- dmy_hm

# Leaving out those that failed to parse
lines_g_h_p <- lines_g_h[!is.na(lines_g_h$Sent),]

# xts object to-be-done is off by one day without this
Sys.setenv(TZ=Sys.timezone())

# Daily statistics
dailysums <- lines_g_h_p %>%
  mutate(day = format(Sent, "%d"),
         month = format(Sent, "%m"),
         year = format(Sent, "%Y")) %>%
  group_by(day, month, year) %>%
  summarise(emails = n())

stats_sum <- dailysums %>%
  mutate(date = as.POSIXct(paste(day, month, year, sep = "-"),
                        format = "%d-%m-%Y" ), tz=Sys.timezone())
stats <- stats_sum[,c("date", "emails")]

# Hourly statistics
timesums <- lines_g_h_p %>%
  mutate(hour = hour(Sent)) %>%
  mutate(day = day(Sent)) %>%
  mutate(month = month(Sent)) %>%
  mutate(year = year(Sent)) %>%
  group_by(year,month,day,hour) %>%
  summarise(emails = n())

timesums$hour <- factor(timesums$hour, levels = 1:24)

# Weekdays statistics
timesums_wd <- lines_g_h_p %>%
  mutate(weekday = weekdays(Sent)) %>%
  group_by(weekday) %>%
  summarise(emails = n())

timesums_wd$weekday <- factor(as.character(weekdaysums$weekday),
                              levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
timesums_wd <- timesums_wd[order(timesums_wd$weekday), ]

# First, a fair bit of cleaning (names, urls etc.)
lines_g_h_p$body <- gsub("[strings not shown here on purposes]", "", lines_g_h_p$body)

# No FI Pure emails
lines_g_h_p <- lines_g_h_p[!grepl(".*FIPure.*", lines_g_h_p$Subject),]
# Take the first 400 chars only
lines_g_h_p$body <- substr(lines_g_h_p$body,1,400)

# All years in data
years <- sort(unique(year(lines_g_h_p$Sent)))

# Plot wordcloud from every year
sapply(years[5], function(x) {
  thisyear <- lines_g_h_p %>%
    filter(year(Sent) == x)

  do_cloud(thisyear, x)
})

do_cloud <- function(df,year) {

  res.corpus <- Corpus(DataframeSource(df[6]))
  res.corpus <- tm_map(res.corpus, removePunctuation)
  res.corpus <- tm_map(res.corpus, content_transformer(tolower))
  res.corpus <- tm_map(res.corpus, function(x) removeWords(x,
                                                           c(stopwords("english"),
                                                             "että",
                                                             "kun",
                                                             "jos",
                                                             "ole")))
  tdm <- TermDocumentMatrix(res.corpus)
  m <- as.matrix(tdm)
  v <- sort(rowSums(m),decreasing=TRUE)
  d <- data.frame(word = names(v),freq=v)
  table(d$freq)
  pal2 <- brewer.pal(8, "Dark2")
  png(paste0("emailcloud_",year,".png"), width=1280,height=800)
  wcloud_plot <- wordcloud(d$word,
                           d$freq,
                           scale=c(8,.2),
                           min.freq=3,
                           max.words=400,
                           random.order=FALSE,
                           rot.per=.15,
                           colors=pal2)
dev.off()

}

# Save CSV data for the Shiny app
write.csv(stats, file = "emailstats.csv", row.names = F)
write.csv(timesums, file = "emaildetailstats.csv", row.names = F)
write.csv(timesums_wd, file = "emailwdstats.csv", row.names = F)
	library(shiny)
	library(highcharter)
	library(dygraphs)
	library(xts)
	library(dplyr)

	shinyApp(
	ui = fluidPage(
	fluidRow(
	column(12,
	mainPanel(
	tabsetPanel(
	tabPanel("Time series",
	h4("My CRIS project emails"),
	dygraphOutput("dp", width = "100%", height = "700")
	),
	tabPanel("Hour",
	h4("Emails/hour"),
	highchartOutput("hchr", width = "100%", height = "500")
	),
	tabPanel("Weekday",
	h4("Emails/weekday"),
	highchartOutput("hcwd", width = "100%", height = "500")
	),
	tabPanel("Top 400 words 2015",
	imageOutput("preImage2015")
	),
	tabPanel("Top 400 words 2016",
	imageOutput("preImage2016")
	),
	tabPanel("Co-occurence of most frequent words",
	imageOutput("occ", height = 600))
	)
	)
	)
	)
	),

	server = function(input, output) {

	# Import daily stats for dygraph
	emailstats <- read.csv("emailstats.csv", stringsAsFactors = F)
	# hourly stats for highcharts
	emailstatsdetail <- read.csv("emaildetailstats.csv", stringsAsFactors = F)
	# and weekday stats
	emailstatsweekday <- read.csv("emailwdstats.csv", stringsAsFactors = F)

	# Prepare dygraph object
	#
	# Fill in missing dates
	# http://stackoverflow.com/a/31484550
	ts <- seq.POSIXt(as.POSIXct("2012-03-23",'%Y-%m-%d'), as.POSIXct("2016-10-25",'%Y-%m-%d'), by="day")
	df <- data.frame(timestamp=ts)
	emailstats$date <- as.POSIXct(emailstats$date, '%Y-%m-%d')
	data_with_missing_dates <- full_join(df,emailstats,by=c("timestamp"="date"))
	# Replace NA with 0
	data_with_missing_dates$emails <- ifelse(is.na(data_with_missing_dates$emails), 0, data_with_missing_dates$emails)

	# Add rownames
	rownames(data_with_missing_dates) <- data_with_missing_dates[[1]]
	# Cast to a XTS object
	emailstats.xts <- as.xts(data_with_missing_dates, dateFormat = "Date")

	dygraph_plot <- dygraph(emailstats.xts) %>%
	dyOptions(stepPlot=TRUE,
	pointSize=7,
	fillGraph = TRUE,
	colors = RColorBrewer::brewer.pal(3, "Set1"),
	strokeWidth = 3,
	strokePattern = "dashed") %>%
	dyLegend(width = 200,
	show = "follow",
	hideOnMouseOut = FALSE) %>%
	dyEvent("2012-05-24", "Project set", labelLoc = "top") %>%
	dyEvent("2013-10-02", "Investment decision", labelLoc = "top") %>%
	dyEvent("2013-12-19", "Call for tender", labelLoc = "top") %>%
	dyEvent("2014-06-05", "Procurement decision", labelLoc = "top") %>%
	dyEvent("2014-09-12", "Kick-off", labelLoc = "top") %>%
	dyEvent("2016-09-05", "Portal open!", labelLoc = "top")

	emailstatsdetail <- emailstatsdetail %>%
	group_by(hour) %>%
	summarise(emails = sum(emails))

	emailstatsdetail$hour <- factor(emailstatsdetail$hour, levels = 1:24)

	# Hourly stats
	hchart_hr <- hchart(emailstatsdetail, "column", x = hour, y = emails) %>%
	hc_colors("orange")

	# and weekday stats
	hchart_wd <- hchart(emailstatsweekday, "column", x = weekday, y = emails) %>%
	hc_colors("orange")

	output$dp <- renderDygraph(dygraph_plot)

	output$hchr <- renderHighchart(hchart_hr)
	output$hcwd <- renderHighchart(hchart_wd)


	output$preImage2015 <- renderImage({
	filename <- normalizePath(file.path('./images/emailcloud_2015.png'))

	list(src = filename,
	alt = "Wordcloud 2015")

	}, deleteFile = FALSE)

	output$preImage2016 <- renderImage({
	filename <- normalizePath(file.path('./images/emailcloud_2016.png'))

	list(src = filename,
	alt = "Wordcloud 2016")

	}, deleteFile = FALSE)

	output$occ <- renderImage({
	filename <- normalizePath(file.path('./images/word_occ_ggraph.png'))

	list(src = filename,
	alt = "Word occurences")

	}, deleteFile = FALSE)

	}
	)
	library(dplyr)
	library(stringr)
	library(tidyr)
	library(wordcloud)
	library(tm)
	library(lubridate)

	raw <- readLines("parsed_w_awk_notempty.txt", encoding = "UTF-8")
	lines <- data_frame(raw = raw)

	# http://stackoverflow.com/a/12985882
	# Number sequences of emails
	lines$email <- NA

	# Define, which head/body belongs to which email in sequence
	x <- grepl("^head ", lines$raw)
	spanEmail <- rle(x)$lengths[rle(x)$values == TRUE]
	lines$email[x] <- rep(seq_along(spanEmail), times = spanEmail)

	x <- grepl("^body ", lines$raw)
	spanEmail <- rle(x)$lengths[rle(x)$values == TRUE]
	lines$email[x] <- rep(seq_along(spanEmail), times = spanEmail)

	# Group by email Within each group, paste and clean head and body
	lines_g <- lines %>%
	group_by(email) %>%
	summarize(head_raw = str_c(raw, collapse = " "), body_raw = str_c(raw, collapse = " ")) %>% # hacky
	mutate(head = gsub("body .*", "", head_raw)) %>% # cleaning the hack but a caveat:
	# if the email is a fwd'ed one, Sent etc are in the body part
	mutate(body = gsub("(^head .)(body .?)", "\\2", body_raw))

	lines_g$head <- gsub("head ", "", lines_g$head)
	lines_g$body <- gsub("body ", "", lines_g$body)

	# In header, separate From, Sent/To, and Subject to own columns.
	# First, add a separator
	lines_g$head <- gsub(" Sent:", "%Sent:", lines_g$head)
	lines_g$head <- gsub(" Date:", "%Sent:", lines_g$head)
	lines_g$head <- gsub(" To:", "%To:", lines_g$head)
	lines_g$head <- gsub(" Subject:", "%Subject:", lines_g$head)

	lines_g_h <- lines_g %>%
	separate(head, c("From", "Sent", "To", "Subject"), sep = "%", remove = FALSE) %>% # only the first occurrences are parsed, which is OK
	select(email, From, Sent, To, Subject, body)

	# Remove labels and whitespace
	lines_g_h$From <- gsub("From:", "", lines_g_h$From)
	lines_g_h$Sent <- gsub("Sent:", "", lines_g_h$Sent)
	lines_g_h$To <- gsub("To:", "", lines_g_h$To)
	lines_g_h$Subject <- gsub("Subject:", "", lines_g_h$Subject)

	lines_g_h$From <- gsub("\t", "", lines_g_h$From)
	lines_g_h$Sent <- gsub("\t", "", lines_g_h$Sent)
	lines_g_h$To <- gsub("\t", "", lines_g_h$To)
	lines_g_h$Subject <- gsub("\t", "", lines_g_h$Subject)

	# Trouble with those emails where the head doesn't contain all necessary fields.
	# Leaving them out.
	lines_g_h <- lines_g_h[!is.na(lines_g_h$Sent),]

	# Cast day and time to Date and Time, after translating months
	lines_g_h$Sent <- gsub("tammikuuta","January", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("helmikuuta","February", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("maaliskuuta","March", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("huhtikuuta","April", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("toukokuuta","May", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("kesäkuuta","June", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("heinäkuuta","July", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("elokuuta","August", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("syyskuuta","September", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("lokakuuta","October", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("marraskuuta","November", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("joulukuuta","December", lines_g_h$Sent)

	lines_g_h$Sent <- gsub("maanantaina","Monday", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("tiistaina","Tuesday", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("keskiviikkona","Wednesday", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("torstaina","Thursday", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("perjantaina","Friday", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("lauantaina","Saturday", lines_g_h$Sent)
	lines_g_h$Sent <- gsub("sununtaina","Sunday", lines_g_h$Sent)

	library(lubridate)
	dmy_hm <- dmy_hm(lines_g_h$Sent)
	lines_g_h$Sent <- dmy_hm

	# Leaving out those that failed to parse
	lines_g_h_p <- lines_g_h[!is.na(lines_g_h$Sent),]

	# xts object to-be-done is off by one day without this
	Sys.setenv(TZ=Sys.timezone())

	# Daily statistics
	dailysums <- lines_g_h_p %>%
	mutate(day = format(Sent, "%d"),
	month = format(Sent, "%m"),
	year = format(Sent, "%Y")) %>%
	group_by(day, month, year) %>%
	summarise(emails = n())

	stats_sum <- dailysums %>%
	mutate(date = as.POSIXct(paste(day, month, year, sep = "-"),
	format = "%d-%m-%Y" ), tz=Sys.timezone())
	stats <- stats_sum[,c("date", "emails")]

	# Hourly statistics
	timesums <- lines_g_h_p %>%
	mutate(hour = hour(Sent)) %>%
	mutate(day = day(Sent)) %>%
	mutate(month = month(Sent)) %>%
	mutate(year = year(Sent)) %>%
	group_by(year,month,day,hour) %>%
	summarise(emails = n())

	timesums$hour <- factor(timesums$hour, levels = 1:24)

	# Weekdays statistics
	timesums_wd <- lines_g_h_p %>%
	mutate(weekday = weekdays(Sent)) %>%
	group_by(weekday) %>%
	summarise(emails = n())

	timesums_wd$weekday <- factor(as.character(weekdaysums$weekday),
	levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
	timesums_wd <- timesums_wd[order(timesums_wd$weekday), ]

	# First, a fair bit of cleaning (names, urls etc.)
	lines_g_h_p$body <- gsub("[strings not shown here on purposes]", "", lines_g_h_p$body)

	# No FI Pure emails
	lines_g_h_p <- lines_g_h_p[!grepl(".FIPure.", lines_g_h_p$Subject),]
	# Take the first 400 chars only
	lines_g_h_p$body <- substr(lines_g_h_p$body,1,400)

	# All years in data
	years <- sort(unique(year(lines_g_h_p$Sent)))

	# Plot wordcloud from every year
	sapply(years[5], function(x) {
	thisyear <- lines_g_h_p %>%
	filter(year(Sent) == x)

	do_cloud(thisyear, x)
	})

	do_cloud <- function(df,year) {

	res.corpus <- Corpus(DataframeSource(df[6]))
	res.corpus <- tm_map(res.corpus, removePunctuation)
	res.corpus <- tm_map(res.corpus, content_transformer(tolower))
	res.corpus <- tm_map(res.corpus, function(x) removeWords(x,
	c(stopwords("english"),
	"että",
	"kun",
	"jos",
	"ole")))
	tdm <- TermDocumentMatrix(res.corpus)
	m <- as.matrix(tdm)
	v <- sort(rowSums(m),decreasing=TRUE)
	d <- data.frame(word = names(v),freq=v)
	table(d$freq)
	pal2 <- brewer.pal(8, "Dark2")
	png(paste0("emailcloud_",year,".png"), width=1280,height=800)
	wcloud_plot <- wordcloud(d$word,
	d$freq,
	scale=c(8,.2),
	min.freq=3,
	max.words=400,
	random.order=FALSE,
	rot.per=.15,
	colors=pal2)
	dev.off()

	}

	# Save CSV data for the Shiny app
	write.csv(stats, file = "emailstats.csv", row.names = F)
	write.csv(timesums, file = "emaildetailstats.csv", row.names = F)
	write.csv(timesums_wd, file = "emailwdstats.csv", row.names = F)