Last active
January 18, 2017 08:04
-
-
Save slarge/379ee3ab8d111d7e426f1b57b30ad7c2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################ | |
# How many pages of Advice # | |
############################ | |
##SOURCE | |
#----pdftools package | |
#https://cran.rstudio.com/web/packages/pdftools | |
#INSTALL PACKAGES | |
#install.packages("pdftools", dependencies=TRUE) #only once | |
#LOAD LIBS | |
library(pdftools) | |
library(ggplot2) | |
library(lubridate) | |
# ODRIVE | |
oDrive <- "O:/ADVISORY SERVICES/" | |
AdviceDirs <- list.dirs(paste0(oDrive, "ICES Advice 2016")) | |
AdviceDirs <- list.dirs(paste0(oDrive, "ICES Advice 2010")) | |
adviceDF <- unlist(lapply(AdviceDirs, function(x) paste0(x, "/", list.files(x)))) | |
# Remove anything that isn't supposed to be there and other versions we might not be interested in | |
adviceDF <- adviceDF[grepl(".pdf", adviceDF)] | |
adviceDF <- adviceDF[!grepl("ersion*", adviceDF)] | |
pages <- lapply(adviceDF, function(x) pdf_info(x)$pages) | |
dates <- lapply(adviceDF, function(x) pdf_info(x)$created) | |
adviceOutput <- as.data.frame(do.call("rbind", pages)) | |
adviceOutput$dates <- do.call("rbind", dates) | |
class(adviceOutput$dates) = c('POSIXt','POSIXct') | |
colnames(adviceOutput)[1] <- "PAGE_COUNT" | |
adviceOutput$dates <- format(adviceOutput$dates, format = "%D") | |
week <- data.frame(week = seq(1:53)) | |
adviceOutput %>% | |
filter(mdy(dates) <= as.Date("2016-12-31")) %>% | |
mutate(week = week(mdy(dates))) %>% | |
full_join(week, by = "week") %>% | |
mutate(PAGE_COUNT = ifelse(is.na(PAGE_COUNT), | |
0, | |
PAGE_COUNT)) %>% | |
group_by(week) %>% | |
summarize(sum = sum(PAGE_COUNT)) %>% | |
ggplot(aes(x = week, y = cumsum(sum))) + | |
geom_line() + | |
xlab("Week") | |
#Many informations about the file are displayed here | |
# info <- pdf_info(out_put_pdf) | |
# text <- pdf_text(out_put_pdf) | |
# fonts <- pdf_fonts(out_put_pdf) | |
# files <- pdf_attachments(out_put_pdf) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The answer is 2028