| library(neotoma) | |
| library(ggplot2) | |
| library(lubridate) | |
| library(reshape2) | |
| all.ds <- get_dataset() | |
| sub_tables <- function(x){ | |
| dates <- try(as.Date(x$submission[,1])) | |
| if(class(dates) == 'try-error')dates <- as.Date('1998-01-01') | |
| dates <- round_date(dates[which.max(dates)], unit = 'month') | |
| data.frame(name = x$dataset.meta$dataset.name, | |
| types = x$dataset.meta$dataset.type, | |
| dates = dates) | |
| } | |
| test_samples <- do.call(rbind.data.frame, lapply(all.ds,sub_tables)) | |
| test_cast <- dcast(test_samples, | |
| formula = types ~ dates, fun.aggregate = length) | |
| new.names <- data.frame(old = c("pollen surface sample", "pollen", | |
| "loss-on-ignition", "vertebrate fauna", | |
| "plant macrofossil", "macroinvertebrate", | |
| "geochronologic", "physical sedimentology", | |
| "geochemistry", "diatom", "charcoal", | |
| "testate amoebae", "water chemistry", | |
| "ostracode surface sample", "insect", | |
| "ostracode", | |
| "Energy dispersive X-ray spectroscopy (EDS/EDX)", | |
| "X-ray fluorescence (XRF)", "All Records"), | |
| new = c('Modern Pollen', "Fossil Pollen", | |
| 'LOI', 'Vertebrate Fauna', 'Plant Macros', | |
| 'Macro-Inverts.', 'Geochronological', | |
| 'Geophysical', 'Geochemical', | |
| 'Diatoms', 'Charcoal', 'Testate Amoebae', | |
| 'Water chem.', 'Modern Ostracodes', 'Insects', | |
| 'Fossil Ostracodes', 'EDS//EDX', 'XRF', 'All Records'), | |
| stringsAsFactors = FALSE) | |
| test_cast[,1] <- as.character(test_cast[,1]) | |
| test_cast[,1] <- new.names$new[match(test_cast[,1], new.names$old)] | |
| test_cast <- rbind(test_cast, test_cast[1,]) | |
| test_cast$types[nrow(test_cast)] <- "All Records" | |
| test_cast[nrow(test_cast),2:ncol(test_cast)] <- colSums(test_cast[1:(nrow(test_cast)-1),2:ncol(test_cast)]) | |
| test_cumsum <- test_cast | |
| test_cumsum[,7] <- rowSums(test_cumsum[,2:7]) | |
| test_cumsum[nrow(test_cumsum), 8] <- test_cumsum[nrow(test_cumsum), 7] + test_cumsum[nrow(test_cumsum), 8] | |
| test_cumsum[,8:ncol(test_cumsum)] <- t(apply(test_cumsum[,8:ncol(test_cumsum)], 1, cumsum)) | |
| # We know that 'all.records` ranges from 10738 to 12761: | |
| test_cumsum[nrow(test_cumsum),2:ncol(test_cumsum)] <- ((test_cumsum[nrow(test_cumsum),2:ncol(test_cumsum)] - 9000) / 4000) * 600 + 100 | |
| test_plotter <- melt(test_cumsum[,c(1, 8:ncol(test_cumsum))]) | |
| test_plotter$cumulative[test_plotter$types == 'All Records'] <- "Legacy Records" | |
| test_plotter$cumulative[!test_plotter$types == 'All Records'] <- "New Records" | |
| test_plotter$variable <- as.Date(as.character(test_plotter$variable)) | |
| neotomaplot <- ggplot(test_plotter, aes(x = variable, y = value, group = types)) + | |
| geom_path(aes(color = types, size = cumulative)) + | |
| theme_bw() + | |
| xlab('Date') + | |
| ylab('Records Uploaded') + | |
| scale_size_discrete(range=c(1.5, 1), guide = 'none') + | |
| theme(axis.title = element_text(family = 'serif', size = 16, face = 'bold'), | |
| axis.text = element_text(family = 'serif', size = 14), | |
| legend.text = element_text(family='serif', size = 12), | |
| legend.title=element_blank()) | |
| ggsave(filename = 'neotomacumulative.png', plot = neotomaplot, width = 8, height = 6, dpi = 300) | |
This comment has been minimized.
Produces something like this plot: