Skip to content

Instantly share code, notes, and snippets.

@willpearse
Last active February 18, 2020 03:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save willpearse/1bdfaff2eb8a93080f159d7a77993d96 to your computer and use it in GitHub Desktop.
Save willpearse/1bdfaff2eb8a93080f159d7a77993d96 to your computer and use it in GitHub Desktop.
MADcomm broken (fixable?) functions
#' @export
.reed.2017a <- function(...) {
data <-read.csv("http://pasta.lternet.edu/package/data/eml/knb-lter-sbc/17/30/a7899f2e57ea29a240be2c00cce7a0d4", as.is=TRUE)
names(data) <- tolower(names(data))
data$count[data$count < 0] <- 0
data$taxon_species[data$taxon_species == -99999] <- NA
data$taxon_genus[data$taxon_genus == -99999] <- NA
data$species <- with(data, paste(taxon_genus, taxon_species, sep="_"))
data$site <- with(data, paste(site, transect, sep="_"))
data$site_year <- with(data, paste(site, year, sep="_"))
data <- with(data, tapply(count, list(site_year, species), sum, na.rm = TRUE))
data[is.na(data)] <- 0
temp <- strsplit(rownames(data), "_")
year <- matrix(unlist(temp), ncol=3, byrow=TRUE)[,3]
name <- matrix(unlist(temp), ncol=3, byrow=TRUE)[,1]
return(.matrix.melt(data,
data.frame(units="#"),
data.frame(id=rownames(data), year, name, lat=NA, long=NA, address=NA, area=NA),
data.frame(species=colnames(data), taxonomy=NA)))
}
#' @export
.reed.2017b <- function(...) {
data <-read.csv("https://pasta.lternet.edu/package/data/eml/knb-lter-sbc/19/23/5daf0da45925ba9014872c6bc9f6c8bb")
names(data) <- tolower(names(data))
data$count[data$count < 0] <- 0
data$taxon_species[data$taxon_species == -99999] <- NA
data$species <- with(data, paste(taxon_genus, taxon_species, sep="_"))
data$site <- with(data, paste(site, transect, sep="_"))
data$site_year <- with(data, paste(site, year, sep="_"))
data <- with(data, tapply(count, list(site_year, species), sum, na.rm = TRUE))
data[is.na(data)] <- 0
temp <- strsplit(rownames(data), "_")
year <- matrix(unlist(temp), ncol=3, byrow=TRUE)[,3]
name <- matrix(unlist(temp), ncol=3, byrow=TRUE)[,1]
return(.matrix.melt(data,
data.frame(units="#"),
data.frame(id=rownames(data), year, name, lat=NA, long=NA, address=NA, area=NA),
data.frame(species=colnames(data), taxonomy=NA)))
}
#' @export
.rodriguezBuritica.2013 <- function(...){
data <- read.csv(suppdata("E094-083","SMCover.csv",from = "esa_archives"))
species.data <- read.csv(suppdata("E094-083","Species.csv",from = "esa_archives"))
species.data$ReportedName <- sub(" ", "_", species.data$ReportedName)
species.data$AcceptedName <- sub(" ", "_", species.data$AcceptedName)
data$species <- species.data$AcceptedName[match(data$Code, species.data$Code)]
data$plot_year <- with(data, paste(Plot, Year, sep = "_"))
transformed.data <- with(data, tapply(Cover, list(plot_year, species), sum, na.rm=TRUE))
transformed.data[is.na(transformed.data)] <- 0
temp <- strsplit(rownames(transformed.data), "_")
year <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,2]
name <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,1]
return(.matrix.melt(transformed.data,
data.frame(units="#"),
data.frame(id=rownames(transformed.data), year, name, lat=NA, long=NA, address=NA, area=NA),
data.frame(species=colnames(transformed.data), taxonomy=NA)))
}
#' @export
.ross.2014 <- function(...){
data <- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-and.3136.5&entityid=88e40dc185bd3f00e7464398b61f40fc", header = TRUE)
species <- data$SCI_NAME
data$id <- rep(paste(data$BIOGEOGRAPHY,data$DATE))
site.metadata <- data[!duplicated(data$id),]
site.metadata <- with(site.metadata,
data.frame(id=id, year=DATE, name=BIOGEOGRAPHY, lat=NA,long=NA, address=NA,area=NA)
)
site <- rep(paste(data$BIOGEOGRAPHY,data$DATE), 856)
abundance <- as.vector(data$INDIVIDUALS)
abundance[is.na(abundance)] <- 0
return(.df.melt(species, site, abundance,
study.metadata=data.frame(units="#"),
site.metadata,
species.metadata=data.frame(species=unique(species), taxonomy=NA)
))
}
#' @export
.truxa.2015 <- function(...){
data <- as.data.frame(read_xlsx(suppdata("10.5061/dryad.fg8f6/1", "Appendix_3.xlsx"), skip=1)) #use skip to skip any rows that you don't want/aren't useful
comm <- data[,-1:-3] #get rid of columns you don't want
rownames(comm) <- data$Species #name the rows what you want
comm <- t(comm) #t=transpose, flip the rows and columns
return(.matrix.melt(comm,
data.frame(units="#"),
data.frame(id=rownames(comm),year="2006-2008",
name=c("Danube non-flooded", "Danude flooded", "Leitha non-flooded", "Leitha flooded", "Morava non-flooded", "Morava flooded"),
lat=c("16\u00BA41'24", "16\u00BA42'20", "16\u00BA51'32", "16\u00BA53'26", "16\u00BA53'22"),
long=c("48\u00BA08'41", "48\u00BA07'53", "48\u00BA00'19", "48\u00BA03'28", "48\u00BA17'00", "48\u00BA17'96"),
address="Eastern Austria",area="na"),
data.frame(species=colnames(comm),taxonomy="Lepidoptera")))
}
#' @export
.schmitt.2012 <- function(...){
addr <- "https://pasta.lternet.edu/package/data/eml/knb-lter-sbc/46/3/4ded739e78e50552837cf100f251f7ab"
addr <- sub("^https","http",addr)
data <-read.csv(addr,header=F, skip=1, sep=",", quote='"',
col.names=c("YEAR", "MONTH", "DATE", "SITE", "DEPTH", "REP",
"SP_CODE", "COUNT", "COMMENTS", "Common_Name",
"taxon_GROUP", "SURVEY", "taxon_PHYLUM",
"taxon_CLASS", "taxon_ORDER", "taxon_FAMILY",
"taxon_GENUS", "taxon_SPECIES"), check.names=TRUE)
data$species <- with(data, paste(taxon_GENUS, taxon_SPECIES, sep="_"))
data$site.year.depth <- with(data, paste(SITE, YEAR, DEPTH, sep="_"))
site.id <- unique(data$site.year.depth)
year <- data$YEAR[!duplicated(data$site.year.depth)]
name <- data$SITE[!duplicated(data$site.year.depth)]
return(.df.melt(data$species, data$site.year.depth, data$COUNT,
data.frame(units="#"),
data.frame(id=site.id, year, name, lat=NA, long=NA, address="Santa Cruz Island, CA, USA", area=NA),
data.frame(species=unique(data$species), taxonomy="Pycnopodia")))
}
#' @export
.sandau.2017 <- function(...){
tmp.file <- tempfile()
download.file("https://www.datadryad.org/bitstream/handle/10255/dryad.129944/BB_all_4_SimilMatrices_Dryad.xlsx?sequence=1", tmp.file)
data <- read.xls(tmp.file, sheet=2)
lookup <- read.xls(suppdata("10.5061/dryad.44bm6", "BB_all_4_SimilMatrices_Dryad.xlsx"), sheet=1, skip=5, header=FALSE, as.is=TRUE)[-1:-8,]
lookup[,2] <- .sanitize.text(lookup[,2])
lookup[,2] <- sapply(strsplit(lookup[,2], " "), function(x) paste(x[1:2],collapse="_"))
lookup <- setNames(lookup[,2], lookup[,1])
names(data)[names(data) %in% names(lookup)] <- lookup[names(data)[names(data) %in% names(lookup)]]
site_year <- with(data, paste(data$PlotID, Year, sep="_"))
data <- cbind(site_year, data)
comm.mat <-data[-1:-11]
#This sets the row names to the unique plot_year identifier
rownames(comm.mat) <-data[,1]
site.metadata <- data[!duplicated(data$site_year),]
return(.matrix.melt(comm.mat,
data.frame(units="%", treatment=""),
data.frame(id=site.metadata$site_year, name=site.metadata$PlotID, year=site.metadata$Year, lat=NA, long=NA, address="Grandcour", treatment=site.metadata$Treat, area="20 x 20 m"),
data.frame(species=unique(lookup, taxonomy="Plantae"))))
}
#' @export
.russo.2015 <- function(...){
species <- read.xls(suppdata("10.5061/dryad.6cr82", "DataforDryad_netmaludome.xlsx"), header=FALSE, as.is=TRUE, nrow=2)[2:1,]
species <- unname(apply(as.matrix(species), 2, paste, collapse="_"))[-1]
data <- read.xls(suppdata("10.5061/dryad.6cr82", "DataforDryad_netmaludome.xlsx"), as.is=TRUE, skip=3)
comm <- as.matrix(data[,-1])
colnames(comm) <- species; rownames(comm) <- data[,1]
return(.matrix.melt(comm,
data.frame(units="#"),
data.frame(id=rownames(comm), name=colnames(comm), year="2008-2013", lat=NA, long=NA, address=NA, area="New York state, USA"),
data.frame(species=colnames(comm), taxonomy=NA)
))
}
# YEAR UNKNOWN
#' @export
.mcknight.2000 <- function(...){
data <- read.csv(file="https://pasta.lternet.edu/package/data/eml/knb-lter-mcm/12/3/7f8537c0f0f80a255551ad61d9d512dc",header=TRUE)
species <- unique(data$Species)
data$id <- rep(paste(data$Location,data$Date))
site.metadata <- data[!duplicated(data$id),]
site.metadata <- with(site.metadata,
data.frame(id=id, year=Date, name=Location, lat=NA,long=NA, address="antarctica",area=NA)
)
site <- rep(paste(data$Location,data$Date), 27)
abundance <- as.vector(data[,10])
abundance[is.na(abundance)] <- 0
return(.df.melt(species, site, abundance,
study.metadata=data.frame(units="#"),
site.metadata,
species.metadata=data.frame(species=unique(species), taxonomy=NA)
))
}
#' @export
.mcmahon.2017 <- function(...){
abun <- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.349.2&entityid=da11cbc268d91fef78c78bd2813adbf6", header = TRUE)
site_meta1 <- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.349.2&entityid=a508f609c7d45f1c10604a4722acfd04", header = TRUE)
site_meta2 <- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.349.2&entityid=d35b86dbfcf7bf6eab90a2fd5539809c", header = TRUE)
org_meta <- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.349.2&entityid=5c558e387eadadf707a3f84742b0d3e1", header = TRUE)
colnames(abun)[1] <- "OTU"
data_meta1 <- merge(abun, site_meta1, by = "Sample_Name")
site_data <- merge(data_meta1, site_meta2, by = "Sample_Name")
data <- merge(org_meta, site_data, by = "OTU")
comm <- with(data, tapply(value, list(paste(Lake,Collection_Date,sep="_", OTU), length)))
site.names <- sapply(strsplit(rownames(comm), "_"), function(x) x[1])
years <- sapply(strsplit(rownames(comm), "_"), function(x) x[2])
comm[is.na(comm)] <- 0
unique <- data[!duplicated(data$OTU),]
colnames(unique)[1] <- 'Species'
unique <- unique[,-9:-20]
return(.matrix.melt(comm,
data.frame(units="p/a"),
data.frame(id=rownames(comm),years,site.names,lat=NA,long=NA,address="North of Minocqua, Wisconsin USA",area="Depth"),
data.frame(species=colnames(comm),taxonomy=unique, )))
}
#' @export
.miller.2013 <- function(...){
data<- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-and.2739.7&entityid=1743caa458ea7bb640833d884576f51c", header = TRUE)
species <- data$ENTITY
data$id <- rep(paste(data$TRAPID,data$YEAR))
site.metadata <- data[!duplicated(data$id),]
site.metadata <- with(site.metadata,
data.frame(id=id, year=YEAR, name=TRAPID, lat=NA,long=NA, address="Willamette National Forest Oregon USA",area=NA)
)
site <- rep(paste(data$TRAPID,data$YEAR), 17663)
abundance <- as.vector(data$NO_INDIV)
abundance[is.na(abundance)] <- 0
return(.df.melt(species, site, abundance,
study.metadata=data.frame(units="#"),
site.metadata,
species.metadata=data.frame(species=unique(species), taxonomy=NA)
))
}
#' @export
.myster.2010 <- function(...){
addr <- "https://pasta.lternet.edu/package/data/eml/knb-lter-luq/100/246250/f718e683c7c425207c7d1f7adeddf85f"
addr <- sub("^https","http", addr)
data <-read.csv(addr, header=F, skip=1, sep=",", col.names=c("date", "plot", "species", "percent.cover"), check.names=TRUE)
data$date <- format(as.Date(data$date, format="%d/%m/%Y"),"%Y")
data$plot.year <- with(data, paste(plot, date, sep="_"))
site.id <- unique(data$plot.year)
year <- data$date[!duplicated(data$plot.year)]
name <- data$plot[!duplicated(data$plot.year)]
return(.df.melt(data$species, data$plot.year, data$percent.cover,
data.frame(units="area"),
data.frame(id=site.id, year, name, lat="-65.8257", long="18.3382", address="Luquillo Experimental Forest, Puerto Rico, USA", area="2mX5m"),
data.frame(species=unique(data$species), taxonomy="Plantae")))
}
#' @export
.nichols.2006 <- function(...) {
addr <- "https://pasta.lternet.edu/package/data/eml/knb-lter-ntl/61/3/wgnhs_macrophyte_aquaplt2"
addr <- sub("^https","http",addr)
abundanceData <-read.csv(addr, header=F, skip=1, sep=",", quote='"',
col.names=c("mwbc", "lake_unique", "lakename",
"county", "county_id", "month", "year4",
"spcode", "aqstano", "visual_abundance"),
check.names=TRUE)
specAddr <- "https://pasta.lternet.edu/package/data/eml/knb-lter-ntl/61/3/wgnhs_macrophyte_pltname"
specAddr <- sub("^https","http",specAddr)
specData <-read.csv(specAddr, header=F, skip=1, sep=",", quote='"',
col.names=c("spcode", "spec_no", "scientific_name",
"common_name", "lifeform", "spec_category",
"genus"), check.names=TRUE)
abundanceData$site.year <- with(abundanceData, paste(lakename, year4, sep=">"))
abundanceData$species <- specData$scientific_name[match(abundanceData$spcode, specData$spcode)]
data <- with(abundanceData, tapply(visual_abundance, list(site.year, species), sum, na.rm = TRUE))
data[is.na(data)] <- 0
temp <- unlist(strsplit(rownames(data), ">", fixed=T))
name <- temp[seq(1,length(temp), 2)]
year <- temp[seq(2,length(temp), 2)]
return(.matrix.melt(data,
data.frame(units="#"),
data.frame(id=rownames(data), year, name, lat=NA, long=NA, address=NA, area=NA),
data.frame(species=colnames(data), taxonomy=NA)))
}
#' @export
.lorite.2017<-function(...){
expdata<-read.delim("https://doi.org/10.1371/journal.pone.0182414.s003", nrows=410)
lookup <- read.delim("https://doi.org/10.1371/journal.pone.0182414.s003", skip=414, nrows=34,as.is = TRUE,header = FALSE)
lookup<-lookup[,1:2]
expdata$new.site<-paste(expdata$Site,expdata$transect,expdata$quadrat,sep="_")
names(expdata)[7:40]<-lookup[,2]
comm<-cbind(id=expdata[,41],expdata[,7:40])
#needs meta data, loc: scattered through paper/tables but existant.
return(.matrix.melt(comm,
data.frame(units="percent"),
data.frame(id=comm$id,year=NA),
data.frame(species=lookup[,2],taxonomy=NA)
))
}
#' @export
.kaspari.2016 <- function(...) {
addr <- "https://pasta.lternet.edu/package/data/eml/msb-tempbiodev/1111170/1/cfd3a55deef52e3a93469057053f5404"
addr <- sub("^https", "http", addr)
data <-read.csv(addr, header=F, skip=1, sep=",",
col.names=c("location", "distance", "direction",
"plotcode", "taxon", "abundance"),
check.names=TRUE)
return(.df.melt(data$taxon, data$plotcode, data$abundance,
data.frame(units="#"),
data.frame(id=unique(data$plotcode), year="2016", name=unique(data$plotcode), lat=NA, long=NA, address=NA, area=NA),
data.frame(species=unique(data$taxon), taxonomy="Arthropoda")))
}
#' @export
.johnson.2017 <- function(...){
datam<-read.csv(suppdata("10.5061/dryad.cb13r","Species_x_SiteMatrix.csv"), as.is=TRUE)
sitedataA<-read.csv(suppdata("10.5061/dryad.cb13r","RawSoilData.csv"),as.is = TRUE)
sitedataB<-read.csv(suppdata("10.5061/dryad.cb13r","VacantLot_DemolitionDate.csv"),as.is = TRUE)
sppdata<-read.csv(suppdata("10.5061/dryad.cb13r","Species_x_TraitsMatrix.csv"),as.is = TRUE)
comm<-datam[,-(1:2)]
sitedataB <- rbind(sitedataB, sitedataB)
sitedataB$new.code <- paste(sitedataB$Code, rep(c("BF","RG"), each=nrow(sitedataB)/2), sep=".")
sitedataA$new.code <- paste(sitedataA$LotID, rep(c("BF","RG"), each=nrow(sitedataA)/2), sep=".")
sitedata<-merge(sitedataA,sitedataB,by="new.code",all.x=TRUE,all.y = TRUE)
names(sitedata)[c(1,27)] <- c("id","address")
sitedata$lat <- NA;sitedata$long <-NA; sitedata$area <- NA
sitedata$year <- "2012-2013"
sitedata$name <- sitedata$id
names(sppdata)[1:2] <- c("species","taxonomy")
return(.matrix.melt(comm,
data.frame(units="percent"),
sitedata,
sppdata)
)
}
#' @export
.hollibaugh.2017 <- function(...){
data <- read.csv(file = "https://pasta.lternet.edu/package/data/eml/knb-lter-pal/114/2/3ab81d869107c4b3a7f0fb76fed55ed4", header = TRUE)
names(data)[7:8] <- c("latitude","longitude")
taxon <- rep(c("Eub","AOB","Archaea","Cren","AOA", "AOB","Eub","AOB","Archaea","Cren","AOA","AOB"), nrow(data))
data$id <- paste(data$Station,data$Datetime.GMT)
site.metadata <- data[!duplicated(data$id),]
site.metadata <- with(site.metadata,
data.frame(id=id, year=Datetime.GMT, name=Station, lat=latitude, long=longitude, address=NA, area=NA)
)
site <- rep(paste(data$Station,data$Datetime.GMT), 12)
abundance <- unname(unlist(data[,10:21]))
return(.df.melt(taxon, site , abundance,
study.metadata=data.frame(units="#"),
site.metadata,
species.metadata=data.frame(species=unique(taxon), taxonomy=NA)))
}
#' @export
.harrower.2017<-function(...){
birddata<-read.csv(suppdata("10.5061/dryad.365dr", "bird_data.csv"),as.is = TRUE)
envdata<-read.csv(suppdata("10.5061/dryad.365dr","envr_data.csv"),as.is=TRUE)
envdata$name<-paste(envdata$block,envdata$transect,sep="_")
birddata$id<-paste(birddata$block,birddata$transect,birddata$year,sep="_")
birddata$name<-paste(birddata$block,birddata$transect,sep="_")
birddata$lat<-"50o39'59\" N"
birddata$long<-"120o19'09\" W"
birddata$address<- "Lac du Bois Provincial Park near Kamloops, British Columbia, Canada"
birddata$area<-"20ha"
birddata$binom<-paste(birddata$genus,birddata$species,sep=".")
comm <- with(birddata, tapply(binom, list(binom, site), length))
comm[is.na(comm)] <- 0
comm<-t(comm)
birdsub<-birddata[!duplicated(birddata$site),]
envsub<-envdata[,c(3,8)]
envtest<-merge(birdsub,envsub,by="name")
envtest<-envtest[,-c(6:11,17)]
return(.matrix.melt(comm,
data.frame(units="#"),
envtest,
data.frame(species=birddata$binom, taxonomy=NA)
)
)
}
#' @export
.franklin.2018 <- function(...) {
data <- read.xls("CopyofWESTCOSPPCOVER.xlsx", as.is=TRUE)
ground_data <- read.xls("WEST CO GROUND COVER.xlsx")
data$R4_SPP <- NULL
colnames(data) <- colnames(ground_data)
combined.data <- rbind(data, ground_data)
combined.data$year <- NA
for(i in seq_len(nrow(combined.data))){
t <- as.numeric(regexpr("[0-9]{4}", combined.data$SITE_ID[i]))[1]
combined.data$year[i] <- substr(combined.data$SITE_ID[i], t, t+4)
}
metadata <- read.xls("WEST CO SAGEBRUSH PLOTS.xlsx", as.is=TRUE)
combined.data$SITE_ID <- gsub(" ", "", combined.data$SITE_ID)
combined.data$lat <- metadata$LATITUDE[match(combined.data$SITE_ID, metadata$SITE_ID)]
combined.data$long <- metadata$LONGITUDE[match(combined.data$SITE_ID, metadata$SITE_ID)]
combined.data$elevation.ft <- metadata$Elev..ft.[match(combined.data$SITE_ID, metadata$SITE_ID)]
combined.data$aspect <- metadata$Aspect[match(combined.data$SITE_ID, metadata$SITE_ID)]
combined.data$pct.slope <- metadata$Pct_Slope[match(combined.data$SITE_ID, metadata$SITE_ID)]
combined.data$project <- metadata$PROJECT[match(combined.data$SITE_ID, metadata$SITE_ID)]
combined.data$COVER_PERCENT <- as.numeric(combined.data$COVER_PERCENT)
combined.data$COVER_PERCENT[is.na(combined.data$COVER_PERCENT)] <- 0
return(.df.melt(combined.data$NAME,
combined.data$SITE_ID,
combined.data$COVER_PERCENT,
data.frame(units="area"),
data.frame(id=unique(combined.data$SITE_ID),
year=combined.data$year[!duplicated(combined.data$SITE_ID)],
name=unique(combined.data$SITE_ID),
lat=combined.data$lat[!duplicated(combined.data$SITE_ID)],
long=combined.data$long[!duplicated(combined.data$SITE_ID)],
address=NA,
area="0.1ha",
elevation.ft=combined.data$elevation.ft[!duplicated(combined.data$SITE_ID)],
aspect=combined.data$aspect[!duplicated(combined.data$SITE_ID)],
pct.slope=combined.data$pct.slope[!duplicated(combined.data$SITE_ID)],
project=combined.data$project[!duplicated(combined.data$SITE_ID)]),
data.frame(species=unique(combined.data$NAME),
taxonomy=NA,
other="Plant study; Percent cover of species and ground")))
}
#' @export
.ellison.2017 <- function(...){
data <- read.csv(file="https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-hfr.97.23&entityid=a840ed1f4c891cd7e6abe660aecb797a", header=TRUE)
species <- data$species
data$id <- rep(paste(data$plot,data$date))
site.metadata <- data[!duplicated(data$id),]
site.metadata <- with(site.metadata,
data.frame(id=id, year=date, name=plot, lat=NA,long=NA, address="North of West Point, New York, USA",area=NA)
)
site <- rep(paste(data$plot,data$date), 3120)
abundance <- as.vector(data$no.ants)
abundance[is.na(abundance)] <- 0
return(.df.melt(species, site, abundance,
study.metadata=data.frame(units="#"),
site.metadata,
species.metadata=data.frame(species=unique(species), taxonomy=NA)
))
}
#' @export
.collins.2018 <- function(...) {
# The species in this dataset are not named; Generic identifiers are given (e.g. 'sp1')
# Species codes were added due to people not wanting scott to publish their data.
data <- read.csv("https://pasta.lternet.edu/package/data/eml/edi/15/5/f69c8fe563067164191d61b6e33eff03", as.is=TRUE)
names(data) <- tolower(names(data))
metadata <- read.csv("https://pasta.lternet.edu/package/data/eml/edi/15/5/8284876afe3a1cb0a919d37e1164357f", as.is=TRUE)
names(metadata) <- tolower(names(metadata))
data$site_year <- with(data, paste(data$sitesubplot, experiment_year, sep="_"))
data$latitude <- metadata$lat[match(data$site_project_comm, metadata$site_project_comm)]
data$longitude <- metadata$long[match(data$site_project_comm, metadata$site_project_comm)]
data$address <- metadata$location[match(data$site_project_comm, metadata$site_project_comm)]
data$area <- metadata$plot_size[match(data$site_project_comm, metadata$site_project_comm)]
return(.df.melt(data$species,
data$site_year,
data$relcover,
data.frame(units="%"),
data.frame(id=unique(data$site_year),
year=data$experiment_year[!duplicated(data$site_year)],
name=data$sitesubplot[!duplicated(data$site_year)],
lat=data$latitude[!duplicated(data$site_year)],
long=data$longitude[!duplicated(data$site_year)],
address=data$address[!duplicated(data$site_year)],
area=data$area[!duplicated(data$site_year)]),
data.frame(species=unique(data$species), taxonomy="Plantae")))
}
#' @export
.coblentz.2015 <- function(...){
# This won't work on Windows OS. I might be wrong but I think that it has
# something to do with the spaces in the file name.
data <- read.xls(suppdata("10.5061/dryad.j2c13", "Invert Community Data 2012 RAW.xlsx"), stringsAsFactors=FALSE)
colnames(data) <- with(data, paste(colnames(data), data[3,], sep="_"))
data <- data[-1:-3,]
species <- data[,1]
data <- data[,-1]
data <- sapply(data, as.numeric)
rownames(data) <- species
return(.matrix.melt(data))
}
#' @export
.chamailleJammes.2016 <- function(...){
data <- read.csv(suppdata("10.1371/journal.pone.0153639", 1), stringsAsFactors=FALSE)
year <- (1992:2005)[-6] # Study excluded the year of 1997
data <- aggregate(. ~ WATERHOLE, data = data, FUN=sum)
species <- colnames(data)
data <- reshape(data, varying = list(names(data)[2:ncol(data)]), v.names = "Count",
idvar = "WATERHOLE", times = c("ELEPHANT", "GIRAFFE", "IMPALA","KUDU",
"ROAN", "SABLE", "WILDEBEEST", "ZEBRA"), timevar = "species", direction = "long")
rownames(data) <- NULL
id <- unique(data$WATERHOLE)
year <- rep(year, each=length(id))
temp <- paste(data$WATERHOLE, year, sep="_")
return(.df.melt(data$species,
data$WATERHOLE,
data$Count,
data.frame(units="#"),
data.frame(id=, lat="18", long="26", address="Hwange National Park, Zimbabwe, Africa", area=NA),
data.frame(species=unique(data$species, taxonomy="Mammalia"))))
}
.brant.2018 <- function(...){
tmp.file <- tempfile()
download.file("https://zenodo.org/record/1198846/files/template_MosquitoDataBrant77.xlsx", tmp.file)
DailyHLC <- read.xls(tmp.file, sheet=4, as.is=TRUE, skip=9)
lookup <- read.xls(tmp.file, sheet=3, as.is=TRUE)
lookup[,2] <- .sanitize.text(lookup[,2])
#lookup[,2] <- sapply(strsplit(lookup[,2], " "), function(x) paste(x[1:2],collapse="_"))
lookup <- setNames(lookup[,2], lookup[,1])
names(DailyHLC) <- gsub("_count", "", names(DailyHLC), fixed=TRUE)
names(lookup) <- gsub(".", "_", names(lookup), fixed=TRUE)
names(DailyHLC)[names(DailyHLC) %in% names(lookup)] <- lookup[names(DailyHLC)[names(DailyHLC) %in% names(lookup)]]
DailyHLC$site_year <- with(DailyHLC, paste(field_name, Location, Date, sep="_"))
#community matrix
comm <- as.matrix(DailyHLC[,c(-1:-7,-ncol(DailyHLC))])
rownames(comm) <- DailyHLC$site_year
site.metadata <- DailyHLC[,1:7]
species.meta <- data.frame(species=colnames(comm), taxonomy="Insecta")
return(.matrix.melt(comm,
data.frame(units="#"),
data.frame(id=DailyHLC$site_year, name=site.metadata$Location, year=site.metadata$Date, lat="4.6353 to 4.9654", long="116.9542 to 117.8004", address="SAFE project, Borneo", area="attracted to humans"),
species.meta))
}
.lightfoot.2016 <- function(...) {
data <- read.table("http://sev.lternet.edu/sites/default/files/data/sev-106/sev106_hopperdynamics_20150826.txt", header=T, sep=",")
data$month.year <- format(as.Date(data$DATE, format="%m/%d/%Y"),"%m/%Y")
spec_codes <- c("ACPI","AGDE","AMCO","ARCO","ARPS","AUEL","AUFE","BOAR",
"BRMA","CIPA","COCR","COOC","COTE","DABI","ERSI","HATR",
"HERU","HEVI","HICA","LAAZ","LEWH","MEAR","MEAZ","MEBO",
"MEGL","MELA","MEOC","METE","OPOB","PAPA","PHQU","PHRO",
"PSDE","PSTE","SCNI","SYMO","TRCA","TRFO","TRKI","TRPA",
"TRPI","XACO","XAMO")
species <- c("Acantherus piperatus","Ageneotettix deorum",
"Amphitornus coloradus","Arphia conspersa",
"Arphia pseudonietana","Aulocara elliotti",
"Aulocara femoratum","Bootettix argentatus",
"Brachystola magna","Cibolacris parviceps",
"Cordillacris crenulata","Cordillacris occipitalis",
"Conozoa texana","Dactylotum bicolor",
"Eritettix simplex","Hadtrotettix trifasciatus",
"Heliaula rufa","Hesperotettix viridis",
"Hippopedon capito","Lactista azteca","Leprus wheeleri",
"Melanoplus aridus","Melanoplus arizonae",
"Melanoplus bowditchi","Melanoplus gladstoni",
"Melanoplus lakinus","Melanoplus occidentalis",
"Mermeria texana","Opeia obscura","Paropomala pallida",
"Phlibostroma quadrimaculatum","Phrynotettix robustus",
"Psoloessa delicatula","Psoloessa texana",
"Schistocerca nitens","Syrbula montezuma",
"Trimerotropis californicus","Tropidolophus formosus",
"Trachyrhachis kiowa","Trimerotropis pallidipennis",
"Trimerotropis pistrinaria","Xanthippus corallipes",
"Xanthippus montanus")
metadata <- data.frame(spec_codes, species)
data$SPECIES <- metadata$species[match(data$SPECIES, metadata$spec_codes)]
data <- with(data, tapply(CNT, list(site_year, SPECIES), sum, na.rm=TRUE))
data$site_year <- with(data, paste(SITE, year, sep="_"))
temp <- strsplit(rownames(data), "_")
year <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,2]
year <- format(as.Date(data$DATE, format="%m/%Y"),"%Y")
name <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,1]
#needs some "burned" info?...
return(.df.melt(data$species, data$SITE, data$CNT,
data.frame(units="#"),
data.frame(id=unique(data$plot_year), year, name, lat=NA, long=NA, address="Sevilleta National Wildlife Refuge, New Mexico", area=NA),
data.frame(species=unique(data$species), taxonomy="Orthoptera")))
}
.fia.2018 <- function(...){
.get.fia <- function(state, var, select){
t.zip <- tempfile()
download.file(paste0("https://apps.fs.usda.gov/fia/datamart/CSV/",state,"_",var,".zip"), t.zip)
unzip(t.zip)
data <- fread(paste0(state,"_",var,".csv"), select=select)
unlink(paste0(state,"_",var,".csv"))
return(data)
}
states <- c("AK","AL","AZ","AR","CA","CO","CT","DE","FL","GA","HI","IA","ID","IL","IN","KS","KY","LA","ME","MD",
"MA","MI","MN","MS","MO","MT","NC","NE","NH","NV","NM","NJ","NY","ND","OH","OK","OR","PA","RI",
"SC","SD","TN","TX","UT","VA","VT","WA","WI","WV","WY","VI","PR")
data <- vector("list", length(states))
for(i in seq_along(states)){
#Download/read in data
tree <- .get.fia(states[i], "TREE", c("CN","PLT_CN","PLOT","SPCD","DIA","INVYR"))
cond <- .get.fia(states[i], "COND", c("PLT_CN","PLOT","STDAGE","FORTYPCD","CONDID"))
plot <- .get.fia(states[i], "PLOT", c("PLOT","LAT","LON","ELEV", "CN"))
#Subset everything, remove sites with multiple/ambiguous codings, merge
tree <- tree[tree$DIA > 1.96,]
cond <- cond[cond$PLT_CN %in% as.integer64(names(Filter(function(x) x==1, table(cond$PLT_CN)))),]
data[[i]] <- merge(tree, merge(cond, plot, by.x="PLT_CN", by.y="CN"), by.x="PLT_CN", by.y="PLT_CN")
data[[i]]$state <- states[i]
}
data <- rbindlist(data)
t <- setNames(seq_along(unique(data$PLT_CN)), unique(data$PLT_CN))
data$site.id <- paste0(data$state, "_", t[as.character(data$PLT_CN)])
uniq.site <- as.data.frame(unique(data[, 15:16]))
sample.sites <- as.data.frame(uniq.site %>% group_by(state) %>% sample_n(size = 30))
data <- merge(sample.sites, data, by="site.id")
data$site.id <- paste0(data$site.id, "_", data$INVYR)
fia.spp <- read.csv("FIA_SppList.csv") #currently in raw_data folder
fia.spp <- data.table(fia.spp$SPCD, paste0(fia.spp$GENUS, "_", fia.spp$SPECIES))
data <- merge(data, fia.spp, by.x="SPCD", by.y="V1")
data <- data.frame(data$V2, data$site.id, data$LAT, data$LON, data$ELEV,
data$STDAGE, data$FORTYPCD, data$CONDID, data$DIA)
names(data) <- c("species", "site.id", "lat", "long", "elev", "stdage", "forestclass", "condclass", "diameter")
comm <- t(as.matrix(with(data, table(species,site.id))))
dia <- aggregate(diameter~species, data, mean)
dia.count <- aggregate(diameter~species, data, length)
dia$diameter.n <- dia.count$diameter
site.df <- data[!duplicated(data$site.id),]
site.df <- site.df[,2:8]
sites <- rownames(comm)
site.df <- site.df[match(sites, site.df$site.id), ]
return(.matrix.melt(comm,
data.frame(units="#"),
data.frame(id=site.df$site.id, name=NA, year=NA, lat=site.df$lat,
long=site.df$long, address=NA, area=NA,
elevation=site.df$elev, class=site.df$forestclass),
data.frame(species=dia$species, taxonomy=NA, diameter=dia$diameter)))
}
.tomasovych.2010a <- function(...){
species <- read.xls(suppdata("10.5061/dryad.1225", "abundances-S California 1975.xls"), skip=1, header=TRUE)
species.clean <- species[,-1]
comm <- t(as.matrix(species.clean))
rownames(comm) <- species$X
rownames(comm)
}
.mendonca.2018 <- function(...){
# need to fix the years
tmp <- tempfile()
download.file("https://esajournals.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1002%2Fecy.2367&attachmentId=2208200269", tmp)
data <- read.csv(.unzip("CERRADO_SM_Capture.csv", tmp), as.is=TRUE, fileEncoding = "Latin1")
data <- data[!is.na(data$Individuals_captured),]
data$Year_finsh <- as.numeric(data$Year_finish)
data <- data[!is.na(data$Year_finish),]
ids <- paste(data$id, data$Year_finish)
#ids <- ids[-c(1513:1536)]
# lat/long data
tmp2 <- tempfile()
download.file("https://esajournals.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1002%2Fecy.2367&attachmentId=2208200269", tmp2)
ll_data <- read.csv(.unzip("CERRADO_SM_Study_Site.csv", tmp), as.is=TRUE, fileEncoding = "Latin1")
ll_data <- ll_data[,c(1,7,8)]
ll_data$id <- unique(ids)
names(ll_data) <- c("id", "lat", "long")
ll_data$year <- ll_data$id; ll_data$name <- ll_data$id
ll_data$address <- "Cerrado ecosystem: Brazil, Boliva, Paraguay"; ll_data$area <- "live_trap"
return(.df.melt(data$Actual_species_name,
ids,
data$Individuals_captured,
data.frame(units = "#"),
ll_data,
data.frame(species = unique(data$Actual_species_name), taxonomy = "Animalia")
)
)
}
# Error in data.frame(id = rownames(data), year = years, name =
# names, lat = NA, : arguments imply differing number of rows: 20,
# 24, 1
.sepulveda.2016 <- function(...){
tmp <- tempfile()
download.file("http://journals.plos.org/plosone/article/file?type=supplementary&id=info:doi/10.1371/journal.pone.0157910.s001", tmp)
data <- read.xls(tmp, 1, skip=1, fileEncoding="Latin1")
data <- data[1:20,]
years <- colnames(data)[2:25]
names(data) <- c("species", paste(rep(c("Cocholgue", "Hualpen", "Llico", "Mehuin", "La Mision", "Maicolpue"),each=4), names(data)[2:25], sep="_"))
d2 <- t(data)
names <- rep(c("Cocholgue", "Hualpen", "Llico", "Mehuin", "La Mision", "Maicolpue"),each=4)
return(.matrix.melt(data,
data.frame(units = "#"),
data.frame(id = rownames(data), year = years, name= names, lat= NA, long= NA, address="Southwestern Chilean coast", area = NA),
data.frame(species=colnames(data), taxonomy = NA)
)
)
}
# Metadata woes
.bried.2017 <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.151171/Dryad.data.xlsx?sequence=1", tmp)
data <- read.xls(tmp, 1)
n <- paste(data$Latitude, data$Longitude, sep = "_")
comm <- data[,-c(1:4)]
comm$Region <- n
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2017, name = , lat= , long = , address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Insecta")
)
)
}
# datasets on chiclids - each function downloads a community
# dataset for a different region
# Kigoma town
.britton.2017.a <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148126/BrittonEtAl2017_KigomaTown.csv?sequence=3", tmp)
data <- read.csv(tmp, skip=1)
data <- data[-c(1,2),]
names(data)[1] <- "species"
comm <- t(data)
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Chiclidae")
))
}
# Kigoma deforested
.britton.2017.b <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148127/BrittonEtAl2017_KigomaDeforested.csv?sequence=3", tmp)
data <- read.csv(tmp, skip=1)
data <- data[-c(1,2),]
names(data)[1] <- "species"
comm <- t(data)
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Chiclidae")
))
}
# Kalilani village
.britton.2017.c <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148128/BrittonEtAl2017_KalilaniVillage.csv?sequence=1", tmp)
data <- read.csv(tmp, skip=1)
data <- data[-c(1,2),]
names(data)[1] <- "species"
comm <- t(data)
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Chiclidae")
))
}
# Jakobsen's beach
.britton.2017.d <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148129/BrittonEtAl2017_Jakobsen%27sBeach.csv?sequence=3", tmp)
data <- read.csv(tmp, skip=1)
data <- data[-c(1,2),]
names(data)[1] <- "species"
comm <- t(data)
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Chiclidae")
))
}
# Gombe stream
.britton.2017.e <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148130/BrittonEtAl2017_GombeNP.csv?sequence=1", tmp)
data <- read.csv(tmp, skip=1)
data <- data[-c(1,2),]
names(data)[1] <- "species"
comm <- t(data)
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Chiclidae")
))
}
# Mahale mountain 1
.britton.2017.f <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148131/BrittonEtAl2017_MahaleNPS1.csv?sequence=1", tmp)
data <- read.csv(tmp, skip=1)
data <- data[-c(1,2),]
names(data)[1] <- "species"
comm <- t(data)
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Chiclidae")
))
}
# Mahale mountain 2
.britton.2017.g <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148132/BrittonEtAl2017_MahaleNPS2.csv?sequence=3", tmp)
data <- read.csv(tmp, skip=1)
data <- data[-c(1,2),]
names(data)[1] <- "species"
comm <- t(data)
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Chiclidae")
))
}
.drew.2015<-function(...){
expdata<-read.csv("https://datadryad.org/bitstream/handle/10255/dryad.93108/Supplemental%201.csv?sequence=1",as.is = TRUE)
expdata$binom<-paste(expdata$Genus,expdata$species,sep=".")
comm<-t(expdata[,4:6])
colnames(comm)<-expdata$binom
#meta data is basically not a thing, so this may need to be scrapped after all
return(.matrix.melt(comm,
data.frame(units="p/a"),
sitedata,
data.frame(speccies=expdata$binom,taxonomy=NA)
)
)
}
.osuri.2016<-function(...){
expdata<-read.csv("https://datadryad.org/bitstream/handle/10255/dryad.109139/Osuri_Sanakran_2016_JAE_plot_data.csv?sequence=2",as.is = TRUE)
comm <- with(expdata, tapply(species, list(species, site.name), length))
comm[is.na(comm)] <- 0
comm<-t(comm)
#meta data is limited, what could be easily found is in the expdata data frame
return(.matrix.melt(comm,
data.frame(units="#"),
sitedata,
data.frame(species=expdata$species,taxonomy=NA)
)
)
}
.helmus.2013 <- function(...){
library(pez) # This isn't how we declare packages in 'real'
# packages for the time being this is sufficient
data(laja)
return(.matrix.melt(invert.sites))
}
.jain.2017 <- function(...){
species <- read.xls(suppdata("10.5061/dryad.177q4", "Jain_etal_2016_Butterfly%20abundance%20across%20sites_22Dec2016.xlsx"), skip=5, header=TRUE, as.is=TRUE)
species.clean <- species[,c(-1:-15,-38)]
comm <- t(as.matrix(species.clean))
colnames(comm) <- species$Scientific.name
return(.matrix.melt(comm,
data.frame(units="#", treatment=NA),
data.frame(id=rownames(comm), year=site.metadata$Date, name=site.metadata$SiteCombo, lat=NA, long=NA, address = "British Columbia", area=site.metadata$HaSurveyed),
data.frame(species=colnames(comm), taxonomy=NA)))
}
.lightfoot.2016 <- function(...) {
data <- read.table("http://sev.lternet.edu/sites/default/files/data/sev-106/sev106_hopperdynamics_20150826.txt", header=T, sep=",")
data$month.year <- format(as.Date(data$DATE, format="%m/%d/%Y"),"%m/%Y")
spec_codes <- c("ACPI","AGDE","AMCO","ARCO","ARPS","AUEL","AUFE","BOAR",
"BRMA","CIPA","COCR","COOC","COTE","DABI","ERSI","HATR",
"HERU","HEVI","HICA","LAAZ","LEWH","MEAR","MEAZ","MEBO",
"MEGL","MELA","MEOC","METE","OPOB","PAPA","PHQU","PHRO",
"PSDE","PSTE","SCNI","SYMO","TRCA","TRFO","TRKI","TRPA",
"TRPI","XACO","XAMO")
species <- c("Acantherus piperatus","Ageneotettix deorum",
"Amphitornus coloradus","Arphia conspersa",
"Arphia pseudonietana","Aulocara elliotti",
"Aulocara femoratum","Bootettix argentatus",
"Brachystola magna","Cibolacris parviceps",
"Cordillacris crenulata","Cordillacris occipitalis",
"Conozoa texana","Dactylotum bicolor",
"Eritettix simplex","Hadtrotettix trifasciatus",
"Heliaula rufa","Hesperotettix viridis",
"Hippopedon capito","Lactista azteca","Leprus wheeleri",
"Melanoplus aridus","Melanoplus arizonae",
"Melanoplus bowditchi","Melanoplus gladstoni",
"Melanoplus lakinus","Melanoplus occidentalis",
"Mermeria texana","Opeia obscura","Paropomala pallida",
"Phlibostroma quadrimaculatum","Phrynotettix robustus",
"Psoloessa delicatula","Psoloessa texana",
"Schistocerca nitens","Syrbula montezuma",
"Trimerotropis californicus","Tropidolophus formosus",
"Trachyrhachis kiowa","Trimerotropis pallidipennis",
"Trimerotropis pistrinaria","Xanthippus corallipes",
"Xanthippus montanus")
metadata <- data.frame(spec_codes, species)
data$SPECIES <- metadata$species[match(data$SPECIES, metadata$spec_codes)]
data <- with(data, tapply(CNT, list(site_year, SPECIES), sum, na.rm=TRUE))
data$site_year <- with(data, paste(SITE, year, sep="_"))
temp <- strsplit(rownames(data), "_")
year <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,2]
year <- format(as.Date(data$DATE, format="%m/%Y"),"%Y")
name <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,1]
#needs some "burned" info?...
return(.df.melt(data$species, data$SITE, data$CNT,
data.frame(units="#"),
data.frame(id=unique(data$plot_year), year, name, lat=NA, long=NA, address="Sevilleta National Wildlife Refuge, New Mexico", area=NA),
data.frame(species=unique(data$species), taxonomy="Orthoptera")))
}
.fia.2018 <- function(...){
.get.fia <- function(state, var, select){
t.zip <- tempfile()
download.file(paste0("https://apps.fs.usda.gov/fia/datamart/CSV/",state,"_",var,".zip"), t.zip)
unzip(t.zip)
data <- fread(paste0(state,"_",var,".csv"), select=select)
unlink(paste0(state,"_",var,".csv"))
return(data)
}
states <- c("AK","AL","AZ","AR","CA","CO","CT","DE","FL","GA","HI","IA","ID","IL","IN","KS","KY","LA","ME","MD",
"MA","MI","MN","MS","MO","MT","NC","NE","NH","NV","NM","NJ","NY","ND","OH","OK","OR","PA","RI",
"SC","SD","TN","TX","UT","VA","VT","WA","WI","WV","WY","VI","PR")
data <- vector("list", length(states))
for(i in seq_along(states)){
#Download/read in data
tree <- .get.fia(states[i], "TREE", c("CN","PLT_CN","PLOT","SPCD","DIA","INVYR"))
cond <- .get.fia(states[i], "COND", c("PLT_CN","PLOT","STDAGE","FORTYPCD","CONDID"))
plot <- .get.fia(states[i], "PLOT", c("PLOT","LAT","LON","ELEV", "CN"))
#Subset everything, remove sites with multiple/ambiguous codings, merge
tree <- tree[tree$DIA > 1.96,]
cond <- cond[cond$PLT_CN %in% as.integer64(names(Filter(function(x) x==1, table(cond$PLT_CN)))),]
data[[i]] <- merge(tree, merge(cond, plot, by.x="PLT_CN", by.y="CN"), by.x="PLT_CN", by.y="PLT_CN")
data[[i]]$state <- states[i]
}
data <- rbindlist(data)
t <- setNames(seq_along(unique(data$PLT_CN)), unique(data$PLT_CN))
data$site.id <- paste0(data$state, "_", t[as.character(data$PLT_CN)])
uniq.site <- as.data.frame(unique(data[, 15:16]))
sample.sites <- as.data.frame(uniq.site %>% group_by(state) %>% sample_n(size = 30))
data <- merge(sample.sites, data, by="site.id")
data$site.id <- paste0(data$site.id, "_", data$INVYR)
fia.spp <- read.csv("FIA_SppList.csv") #currently in raw_data folder
fia.spp <- data.table(fia.spp$SPCD, paste0(fia.spp$GENUS, "_", fia.spp$SPECIES))
data <- merge(data, fia.spp, by.x="SPCD", by.y="V1")
data <- data.frame(data$V2, data$site.id, data$LAT, data$LON, data$ELEV,
data$STDAGE, data$FORTYPCD, data$CONDID, data$DIA)
names(data) <- c("species", "site.id", "lat", "long", "elev", "stdage", "forestclass", "condclass", "diameter")
comm <- t(as.matrix(with(data, table(species,site.id))))
dia <- aggregate(diameter~species, data, mean)
dia.count <- aggregate(diameter~species, data, length)
dia$diameter.n <- dia.count$diameter
site.df <- data[!duplicated(data$site.id),]
site.df <- site.df[,2:8]
sites <- rownames(comm)
site.df <- site.df[match(sites, site.df$site.id), ]
return(.matrix.melt(comm,
data.frame(units="#"),
data.frame(id=site.df$site.id, name=NA, year=NA, lat=site.df$lat,
long=site.df$long, address=NA, area=NA,
elevation=site.df$elev, class=site.df$forestclass),
data.frame(species=dia$species, taxonomy=NA, diameter=dia$diameter)))
}
.tomasovych.2010a <- function(...){
species <- read.xls(suppdata("10.5061/dryad.1225", "abundances-S California 1975.xls"), skip=1, header=TRUE)
species.clean <- species[,-1]
comm <- t(as.matrix(species.clean))
rownames(comm) <- species$X
rownames(comm)
}
.mendonca.2018 <- function(...){
# need to fix the years
tmp <- tempfile()
download.file("https://esajournals.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1002%2Fecy.2367&attachmentId=2208200269", tmp)
data <- read.csv(.unzip("CERRADO_SM_Capture.csv", tmp), as.is=TRUE, fileEncoding = "Latin1")
data <- data[!is.na(data$Individuals_captured),]
data$Year_finsh <- as.numeric(data$Year_finish)
data <- data[!is.na(data$Year_finish),]
ids <- paste(data$id, data$Year_finish)
#ids <- ids[-c(1513:1536)]
# lat/long data
tmp2 <- tempfile()
download.file("https://esajournals.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1002%2Fecy.2367&attachmentId=2208200269", tmp2)
ll_data <- read.csv(.unzip("CERRADO_SM_Study_Site.csv", tmp), as.is=TRUE, fileEncoding = "Latin1")
ll_data <- ll_data[,c(1,7,8)]
ll_data$id <- unique(ids)
names(ll_data) <- c("id", "lat", "long")
ll_data$year <- ll_data$id; ll_data$name <- ll_data$id
ll_data$address <- "Cerrado ecosystem: Brazil, Boliva, Paraguay"; ll_data$area <- "live_trap"
return(.df.melt(data$Actual_species_name,
ids,
data$Individuals_captured,
data.frame(units = "#"),
ll_data,
data.frame(species = unique(data$Actual_species_name), taxonomy = "Animalia")
)
)
}
# Error in data.frame(id = rownames(data), year = years, name =
# names, lat = NA, : arguments imply differing number of rows: 20,
# 24, 1
.sepulveda.2016 <- function(...){
tmp <- tempfile()
download.file("http://journals.plos.org/plosone/article/file?type=supplementary&id=info:doi/10.1371/journal.pone.0157910.s001", tmp)
data <- read.xls(tmp, 1, skip=1, fileEncoding="Latin1")
data <- data[1:20,]
years <- colnames(data)[2:25]
names(data) <- c("species", paste(rep(c("Cocholgue", "Hualpen", "Llico", "Mehuin", "La Mision", "Maicolpue"),each=4), names(data)[2:25], sep="_"))
d2 <- t(data)
names <- rep(c("Cocholgue", "Hualpen", "Llico", "Mehuin", "La Mision", "Maicolpue"),each=4)
return(.matrix.melt(data,
data.frame(units = "#"),
data.frame(id = rownames(data), year = years, name= names, lat= NA, long= NA, address="Southwestern Chilean coast", area = NA),
data.frame(species=colnames(data), taxonomy = NA)
)
)
}
# Metadata woes
.bried.2017 <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.151171/Dryad.data.xlsx?sequence=1", tmp)
data <- read.xls(tmp, 1)
n <- paste(data$Latitude, data$Longitude, sep = "_")
comm <- data[,-c(1:4)]
comm$Region <- n
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2017, name = , lat= , long = , address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Insecta")
)
)
}
# datasets on chiclids - each function downloads a community
# dataset for a different region
# Kigoma town
.britton.2017.a <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148126/BrittonEtAl2017_KigomaTown.csv?sequence=3", tmp)
data <- read.csv(tmp, skip=1)
data <- data[-c(1,2),]
names(data)[1] <- "species"
comm <- t(data)
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Chiclidae")
))
}
# Kigoma deforested
.britton.2017.b <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148127/BrittonEtAl2017_KigomaDeforested.csv?sequence=3", tmp)
data <- read.csv(tmp, skip=1)
data <- data[-c(1,2),]
names(data)[1] <- "species"
comm <- t(data)
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Chiclidae")
))
}
# Kalilani village
.britton.2017.c <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148128/BrittonEtAl2017_KalilaniVillage.csv?sequence=1", tmp)
data <- read.csv(tmp, skip=1)
data <- data[-c(1,2),]
names(data)[1] <- "species"
comm <- t(data)
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Chiclidae")
))
}
# Jakobsen's beach
.britton.2017.d <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148129/BrittonEtAl2017_Jakobsen%27sBeach.csv?sequence=3", tmp)
data <- read.csv(tmp, skip=1)
data <- data[-c(1,2),]
names(data)[1] <- "species"
comm <- t(data)
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Chiclidae")
))
}
# Gombe stream
.britton.2017.e <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148130/BrittonEtAl2017_GombeNP.csv?sequence=1", tmp)
data <- read.csv(tmp, skip=1)
data <- data[-c(1,2),]
names(data)[1] <- "species"
comm <- t(data)
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Chiclidae")
))
}
# Mahale mountain 1
.britton.2017.f <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148131/BrittonEtAl2017_MahaleNPS1.csv?sequence=1", tmp)
data <- read.csv(tmp, skip=1)
data <- data[-c(1,2),]
names(data)[1] <- "species"
comm <- t(data)
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Chiclidae")
))
}
# Mahale mountain 2
.britton.2017.g <- function(...){
tmp <- tempfile()
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148132/BrittonEtAl2017_MahaleNPS2.csv?sequence=3", tmp)
data <- read.csv(tmp, skip=1)
data <- data[-c(1,2),]
names(data)[1] <- "species"
comm <- t(data)
return(.matrix.melt(comm,
data.frame(units = "#"),
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA),
data.frame(species = colnames(data), taxonomy = "Chiclidae")
))
}
.drew.2015<-function(...){
expdata<-read.csv("https://datadryad.org/bitstream/handle/10255/dryad.93108/Supplemental%201.csv?sequence=1",as.is = TRUE)
expdata$binom<-paste(expdata$Genus,expdata$species,sep=".")
comm<-t(expdata[,4:6])
colnames(comm)<-expdata$binom
#meta data is basically not a thing, so this may need to be scrapped after all
return(.matrix.melt(comm,
data.frame(units="p/a"),
sitedata,
data.frame(speccies=expdata$binom,taxonomy=NA)
)
)
}
.osuri.2016<-function(...){
expdata<-read.csv("https://datadryad.org/bitstream/handle/10255/dryad.109139/Osuri_Sanakran_2016_JAE_plot_data.csv?sequence=2",as.is = TRUE)
comm <- with(expdata, tapply(species, list(species, site.name), length))
comm[is.na(comm)] <- 0
comm<-t(comm)
#meta data is limited, what could be easily found is in the expdata data frame
return(.matrix.melt(comm,
data.frame(units="#"),
sitedata,
data.frame(species=expdata$species,taxonomy=NA)
)
)
}
.helmus.2013 <- function(...){
library(pez) # This isn't how we declare packages in 'real'
# packages for the time being this is sufficient
data(laja)
return(.matrix.melt(invert.sites))
}
.jain.2017 <- function(...){
species <- read.xls(suppdata("10.5061/dryad.177q4", "Jain_etal_2016_Butterfly%20abundance%20across%20sites_22Dec2016.xlsx"), skip=5, header=TRUE, as.is=TRUE)
species.clean <- species[,c(-1:-15,-38)]
comm <- t(as.matrix(species.clean))
colnames(comm) <- species$Scientific.name
return(.matrix.melt(comm,
data.frame(units="#", treatment=NA),
data.frame(id=rownames(comm), year=site.metadata$Date, name=site.metadata$SiteCombo, lat=NA, long=NA, address = "British Columbia", area=site.metadata$HaSurveyed),
data.frame(species=colnames(comm), taxonomy=NA)))
}
## Error in data.frame(id = id, year = YEAR, name = Waterbody_Name, lat = lat, : arguments imply differing number of rows: 7556, 20027, 1
## does not yet work
.rypel.2018 <- function(...){
tmp.file <- tempfile()
download.file("https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.356.3&entityid=829ef0e4eea5e6392b19e595aa775832", tmp.file)
abun <- read.csv(tmp.file, header=TRUE)
taxon_inf <- read.csv(file="https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.356.3&entityid=490295acdaf716c90b58a5a089ab9847",header=TRUE)
location <- read.csv(file="https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.356.3&entityid=3c23c7e39d30f047fe6b229d85df2a88",header=TRUE)
abun <- merge(abun, taxon_inf, by = "taxon_id")
data <- merge(abun, location, by = "WBIC")
species <- data$taxon_name
lat <- data$Latitude
long <- data$Longitude
data$id <- rep(paste(data$Waterbody_Name,data$YEAR))
site.metadata <- data[!duplicated(data$id),]
site.metadata <- with(site.metadata,
data.frame(id=id, year=YEAR, name=Waterbody_Name, lat=lat,long=long, address="Wisconsin USA",area=NA)
)
site <- rep(paste(data$Waterbody_Name,data$Year), 7556)
data$site.id <- paste(data$Waterbody_Name,data$Year)
comm <- with(data, tapply(N, list(site.id, taxon_name), sum))
comm[is.na(comm)] <- 0
return(.df.melt(species, site, comm,
study.metadata=data.frame(units="#"),
site.metadata,
species.metadata=data.frame(species=unique(species), taxonomy=NA)
))
}
################################
# ARGON FUNCTIONS ##############
# - WORKING BUT NOT DATA RELEASE
################################
if(FALSE){
#' @export
.branstetter.2018 <- function(...) {
data <- read.csv("TableA3.csv")
metadata <- read.csv("TableA2.csv")
rownames(data) <- data[,1]
data[,1] <- NULL
colnames(data) <- gsub(".", "-", colnames(data), fixed=TRUE)
data <- t(data)
rownames(data) <- paste(rownames(data), year, sep="_")
metadata$year <- format(as.Date(metadata$datecollected, format="%d-%b-%Y"),"%Y")
year <- metadata$year[!duplicated(metadata$site)]
name <- unique(metadata$site)
lat <- metadata$latitude[!duplicated(metadata$site)]
long <- metadata$longitude[!duplicated(metadata$site)]
return(.matrix.melt(data,
data.frame(units="#"),
data.frame(id=rownames(data), year, name, lat, long,
address=NA, area=NA),
data.frame(species=colnames(data), taxonomy="Hymenoptera")))
}
.cobb.2016 <- function(...) {
data <- read.xls("COMPLETE Dataset as of 4_recovery2.xlsx")
data$name <- with(data, paste("study.area", Study.Area, "site", Site, sep="_"))
data$month.year <- with(data, paste(Month, Year, sep="-"))
data$site.year <- with(data, paste(name, month.year, sep="_"))
metadata <- data[,c(1:11, 148, 149, 150)]
metadata$Longitude <- gsub("\342\200\223", "-", metadata$Longitude)
data[,1] <- data$site.year
data[,c(2:11, 148, 149, 150)] <- NULL
data <- aggregate(.~Sample.., data=data, FUN=sum)
rownames(data) <- data[,1]
data[,1] <- NULL
rownames <- rownames(data)
data <- apply(data, 2, as.numeric)
rownames(data) <- rownames
name <- metadata$name[match(rownames(data), metadata$site.year)]
year <- metadata$Year[match(rownames(data), metadata$site.year)]
lat <- metadata$Latitude[match(rownames(data), metadata$site.year)]
long <- metadata$Longitude[match(rownames(data), metadata$site.year)]
veg.type <- metadata$Veg.type[match(rownames(data), metadata$site.year)]
burned <- metadata$Burn[match(rownames(data), metadata$site.year)]
monsoon <- metadata$Monsoon[match(rownames(data), metadata$site.year)]
return(.matrix.melt(data,
data.frame(units="STD.#"),
data.frame(id=rownames(data),
year,
name,
lat,
long,
address=NA,
area=NA,
veg.type,
burned,
monsoon),
data.frame(species=colnames(data), taxonomy="Arthropoda")))
}
.mooney.2018 <- function(...) {
#will need to loop through and do this for each year (sheet) in the dataset.
data <- read.xls("Insect Abundance Population Summaries.xlsx", sheet="#")
data <- data[which(data$Response == "Total"),]
#remove all rows that contain only NA values
data <- data[ ,!apply(data, 2, function(x) all(is.na(x)))]
data <- melt(data, id=c("Population", "Response"))
}
.dyer.2017 <- function(...) {
# Location is not always GPS coordinates in this dataset. Some are descriptions or titles of the locations.
# Some of the values in data are blank. These do not mean that the value is zero but that the data is not complete. (Lee has the code to complete it).
data <- read.xls("SWRS_plots_updated_nov_3_2017.xlsx")
data<-data[,1:24]
data$year <- format(as.Date(data$Date..D.M.Y., format="%Y-%m-%d"),"%Y")
data$plot.year <- with(data, paste(X.number, year, sep="."))
return(.df.melt(data$plant.sp,
data$plot.year,
data$Leaf.area..cm.2.,
data.frame(units="area"),
data.frame(id=unique(data$plot.year),
year=data$year[!duplicated(data$plot.year)],
name=data$X.number[!duplicated(data$plot.year)],
lat=NA,
long=NA,
address=NA,
area="cm2"),
data.frame(species=unique(data$plant.sp), taxonomy="Plantae")))
}
}
#' @export
.fia.2018 <- function(...){
.get.fia <- function(state, var, select){
t.zip <- tempfile()
download.file(paste0("https://apps.fs.usda.gov/fia/datamart/CSV/",state,"_",var,".zip"), t.zip)
unzip(t.zip)
data <- fread(paste0(state,"_",var,".csv"), select=select)
unlink(paste0(state,"_",var,".csv"))
return(data)
}
states <- c("AL", "AK") #c("AL","AK","AZ","CA","CO","FL","GA","HI","KS","MD","MA","MI","NH","NM","ND","OK","TN","TX","UT","VA","WA","WI","WY")
data <- vector("list", length(states))
for(i in seq_along(states)){
#Download/read in data
tree <- .get.fia(states[i], "TREE", c("CN","PLT_CN","PLOT","SPCD","DIA","INVYR"))
cond <- .get.fia(states[i], "COND", c("PLT_CN","PLOT","STDAGE","FORTYPCD","CONDID"))
plot <- .get.fia(states[i], "PLOT", c("PLOT","LAT","LON","ELEV", "CN"))
#Subset everything, remove sites with multiple/ambiguous codings, merge
tree <- tree[tree$DIA > 1.96,]
cond <- cond[cond$PLT_CN %in% as.integer64(names(Filter(function(x) x==1, table(cond$PLT_CN)))),]
data[[i]] <- merge(tree, merge(cond, plot, by.x="PLT_CN", by.y="CN"), by.x="PLT_CN", by.y="PLT_CN")
data[[i]]$state <- states[i]
}
data <- rbindlist(data)
t <- setNames(seq_along(unique(data$PLT_CN)), unique(data$PLT_CN))
data$site.id <- paste0(data$state, "_", t[as.character(data$PLT_CN)])
data$site.id <- paste0(data$site.id, "_", data$INVYR)
rndata <- with(data, ave(data, state, FUN=function(x) {sample.int(length(x))}))
fia.spp <- read.csv("FIA_SppList.csv") #currently in the pglmm raw data folder
fia.spp <- data.table(fia.spp$SPCD, paste0(fia.spp$GENUS, "_", fia.spp$SPECIES))
data <- merge(data, fia.spp, by.x="SPCD", by.y="V1")
data <- data.frame(data$V2, data$site.id, data$LAT, data$LON, data$ELEV,
data$STDAGE, data$FORTYPCD, data$CONDID, data$DIA)
names(data) <- c("species", "site.id", "lat", "long", "elev", "stdage", "forestclass", "condclass", "diameter")
comm <- t(as.matrix(with(data, table(species,site.id))))
# To get mean diameter of each species at each site:
dia <- aggregate(diameter~species, data, mean)
# To get count of diameters :
dia.count <- aggregate(diameter~species, data, length)
# data frame with diameter mean and count per species-site combination
dia$diameter.n <- dia.count$diameter
site.df <- data[!duplicated(data$site.id),]
site.df <- site.df[,2:8]
#site.df$site.id <- as.character(site.df$site.id); dia$species <- as.character(dia$species)
sites <- rownames(comm)
site.df <- site.df[match(sites, site.df$site.id), ]
return(.matrix.melt(comm,
data.frame(units="#"),
data.frame(id=site.df$site.id, name=NA, year=NA, lat=site.df$lat,
long=site.df$long, address=NA, area=NA,
elevation=site.df$elev, class=site.df$forestclass),
data.frame(species=dia$species, taxonomy=NA)))
}
#' @export
.heidi.2018 <- function(...) {
data <- read.xls("Heidi_Species_Cover_2017_Final_121817.xlsx", sheet=2, stringsAsFactors=FALSE)
metadata <- read.xls("SiteSpeciesList_argon.xlsx", fileEncoding="latin1", stringsAsFactors=FALSE)
data$geo <- metadata$Lat[match(data$Site, metadata$Site.Name)]
data$lat <- NA
data$long <- NA
temp <- strsplit(data$geo, split=",")
data$lat[1:471] <- matrix(unlist(temp[1:471]), ncol=2, byrow=TRUE)[,1]
data$long[1:471] <- matrix(unlist(temp[1:471]), ncol=2, byrow=TRUE)[,2]
data$Date <- format(as.Date(data$Date, format="%Y-%m-%d"),"%Y")
data$site_plot <- with(data, paste(Site, Plot, Date, sep="_"))
site.id <- unique(data$site_plot)
year <- data$Date[!duplicated(data$site_plot)]
name <- data$Site[!duplicated(data$site_plot)]
return(.df.melt(data$Species.Ground.Cover,
data$site_plot,
data$Count,
data.frame(units="#"),
data.frame(id=unique(data$site_plot), year, name, lat=data$lat[!duplicated(data$site_plot)], long=data$long[!duplicated(data$site_plot)], address=NA, area=NA),
data.frame(species=unique(data$Species.Ground.Cover), taxonomy=NA)))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment