Skip to content

Instantly share code, notes, and snippets.

@mbjones
Created September 22, 2015 21:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mbjones/cbe1b3ca6f7556d9ac16 to your computer and use it in GitHub Desktop.
Save mbjones/cbe1b3ca6f7556d9ac16 to your computer and use it in GitHub Desktop.
Demo to iterate across a directory and execute functions in R files in that directory.
#' Produces a merged foo dataset where the columns are merged from seperate data files.
#'
#' More detailed description here
#' @param datasetnums a list of the numeric datasets to be merged
#' @return the merged data frame
mergedata <- function(datasetnums) {
# Assume directory structure such as:
#data
# -- data1.R
# -- data2.R
# -- data3.R
# TODO: Prep overall data frame by creating CoPrct
#ds_list <- list('data1.R', 'data2.R')
ds_list <- dir('data')
#func_name <- str_sub(ds_list, 1, str_locate(ds_list, ".R")[,1]-1)
# Loop over all files in directory 'data' and merge the data from each one
# by executing the data load function found in the script, which has the same
# name as the R file in which the function is found
for(dsname in ds_list) {
source(paste('data', dsname, sep='/'))
func_name <- str_sub(dsname, 1, str_locate(dsname, ".R")[1]-1)
df <- do.call('func_name', list())
CoPrct <- merge(CoPrct,df,all.x=T)
}
# Alternative, Call each data load function, and merge it manually
#CoPrct <- merge(CoPrct,data1(),all.x=T)
#CoPrct <- merge(CoPrct,data2(),all.x=T)
#CoPrct <- merge(CoPrct,data3(),all.x=T)
#CoPrct <- merge(CoPrct,data4(),all.x=T)
#CoPrct <- merge(CoPrct,data5(),all.x=T)
write.csv(CoPrct, file = "CoPrct.csv", row.names=FALSE)
}
data1 <- function() {
URL_Pinks <- "https://drive.google.com/uc?export=download&id=0By1iaulIAI-uVEZya3VTVnE3Wk0"
PinksGet <- GET(URL_Pinks)
Pinks1 <- content(PinksGet, as='text')
WPinks <- read.csv(file=textConnection(Pinks1),stringsAsFactors=F)
return(WPinks)
}
data2 <- function() {
# Code needed to load dataset 2
print("Hi!")
}
#' Example refactored function that can load data from multiple URLs of similar type
data3 <- function(header_lines, data_url) {
S76_07Get <- GET(data_url)
# Parses out year from URL
S76_071 <- content(S76_07Get, as='text')
S76_07_h <- scan(textConnection(S76_071), nlines=1, what=character()) # reads first header line
S76_07_h <- gsub("#YY", "YYYY", S76_07_h) # gets rid of column name with # in it
#S76_07_units <- scan(textConnection(S76_071), skip=1, nlines=1, what=character()) #reads second header line
S76_07 <- read.table(file=textConnection(S76_071),stringsAsFactors=FALSE,skip=header_lines,header=FALSE)
names(S76_07) <- S76_07_h # pastes the header line in
S76_07 <- rename(S76_07, WD=WDIR, BAR=PRES)
return(S76_07)
}
S76_07 <- data3(1, "http://www.ndbc.noaa.gov/view_text_file.php?filename=46076h2007.txt.gz&dir=data/historical/stdmet/")
S76_08 <- data3(2, "http://www.ndbc.noaa.gov/view_text_file.php?filename=46076h2008.txt.gz&dir=data/historical/stdmet/")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment