mbjones/data_load_snippits.R

## data_load_snippits.R
#' Produces a merged foo dataset where the columns are merged from seperate data files.
#'
#' More detailed description here
#' @param datasetnums a list of the numeric datasets to be merged
#' @return the merged data frame
mergedata <- function(datasetnums) {

    # Assume directory structure such as:
    #data
    #   -- data1.R
    #   -- data2.R
    #   -- data3.R

    # TODO: Prep overall data frame by creating CoPrct

    #ds_list <- list('data1.R', 'data2.R')
    ds_list <- dir('data')
    #func_name <- str_sub(ds_list, 1, str_locate(ds_list, ".R")[,1]-1)

    # Loop over all files in directory 'data' and merge the data from each one
    # by executing the data load function found in the script, which has the same
    # name as the R file in which the function is found
    for(dsname in ds_list) {
        source(paste('data', dsname, sep='/'))
        func_name <- str_sub(dsname, 1, str_locate(dsname, ".R")[1]-1)
        df <- do.call('func_name', list())
        CoPrct <- merge(CoPrct,df,all.x=T)
    }

    # Alternative, Call each data load function, and merge it manually
    #CoPrct <- merge(CoPrct,data1(),all.x=T)
    #CoPrct <- merge(CoPrct,data2(),all.x=T)
    #CoPrct <- merge(CoPrct,data3(),all.x=T)
    #CoPrct <- merge(CoPrct,data4(),all.x=T)
    #CoPrct <- merge(CoPrct,data5(),all.x=T)

    write.csv(CoPrct, file = "CoPrct.csv", row.names=FALSE)
}

data1 <- function() {
    URL_Pinks <- "https://drive.google.com/uc?export=download&id=0By1iaulIAI-uVEZya3VTVnE3Wk0"
    PinksGet <- GET(URL_Pinks)
    Pinks1 <- content(PinksGet, as='text')
    WPinks <- read.csv(file=textConnection(Pinks1),stringsAsFactors=F)
    return(WPinks)
}

data2 <- function() {
    # Code needed to load dataset 2
    print("Hi!")
}

#' Example refactored function that can load data from multiple URLs of similar type
data3 <- function(header_lines, data_url) {
    S76_07Get <- GET(data_url)
    # Parses out year from URL
    S76_071 <- content(S76_07Get, as='text')
    S76_07_h <- scan(textConnection(S76_071), nlines=1, what=character())  # reads first header line
    S76_07_h <- gsub("#YY", "YYYY", S76_07_h)  # gets rid of column name with # in it
    #S76_07_units <- scan(textConnection(S76_071), skip=1, nlines=1, what=character()) #reads second header line
    S76_07 <- read.table(file=textConnection(S76_071),stringsAsFactors=FALSE,skip=header_lines,header=FALSE)
    names(S76_07) <- S76_07_h   # pastes the header line in
    S76_07 <- rename(S76_07, WD=WDIR, BAR=PRES)
    return(S76_07)
}

S76_07 <- data3(1, "http://www.ndbc.noaa.gov/view_text_file.php?filename=46076h2007.txt.gz&dir=data/historical/stdmet/")
S76_08 <- data3(2, "http://www.ndbc.noaa.gov/view_text_file.php?filename=46076h2008.txt.gz&dir=data/historical/stdmet/")
	#' Produces a merged foo dataset where the columns are merged from seperate data files.
	#'
	#' More detailed description here
	#' @param datasetnums a list of the numeric datasets to be merged
	#' @return the merged data frame
	mergedata <- function(datasetnums) {

	# Assume directory structure such as:
	#data
	# -- data1.R
	# -- data2.R
	# -- data3.R

	# TODO: Prep overall data frame by creating CoPrct

	#ds_list <- list('data1.R', 'data2.R')
	ds_list <- dir('data')
	#func_name <- str_sub(ds_list, 1, str_locate(ds_list, ".R")[,1]-1)

	# Loop over all files in directory 'data' and merge the data from each one
	# by executing the data load function found in the script, which has the same
	# name as the R file in which the function is found
	for(dsname in ds_list) {
	source(paste('data', dsname, sep='/'))
	func_name <- str_sub(dsname, 1, str_locate(dsname, ".R")[1]-1)
	df <- do.call('func_name', list())
	CoPrct <- merge(CoPrct,df,all.x=T)
	}

	# Alternative, Call each data load function, and merge it manually
	#CoPrct <- merge(CoPrct,data1(),all.x=T)
	#CoPrct <- merge(CoPrct,data2(),all.x=T)
	#CoPrct <- merge(CoPrct,data3(),all.x=T)
	#CoPrct <- merge(CoPrct,data4(),all.x=T)
	#CoPrct <- merge(CoPrct,data5(),all.x=T)

	write.csv(CoPrct, file = "CoPrct.csv", row.names=FALSE)
	}

	data1 <- function() {
	URL_Pinks <- "https://drive.google.com/uc?export=download&id=0By1iaulIAI-uVEZya3VTVnE3Wk0"
	PinksGet <- GET(URL_Pinks)
	Pinks1 <- content(PinksGet, as='text')
	WPinks <- read.csv(file=textConnection(Pinks1),stringsAsFactors=F)
	return(WPinks)
	}

	data2 <- function() {
	# Code needed to load dataset 2
	print("Hi!")
	}

	#' Example refactored function that can load data from multiple URLs of similar type
	data3 <- function(header_lines, data_url) {
	S76_07Get <- GET(data_url)
	# Parses out year from URL
	S76_071 <- content(S76_07Get, as='text')
	S76_07_h <- scan(textConnection(S76_071), nlines=1, what=character()) # reads first header line
	S76_07_h <- gsub("#YY", "YYYY", S76_07_h) # gets rid of column name with # in it
	#S76_07_units <- scan(textConnection(S76_071), skip=1, nlines=1, what=character()) #reads second header line
	S76_07 <- read.table(file=textConnection(S76_071),stringsAsFactors=FALSE,skip=header_lines,header=FALSE)
	names(S76_07) <- S76_07_h # pastes the header line in
	S76_07 <- rename(S76_07, WD=WDIR, BAR=PRES)
	return(S76_07)
	}

	S76_07 <- data3(1, "http://www.ndbc.noaa.gov/view_text_file.php?filename=46076h2007.txt.gz&dir=data/historical/stdmet/")
	S76_08 <- data3(2, "http://www.ndbc.noaa.gov/view_text_file.php?filename=46076h2008.txt.gz&dir=data/historical/stdmet/")