dreidpath/replicated_analysis.R

## replicated_analysis.R
################################################################
# A student has five imputed datasets.  She wanted to know the #
# easiest way to repeat the analysis with each dataset and     #
# store the results.  This may not be the "easiest" way, but   #
# it is a way                                                  #
################################################################
#
################################################################
# Create five toy datasets to demonstrate the approach         #
################################################################
#
data("Loblolly")  # This data set is part of the core R distribution

DF1 <- Loblolly[sample(1:84, 84, replace=T), 1:2]  # create a random resample of the Loblolly dataframe
DF2 <- Loblolly[sample(1:84, 84, replace=T), 1:2]  # create a random resample of the Loblolly dataframe
DF3 <- Loblolly[sample(1:84, 84, replace=T), 1:2]  # create a random resample of the Loblolly dataframe
DF4 <- Loblolly[sample(1:84, 84, replace=T), 1:2]  # create a random resample of the Loblolly dataframe
DF5 <- Loblolly[sample(1:84, 84, replace=T), 1:2]  # create a random resample of the Loblolly dataframe
rm(Loblolly)  # rremove the original Loblolly data frame

################################################################
# Start the actual code for looping through the datasets       #
################################################################
#
# You need a list of the names of the relevant datasets.  You could use the ls()
# command at this point, but it is safer to list them by hand
myDF <- c("DF1", "DF2", "DF3", "DF4", "DF5")  # A vector of the names of all the dataframe

all_results <- list()  # An empty list to store the results of the lm models

for( i in myDF){  # Loop through each of the dataframes
  print(i)
  model_i <- lm(height ~ age, data = get(i))  # "get()" is the key function ... get the object with the name i
  all_results[i] <- list(model_i)  # store the results
}

################################################################
# Use "summary()" to show an example of the data stored in     #
# element of the "all_results" list                            #
################################################################
#
summary(all_results$DF1)  # Could be used for DF1..5, all the model information is stored in the list
	################################################################
	# A student has five imputed datasets. She wanted to know the #
	# easiest way to repeat the analysis with each dataset and #
	# store the results. This may not be the "easiest" way, but #
	# it is a way #
	################################################################
	#
	################################################################
	# Create five toy datasets to demonstrate the approach #
	################################################################
	#
	data("Loblolly") # This data set is part of the core R distribution

	DF1 <- Loblolly[sample(1:84, 84, replace=T), 1:2] # create a random resample of the Loblolly dataframe
	DF2 <- Loblolly[sample(1:84, 84, replace=T), 1:2] # create a random resample of the Loblolly dataframe
	DF3 <- Loblolly[sample(1:84, 84, replace=T), 1:2] # create a random resample of the Loblolly dataframe
	DF4 <- Loblolly[sample(1:84, 84, replace=T), 1:2] # create a random resample of the Loblolly dataframe
	DF5 <- Loblolly[sample(1:84, 84, replace=T), 1:2] # create a random resample of the Loblolly dataframe
	rm(Loblolly) # rremove the original Loblolly data frame

	################################################################
	# Start the actual code for looping through the datasets #
	################################################################
	#
	# You need a list of the names of the relevant datasets. You could use the ls()
	# command at this point, but it is safer to list them by hand
	myDF <- c("DF1", "DF2", "DF3", "DF4", "DF5") # A vector of the names of all the dataframe

	all_results <- list() # An empty list to store the results of the lm models

	for( i in myDF){ # Loop through each of the dataframes
	print(i)
	model_i <- lm(height ~ age, data = get(i)) # "get()" is the key function ... get the object with the name i
	all_results[i] <- list(model_i) # store the results
	}

	################################################################
	# Use "summary()" to show an example of the data stored in #
	# element of the "all_results" list #
	################################################################
	#
	summary(all_results$DF1) # Could be used for DF1..5, all the model information is stored in the list