walterst/MME_R_script_growth_modeling.txt

## MME_R_script_growth_modeling.txt
# This code will read in the STARR heights and weight data that accompanied the article:
# "A modified Michaelis-Menten equation estimates growth from birth to 3 years in healthy babies in the US"
# The filepaths will need to be modified for the correct local filepath. dplyr and ggplot2, gplots, & gridExtra graphics
# libraries are needed. Interpolation of weight/heights from a given age in days
# would be done through the predict() function, passing the fitted model and a dataframe of days.
# Subjects that fail to fit due to errors with nls() will be plotted as raw data, if errors occur.
# Increase the default number_of_subjects_to_fit to 100 to see an example.

library(dplyr)
library(ggplot2)
library(gplots)
library(gridExtra)

number_of_subjects_to_fit = 5


#### CHANGE filepaths below to the correct local filepath

weights <- read.csv("starr_source_weight_data.txt", sep="\t", header = T)

heights <- read.csv("starr_source_height_data.txt", sep="\t", header = T)


# The dataframe should have anon_id (for the subjects), AgeBabyDays for the age, and weightkg for weights/Heightcm for heights. If other data are used, change the names accordingly (or modify the code below to use the proper names)
head(weights)
head(heights)


# In the case of failures, one can set trace=TRUE to examine the parameter values through iterations.
# For failures, it's often the case that a1 and b1 increase without bound.

# These starting parameter values were based upon weight in kg, age in days
fit_MM_curves_weights <- function(dates,weights) {
  fitted_curve <-nls(weights~(c1+(a1*dates)/(b1+dates)), start = list(a1 = 5, b1 = 20, c1=2.5), trace = FALSE)
  return(fitted_curve)
}


# These starting parameter values were based upon height in cm, age in days
fit_MM_curves_heights <- function(dates,heights) {
  fitted_curve <-nls(heights~(c1+(a1*dates)/(b1+dates)), start = list(a1 = 60, b1 = 530, c1=50), trace = FALSE)
  return(fitted_curve)
}

five_weights <- dplyr::filter(weights, anon_id %in% unique(weights$anon_id)[1:number_of_subjects_to_fit])
five_heights <- dplyr::filter(heights, anon_id %in% unique(heights$anon_id)[1:number_of_subjects_to_fit])

### Weight modeling
subjects_weight <- unique(five_weights$anon_id)

error_subjects_weights = character()
weight_fitting_data = data.frame(anon_id=character(), date_series=numeric(),weight_series=numeric(), fitted_curve=numeric())
error_data_weights = data.frame(anon_id=character(), date_series=numeric(),weight_series=numeric())

for (subject in subjects_weight) {
skip_to_next <- FALSE
target_subject = dplyr::filter(five_weights, anon_id == subject)

date_series <- target_subject$AgeBabyDays
weight_series <- target_subject$weightkg

possible_error <- tryCatch( {
  fitted_curve <- fit_MM_curves_weights(date_series, weight_series)
  }, error = function(e){
    skip_to_next <<- TRUE
  })

if(skip_to_next) {
  error_subjects_weights <- c(error_subjects_weights, subject)
  anon_ids = rep(as.character(subject), length(date_series))
  curr_df = data.frame(anon_id = anon_ids, date_series=date_series, weight_series=weight_series)
  error_data_weights <- dplyr::bind_rows(error_data_weights, curr_df)
  next
  }

anon_ids = rep(subject, length(date_series))
curr_df = data.frame(anon_id = anon_ids, date_series=date_series, weight_series=weight_series, fitted_curve=predict(fitted_curve))

weight_fitting_data <- dplyr::bind_rows(weight_fitting_data, curr_df)
}

print("Total Number of Subjects for weight modeling:")
print(length(subjects_weight))
print("Number of subjects with errors fitting for weight modeling:")
print(length(error_subjects_weights))

filtered_remaining_subjects <- setdiff(subjects_weight, error_subjects_weights)

weight_fitted_data = dplyr::filter(weight_fitting_data, anon_id %in% filtered_remaining_subjects)


p1 <- ggplot() + geom_point(data=weight_fitted_data, aes(x=date_series, y = weight_series)) + geom_line(data=weight_fitted_data, aes(x=date_series, y=fitted_curve, color="red")) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))  + theme(legend.position = "none") + facet_wrap(~anon_id)
#ggsave(file = "Add_filepath_and_uncomment_if_used.pdf", width = 20, height = 16)
p1


# Only attempt if there are errors
if (length(error_subjects_weights) > 0) {
p2 <- ggplot() + geom_line(data=error_data_weights, aes(x=date_series, y = weight_series)) + facet_wrap(~anon_id) + geom_point(data=error_data_weights, aes(x=date_series, y = weight_series)) + theme(legend.position = "none") + facet_wrap(~anon_id)
#ggsave(file = "Add_filepath_and_uncomment_if_used.pdf", width = 20, height = 16)
p2
}

#### Height modeling

subjects_height <- unique(five_heights$anon_id)

error_subjects_height = character()
height_fitting_data = data.frame(anon_id=character(), date_series=numeric(),height_series=numeric(), fitted_curve=numeric())
error_data_height = data.frame(anon_id=character(), date_series=numeric(),height_series=numeric())

for (subject in subjects_height) {
skip_to_next <- FALSE
target_subject = dplyr::filter(five_heights, anon_id == subject)

date_series <- target_subject$AgeBabyDays
height_series <- target_subject$Heightcm

possible_error <- tryCatch( {
  fitted_curve <- fit_MM_curves_heights(date_series, height_series)
  }, error = function(e){
    skip_to_next <<- TRUE
  })

if(skip_to_next) {
  error_subjects_height <- c(error_subjects_height, subject)
  anon_ids = rep(as.character(subject), length(date_series))
  curr_df = data.frame(anon_id = anon_ids, date_series=date_series, height_series=height_series)
  error_data_height <- dplyr::bind_rows(error_data_height, curr_df)
  next
  }

anon_ids = rep(subject, length(date_series))
curr_df = data.frame(anon_id = anon_ids, date_series=date_series, height_series=height_series, fitted_curve=predict(fitted_curve))

height_fitting_data <- dplyr::bind_rows(height_fitting_data, curr_df)
}

print("Total Number of Subjects for height modeling:")
print(length(subjects_height))
print("Number of subjects with errors fitting for weight modeling:")
print(length(error_subjects_height))

filtered_remaining_subjects <- setdiff(subjects_height, error_subjects_height)

height_fitted_data = dplyr::filter(height_fitting_data, anon_id %in% filtered_remaining_subjects)


p3 <- ggplot() + geom_point(data=height_fitted_data, aes(x=date_series, y = height_series)) + geom_line(data=height_fitted_data, aes(x=date_series, y=fitted_curve, color="red")) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))  + theme(legend.position = "none") + facet_wrap(~anon_id)
#ggsave(file = "Add_filepath_and_uncomment_if_used.pdf", width = 20, height = 16)
p3


# Only attempt if there are errors
if (length(error_subjects_height) > 0) {
p4 <- ggplot() + geom_line(data=error_data_height, aes(x=date_series, y = height_series)) + facet_wrap(~anon_id) + geom_point(data=error_data_height, aes(x=date_series, y = height_series)) + theme(legend.position = "none") + facet_wrap(~anon_id)
#ggsave(file = "Add_filepath_and_uncomment_if_used.pdf", width = 20, height = 16)
p4
}
	# This code will read in the STARR heights and weight data that accompanied the article:
	# "A modified Michaelis-Menten equation estimates growth from birth to 3 years in healthy babies in the US"
	# The filepaths will need to be modified for the correct local filepath. dplyr and ggplot2, gplots, & gridExtra graphics
	# libraries are needed. Interpolation of weight/heights from a given age in days
	# would be done through the predict() function, passing the fitted model and a dataframe of days.
	# Subjects that fail to fit due to errors with nls() will be plotted as raw data, if errors occur.
	# Increase the default number_of_subjects_to_fit to 100 to see an example.

	library(dplyr)
	library(ggplot2)
	library(gplots)
	library(gridExtra)

	number_of_subjects_to_fit = 5


	#### CHANGE filepaths below to the correct local filepath

	weights <- read.csv("starr_source_weight_data.txt", sep="\t", header = T)

	heights <- read.csv("starr_source_height_data.txt", sep="\t", header = T)


	# The dataframe should have anon_id (for the subjects), AgeBabyDays for the age, and weightkg for weights/Heightcm for heights. If other data are used, change the names accordingly (or modify the code below to use the proper names)
	head(weights)
	head(heights)


	# In the case of failures, one can set trace=TRUE to examine the parameter values through iterations.
	# For failures, it's often the case that a1 and b1 increase without bound.

	# These starting parameter values were based upon weight in kg, age in days
	fit_MM_curves_weights <- function(dates,weights) {
	fitted_curve <-nls(weights~(c1+(a1*dates)/(b1+dates)), start = list(a1 = 5, b1 = 20, c1=2.5), trace = FALSE)
	return(fitted_curve)
	}


	# These starting parameter values were based upon height in cm, age in days
	fit_MM_curves_heights <- function(dates,heights) {
	fitted_curve <-nls(heights~(c1+(a1*dates)/(b1+dates)), start = list(a1 = 60, b1 = 530, c1=50), trace = FALSE)
	return(fitted_curve)
	}

	five_weights <- dplyr::filter(weights, anon_id %in% unique(weights$anon_id)[1:number_of_subjects_to_fit])
	five_heights <- dplyr::filter(heights, anon_id %in% unique(heights$anon_id)[1:number_of_subjects_to_fit])

	### Weight modeling
	subjects_weight <- unique(five_weights$anon_id)

	error_subjects_weights = character()
	weight_fitting_data = data.frame(anon_id=character(), date_series=numeric(),weight_series=numeric(), fitted_curve=numeric())
	error_data_weights = data.frame(anon_id=character(), date_series=numeric(),weight_series=numeric())

	for (subject in subjects_weight) {
	skip_to_next <- FALSE
	target_subject = dplyr::filter(five_weights, anon_id == subject)

	date_series <- target_subject$AgeBabyDays
	weight_series <- target_subject$weightkg

	possible_error <- tryCatch( {
	fitted_curve <- fit_MM_curves_weights(date_series, weight_series)
	}, error = function(e){
	skip_to_next <<- TRUE
	})

	if(skip_to_next) {
	error_subjects_weights <- c(error_subjects_weights, subject)
	anon_ids = rep(as.character(subject), length(date_series))
	curr_df = data.frame(anon_id = anon_ids, date_series=date_series, weight_series=weight_series)
	error_data_weights <- dplyr::bind_rows(error_data_weights, curr_df)
	next
	}

	anon_ids = rep(subject, length(date_series))
	curr_df = data.frame(anon_id = anon_ids, date_series=date_series, weight_series=weight_series, fitted_curve=predict(fitted_curve))

	weight_fitting_data <- dplyr::bind_rows(weight_fitting_data, curr_df)
	}

	print("Total Number of Subjects for weight modeling:")
	print(length(subjects_weight))
	print("Number of subjects with errors fitting for weight modeling:")
	print(length(error_subjects_weights))

	filtered_remaining_subjects <- setdiff(subjects_weight, error_subjects_weights)

	weight_fitted_data = dplyr::filter(weight_fitting_data, anon_id %in% filtered_remaining_subjects)


	p1 <- ggplot() + geom_point(data=weight_fitted_data, aes(x=date_series, y = weight_series)) + geom_line(data=weight_fitted_data, aes(x=date_series, y=fitted_curve, color="red")) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) + theme(legend.position = "none") + facet_wrap(~anon_id)
	#ggsave(file = "Add_filepath_and_uncomment_if_used.pdf", width = 20, height = 16)
	p1


	# Only attempt if there are errors
	if (length(error_subjects_weights) > 0) {
	p2 <- ggplot() + geom_line(data=error_data_weights, aes(x=date_series, y = weight_series)) + facet_wrap(~anon_id) + geom_point(data=error_data_weights, aes(x=date_series, y = weight_series)) + theme(legend.position = "none") + facet_wrap(~anon_id)
	#ggsave(file = "Add_filepath_and_uncomment_if_used.pdf", width = 20, height = 16)
	p2
	}

	#### Height modeling

	subjects_height <- unique(five_heights$anon_id)

	error_subjects_height = character()
	height_fitting_data = data.frame(anon_id=character(), date_series=numeric(),height_series=numeric(), fitted_curve=numeric())
	error_data_height = data.frame(anon_id=character(), date_series=numeric(),height_series=numeric())

	for (subject in subjects_height) {
	skip_to_next <- FALSE
	target_subject = dplyr::filter(five_heights, anon_id == subject)

	date_series <- target_subject$AgeBabyDays
	height_series <- target_subject$Heightcm

	possible_error <- tryCatch( {
	fitted_curve <- fit_MM_curves_heights(date_series, height_series)
	}, error = function(e){
	skip_to_next <<- TRUE
	})

	if(skip_to_next) {
	error_subjects_height <- c(error_subjects_height, subject)
	anon_ids = rep(as.character(subject), length(date_series))
	curr_df = data.frame(anon_id = anon_ids, date_series=date_series, height_series=height_series)
	error_data_height <- dplyr::bind_rows(error_data_height, curr_df)
	next
	}

	anon_ids = rep(subject, length(date_series))
	curr_df = data.frame(anon_id = anon_ids, date_series=date_series, height_series=height_series, fitted_curve=predict(fitted_curve))

	height_fitting_data <- dplyr::bind_rows(height_fitting_data, curr_df)
	}

	print("Total Number of Subjects for height modeling:")
	print(length(subjects_height))
	print("Number of subjects with errors fitting for weight modeling:")
	print(length(error_subjects_height))

	filtered_remaining_subjects <- setdiff(subjects_height, error_subjects_height)

	height_fitted_data = dplyr::filter(height_fitting_data, anon_id %in% filtered_remaining_subjects)


	p3 <- ggplot() + geom_point(data=height_fitted_data, aes(x=date_series, y = height_series)) + geom_line(data=height_fitted_data, aes(x=date_series, y=fitted_curve, color="red")) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) + theme(legend.position = "none") + facet_wrap(~anon_id)
	#ggsave(file = "Add_filepath_and_uncomment_if_used.pdf", width = 20, height = 16)
	p3


	# Only attempt if there are errors
	if (length(error_subjects_height) > 0) {
	p4 <- ggplot() + geom_line(data=error_data_height, aes(x=date_series, y = height_series)) + facet_wrap(~anon_id) + geom_point(data=error_data_height, aes(x=date_series, y = height_series)) + theme(legend.position = "none") + facet_wrap(~anon_id)
	#ggsave(file = "Add_filepath_and_uncomment_if_used.pdf", width = 20, height = 16)
	p4
	}