cmt2/bgb_to_revgadgets.R

## bgb_to_revgadgets.R
bgb_to_revgadgets <- function(results_path, geo_data_path, tree_path, area_names = NULL) {
  # load biogeobears results object
  load(results_path)
  # change data directories in results object
  res[["inputs"]]$geogfn <- geo_data_path
  res[["inputs"]]$trfn <- tree_path

  ##### Process data for plotting #####

  # read in tree separately

  tree <- RevGadgets::readTrees(paths = res[["inputs"]]$trfn)
  states <- res$inputs$all_geog_states_list_usually_inferred_from_areas_maxareas

  # create a dataframe with results in revgadgets compliant format
  rev_data <- data.frame(matrix(nrow = nrow(res$relative_probs_of_each_state_at_branch_bottom_below_node_UPPASS),
                                ncol = 15))
  colnames(rev_data) <- c("end_state_1", "end_state_2", "end_state_3",
                          "end_state_1_pp", "end_state_2_pp", "end_state_3_pp",
                          "end_state_other_pp",
                          "start_state_1", "start_state_2", "start_state_3",
                          "start_state_1_pp", "start_state_2_pp", "start_state_3_pp",
                          "start_state_other_pp",
                          "node")
  # get end states
  for (i in 1:nrow(res$ML_marginal_prob_each_state_at_branch_top_AT_node)) {
    row <- res$ML_marginal_prob_each_state_at_branch_top_AT_node[i,]
    rev_data[i, 1] <- order(row,decreasing=T)[1]
    rev_data[i, 2] <- order(row,decreasing=T)[2]
    rev_data[i, 3] <- order(row,decreasing=T)[3]
    rev_data[i, 4] <- row[order(row,decreasing=T)[1]]
    rev_data[i, 5] <- row[order(row,decreasing=T)[2]]
    rev_data[i, 6] <- row[order(row,decreasing=T)[3]]
    rev_data[i, 7] <- sum(row[order(row,decreasing=T)[4:length(row)]])
  }
  # get start states
  for (i in 1:nrow(res$ML_marginal_prob_each_state_at_branch_bottom_below_node)) {
    row <- res$ML_marginal_prob_each_state_at_branch_bottom_below_node[i,]
    rev_data[i, 8] <- order(row,decreasing=T)[1]
    rev_data[i, 9] <- order(row,decreasing=T)[2]
    rev_data[i, 10] <- order(row,decreasing=T)[3]
    rev_data[i, 11] <- row[order(row,decreasing=T)[1]]
    rev_data[i, 12] <- row[order(row,decreasing=T)[2]]
    rev_data[i, 13] <- row[order(row,decreasing=T)[3]]
    rev_data[i, 14] <- sum(row[order(row,decreasing=T)[4:length(row)]])
  }
  rev_data$node <- 1:nrow(res$ML_marginal_prob_each_state_at_branch_bottom_below_node)

  # make better labels
  tipranges <- getranges_from_LagrangePHYLIP(res[["inputs"]]$geogfn)

  geo <- res$inputs$all_geog_states_list_usually_inferred_from_areas_maxareas
  geo_num <- unlist(lapply(lapply(geo, as.character), paste0, collapse ="_"))
  if (is.null(area_names)) {
    area_names <- colnames(tipranges@df)
  }
  if (length(area_names) != length(colnames(tipranges@df))) stop("Number of specified area names is incorrect. Check your geo data file.")
  number_codes <- 0:(length(area_names)-1)
  geo_letters <- geo_num

  recodes <- paste0(paste0(number_codes, " = '", area_names, "'"), collapse = "; ")
  for (i in 1:length(geo_letters)) {
    code <- geo_letters[i]
    code_split <- unlist(strsplit(code, "_"))
    geo_letters[i] <- paste0(car::recode(code_split, recodes), collapse = "")
  }

  #area_names <- rev(area_names)
  label_dict <- data.frame(lab_num_short = 1:length(geo),
                           lab_num_long = geo_num,
                           lab_letters = geo_letters)

  # replace short number codes with letters
  not_state_cols <- c(grep("_pp", colnames(rev_data)),
                      grep("node", colnames(rev_data)))
  state_cols <- c(1:ncol(rev_data))[!c(1:ncol(rev_data)) %in% not_state_cols]
  for (i in state_cols) { # loop through by column indices for the state columns
    col <- as.character(rev_data[,i])
    for (j in 1:length(col)) { # loop through each item in the column and replace with letters
      col[j] <- label_dict$lab_letters[which(label_dict$lab_num_short == col[j])]
    }
    rev_data[,i] <- col
  }

  #change "NA" to NA
  rev_data %>%
    naniar::replace_with_na_all(condition = ~.x == "NA") -> rev_data
  # change any NAs in PP columns to 0
  pp_cols <- grep("_pp", colnames(rev_data))
  for (p in pp_cols) { rev_data[ ,p][is.na(rev_data[ ,p])] <- 0 }

  # make treedata object (combine data and tree)
  tibble::as_tibble(tree[[1]][[1]]) %>%
    full_join(rev_data, by = 'node') %>%
    as.treedata() -> rev_treedata
  #add list of states
  attributes(rev_treedata)$state_labels <- as.character(na.omit(unique(unlist(rev_data[,c(1:3, 8:10)]))))

  return(rev_treedata)
}
	bgb_to_revgadgets <- function(results_path, geo_data_path, tree_path, area_names = NULL) {
	# load biogeobears results object
	load(results_path)
	# change data directories in results object
	res[["inputs"]]$geogfn <- geo_data_path
	res[["inputs"]]$trfn <- tree_path

	##### Process data for plotting #####

	# read in tree separately

	tree <- RevGadgets::readTrees(paths = res[["inputs"]]$trfn)
	states <- res$inputs$all_geog_states_list_usually_inferred_from_areas_maxareas

	# create a dataframe with results in revgadgets compliant format
	rev_data <- data.frame(matrix(nrow = nrow(res$relative_probs_of_each_state_at_branch_bottom_below_node_UPPASS),
	ncol = 15))
	colnames(rev_data) <- c("end_state_1", "end_state_2", "end_state_3",
	"end_state_1_pp", "end_state_2_pp", "end_state_3_pp",
	"end_state_other_pp",
	"start_state_1", "start_state_2", "start_state_3",
	"start_state_1_pp", "start_state_2_pp", "start_state_3_pp",
	"start_state_other_pp",
	"node")
	# get end states
	for (i in 1:nrow(res$ML_marginal_prob_each_state_at_branch_top_AT_node)) {
	row <- res$ML_marginal_prob_each_state_at_branch_top_AT_node[i,]
	rev_data[i, 1] <- order(row,decreasing=T)[1]
	rev_data[i, 2] <- order(row,decreasing=T)[2]
	rev_data[i, 3] <- order(row,decreasing=T)[3]
	rev_data[i, 4] <- row[order(row,decreasing=T)[1]]
	rev_data[i, 5] <- row[order(row,decreasing=T)[2]]
	rev_data[i, 6] <- row[order(row,decreasing=T)[3]]
	rev_data[i, 7] <- sum(row[order(row,decreasing=T)[4:length(row)]])
	}
	# get start states
	for (i in 1:nrow(res$ML_marginal_prob_each_state_at_branch_bottom_below_node)) {
	row <- res$ML_marginal_prob_each_state_at_branch_bottom_below_node[i,]
	rev_data[i, 8] <- order(row,decreasing=T)[1]
	rev_data[i, 9] <- order(row,decreasing=T)[2]
	rev_data[i, 10] <- order(row,decreasing=T)[3]
	rev_data[i, 11] <- row[order(row,decreasing=T)[1]]
	rev_data[i, 12] <- row[order(row,decreasing=T)[2]]
	rev_data[i, 13] <- row[order(row,decreasing=T)[3]]
	rev_data[i, 14] <- sum(row[order(row,decreasing=T)[4:length(row)]])
	}
	rev_data$node <- 1:nrow(res$ML_marginal_prob_each_state_at_branch_bottom_below_node)

	# make better labels
	tipranges <- getranges_from_LagrangePHYLIP(res[["inputs"]]$geogfn)

	geo <- res$inputs$all_geog_states_list_usually_inferred_from_areas_maxareas
	geo_num <- unlist(lapply(lapply(geo, as.character), paste0, collapse ="_"))
	if (is.null(area_names)) {
	area_names <- colnames(tipranges@df)
	}
	if (length(area_names) != length(colnames(tipranges@df))) stop("Number of specified area names is incorrect. Check your geo data file.")
	number_codes <- 0:(length(area_names)-1)
	geo_letters <- geo_num

	recodes <- paste0(paste0(number_codes, " = '", area_names, "'"), collapse = "; ")
	for (i in 1:length(geo_letters)) {
	code <- geo_letters[i]
	code_split <- unlist(strsplit(code, "_"))
	geo_letters[i] <- paste0(car::recode(code_split, recodes), collapse = "")
	}

	#area_names <- rev(area_names)
	label_dict <- data.frame(lab_num_short = 1:length(geo),
	lab_num_long = geo_num,
	lab_letters = geo_letters)

	# replace short number codes with letters
	not_state_cols <- c(grep("_pp", colnames(rev_data)),
	grep("node", colnames(rev_data)))
	state_cols <- c(1:ncol(rev_data))[!c(1:ncol(rev_data)) %in% not_state_cols]
	for (i in state_cols) { # loop through by column indices for the state columns
	col <- as.character(rev_data[,i])
	for (j in 1:length(col)) { # loop through each item in the column and replace with letters
	col[j] <- label_dict$lab_letters[which(label_dict$lab_num_short == col[j])]
	}
	rev_data[,i] <- col
	}

	#change "NA" to NA
	rev_data %>%
	naniar::replace_with_na_all(condition = ~.x == "NA") -> rev_data
	# change any NAs in PP columns to 0
	pp_cols <- grep("_pp", colnames(rev_data))
	for (p in pp_cols) { rev_data[ ,p][is.na(rev_data[ ,p])] <- 0 }

	# make treedata object (combine data and tree)
	tibble::as_tibble(tree[[1]][[1]]) %>%
	full_join(rev_data, by = 'node') %>%
	as.treedata() -> rev_treedata
	#add list of states
	attributes(rev_treedata)$state_labels <- as.character(na.omit(unique(unlist(rev_data[,c(1:3, 8:10)]))))

	return(rev_treedata)
	}