nmatzke/Beast2_GeneralSubstitutionModel_v1.R

## Beast2_GeneralSubstitutionModel_v1.R


#######################################################
# Figuring out the Beast2 GeneralSubstitutionModel
#######################################################


# The Beast2 documentation says:
#
# file:///Applications/BEAST_2.4.3/Beast2_XML_as_HTML_help/beast.evolution.substitutionmodel.GeneralSubstitutionModel.html
#
# BEAST v2.4.4 Prerelease Documentation: beast.evolution.substitutionmodel.GeneralSubstitutionModel
# type: beast.core.Function
# Rate parameter which defines the transition rate matrix. Only the off-diagonal
# entries need to be specified (diagonal makes row sum to zero in a rate matrix).
# Entry i specifies the rate from floor(i/(n-1)) to i%(n-1)+delta where n is the
# number of states and delta=1 if floor(i/(n-1)) >= i%(n-1) and 0 otherwise.
#
# NOTE: I think the formula must actually be:
# Entry i specifies the rate from floor(i/(n-1)) to i%(n-1)+delta where n is the
# number of states and delta=1 if floor(i/(n-1)) <= i%(n-1) and 0 otherwise.
#
#
# NOTE!!  THE 'i' ARE THE 0-BASED INDICES OF 'i'!!!

#####################################################
# First, source some utility functions
#####################################################
source("https://gist.githubusercontent.com/nmatzke/99617e9b994c4d66d786af3ee057413c/raw/a8d72d19fbabb458df0c0540c293a1a5a2719814/data_frame_utilities_v1.R")


# This function appears to work (switching the >= to a <= )
from_i_get_from_to <- function(i, numstates)
	{
	setup='
	i = 0:19
	numstates = 5
	from_i_get_from_to(i, numstates)

	i = 0
	numstates = 5
	from_i_get_from_to(i, numstates)
	'


	if (length(i) > 1)
		{
		tdf = t(sapply(X=i, from_i_get_from_to, numstates=numstates))
		tdf = as.data.frame(tdf, stringsAsFactors=FALSE)
		names(tdf) = c("i", "from_rownum", "to_colnum")
		tdf = unlist_df4(tdf)
		return(tdf)
		}


	if (floor(i/(numstates-1)) <= i%%(numstates-1))
		{
		delta = 1
		} else {
		delta = 0
		}

	from_rownum = floor(i/(numstates-1))
	to_colnum = i%%(numstates-1)+delta

	outrow = c(i, from_rownum, to_colnum)
	outrow = matrix(data=outrow, nrow=1)
	outrow = as.data.frame(outrow, stringsAsFactors=FALSE)
	names(outrow) = c("i", "from_rownum", "to_colnum")
	return(outrow)
	}


	i = 0:19
	numstates = 5
	from_i_get_from_to(i, numstates)


# Show the indices in the matrix
make_indices_table_into_matrix <- function(tdf, numstates)
	{
	setup = '
	i = 0:19
	numstates = 5
	tdf = from_i_get_from_to(i, numstates)
	tdf
	tmpmat = make_indices_table_into_matrix(tdf, numstates)
	tmpmat
	'

	tmpmat = matrix(data=NA, nrow=numstates, ncol=numstates)

	for (j in 1:nrow(tdf))
		{
		tmpmat[(tdf$from_rownum[j]+1), (tdf$to_colnum[j]+1)] = tdf$i[j]
		}

	return(tmpmat)
	}


i = 0:19
numstates = 5
tdf = from_i_get_from_to(i, numstates)
tdf
tmpmat = make_indices_table_into_matrix(tdf, numstates)
tmpmat

# Good result
# > tmpmat
#      [,1] [,2] [,3] [,4] [,5]
# [1,]   NA    0    1    2    3
# [2,]    4   NA    5    6    7
# [3,]    8    9   NA   10   11
# [4,]   12   13   14   NA   15
# [5,]   16   17   18   19   NA


# This is the formula in the documentation,
# which produces a weird result
#
from_i_get_from_to_WEIRD <- function(i, numstates)
	{
	setup='
	i = 0:19
	numstates = 5
	from_i_get_from_to(i, numstates)

	i = 0
	numstates = 5
	from_i_get_from_to(i, numstates)
	'


	if (length(i) > 1)
		{
		tdf = t(sapply(X=i, from_i_get_from_to_WEIRD, numstates=numstates))
		tdf = as.data.frame(tdf, stringsAsFactors=FALSE)
		names(tdf) = c("i", "from_rownum", "to_colnum")
		tdf = unlist_df4(tdf)
		return(tdf)
		}


	if (floor(i/(numstates-1)) >= i%%(numstates-1))
		{
		delta = 1
		} else {
		delta = 0
		}

	from_rownum = floor(i/(numstates-1))
	to_colnum = i%%(numstates-1)+delta

	outrow = c(i, from_rownum, to_colnum)
	outrow = matrix(data=outrow, nrow=1)
	outrow = as.data.frame(outrow, stringsAsFactors=FALSE)
	names(outrow) = c("i", "from_rownum", "to_colnum")
	return(outrow)
	}


	i = 0:19
	numstates = 5
	from_i_get_from_to(i, numstates)


i = 0:19
numstates = 5
tdf = from_i_get_from_to_WEIRD(i, numstates)
tdf
tmpmat = make_indices_table_into_matrix(tdf, numstates)
tmpmat

# Weird result:
# tmpmat
#      [,1] [,2] [,3] [,4] [,5]
# [1,]   NA    1    2    3   NA
# [2,]   NA    4    6    7   NA
# [3,]   NA    8    9   11   NA
# [4,]   NA   12   13   14   15
# [5,]   NA   16   17   18   19


	#######################################################
	# Figuring out the Beast2 GeneralSubstitutionModel
	#######################################################


	# The Beast2 documentation says:
	#
	# file:///Applications/BEAST_2.4.3/Beast2_XML_as_HTML_help/beast.evolution.substitutionmodel.GeneralSubstitutionModel.html
	#
	# BEAST v2.4.4 Prerelease Documentation: beast.evolution.substitutionmodel.GeneralSubstitutionModel
	# type: beast.core.Function
	# Rate parameter which defines the transition rate matrix. Only the off-diagonal
	# entries need to be specified (diagonal makes row sum to zero in a rate matrix).
	# Entry i specifies the rate from floor(i/(n-1)) to i%(n-1)+delta where n is the
	# number of states and delta=1 if floor(i/(n-1)) >= i%(n-1) and 0 otherwise.
	#
	# NOTE: I think the formula must actually be:
	# Entry i specifies the rate from floor(i/(n-1)) to i%(n-1)+delta where n is the
	# number of states and delta=1 if floor(i/(n-1)) <= i%(n-1) and 0 otherwise.
	#
	#
	# NOTE!! THE 'i' ARE THE 0-BASED INDICES OF 'i'!!!

	#####################################################
	# First, source some utility functions
	#####################################################
	source("https://gist.githubusercontent.com/nmatzke/99617e9b994c4d66d786af3ee057413c/raw/a8d72d19fbabb458df0c0540c293a1a5a2719814/data_frame_utilities_v1.R")



	# This function appears to work (switching the >= to a <= )
	from_i_get_from_to <- function(i, numstates)
	{
	setup='
	i = 0:19
	numstates = 5
	from_i_get_from_to(i, numstates)

	i = 0
	numstates = 5
	from_i_get_from_to(i, numstates)
	'


	if (length(i) > 1)
	{
	tdf = t(sapply(X=i, from_i_get_from_to, numstates=numstates))
	tdf = as.data.frame(tdf, stringsAsFactors=FALSE)
	names(tdf) = c("i", "from_rownum", "to_colnum")
	tdf = unlist_df4(tdf)
	return(tdf)
	}


	if (floor(i/(numstates-1)) <= i%%(numstates-1))
	{
	delta = 1
	} else {
	delta = 0
	}

	from_rownum = floor(i/(numstates-1))
	to_colnum = i%%(numstates-1)+delta

	outrow = c(i, from_rownum, to_colnum)
	outrow = matrix(data=outrow, nrow=1)
	outrow = as.data.frame(outrow, stringsAsFactors=FALSE)
	names(outrow) = c("i", "from_rownum", "to_colnum")
	return(outrow)
	}


	i = 0:19
	numstates = 5
	from_i_get_from_to(i, numstates)


	# Show the indices in the matrix
	make_indices_table_into_matrix <- function(tdf, numstates)
	{
	setup = '
	i = 0:19
	numstates = 5
	tdf = from_i_get_from_to(i, numstates)
	tdf
	tmpmat = make_indices_table_into_matrix(tdf, numstates)
	tmpmat
	'

	tmpmat = matrix(data=NA, nrow=numstates, ncol=numstates)

	for (j in 1:nrow(tdf))
	{
	tmpmat[(tdf$from_rownum[j]+1), (tdf$to_colnum[j]+1)] = tdf$i[j]
	}

	return(tmpmat)
	}


	i = 0:19
	numstates = 5
	tdf = from_i_get_from_to(i, numstates)
	tdf
	tmpmat = make_indices_table_into_matrix(tdf, numstates)
	tmpmat

	# Good result
	# > tmpmat
	# [,1] [,2] [,3] [,4] [,5]
	# [1,] NA 0 1 2 3
	# [2,] 4 NA 5 6 7
	# [3,] 8 9 NA 10 11
	# [4,] 12 13 14 NA 15
	# [5,] 16 17 18 19 NA





	# This is the formula in the documentation,
	# which produces a weird result
	#
	from_i_get_from_to_WEIRD <- function(i, numstates)
	{
	setup='
	i = 0:19
	numstates = 5
	from_i_get_from_to(i, numstates)

	i = 0
	numstates = 5
	from_i_get_from_to(i, numstates)
	'



	if (length(i) > 1)
	{
	tdf = t(sapply(X=i, from_i_get_from_to_WEIRD, numstates=numstates))
	tdf = as.data.frame(tdf, stringsAsFactors=FALSE)
	names(tdf) = c("i", "from_rownum", "to_colnum")
	tdf = unlist_df4(tdf)
	return(tdf)
	}


	if (floor(i/(numstates-1)) >= i%%(numstates-1))
	{
	delta = 1
	} else {
	delta = 0
	}

	from_rownum = floor(i/(numstates-1))
	to_colnum = i%%(numstates-1)+delta

	outrow = c(i, from_rownum, to_colnum)
	outrow = matrix(data=outrow, nrow=1)
	outrow = as.data.frame(outrow, stringsAsFactors=FALSE)
	names(outrow) = c("i", "from_rownum", "to_colnum")
	return(outrow)
	}


	i = 0:19
	numstates = 5
	from_i_get_from_to(i, numstates)



	i = 0:19
	numstates = 5
	tdf = from_i_get_from_to_WEIRD(i, numstates)
	tdf
	tmpmat = make_indices_table_into_matrix(tdf, numstates)
	tmpmat

	# Weird result:
	# tmpmat
	# [,1] [,2] [,3] [,4] [,5]
	# [1,] NA 1 2 3 NA
	# [2,] NA 4 6 7 NA
	# [3,] NA 8 9 11 NA
	# [4,] NA 12 13 14 15
	# [5,] NA 16 17 18 19