bomeara/distances.R

## distances.R
chunks <- seq(from=0, to=1, length.out=7) # how finely we want to divide each univariate character

# 0.0000000 0.1666667 0.3333333 0.5000000 0.6666667 0.8333333 1.0000000

possible_states <- expand.grid(a=chunks, b=chunks, c=chunks) # all possible combinations (not adding up to 1).
 #Doing just three chars here, you can add more: d=chunks, e=chunks....
sums <- apply(possible_states,1, sum)
possible_states <- possible_states[which(abs(sums-1)<0.00001),] # now limiting to those that add up to 1
rownames(possible_states) <- sequence(nrow(possible_states)) - 1 # state 0, 1, 2...

# Now we have a matrix:

#       a         b         c
# 0 1.0000000 0.0000000 0.0000000
# 1 0.6666667 0.3333333 0.0000000
# 2 0.5000000 0.5000000 0.0000000
# 3 0.3333333 0.6666667 0.0000000
# 4 0.0000000 1.0000000 0.0000000
# 5 0.6666667 0.1666667 0.1666667
# ... and so forth

# These are all the possible combinations of the multivariate trait that add up to 1

# You then need to convert your traits to these states (0, 1, 2...)
# Basically, for each species, which row of possible_states is closest
# Not doing it here, but one way would be just to append the values for one species
# to the possible_states matrix, see which row above is minimum distance from it
# and the rowname of that row (aka row index - 1) is the state for that species.

# Next, we're going to get a transition rate matrix

distances <- as.matrix(dist(possible_states, upper=TRUE))
rate_mat <- distances
diag(rate_mat) <- 10 # just to prevent NA issues
for (i in sequence(nrow(rate_mat))) {
    min_dist <- min(rate_mat[i,],na.rm=TRUE)
    rate_mat[i,unname(which(rate_mat[i,]>min_dist))] <- 0
    rate_mat[i,unname(which(rate_mat[i,]==min_dist))] <- 1
}
diag(rate_mat) <- NA

# The idea of this matrix is that we only allow moves to the nearest character frequency,
# and that all such moves have the same rate (the latter can be relaxed by having numbers
# greater than one in the matrix: rate class 2, 3, 4, etc.)

# 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21
# 0  NA  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
# 1   0 NA  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
# 2   0  1 NA  1  0  0  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
# 3   0  0  0 NA  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
# 4   0  0  0  0 NA  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
# 5   0  0  0  0  0 NA  1  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
# 6   0  1  0  0  0  1 NA  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
# 7   0  0  0  1  0  0  0 NA  1  0  0  0  0  0  0  0  0  0  0  0  0  0
# 8   0  0  0  0  0  0  0  1 NA  0  0  0  1  0  0  0  0  0  0  0  0  0
# 9   0  0  0  0  0  0  0  0  0 NA  1  0  0  0  0  0  0  0  0  0  0  0
# 10  0  0  0  0  0  1  0  0  0  1 NA  0  0  0  0  0  0  0  0  0  0  0
# 11  0  0  0  0  0  0  1  1  0  0  1 NA  1  0  0  1  1  0  0  0  0  0
# 12  0  0  0  0  0  0  0  0  1  0  0  0 NA  1  0  0  0  0  0  0  0  0
# 13  0  0  0  0  0  0  0  0  0  0  0  0  1 NA  0  0  0  0  0  0  0  0
# 14  0  0  0  0  0  0  0  0  0  1  1  0  0  0 NA  1  0  0  1  0  0  0
# 15  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 NA  0  0  1  1  0  0
# 16  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 NA  0  0  1  1  0
# 17  0  0  0  0  0  0  0  0  0  0  0  0  1  1  0  0  1 NA  0  0  1  0
# 18  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0 NA  0  0  0
# 19  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  1  0  0 NA  0  0
# 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0 NA  0
# 21  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0 NA

# now reconstruct
results <- corHMM::rayDISC(phy, data, ntraits=1, charnum=1, rate.mat=rate_mat, model="ER")
	chunks <- seq(from=0, to=1, length.out=7) # how finely we want to divide each univariate character

	# 0.0000000 0.1666667 0.3333333 0.5000000 0.6666667 0.8333333 1.0000000

	possible_states <- expand.grid(a=chunks, b=chunks, c=chunks) # all possible combinations (not adding up to 1).
	#Doing just three chars here, you can add more: d=chunks, e=chunks....
	sums <- apply(possible_states,1, sum)
	possible_states <- possible_states[which(abs(sums-1)<0.00001),] # now limiting to those that add up to 1
	rownames(possible_states) <- sequence(nrow(possible_states)) - 1 # state 0, 1, 2...

	# Now we have a matrix:

	# a b c
	# 0 1.0000000 0.0000000 0.0000000
	# 1 0.6666667 0.3333333 0.0000000
	# 2 0.5000000 0.5000000 0.0000000
	# 3 0.3333333 0.6666667 0.0000000
	# 4 0.0000000 1.0000000 0.0000000
	# 5 0.6666667 0.1666667 0.1666667
	# ... and so forth

	# These are all the possible combinations of the multivariate trait that add up to 1

	# You then need to convert your traits to these states (0, 1, 2...)
	# Basically, for each species, which row of possible_states is closest
	# Not doing it here, but one way would be just to append the values for one species
	# to the possible_states matrix, see which row above is minimum distance from it
	# and the rowname of that row (aka row index - 1) is the state for that species.

	# Next, we're going to get a transition rate matrix

	distances <- as.matrix(dist(possible_states, upper=TRUE))
	rate_mat <- distances
	diag(rate_mat) <- 10 # just to prevent NA issues
	for (i in sequence(nrow(rate_mat))) {
	min_dist <- min(rate_mat[i,],na.rm=TRUE)
	rate_mat[i,unname(which(rate_mat[i,]>min_dist))] <- 0
	rate_mat[i,unname(which(rate_mat[i,]==min_dist))] <- 1
	}
	diag(rate_mat) <- NA

	# The idea of this matrix is that we only allow moves to the nearest character frequency,
	# and that all such moves have the same rate (the latter can be relaxed by having numbers
	# greater than one in the matrix: rate class 2, 3, 4, etc.)

	# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
	# 0 NA 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
	# 1 0 NA 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
	# 2 0 1 NA 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
	# 3 0 0 0 NA 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
	# 4 0 0 0 0 NA 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
	# 5 0 0 0 0 0 NA 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
	# 6 0 1 0 0 0 1 NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
	# 7 0 0 0 1 0 0 0 NA 1 0 0 0 0 0 0 0 0 0 0 0 0 0
	# 8 0 0 0 0 0 0 0 1 NA 0 0 0 1 0 0 0 0 0 0 0 0 0
	# 9 0 0 0 0 0 0 0 0 0 NA 1 0 0 0 0 0 0 0 0 0 0 0
	# 10 0 0 0 0 0 1 0 0 0 1 NA 0 0 0 0 0 0 0 0 0 0 0
	# 11 0 0 0 0 0 0 1 1 0 0 1 NA 1 0 0 1 1 0 0 0 0 0
	# 12 0 0 0 0 0 0 0 0 1 0 0 0 NA 1 0 0 0 0 0 0 0 0
	# 13 0 0 0 0 0 0 0 0 0 0 0 0 1 NA 0 0 0 0 0 0 0 0
	# 14 0 0 0 0 0 0 0 0 0 1 1 0 0 0 NA 1 0 0 1 0 0 0
	# 15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 NA 0 0 1 1 0 0
	# 16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 NA 0 0 1 1 0
	# 17 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 NA 0 0 1 0
	# 18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 NA 0 0 0
	# 19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 NA 0 0
	# 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 NA 0
	# 21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 NA

	# now reconstruct
	results <- corHMM::rayDISC(phy, data, ntraits=1, charnum=1, rate.mat=rate_mat, model="ER")