Skip to content

Instantly share code, notes, and snippets.

@nmatzke
Last active February 2, 2016 14:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nmatzke/8f80723b6e1fc80ed5ac to your computer and use it in GitHub Desktop.
Save nmatzke/8f80723b6e1fc80ed5ac to your computer and use it in GitHub Desktop.
Calculating the number of unobservable site patterns for the Mk-parsimony-informative ascertainment bias correction
#######################################################
# Calculating the number of unobservable site patterns
# for the Mk-parsimony-informative ascertainment bias
# correction
#
# by Nick Matzke, 2015-2016
#
# Free to re-use; we are preparing an ms on this issue,
# please email nick.matzke@anu.edu.au if interested.
#
#######################################################
# Allman et al. 2010 as Mk pars-inf
#
# Allman ES, Holder MT, Rhodes JA (2010).
# Estimating trees from filtered data: Identifiability of models
# for morphological phylogenetics. Journal of Theoretical Biology
# 263 (1):108-119
#######################################################
# Permutations
# http://stackoverflow.com/questions/7906332/how-to-calculate-combination-and-permutation-in-r
#######################################################
perm <- function(n, k)
{
return(factorial(n) / factorial(n-k))
}
# Number of unobservable patterns in Mk-parsimony-informative ascertainment bias correction
num_unobservable_patterns_ParsInf <- function(ntaxa, nstates, printflag=TRUE)
{
defaults='
ntaxa=5
nstates=2
printflag=TRUE
'
total_number_of_possible_patterns = nstates^ntaxa
number_of_unobservable_patterns_given_nstates_observed = 0
number_of_unobservable_patterns_given_j_states_in_pattern = 0
number_of_unobservable_patterns_given_j_states_in_pattern_AND_patterns_are_uniform = 0
# Initialize table
pattern_counts_table = NULL
# First, get the number of states in the patterns
for (i in nstates:1)
{
# Then, make a list of the number of combinations of states that are possible
number_of_combinations_of_states = choose(n=nstates, k=i)
# For each of these combinations, 1 of the states will be the
# dominant state, and (i-1) will be autapomorphic states
# This can happen nstates i choose 1 ways
number_of_ways_to_pick_the_dominant_state = choose(n=i, k=1)
# After a dominant state is chosen, there are ntaxa choose (i-1)
# ways to pick the autapomorphic taxa
number_of_ways_to_pick_autapomorphic_taxa = choose(n=ntaxa, k=(i-1))
# Given that the autapomorphic taxa have been picked, there are
# (i-1) PERMUTE (i-1) ways to assign the autapomorphic states
number_of_ways_to_assign_autapo_states_to_autapo_taxa = perm(n=(i-1), k=(i-1))
product = number_of_combinations_of_states * number_of_ways_to_pick_the_dominant_state * number_of_ways_to_pick_autapomorphic_taxa * number_of_ways_to_assign_autapo_states_to_autapo_taxa
tmprow = c(number_of_combinations_of_states, number_of_ways_to_pick_the_dominant_state, number_of_ways_to_pick_autapomorphic_taxa, number_of_ways_to_assign_autapo_states_to_autapo_taxa, product)
pattern_counts_table = rbind(pattern_counts_table, tmprow)
}
abbreviations = c("ncomb_sts", "npicks_domstate", "npicks_autapo_taxa", "n_assign_autapo_states", "product")
meaning = c("number of states in the pattern", "number_of_combinations_of_states", "number_of_ways_to_pick_the_dominant_state", "number_of_ways_to_pick_autapomorphic_taxa", "number_of_ways_to_assign_autapo_states_to_autapo_taxa", "number of unobservable patterns given this number of states in the pattern")
headers_explained = cbind(c("(row names)", abbreviations), meaning)
headers_explained = as.data.frame(headers_explained, stringsAsFactors=FALSE)
names(headers_explained) = c("abbreviation", "meaning")
row.names(headers_explained) = NULL
# Label the table
pattern_counts_table = as.data.frame(pattern_counts_table, stringsAsFactors=FALSE)
names(pattern_counts_table) = abbreviations
row.names(pattern_counts_table) = paste0("when_nstates=", nstates:1)
number_of_unobservable_patterns = sum(pattern_counts_table$product)
percentage_of_patterns_unobservable = number_of_unobservable_patterns / total_number_of_possible_patterns * 100
summary_table = matrix(data=c(ntaxa, nstates, number_of_unobservable_patterns, total_number_of_possible_patterns, percentage_of_patterns_unobservable), nrow=1)
summary_table = as.data.frame(summary_table, stringsAsFactors=FALSE)
names(summary_table) = c("ntaxa", "nstates", "number_of_unobservable_patterns", "total_number_of_possible_patterns", "percentage_of_patterns_unobservable")
res = NULL
res$headers_explained = headers_explained
res$pattern_counts_table = pattern_counts_table
res$summary_table = summary_table
if (printflag == TRUE)
{
require(BioGeoBEARS)
cat("\n\nTable counting the number of unobservable patterns:\n\n")
print(conditional_format_table(pattern_counts_table))
cat("\n\nExplanation of the table headers:\n\n")
print(headers_explained)
cat("\n\nSummary of possible and unobservable patterns:\n\n")
print(conditional_format_table(summary_table))
}
return(res)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment