Skip to content

Instantly share code, notes, and snippets.

@kgori
Created November 11, 2014 15:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kgori/e7df5e3dc16bb0e3375a to your computer and use it in GitHub Desktop.
Save kgori/e7df5e3dc16bb0e3375a to your computer and use it in GitHub Desktop.
Basic tree builder using PLL, outputs all parameters
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <pll/pll.h>
#define EXIT_NO_OVERWRITE 4
#define EXIT_INCOMPATIBLE_TREE_ALIGNMENT 3
#define EXIT_PARTITIONS_PARSE_FAILURE 2
#define EXIT_ALIGNMENT_PARSE_FAILURE 1
#define EXIT_SUCCESS 0
/* REF: http://stackoverflow.com/a/122721 */
char *trimwhitespace(char *str) {
char *end;
// Trim leading space
while(isspace(*str)) str++;
if(*str == 0) // All spaces?
return str;
// Trim trailing space
end = str + strlen(str) - 1;
while(end > str && isspace(*end)) end--;
// Write new null terminator
*(end+1) = 0;
return str;
}
int main (int argc, char * argv[]) {
pllAlignmentData * alignment;
pllInstance * tr;
partitionList * partitions;
pllQueue * partitionInfo;
pllInstanceAttr attr;
if (argc != 5){
fprintf (stderr, "usage: %s [phylip-file] [partition-file] [nthreads] [outfile]\n", argv[0]);
return (EXIT_FAILURE);
}
/* File checking */
FILE *ofp;
ofp = fopen(argv[4], "w");
if (ofp == NULL) {
fprintf(stderr, "Couldn't open %s for writing\n", argv[4]);
fclose(ofp);
}
/* Set the PLL instance attributes */
attr.rateHetModel = PLL_GAMMA;
attr.fastScaling = PLL_TRUE;
attr.saveMemory = PLL_FALSE;
attr.useRecom = PLL_FALSE;
attr.randomNumberSeed = 0xDEADBEEF;
attr.numberOfThreads = atoi(argv[3]); /* This only affects the pthreads version */
/* Create a PLL tree */
tr = pllCreateInstance (&attr);
/* Parse a PHYLIP file */
alignment = pllParseAlignmentFile (PLL_FORMAT_PHYLIP, argv[1]);
if (!alignment) {
fprintf (stderr, "Error while parsing %s\n", argv[1]);
return (EXIT_ALIGNMENT_PARSE_FAILURE);
}
/* Parse the partitions file into a partition queue structure */
partitionInfo = pllPartitionParse (argv[2]);
/* Validate the partitions */
if (!pllPartitionsValidate (partitionInfo, alignment)) {
fprintf (stderr, "Error: Partitions do not cover all sites\n");
return (EXIT_PARTITIONS_PARSE_FAILURE);
}
/* Commit the partitions and build a partitions structure */
partitions = pllPartitionsCommit (partitionInfo, alignment);
/* We don't need the the intermedia partition queue structure anymore */
pllQueuePartitionsDestroy (&partitionInfo);
/* eliminate duplicate sites from the alignment and update weights vector */
pllAlignmentRemoveDups (alignment, partitions);
/* Or instead of the previous function use the next commented line to create
a random tree topology */
pllTreeInitTopologyRandom(tr, alignment->sequenceCount, alignment->sequenceLabels);
/* Connect the alignment and partition structure with the tree structure */
if (!pllLoadAlignment (tr, alignment, partitions)) {
fprintf (stderr, "Incompatible tree/alignment combination\n");
return (EXIT_INCOMPATIBLE_TREE_ALIGNMENT);
}
pllComputeRandomizedStepwiseAdditionParsimonyTree(tr, partitions);
/* Initialize the model. Note that this function will also perform a full
tree traversal and evaluate the likelihood of the tree. Therefore, you
have the guarantee that tr->likelihood is the valid likelihood */
pllInitModel(tr, partitions);
pllAlignmentDataDestroy (alignment);
printf ("Optimising: ... \n");
pllRaxmlSearchAlgorithm(tr, partitions, PLL_TRUE);
/* WRITE THE OUTPUT */
fprintf(ofp, "{\n \"likelihood\": %.12f,\n", tr->likelihood);
pllTreeToNewick(tr->tree_string, tr, partitions,
tr->start->back, PLL_TRUE, PLL_TRUE,
PLL_TRUE, PLL_FALSE, PLL_FALSE,
PLL_SUMMARIZE_LH, 0,0);
char* newick = trimwhitespace(tr->tree_string);
fprintf(ofp, " \"tree\": \"%s\",\n", newick);
fprintf(ofp, " \"partitions\": {\n");
int i;
for (i = 0; i < partitions->numberOfPartitions; i++) {
fprintf(ofp, " \"%d\": {\n", i);
fprintf(ofp, " \"name\": \"%s\",\n", partitions->partitionData[i]->partitionName);
fprintf(ofp, " \"alpha\": %0.12f,\n", pllGetAlpha(partitions, i));
fprintf(ofp, " \"frequencies\": [\n");
int j;
int num_states = partitions->partitionData[i]->states;
for (j = 0; j < num_states - 1; j++) {
fprintf(ofp, " %.12f,\n", partitions->partitionData[i]->frequencies[j]);
}
fprintf(ofp, " %.12f\n ]\n", partitions->partitionData[i]->frequencies[num_states - 1]);
if (partitions->partitionData[i]->dataType == PLL_DNA_DATA) {
int num_rates = (num_states * (num_states - 1)) / 2;
fprintf(ofp, " \"rates\": [\n");
for (j = 0; j < num_rates - 1; j++) {
fprintf(ofp, " %.12f,\n", partitions->partitionData[i]->substRates[j]);
}
fprintf(ofp, " %.12f\n ]\n", partitions->partitionData[i]->substRates[num_rates - 1]);
}
if (i == partitions->numberOfPartitions - 1) {
fprintf(ofp, " }\n");
}
else {
fprintf(ofp, " },\n");
}
}
fprintf(ofp, " }\n}");
fclose(ofp);
/* free all allocated memory to eliminate memory leaks */
pllPartitionsDestroy (tr, &partitions);
pllDestroyInstance(tr);
return (EXIT_SUCCESS);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment