Created
November 11, 2014 15:37
-
-
Save kgori/e7df5e3dc16bb0e3375a to your computer and use it in GitHub Desktop.
Basic tree builder using PLL, outputs all parameters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <assert.h> | |
#include <pll/pll.h> | |
#define EXIT_NO_OVERWRITE 4 | |
#define EXIT_INCOMPATIBLE_TREE_ALIGNMENT 3 | |
#define EXIT_PARTITIONS_PARSE_FAILURE 2 | |
#define EXIT_ALIGNMENT_PARSE_FAILURE 1 | |
#define EXIT_SUCCESS 0 | |
/* REF: http://stackoverflow.com/a/122721 */ | |
char *trimwhitespace(char *str) { | |
char *end; | |
// Trim leading space | |
while(isspace(*str)) str++; | |
if(*str == 0) // All spaces? | |
return str; | |
// Trim trailing space | |
end = str + strlen(str) - 1; | |
while(end > str && isspace(*end)) end--; | |
// Write new null terminator | |
*(end+1) = 0; | |
return str; | |
} | |
int main (int argc, char * argv[]) { | |
pllAlignmentData * alignment; | |
pllInstance * tr; | |
partitionList * partitions; | |
pllQueue * partitionInfo; | |
pllInstanceAttr attr; | |
if (argc != 5){ | |
fprintf (stderr, "usage: %s [phylip-file] [partition-file] [nthreads] [outfile]\n", argv[0]); | |
return (EXIT_FAILURE); | |
} | |
/* File checking */ | |
FILE *ofp; | |
ofp = fopen(argv[4], "w"); | |
if (ofp == NULL) { | |
fprintf(stderr, "Couldn't open %s for writing\n", argv[4]); | |
fclose(ofp); | |
} | |
/* Set the PLL instance attributes */ | |
attr.rateHetModel = PLL_GAMMA; | |
attr.fastScaling = PLL_TRUE; | |
attr.saveMemory = PLL_FALSE; | |
attr.useRecom = PLL_FALSE; | |
attr.randomNumberSeed = 0xDEADBEEF; | |
attr.numberOfThreads = atoi(argv[3]); /* This only affects the pthreads version */ | |
/* Create a PLL tree */ | |
tr = pllCreateInstance (&attr); | |
/* Parse a PHYLIP file */ | |
alignment = pllParseAlignmentFile (PLL_FORMAT_PHYLIP, argv[1]); | |
if (!alignment) { | |
fprintf (stderr, "Error while parsing %s\n", argv[1]); | |
return (EXIT_ALIGNMENT_PARSE_FAILURE); | |
} | |
/* Parse the partitions file into a partition queue structure */ | |
partitionInfo = pllPartitionParse (argv[2]); | |
/* Validate the partitions */ | |
if (!pllPartitionsValidate (partitionInfo, alignment)) { | |
fprintf (stderr, "Error: Partitions do not cover all sites\n"); | |
return (EXIT_PARTITIONS_PARSE_FAILURE); | |
} | |
/* Commit the partitions and build a partitions structure */ | |
partitions = pllPartitionsCommit (partitionInfo, alignment); | |
/* We don't need the the intermedia partition queue structure anymore */ | |
pllQueuePartitionsDestroy (&partitionInfo); | |
/* eliminate duplicate sites from the alignment and update weights vector */ | |
pllAlignmentRemoveDups (alignment, partitions); | |
/* Or instead of the previous function use the next commented line to create | |
a random tree topology */ | |
pllTreeInitTopologyRandom(tr, alignment->sequenceCount, alignment->sequenceLabels); | |
/* Connect the alignment and partition structure with the tree structure */ | |
if (!pllLoadAlignment (tr, alignment, partitions)) { | |
fprintf (stderr, "Incompatible tree/alignment combination\n"); | |
return (EXIT_INCOMPATIBLE_TREE_ALIGNMENT); | |
} | |
pllComputeRandomizedStepwiseAdditionParsimonyTree(tr, partitions); | |
/* Initialize the model. Note that this function will also perform a full | |
tree traversal and evaluate the likelihood of the tree. Therefore, you | |
have the guarantee that tr->likelihood is the valid likelihood */ | |
pllInitModel(tr, partitions); | |
pllAlignmentDataDestroy (alignment); | |
printf ("Optimising: ... \n"); | |
pllRaxmlSearchAlgorithm(tr, partitions, PLL_TRUE); | |
/* WRITE THE OUTPUT */ | |
fprintf(ofp, "{\n \"likelihood\": %.12f,\n", tr->likelihood); | |
pllTreeToNewick(tr->tree_string, tr, partitions, | |
tr->start->back, PLL_TRUE, PLL_TRUE, | |
PLL_TRUE, PLL_FALSE, PLL_FALSE, | |
PLL_SUMMARIZE_LH, 0,0); | |
char* newick = trimwhitespace(tr->tree_string); | |
fprintf(ofp, " \"tree\": \"%s\",\n", newick); | |
fprintf(ofp, " \"partitions\": {\n"); | |
int i; | |
for (i = 0; i < partitions->numberOfPartitions; i++) { | |
fprintf(ofp, " \"%d\": {\n", i); | |
fprintf(ofp, " \"name\": \"%s\",\n", partitions->partitionData[i]->partitionName); | |
fprintf(ofp, " \"alpha\": %0.12f,\n", pllGetAlpha(partitions, i)); | |
fprintf(ofp, " \"frequencies\": [\n"); | |
int j; | |
int num_states = partitions->partitionData[i]->states; | |
for (j = 0; j < num_states - 1; j++) { | |
fprintf(ofp, " %.12f,\n", partitions->partitionData[i]->frequencies[j]); | |
} | |
fprintf(ofp, " %.12f\n ]\n", partitions->partitionData[i]->frequencies[num_states - 1]); | |
if (partitions->partitionData[i]->dataType == PLL_DNA_DATA) { | |
int num_rates = (num_states * (num_states - 1)) / 2; | |
fprintf(ofp, " \"rates\": [\n"); | |
for (j = 0; j < num_rates - 1; j++) { | |
fprintf(ofp, " %.12f,\n", partitions->partitionData[i]->substRates[j]); | |
} | |
fprintf(ofp, " %.12f\n ]\n", partitions->partitionData[i]->substRates[num_rates - 1]); | |
} | |
if (i == partitions->numberOfPartitions - 1) { | |
fprintf(ofp, " }\n"); | |
} | |
else { | |
fprintf(ofp, " },\n"); | |
} | |
} | |
fprintf(ofp, " }\n}"); | |
fclose(ofp); | |
/* free all allocated memory to eliminate memory leaks */ | |
pllPartitionsDestroy (tr, &partitions); | |
pllDestroyInstance(tr); | |
return (EXIT_SUCCESS); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment