Skip to content

Instantly share code, notes, and snippets.

@spond
Created August 15, 2015 18:23
Show Gist options
  • Save spond/abc305ea3b91f2620677 to your computer and use it in GitHub Desktop.
Save spond/abc305ea3b91f2620677 to your computer and use it in GitHub Desktop.
EDEPS HBL files
RequireVersion ("2.22");
VERBOSITY_LEVEL = 1;
// namespace 'io' for interactive/datamonkey i/o functions
LoadFunctionLibrary("lib2014/IOFunctions.bf");
// namespace 'utility' for convenience functions
LoadFunctionLibrary("lib2014/UtilityFunctions.bf");
io.displayAnalysisBanner ({"info" : "EDEPS is a modification of DEPS to look for directional evolution along all or
a subset of tree branches (episodic directional evolution of protein sequences).",
"version" : "1.00",
"reference" : "PLoS Comput Biol. 2012;8(5):e1002507",
"authors" : "Sergei L Kosakovsky Pond, Ben Murrell, and Konrad Scheffler",
"contact" : "spond@ucsd.edu",
"requirements" : "an aminoacid alignment and a rooted phylogenetic tree (optionally annotated with {})"
} );
LoadFunctionLibrary ("GrabBag.bf");
LoadFunctionLibrary ("AncestralMapper.bf");
/*--------------------------------------------------------------------------------------------*/
AAString = "ACDEFGHIKLMNPQRSTVWY";
AACharToIdx = {};
for (k=0; k<20; k += 1) {
AACharToIdx [AAString[k]] = k;
}
SKIP_MODEL_PARAMETER_LIST = 0;
LoadFunctionLibrary ("AddABias");
LoadFunctionLibrary ("TreeTools");
test_p_values = {20,2};
/*--------------------------------------------------------------------------------------------*/
function GetEqFreqs (ModelMatrixName&, baseFreqs) {
t = 1;
numRateMx = ModelMatrixName;
for (ri = 0; ri < 20; ri += 1) {
for (ci = 0; ci < 20; ci += 1) {
if (ri != ci) {
numRateMx[ri][ci] = numRateMx[ri][ci] * baseFreqs[ci];
numRateMx[ri][ri] = numRateMx[ri][ri] - numRateMx[ri][ci];
}
}
}
for (ri = 0; ri < 20; ri += 1) {
numRateMx [ri][19] = 1;
}
numRateMxI = Inverse (numRateMx);
return numRateMxI [19][-1];
}
/*--------------------------------------------------------------------------------------------*/
SetDialogPrompt ("PROTEIN file to examine:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
basePath = LAST_FILE_PATH;
DataSetFilter filteredData = CreateFilter (ds,1);
ACCEPT_ROOTED_TREES = 1;
tree_definition = utility.loadAnnotatedTopology(1);
promptModel (0);
ExecuteCommands ("Tree givenTree = " +tree_definition["string"]);
treeAVL = givenTree^0;
rootNode = treeAVL[(treeAVL[0])["Root"]];
io.checkAssertion ("Abs(rootNode[\"Children\"]) == 2", "Please ensure that the tree is rooted");
EDEPS_SET = selectBranchesToTest (tree_definition);
EDEPS_SET_str = Join (",",Rows(EDEPS_SET));
io.reportProgressMessage ("EDEPS", "Selected " + Abs (EDEPS_SET) + " branches to test for directional selection: " + EDEPS_SET_str);
root_left = "givenTree." + (treeAVL[(rootNode["Children"])[0]])["Name"] + ".t";
root_right = "givenTree." + (treeAVL[(rootNode["Children"])[1]])["Name"] + ".t";
ExecuteCommands (root_left + ":=" + root_right);
root_left = "biasedTree." + (treeAVL[(rootNode["Children"])[0]])["Name"] + ".t";
root_right = "biasedTree." + (treeAVL[(rootNode["Children"])[1]])["Name"] + ".t";
LikelihoodFunction lf = (filteredData, givenTree);
GetString (lfInfo, lf, -1);
baseline_model = (lfInfo["Models"])[0];
io.reportProgressMessage ("EDEPS","Baseline model fit");
for (k = 1; k < Abs(treeAVL); k += 1) {
if (EDEPS_SET[(treeAVL[k])["Name"]] == 0) {
(treeAVL[k])["Model"] = baseline_model;
}
else {
(treeAVL[k])["Model"] = "biasedModel";
}
}
biasedTreeString = PostOrderAVL2StringAnnotateAux (treeAVL,0,"Model","{}");
fprintf (stdout, biasedTreeString, "\n");
Optimize (res0,lf);
LIKELIHOOD_FUNCTION_OUTPUT = 7;
outPath = basePath + ".base";
fprintf (outPath, CLEAR_FILE, lf);
baselineLogL = res0[1][0];
baselineBL = BranchLength (givenTree,-1);
referenceL = +baselineBL;
preOrder = givenTree^1;
r2t_mean = 0;
r2t_count = 0;
for (k = 2; k < Abs(preOrder); k += 1) {
(preOrder[k])["R2T"] = Eval ("givenTree." + (preOrder[k])["Name"] + ".t") +
(preOrder[(preOrder[k])["Parent"]])["R2T"];
if (Abs ((preOrder[k])["Children"]) == 0) {
r2t_count += 1;
r2t_mean += (preOrder[k])["R2T"];
}
}
r2t_mean = r2t_mean / r2t_count;
summaryPath = basePath+".summary";
substitutionsPath = basePath+"_subs.csv";
siteReportMap = basePath+"_bysite.csv";
fprintf (summaryPath, CLEAR_FILE, KEEP_OPEN);
io.reportProgressMessage ("EDEPS", "Standard model fit. Log-L = " + baselineLogL + ". Tree length = " + referenceL + " subs/site");
fprintf (summaryPath, "[PHASE 0.2] Standard model fit. Log-L = ",baselineLogL,". Tree length = ",referenceL, " subs/site \n");
fixGlobalParameters ("lf");
byResidueSummary = {};
bySiteSummary = {};
/*------------------------------------------------------------------------------*/
if (MPI_NODE_COUNT > 1) {
MPINodeStatus = {MPI_NODE_COUNT-1,1}["-1"];
}
for (residue = 0; residue < 20; residue += 1) {
AddABiasREL (modelNameString,"biasedMatrix",residue);
global P_bias2 := 1;
global relBias := 1;
Model biasedModel = (biasedMatrix, vectorOfFrequencies, 1);
Tree biasedTree = biasedTreeString;
global treeScaler = 1;
ReplicateConstraint ("this1.?.?:=treeScaler*this2.?.?__",biasedTree,givenTree);
ExecuteCommands (root_left + "=" + root_left);
ExecuteCommands (root_right + "=" + root_right);
LikelihoodFunction lfb = (filteredData, biasedTree);
if (MPI_NODE_COUNT > 1) {
SendAJob (residue);
}
else {
Optimize (lfb_MLES,lfb);
DoResults (residue);
}
}
/*------------------------------------------------------------------------------*/
if (MPI_NODE_COUNT > 1) {
jobsLeft = ({1,MPI_NODE_COUNT-1}["1"] * MPINodeStatus["_MATRIX_ELEMENT_VALUE_>=0"])[0];
for (nodeID = 0; nodeID < jobsLeft; nodeID += 1) {
MPIReceive (-1, fromNode, theJob);
oldRes = MPINodeStatus[fromNode-1];
ExecuteCommands (theJob);
DoResults (oldRes);
}
}
/*------------------------------------------------------------------------------*/
fprintf (substitutionsPath, CLEAR_FILE, KEEP_OPEN, "Site,From,To,Count");
fprintf (siteReportMap, CLEAR_FILE, KEEP_OPEN, "Site");
for (k=0; k<20; k+=1) {
fprintf (siteReportMap, ",", AAString[k]);
}
fprintf (siteReportMap, "\nLRT p-value");
test_p_values = test_p_values % 0;
rejectedHypotheses = {};
for (k=0; k<20; k+=1) {
pv = (byResidueSummary[AAString[k]])["p"];
fprintf (siteReportMap, ",", pv);
}
io.reportProgressMessage ("EDEPS", "Residues (and p-values) for which there is evidence of directional selection (at p<=0.05)");
fprintf (summaryPath, "\nResidues (and p-values) for which there is evidence of directional selection (at p<=0.05)");
for (k=0; k<20; k += 1) {
if (test_p_values[k][0] < (0.05/(20-k)))
{
rejectedHypotheses [test_p_values[k][1]] = 1;
rejectedHypotheses [AAString[test_p_values[k][1]]] = 1;
io.reportProgressMessage ("EDEPS", AAString[test_p_values[k][1]] + " : " + test_p_values[k][0] );
fprintf (summaryPath, "\n\t", AAString[test_p_values[k][1]], " : ",test_p_values[k][0] );
}
else {
break;
}
}
fprintf (summaryPath, "\n");
ancCacheID = _buildAncestralCache ("lf", 0);
outputcount = 0;
for (k=0; k<filteredData.sites; k += 1) {
thisSite = _substitutionsBySite (ancCacheID,k);
for (char1 = 0; char1 < 20; char1 += 1) {
for (char2 = 0; char2 < 20; char2 += 1) {
if (char1 != char2 && (thisSite["COUNTS"])[char1][char2]) {
ccount = (thisSite["COUNTS"])[char1][char2];
fprintf (substitutionsPath, "\n", k+1, ",", AAString[char1], ",", AAString[char2], "," , ccount);
}
}
}
if (Abs(bySiteSummary[k])) {
fprintf (siteReportMap, "\n", k+1);
didSomething = 0;
pv = 0;
for (k2=0; k2<20; k2 += 1) {
if (Abs((byResidueSummary[AAString[k2]])["BFs"]) == 0 || rejectedHypotheses[k2] == 0) {
fprintf (siteReportMap, ",N/A");
}
else {
thisSitePV = ((byResidueSummary[AAString[k2]])["BFs"])[k];
pv = Max(pv,thisSitePV);
fprintf (siteReportMap, ",", thisSitePV);
if (pv > 100) {
didSomething = 1;
}
}
}
if (!didSomething) {
continue;
}
if (outputcount == 0) {
outputcount = 1;
io.reportProgressMessage ("EDEPS", "The list of sites which show evidence of directional selection (Bayes Factor > 20) together with the target residues and inferred substitution counts ");
fprintf (summaryPath, "\nThe list of sites which show evidence of directional selection (Bayes Factor > 20)\n",
"together with the target residues and inferred substitution counts\n");
}
fprintf (stdout, "\nSite ", Format (k+1,8,0), " (max BF = ", pv, ")\n Preferred residues: ");
fprintf (summaryPath, "\nSite ", Format (k+1,8,0), " (max BF = ", pv, ")\n Preferred residues: ");
for (k2 = 0; k2 < Abs (bySiteSummary[k]); k2 += 1) {
thisChar = (bySiteSummary[k])[k2];
if (rejectedHypotheses[thisChar]) {
fprintf (stdout, thisChar);
fprintf (summaryPath, thisChar);
}
}
fprintf (stdout, "\n Substitution counts:");
fprintf (summaryPath, "\n Substitution counts:");
for (char1 = 0; char1 < 20; char1 = char1+1) {
for (char2 = char1+1; char2 < 20; char2 = char2+1)
{
ccount = (thisSite["COUNTS"])[char1][char2];
ccount2 = (thisSite["COUNTS"])[char2][char1];
if (ccount+ccount2)
{
fprintf (stdout, "\n\t", AAString[char1], "->", AAString[char2], ":", Format (ccount, 5, 0), "/",
AAString[char2], "->", AAString[char1], ":", Format (ccount2, 5, 0));
fprintf (summaryPath, "\n\t", AAString[char1], "->", AAString[char2], ":", Format (ccount, 5, 0), "/",
AAString[char2], "->", AAString[char1], ":", Format (ccount2, 5, 0));
}
}
}
}
}
_destroyAncestralCache (ancCacheID);
fprintf (substitutionsPath, CLOSE_FILE);
fprintf (summaryPath, CLOSE_FILE);
fprintf (siteReportMap, CLOSE_FILE);
fprintf (stdout, "\n");
/*--------------------------------------------------------------------------------------------*/
function computeDelta (ModelMatrixName&, efv, t_0, which_cat) {
t = t_0;
c = 1;
catVar = which_cat;
rmx = ModelMatrixName;
for (r=0; r<20; r += 1) {
diag = 0;
for (c=0; c<20; c += 1) {
rmx[r][c] = rmx[r][c] * efv[c];
diag = diag - rmx[r][c];
}
rmx[r][r] = diag;
}
return Transpose(efv)*(Exp (rmx) - {20,20}["_MATRIX_ELEMENT_ROW_==_MATRIX_ELEMENT_COLUMN_"]);
}
/*------------------------------------------------------------------------------*/
function SendAJob (residueIn)
{
for (nodeID = 0; nodeID < MPI_NODE_COUNT -1; nodeID = nodeID + 1)
{
if (MPINodeStatus[nodeID] < 0)
{
MPINodeStatus[nodeID] = residueIn;
MPISend (nodeID+1,lfb);
break;
}
}
if (nodeID == MPI_NODE_COUNT - 1)
{
MPIReceive (-1, fromNode, theJob);
oldRes = MPINodeStatus[fromNode-1];
MPISend (fromNode,lfb);
MPINodeStatus[fromNode-1] = residueIn;
ExecuteCommands (theJob);
DoResults (oldRes);
}
return 0;
}
/*------------------------------------------------------------------------------*/
function DoResults (residueIn)
{
residueC = AAString[residueIn];
fprintf (summaryPath, "[PHASE ",residueIn+1,".1] Model biased for ",residueC,"\n");
pv = 1-CChi2(2(lfb_MLES[1][0]-baselineLogL),3);
io.reportProgressMessage ("EDEPS", "Finished with the model biased for " + residueC + ". Log-L = " + Format(lfb_MLES[1][0],8,3) + "\n");
fprintf (summaryPath, "[PHASE ",residueIn+1,".2] Finished with the model biased for ",residueC,". Log-L = ",Format(lfb_MLES[1][0],8,3),"\n");
fr1 = P_bias;
rateAccel1 = (computeDelta("biasedMatrix",vectorOfFrequencies,r2t_mean,1))[residueIn];
fprintf (stdout, "\tBias term = ", Format(rateBiasTo,8,3),
"\n\tproportion = ", Format(fr1,8,3),
"\n\tExp freq increase = ", Format(rateAccel1*100,8,3), "%",
"\n\tp-value = ", Format(pv,8,3),"\n");
fprintf (summaryPath, "\n\tBias term = ", Format(rateBiasTo,8,3),
"\n\tproportion = ", Format(fr1,8,3),
"\n\tExp freq increase = ", Format(rateAccel1*100,8,3), "%",
"\n\tp-value = ", Format(pv,8,3),"\n");
LIKELIHOOD_FUNCTION_OUTPUT = 7;
outPath = basePath + "." + residueC;
fprintf (outPath, CLEAR_FILE, lfb);
byResidueSummary [residueC] = {};
(byResidueSummary [residueC])["p"] = pv;
test_p_values [residueIn][0] = pv;
test_p_values [residueIn][1] = residueIn;
(byResidueSummary [residueC])["sites"] = {};
(byResidueSummary [residueC])["BFs"] = {};
ConstructCategoryMatrix (mmx,lfb,COMPLETE);
GetInformation (catOrder, lfb);
dim = Columns (mmx);
_MARGINAL_MATRIX_ = {2, dim};
GetInformation (cInfo, c);
GetInformation (_CATEGORY_VARIABLE_CDF_, catVar);
ccc = Columns (cInfo);
_CATEGORY_VARIABLE_CDF_ = _CATEGORY_VARIABLE_CDF_[1][-1];
if (catOrder [0] == "c")
{
for (k=0; k<dim; k=k+1)
{
for (k2 = 0; k2 < ccc; k2=k2+1)
{
_MARGINAL_MATRIX_ [0][k] = _MARGINAL_MATRIX_ [0][k] + mmx[2*k2][k] *cInfo[1][k2];
_MARGINAL_MATRIX_ [1][k] = _MARGINAL_MATRIX_ [1][k] + mmx[2*k2+1][k]*cInfo[1][k2];
}
}
}
else
{
for (k=0; k<dim; k=k+1)
{
for (k2 = 0; k2 < ccc; k2=k2+1)
{
_MARGINAL_MATRIX_ [0][k] = _MARGINAL_MATRIX_ [0][k] + mmx[k2][k]*cInfo[1][k2];
_MARGINAL_MATRIX_ [1][k] = _MARGINAL_MATRIX_ [1][k] + mmx[ccc+k2][k]*cInfo[1][k2];
}
}
}
ExecuteAFile (HYPHY_LIB_DIRECTORY + "ChartAddIns" + DIRECTORY_SEPARATOR + "DistributionAddIns" + DIRECTORY_SEPARATOR + "Includes" + DIRECTORY_SEPARATOR + "posteriors.ibf");
prior = (_CATEGORY_VARIABLE_CDF_[1])/(1-_CATEGORY_VARIABLE_CDF_[1]);
for (k=0; k<dim; k+=1) {
bayesF = _MARGINAL_MATRIX_[1][k]/_MARGINAL_MATRIX_[0][k]/prior;
((byResidueSummary [residueC])["BFs"])[k] = bayesF;
if (bayesF > 100) {
((byResidueSummary [residueC])["sites"])[Abs((byResidueSummary [residueC])["sites"])] = k+1;
if (Abs(bySiteSummary[k]) == 0) {
bySiteSummary[k] = {};
}
(bySiteSummary[k])[Abs(bySiteSummary[k])] = residueC;
}
}
return 0;
}
//------------------------------------------------------------------------------
lfunction selectBranchesToTest (tree_definition) {
extra_models = {};
for (k = 0; k < Columns (tree_definition["model_list"]); k += 1) {
model_id = (tree_definition["model_list"])[k];
if (model_id != "") {
extra_models + model_id;
}
}
UseModel (USE_NO_MODEL);
ExecuteCommands ("Topology T = " + tree_definition["string"]);
tAVL = T^0;
totalBranchCount = Abs (tAVL) - 2;
selectedBranches = {};
selectTheseForTesting = {totalBranchCount + 3 + Abs (extra_models), 2};
selectTheseForTesting [0][0] = "All"; selectTheseForTesting [0][1] = "Test for selection on all branches jointly";
selectTheseForTesting [1][0] = "Internal"; selectTheseForTesting [1][1] = "Test for selection on all internal branches jointly";
selectTheseForTesting [2][0] = "Leaves"; selectTheseForTesting [2][1] = "Test for selection on all terminal branches jointly";
for (k = 0; k < Abs (extra_models); k+=1) {
selectTheseForTesting [3+k][0] = "Set " + extra_models[k];
selectTheseForTesting [3+k][1] = "Test for selection on all branches labeled with {" + extra_models[k] + "} jointly";
}
for (k = 0; k < totalBranchCount; k += 1) {
selectTheseForTesting [k+3 + Abs (extra_models)][0] = (tAVL[k+1])["Name"];
selectTheseForTesting [k+3 + Abs (extra_models)][1] = "Add branch '" + selectTheseForTesting [k+3 + Abs (extra_models)][0] + "' to the test set";
}
ChoiceList (whichBranchesToTest,"Which branches to test?",0,NO_SKIP,selectTheseForTesting);
for (k = 0; k < Columns (whichBranchesToTest); k += 1) {
if (whichBranchesToTest [k] < 3) {
for (k2 = 1; k2 <= totalBranchCount; k2 += 1) {
if (whichBranchesToTest[k] == 0 || whichBranchesToTest[k] == 1 && Abs ((tAVL[k2])["Children"]) > 0 || whichBranchesToTest[k] == 2 && Abs ((tAVL[k2])["Children"]) == 0) {
selectedBranches [(tAVL[k2])["Name"]] = 1;
}
}
return selectedBranches;
}
if (whichBranchesToTest [k] < 3 + Abs (extra_models)) {
model_key = extra_models [whichBranchesToTest [k] - 3];
for (k2 = 1; k2 <= totalBranchCount; k2 += 1) {
bName = (tAVL[k2])["Name"];
if ((tree_definition["model_map"])[bName] == model_key) {
selectedBranches [bName] = 1;
}
}
return selectedBranches;
}
selectedBranches [(tAVL[whichBranchesToTest [k] - 2 - Abs (extra_models)])["Name"]] = 1;
}
return selectedBranches;
}
#NEXUS
[
Generated by HYPHY 2.220141217beta(MP) for MacOS(Universal Binary) on Sat Aug 15 12:50:02 2015
]
BEGIN TAXA;
DIMENSIONS NTAX = 34;
TAXLABELS
'N1199101' 'T1199908' 'N2199809' 'N3199901' 'T3200011' 'N4199904' 'T4200608' 'N5199906' 'T5200107' 'N6199906' 'T6200007' 'N7199611' 'T7199811' 'N8199907' 'T8200310' 'N9199909' 'T9200006' 'N10199911' 'T10200305' 'N11199911' 'T11200103' 'N12200008' 'T12200101' 'N13200010' 'T13200309' 'N14200012' 'T14200204' 'N15200101' 'T15200201' 'N16200103' 'T16200304' 'N17199606' 'T17199703' 'N18200108' ;
END;
BEGIN CHARACTERS;
DIMENSIONS NCHAR = 335;
FORMAT
DATATYPE = PROTEIN
GAP=-
MISSING=-
NOLABELS
;
MATRIX
-------------------------LTEEKIKALIEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSINNETPGIRYQFNVLPQGWKGSPSIFQSSMTKILEFFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIE------------------------------------------------------------------------------------------
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALIFICAEMEFEGKISKIGPENPYNTP-FAIKRKFGTRWRKLIDLRDLNKKTQDFWEIQLGIPHPAGLKK-KSVTVLDVGDAYYSVFFDKDFRKYTAFTIPSINNETPGIRYQYNVLPMGWKGSPSIFQSSMTKILEPFRFQNPDIVI-FY-DDLYVGSDLEIGQHRTK-EELRQHLLKWGFTTP--K-QKEPPF-WMGYELHPDKWTVQPIELPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLC-LLKGAKALTEVIPLTAEAELEL--------------------------------
PISPIETVPVKLKPGMDGP-VKQWPL-EFKIKALIEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKKTQDFWEVFLGIPHPAGLK-KKSVTFLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPA-FQ-SMTFILEPFRKQNPDIVIYQYMDDFYVGSDFEIGEHRTKIEELREHLLKWGFFTPFKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGI-------------------------------------------------------------
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVFLHEDFRKYTAFTIPSINNETPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDL-IGQHR-KIDELRFHLL-WGFTTPDK--------------------------------------------------------------------------------------------------------------------
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKRKDGTRWRKLIDLRELNKRTQDFWEVQLGIPHPAGLKKNKSVT-LDVGDAFYSVPLHEDFRKYTAFTIPSINNETPGIRYQYNVLPMGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYVDDLYV-SDLEIGQHRTKIDELRQHLLRWGFTTPD-KHQKEPPFLWMGYELHPDKWTVQPIVLPDKDFWTVNFIQKLVGKLNWASQIYAGIKVKQLCKLLKG-KSLTEIVPLTEEAE-----------------------------------
PIS-I-TVPVKLKPGMDGPKVKQWFLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKRKSVTVFDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRAKIEELREHLLKWGLTTPDKKHQKEPPFLWMGYELHPDKWTFQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIFVKQLCKLLRGTKALTEVV------------------------------------------
PISSIFTVPVKLKPGVDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKSKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPFVRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDI-IYQYMDDLYVASDLEIGQHRAKIEELREHLFKWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVALTEEAEL----------------------------------
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALIEICTEMFKEFKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGFKKKKSVTVLDVGDAYFSIPLDEDFRKYTAFTIPSRNNETPG-RYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPFVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKFKQLFKLLRGTK------------------------------------------------
PISPIETVPVKLKPGMDGPKVKFWPLTEEKIKALIEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKFVDFRELNKRTQDFWEVQLGIPHPAGFKKKKSVTVLDVGDAYFSIPLDEDFRKYTAFTIPSRNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVICQYMDDLYVGSDLEIGQHRTKIEELRQHLLFWGLTTPDKKHQFEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKGLTEVIPLTEEAE-----------------------------------
PISPIDTVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISRIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPSGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSTNNETPGIRY-YNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELREHLLRWFFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGI-I-QLCKLLRGTKALTEVVPFTEEAELE---------------------------------
PISPIDTVPVKLKPGMDGPKVKQWPLFEEKIKALVEICTEMEKEGKISRIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPSGLKKKKSVTVLDVGDAYFSVPLDKDFRKFTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELREHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTFQPI-LPEKDSWTVNDIQKLVGKLNWASQIYAGIKIRQLCKLLRGTKALTEVVPLTEE-ELEL--------------------------------
---------------------KQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSVNNETPGIRYQYNVLPQGWKGSPAIFQASMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRIKIEELREHLLKWGFTTPDKKHQKEPPFLWMGYE------------------------------------------------------------------------------------------------------
------------------------------IKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSVNNETPGIRYQYNV-PQGWKGSPAIFQASMTKILEP-RKQNPDIVIYQYMDDLYVGSDL-IGQHR-KIEELREHLLKWGFTTPDKKHQKEPPFLWMGYE------------------------------------------------------------------------------------------------------
PISPIDTVPVKLKPGMDGPKVFQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFREFNKRTQDFWEFQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRIKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRHLCKLLRGTKALTEVVPLTEEAELEL--------------------------------
PISPIDTVPVKLKPGMDGPKVFQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKFDSTKWRKLVDFREFNKRTQDFWEFQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPFIRYQFNVFPQGWKGSPAIFQSFMT-ILEPFRKQFPDIFIYQYMDDLYVGSFLEIFFHRIKIEELRQHLF-WFFTTPDKFHQKEPPFLWMGYELHPDKWFVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRHLC-L-RGTKAFTEVVPLTEEAE-----------------------------------
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKITKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQQIVLPEKDSWTVNDIQKLVGFLNWASQIYAGIKVKQLCKLLRGAKALTEVVPLTEEAELELA-------------------------------
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKITKIGPENPYNTPVF-IKKKDSTKWRKLVDFRELNKRTQDFWEIQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYVDDLYVASDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQQIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGAKALTEVVPLTEEAELEL--------------------------------
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKW-KLVDFRELNKKTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAFFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFR-QNPDMVIFQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYFLHPDKWTVQPIELPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAE-----------------------------------
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKKTQDFWEVQLGIPHPAGLKKNKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDMVIYQYVDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIELPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGTKALTEVVPLTEEAE-----------------------------------
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKFKQLCKLLRGTKALTEVVPLTEEA-LEL--------------------------------
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKNKSVTVLDVGDAYFSVPLDKEFRKYTAFTIPS-NNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPEIVIYQYVDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVKQLCKLLRGTKALTEVVPLTEEAELE---------------------------------
PISPIETFPVRL-PGMDGPKVKQWPLTEEKIKALIEICTEMEKEGKITKIGPENPYNTPVFAIKKKDGNKWRKLVDFRELNKRTQDFWEVQLGIPHPSGLKKKKSVTVLDVGDAYFSVPLYEDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPEIVFYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGAKSLTEVIPLTKEAE-----------------------------------
PISPIETVPVRLKPGMDGPKVKQWPLTEEKIKALIEICTEMEKEGKITKIGPENPYNTPVFAIKKKDGNKWRKLVDFRELNKRTQDFWEVQLGIPHPSGLKKKKSVTVLDVGDAYFSVPLYEDFRKYTAFTIPSTNNETPGIRYQYNVLPQGWKGSPAIFQSSMTRILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQLCKLLRGAKSLTEVIPLTKEAE-----------------------------------
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMFKEGKISKIGPENPYNTPVFFIKKKDSTKWFFLVDFRELNKRTQDFWEVQLGIPFFAGLK-KKSVTVLDVGDAYFSVPLDEDFRKFTAFTIPSINNETPGIRYQYNVFPQGWKGSPAIFQSSMTKFFEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRFKIEELRQHLLRWGFTTPDKKHQKEFPFLWMGYELHPDKWTVQPIVLPEKDSWTVNDIQKLVGKFNWASQIYAGFKVRQLCKLLRFAKALTEVIPLTKEAE-----------------------------------
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGL-QKKSVTVLDVGDAYFSVPLDEDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSFMTKILEPFRKQNPDIVIYQYVDDLYVGSDLEIGQHRTKIEELRQHLLRWGFTTPDKKHQKEFPFFWMGYELHPFKWTVQPFVLPEKDSWTVNDIQKLVGKLNWASQIYAGIKVRQFCKLLRGAKALTEVIPLTKEAE-----------------------------------
PISPIETVPVKLKPGMDGP-VKQWPLTEEKIKALTEICTEMEKEGKISKIGPENPYNTPIFAIKKKDSTKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQ-SMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGFHRTKI-ELRQHLLRWGFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIMLPEQFSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGAKALTEVVSLTEEAE-----------------------------------
PISPIETVFVKLKPGMDGPKVKQWPLTEEKIKALTEICTEMEKEGKFSKIGPEFPFNTPIFAIKKK-STKWRKLVDFRELNKRTQD-WE-FLGIPHPAGLKK-KSVTVLDVGDA-FSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQYSMTKILEPFRKQNPDIVIYQYVDDLYV-SDFEIGQHRTKIDELRQHLLRWGFTTPDKK-QKEPPFLWMGYELHPD-WTVQPIMLPEQDSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGAKALTEVVSLTEE-------------------------------------
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVDFFELNKRTQDFWEVQLGIPHPAGLKKKKSVTVFDVGDAYFSVPLDEDFRKYTAFFIPSINNETPGIRYQYNVLPQGWKGSPAIFQFSMIFILEPFRKQNPDIVIFQYMDDLYVGSDLEIGQHRAKIEDLREHLLRWFFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKFSWTVNDIQKLVGKLNWASQIYPGIKVKQLCRLLRGTKALTFVVPLTEEAE-----------------------------------
PISPIETVPVKLKPGMDGPKVFQWPLTFEKIKALVEICTFMEKEGKISKIGPENPYNTPVFAIKKKDGTKWRKLVFFFELNKRTQDFWEVFLGIPHPAGLKKNKSVTVFDVGDAYFSVPLDFDFRKYTAFFFPSINNETPGIRYQYNVLPQGWKGSPAIFQFSMLKILEPFRKQNPDIVIYQYMDFLYVGSDLEIGQHR-KIF-FREHLLRWFFTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKFSWTVNDIQKLVGKLNWASQIYPGIKVKQLCRLLRGTKALTEVVPLTEEAELE---------------------------------
PISPIETVPVKLKPGMDGP-VKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSSKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPEIVIYQYMDDLYVGSDLEIGQHRAKVEELRQHLLRWGFTTPDKKHQKEPFFLWMGYELHPDKWTVQPIQLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKALTEVIPLTEEAE-----------------------------------
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKAFVEICTEMEKEGKISFIGPENPYNTPVFAIKKKDSSKWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPEIVIYQYVDDLYVGSDLEIFQHRTKIEELRQHLLRWGFTTPDKKHQKEPPFLWMGYEFHPDKWTVQPIKLPEKDSWTVNDIQKLVGKLNWASQIYPGIKVRQLCKLLRGTKAFTEFIPLTEEAE-----------------------------------
------------------------PLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKKTQDFWEVQLGIPHPAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLRWGLTTPDKKHQKEPPFLWMGYELHPDKWTVQPIVLPEKDSW-----------------------------------------------------------------------------------
------------------------PLTEEKIKALVEICAELEKEGKISKIGPENPYNTPVFAIKKKDSTKWRKLVDFRELNKKTQDFWEVQLGIPHPAGLKKNKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVICQYMDDLYVGSDLEIGQHRKKIEELRQHLWKWGFYTPDKKHQKEPPFRWMGYELHPDKWTVQPIVLPEKDSW-----------------------------------------------------------------------------------
PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKFDSTKWRKLVDFRELNKFTQDFWEVQLGIPHFAGLKKKKSVTVLDVGDAYFSVPLDKDFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQCSMTKILEPFRKQNPDIVIYQYMDFLYVGSDLEIEQHRAKIEELRQHLLRWGFTTPDKKHQKEPPFFWMGYELHPDKWTVQPIVL-Q-DSWTVNDIQKLVGKLNWASQIYPGIKVKQLCKLLRGTKALTEVIPLTAEAE-----------------------------------;
END;
BEGIN TREES;
TREE tree = (N1199101,((((N4199904,T4200608{Treated}),(N5199906,T5200107{Treated})),(((((T1199908{Treated},(N14200012,T14200204{Treated})),(N3199901,T3200011{Treated})),(N9199909,T9200006{Treated})),((N2199809,(N12200008,T12200101{Treated})),(((N6199906,T6200007{Treated}),(N7199611,T7199811{Treated})),(N8199907,T8200310{Treated})))),(((N10199911,T10200305{Treated}),(N17199606,T17199703{Treated})),((N11199911,T11200103{Treated}),((N15200101,T15200201{Treated}),((N16200103,T16200304{Treated}),N18200108)))))),(N13200010,T13200309{Treated})));
END;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment