From ee58330698cc18f14c237928aa26048b7cfbfdb0 Mon Sep 17 00:00:00 2001 From: Stepan Saenko <123179229+StepanSaenko@users.noreply.github.com> Date: Thu, 29 Jun 2023 09:22:56 +0200 Subject: [PATCH 1/8] Added a function which can check the node names in .nwk If the node is named with only digit symbols, it can be the reason of the error: terminate called after throwing an instance of 'std::out_of_range' what(): vector::_M_range_check: __n (which is 12157) >= this->size() (which is 3) Aborted (core dumped) A function added to phylotree.cc shows a possible explanation --- src/phylotree.cc | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/src/phylotree.cc b/src/phylotree.cc index af637c41..cf68fea2 100644 --- a/src/phylotree.cc +++ b/src/phylotree.cc @@ -61,8 +61,70 @@ void PhyloTree::printTree() const { } } +/*function will check if there any node named only with digits in .nwk file */ +bool printNodeNames(const std::string& newickString) { + std::stack nodeStack; + std::string currentName; + std::regex regex("[0-9]+"); + bool readingLength = false; + + for (char c : newickString) { + if (c == '(') { + nodeStack.push(currentName); + currentName.clear(); + } + else if (c == ',') { + if (!currentName.empty()) { + if (std::regex_match(currentName, regex)){ + return false; + } + currentName.clear(); + } + readingLength = false; + } + else if (c == ')') { + if (!currentName.empty()) { + if (std::regex_match(currentName, regex)){ + return false; + } + currentName.clear(); + } + if (!nodeStack.empty()) { + currentName = nodeStack.top(); + nodeStack.pop(); + } + readingLength = false; + } + else if (c == ':') { + readingLength = true; + } + else if (c == ';' && !currentName.empty()) { + if (std::regex_match(currentName, regex)) { + return false; + } + currentName.clear(); + readingLength = false; + } + else if (!readingLength) { + currentName += c; + } + } + return true; +} + PhyloTree::PhyloTree(string filename){ +/*check if any nodes contains only digits, it caused the error*/ + filebuf fb0; + fb0.open(filename.c_str(),ios::in); + if (fb0.is_open()){ + std::string fileContents((std::istreambuf_iterator(&fb0)), std::istreambuf_iterator()); + if (!printNodeNames(fileContents)) { + throw ProjectError("the parsing of " + filename + " has been unsuccessful. Probably, at least one node named with only digits. Please check, whether all the nodenames of your input named with at least one latin symbol" ); + } + fb0.close(); + } + #ifdef COMPGENEPRED filebuf fb; fb.open(filename.c_str(),ios::in); From c11b8d66af732e44b243316e41669ae17546ca7b Mon Sep 17 00:00:00 2001 From: Stepan Saenko <123179229+StepanSaenko@users.noreply.github.com> Date: Fri, 30 Jun 2023 14:27:34 +0200 Subject: [PATCH 2/8] Update phylotree.cc Added an std::out_of_range exception --- src/phylotree.cc | 77 +++++++----------------------------------------- 1 file changed, 11 insertions(+), 66 deletions(-) diff --git a/src/phylotree.cc b/src/phylotree.cc index cf68fea2..d8e61937 100644 --- a/src/phylotree.cc +++ b/src/phylotree.cc @@ -61,70 +61,8 @@ void PhyloTree::printTree() const { } } -/*function will check if there any node named only with digits in .nwk file */ -bool printNodeNames(const std::string& newickString) { - std::stack nodeStack; - std::string currentName; - std::regex regex("[0-9]+"); - bool readingLength = false; - - for (char c : newickString) { - if (c == '(') { - nodeStack.push(currentName); - currentName.clear(); - } - else if (c == ',') { - if (!currentName.empty()) { - if (std::regex_match(currentName, regex)){ - return false; - } - currentName.clear(); - } - readingLength = false; - } - else if (c == ')') { - if (!currentName.empty()) { - if (std::regex_match(currentName, regex)){ - return false; - } - currentName.clear(); - } - if (!nodeStack.empty()) { - currentName = nodeStack.top(); - nodeStack.pop(); - } - readingLength = false; - } - else if (c == ':') { - readingLength = true; - } - else if (c == ';' && !currentName.empty()) { - if (std::regex_match(currentName, regex)) { - return false; - } - currentName.clear(); - readingLength = false; - } - else if (!readingLength) { - currentName += c; - } - } - return true; -} - PhyloTree::PhyloTree(string filename){ -/*check if any nodes contains only digits, it caused the error*/ - filebuf fb0; - fb0.open(filename.c_str(),ios::in); - if (fb0.is_open()){ - std::string fileContents((std::istreambuf_iterator(&fb0)), std::istreambuf_iterator()); - if (!printNodeNames(fileContents)) { - throw ProjectError("the parsing of " + filename + " has been unsuccessful. Probably, at least one node named with only digits. Please check, whether all the nodenames of your input named with at least one latin symbol" ); - } - fb0.close(); - } - #ifdef COMPGENEPRED filebuf fb; fb.open(filename.c_str(),ios::in); @@ -139,11 +77,18 @@ PhyloTree::PhyloTree(string filename){ * start parsing * if error_message=0 parsing was successful, if error_message=1 input syntax is wrong */ - int error_message = parser.parse(); + try { + int error_message = parser.parse(); + if(error_message == 1){ + throw ProjectError("the parsing of " + filename + " has been unsuccessful. Please check, whether the syntax of your input file is correct" ); + } + } + catch (const std::out_of_range& e) { + std::cout << "An out_of_range exception occurred: " << e.what() << std::endl; + std::cout << "the parsing of " + filename + " has been unsuccessful. At least one node named with only digits. Please check, whether the nodenames of your input are correct" << std::endl; + } fb.close(); - if(error_message == 1){ - throw ProjectError("the parsing of " + filename + " has been unsuccessful. Please check, whether the syntax of your input file is correct" ); - } + /* * check if all branch lengths >0 */ From 308848399a36b3b573501376667840cfa22439e7 Mon Sep 17 00:00:00 2001 From: Stepan Saenko <123179229+StepanSaenko@users.noreply.github.com> Date: Fri, 30 Jun 2023 14:49:37 +0200 Subject: [PATCH 3/8] Update phylotree.cc --- src/phylotree.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/phylotree.cc b/src/phylotree.cc index d8e61937..ee1b3a5e 100644 --- a/src/phylotree.cc +++ b/src/phylotree.cc @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef COMPGENEPRED #include "parser/parser.h" @@ -85,7 +86,7 @@ PhyloTree::PhyloTree(string filename){ } catch (const std::out_of_range& e) { std::cout << "An out_of_range exception occurred: " << e.what() << std::endl; - std::cout << "the parsing of " + filename + " has been unsuccessful. At least one node named with only digits. Please check, whether the nodenames of your input are correct" << std::endl; + throw ProjectError("the parsing of " + filename + " has been unsuccessful. Probably, at least one node named with only digits. Please check, whether the syntax of your input file is correct" ) } fb.close(); From ea08fceac413c58bc028952d2ccfc38f0cf53fad Mon Sep 17 00:00:00 2001 From: Stepan Saenko <123179229+StepanSaenko@users.noreply.github.com> Date: Sat, 1 Jul 2023 13:08:05 +0200 Subject: [PATCH 4/8] Update phylotree.cc --- src/phylotree.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/phylotree.cc b/src/phylotree.cc index ee1b3a5e..16ae2253 100644 --- a/src/phylotree.cc +++ b/src/phylotree.cc @@ -62,8 +62,6 @@ void PhyloTree::printTree() const { } } -PhyloTree::PhyloTree(string filename){ - #ifdef COMPGENEPRED filebuf fb; fb.open(filename.c_str(),ios::in); From fbc995a3555895db02dd6f2b2c76354aa79f983d Mon Sep 17 00:00:00 2001 From: Stepan Saenko <123179229+StepanSaenko@users.noreply.github.com> Date: Sat, 1 Jul 2023 13:23:52 +0200 Subject: [PATCH 5/8] Update phylotree.cc --- src/phylotree.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/phylotree.cc b/src/phylotree.cc index 16ae2253..1e341855 100644 --- a/src/phylotree.cc +++ b/src/phylotree.cc @@ -61,6 +61,7 @@ void PhyloTree::printTree() const { (*node)->printNode(); } } +PhyloTree::PhyloTree(string filename){ #ifdef COMPGENEPRED filebuf fb; From e94b5060f8a6b71a2dfbcb7099a7def10aec904b Mon Sep 17 00:00:00 2001 From: Stepan Saenko <123179229+StepanSaenko@users.noreply.github.com> Date: Sat, 1 Jul 2023 13:30:41 +0200 Subject: [PATCH 6/8] Update phylotree.cc --- src/phylotree.cc | 705 ++++++++++++++++++++++++----------------------- 1 file changed, 357 insertions(+), 348 deletions(-) diff --git a/src/phylotree.cc b/src/phylotree.cc index 1e341855..fca5a286 100644 --- a/src/phylotree.cc +++ b/src/phylotree.cc @@ -17,27 +17,32 @@ #include #include #include +#include +#include +#include +#include +#include #include - #ifdef COMPGENEPRED #include "parser/parser.h" #endif double PhyloTree::phylo_factor=1; + void Treenode::printNode() const { cout<<"Species: "<species<<" (idx="<idx <<")"<<" Distance: "<distance; if (this->parent == NULL){ - cout<<" root "; + cout<<" root "; } if (this->children.empty()){ - cout<<" leaf "<::const_iterator node = this->children.begin(); node != this->children.end(); node++){ - cout<<(*node)->species<::const_iterator node = this->children.begin(); node != this->children.end(); node++){ + cout<<(*node)->species<::const_iterator node = this->treenodes.begin(); node != this->treenodes.end(); node++){ - (*node)->printNode(); + (*node)->printNode(); } } -PhyloTree::PhyloTree(string filename){ + +PhyloTree::PhyloTree(string filename){ + /*check if any nodes contains only digits, it caused the error*/ + /*filebuf fb0; + fb0.open(filename.c_str(),ios::in); + if (fb0.is_open()){ + + std::string fileContents((std::istreambuf_iterator(&fb0)), std::istreambuf_iterator()); + if (!printNodeNames(fileContents)) { + throw ProjectError("the parsing of " + filename + " has been unsuccessful. At least one node named with only digits. Please check, whether the nodenames of your input are correct " ); + } + fb0.close(); + }*/ #ifdef COMPGENEPRED filebuf fb; fb.open(filename.c_str(),ios::in); if (fb.is_open()){ - istream istrm(&fb); - vector species; - Parser parser(&treenodes, &species, istrm); //define an object of the Parser class + istream istrm(&fb); + vector species; + Parser parser(&treenodes, &species, istrm); //define an object of the Parser class #ifndef DEBUG - parser.setDebug(false); + parser.setDebug(false); #endif - /* - * start parsing - * if error_message=0 parsing was successful, if error_message=1 input syntax is wrong - */ + /* + * start parsing + * if error_message=0 parsing was successful, if error_message=1 input syntax is wrong + */ try { int error_message = parser.parse(); if(error_message == 1){ - throw ProjectError("the parsing of " + filename + " has been unsuccessful. Please check, whether the syntax of your input file is correct" ); - } - } - catch (const std::out_of_range& e) { - std::cout << "An out_of_range exception occurred: " << e.what() << std::endl; - throw ProjectError("the parsing of " + filename + " has been unsuccessful. Probably, at least one node named with only digits. Please check, whether the syntax of your input file is correct" ) + throw ProjectError("the parsing of " + filename + " has been unsuccessful. Please check, whether the syntax of your input file is correct" ); + } + } + catch (const std::out_of_range& e) { + std::cout << "An out_of_range exception occurred: " << e.what() << std::endl; + throw ProjectError("the parsing of " + filename + " has been unsuccessful. Probably, at least one node named with only digits. Please check, whether the syntax of your input file is correct" ); + } + fb.close(); + + + /* + * check if all branch lengths >0 + */ + vector bs; + getBranchLengths(bs); + for(int i=0; i < bs.size(); i++){ + if(bs[i] <= 0) + throw ProjectError("Branch lengths must be > 0 in " + filename + "."); } - fb.close(); - - /* - * check if all branch lengths >0 - */ - vector bs; - getBranchLengths(bs); - for(int i=0; i < bs.size(); i++){ - if(bs[i] <= 0) - throw ProjectError("Branch lengths must be > 0 in " + filename + "."); - } - numSp=species.size(); - /* - * if only a subset of the species is sought, drop all leaf nodes - * which are not in the given subset - */ - string only_species = Constant::speciesfilenames; // by default use only the species from speciesfilenames - /* old code: read in species list from separate file - * only required with mysql dbaccess - * if(only_species.empty()) - * Properties::assignProperty("/CompPred/only_species", only_species); - */ - if(!only_species.empty()){ - - ifstream ifstrm(only_species.c_str()); - if (ifstrm.is_open()){ - vector keep; // the subset of species to be kept - string line; - while(getline(ifstrm, line)){ - stringstream stm(line); - string s; - if(stm >> s) - keep.push_back(s); - } - ifstrm.close(); - for(int i=0; i keep; // the subset of species to be kept + string line; + while(getline(ifstrm, line)){ + stringstream stm(line); + string s; + if(stm >> s) + keep.push_back(s); + } + ifstrm.close(); + for(int i=0; i &speciesnames, double b){ Treenode *root = new Treenode; for(int i=0;iaddChild(leaf); - this->treenodes.push_back(leaf); + Treenode *leaf = new Treenode(speciesnames[i],b); + root->addChild(leaf); + this->treenodes.push_back(leaf); } this->treenodes.push_back(root); numSp=speciesnames.size(); @@ -170,76 +187,75 @@ PhyloTree::PhyloTree(const vector &speciesnames, double b){ PhyloTree::PhyloTree(const PhyloTree &other){ this->numSp=other.numSp; if(!other.treenodes.empty()){ - Treenode *root=other.treenodes.back(); - if(!root->isRoot()){ - throw ProjectError("last node in tree ist not the root"); - } - Treenode *root_copy = new Treenode(); - this->treenodes.push_front(root_copy); - list stack; - list stack_copy; - stack.push_front(root); - stack_copy.push_front(root_copy); - while(!stack.empty()){ - Treenode *p = stack.front(); - stack.pop_front(); - Treenode *p_copy = stack_copy.front(); - stack_copy.pop_front(); - if(!p->isLeaf()){ - for(list::reverse_iterator it = p->children.rbegin(); it != p->children.rend(); it++){ - Treenode *copy = new Treenode((*it)->getSpecies(), (*it)->getDist(), (*it)->getIdx()); - p_copy->addChild(copy); - this->treenodes.push_front(copy); - stack_copy.push_front(copy); - stack.push_front(*it); - } - } - } + Treenode *root=other.treenodes.back(); + if(!root->isRoot()){ + throw ProjectError("last node in tree ist not the root"); + } + Treenode *root_copy = new Treenode(); + this->treenodes.push_front(root_copy); + list stack; + list stack_copy; + stack.push_front(root); + stack_copy.push_front(root_copy); + while(!stack.empty()){ + Treenode *p = stack.front(); + stack.pop_front(); + Treenode *p_copy = stack_copy.front(); + stack_copy.pop_front(); + if(!p->isLeaf()){ + for(list::reverse_iterator it = p->children.rbegin(); it != p->children.rend(); it++){ + Treenode *copy = new Treenode((*it)->getSpecies(), (*it)->getDist(), (*it)->getIdx()); + p_copy->addChild(copy); + this->treenodes.push_front(copy); + stack_copy.push_front(copy); + stack.push_front(*it); + } + } + } } } PhyloTree::~PhyloTree(){ for(list::iterator it = treenodes.begin(); it != treenodes.end(); it++){ - delete *it; + delete *it; } treenodes.clear(); } void PhyloTree::printNewick(string filename) const { - if(!treenodes.empty()){ - ofstream file; - file.open(filename.c_str()); - recursiveNWK(file,treenodes.back()); - file.close(); + ofstream file; + file.open(filename.c_str()); + recursiveNWK(file,treenodes.back()); + file.close(); } } void PhyloTree::recursiveNWK(ofstream &file, Treenode *node) const { if(node->isLeaf()){ - file<getSpecies()<<":"<getDist(); + file<getSpecies()<<":"<getDist(); } else{ - file<<"("; - for(list::iterator it=node->children.begin(); it !=node->children.end(); it++){ - if( it != node->children.begin() ) - file<<","; - recursiveNWK(file,*it); - } - if(node->isRoot()){ - file<<")"<getSpecies()<<";"; - } - else{ - file<<")"<getSpecies()<<":"<getDist(); - } + file<<"("; + for(list::iterator it=node->children.begin(); it !=node->children.end(); it++){ + if( it != node->children.begin() ) + file<<","; + recursiveNWK(file,*it); + } + if(node->isRoot()){ + file<<")"<getSpecies()<<";"; + } + else{ + file<<")"<getSpecies()<<":"<getDist(); + } } } double PhyloTree::pruningAlgor(string labelpattern, Evo *evo, int u){ vector tuple; for (int i=0; i &tuple, Evo *evo, int u){ int states = evo->getNumStates(); - for(list::iterator node = treenodes.begin(); node != treenodes.end(); node++){ - if((*node)->isLeaf()){ - // initialization - int c = tuple[(*node)->getIdx()]; - if(c >= states || c < 0){ - (*node)->resizeTable(states,1); // in the case of unknown characters, we sum over all possibilities - } - else{ - (*node)->resizeTable(states,0); - (*node)->setTable(c,1); - } - } - else{ - //recursion for the interior nodes - (*node)->resizeTable(states); - // cout<<"transition matrix P for omega "<::iterator it = (*node)->children.begin(); it != (*node)->children.end(); it++){ - double sum=0; - gsl_matrix *P = evo->getSubMatrixP(u,(*it)->getDist()); - //cout<<"---------- Transition Matrix for omega nr "<getTable(j); - //cout<setTable(i, score); - } - } + if((*node)->isLeaf()){ + // initialization + int c = tuple[(*node)->getIdx()]; + if(c >= states || c < 0){ + (*node)->resizeTable(states,1); // in the case of unknown characters, we sum over all possibilities + } + else{ + (*node)->resizeTable(states,0); + (*node)->setTable(c,1); + } + } + else{ + //recursion for the interior nodes + (*node)->resizeTable(states); + // cout<<"transition matrix P for omega "<::iterator it = (*node)->children.begin(); it != (*node)->children.end(); it++){ + double sum=0; + gsl_matrix *P = evo->getSubMatrixP(u,(*it)->getDist()); + //cout<<"---------- Transition Matrix for omega nr "<getTable(j); + //cout<setTable(i, score); + } + } } //printRecursionTable(); //in the root, we take the weighted average over all states double tree_score = 0; - for (int i=0; igetPi(i) * treenodes.back()->getTable(i)); - tree_score += ts; + for(int i=0; igetPi(i) * treenodes.back()->getTable(i)); + tree_score += ts; } return log(tree_score); } @@ -301,30 +316,29 @@ void PhyloTree::printRecursionTable() const{ cout<table.size(); i++){ - cout<<"state "<::const_iterator it = treenodes.begin(); it != treenodes.end(); it++){ - cout<getSpecies(); - for(int i=0; i<(*it)->table.size(); i++){ - if( (*it)->getTable(i) == - std::numeric_limits::max()){ - cout<getTable(i); - } - } - cout<getSpecies(); + for(int i=0; i<(*it)->table.size(); i++){ + if( (*it)->getTable(i) == - std::numeric_limits::max()){ + cout<getTable(i); + } + } + cout< &branchset) const { - for(list::const_iterator node = treenodes.begin(); node != treenodes.end(); node++){ - if(!((*node)->isRoot())) - branchset.push_back((*node)->getDist()); + if(!((*node)->isRoot())) + branchset.push_back((*node)->getDist()); } } @@ -332,58 +346,55 @@ void PhyloTree::getSpeciesNames(vector &speciesnames) { for(list::const_iterator node = treenodes.begin(); node != treenodes.end(); node++){ if((*node)->isLeaf()){ speciesnames.push_back((*node)->getSpecies()); - // set species idx - int idx = speciesnames.size()-1 ; - (*node)->setIdx(idx); - } + // set species idx + int idx = speciesnames.size()-1 ; + (*node)->setIdx(idx); + } } } Treenode *PhyloTree::getLeaf(string species) const { for(list::const_iterator node = treenodes.begin(); node != treenodes.end(); node++){ - if( (*node)->getSpecies() == species) - return (*node); + if( (*node)->getSpecies() == species) + return (*node); } throw ProjectError("species " + species + " not in tree") ; } void PhyloTree::drop(string species){ - Treenode *node=getLeaf(species); drop(node,NULL); } void PhyloTree::drop(Treenode *node, Evo *evo){ - if(node->isLeaf()){ - Treenode *p=node->getParent(); - if(p){ - p->removeChild(node); - if(p->children.size()==1){ - collapse(p,evo); - } - } - treenodes.remove(node); - delete node; - numSp--; + Treenode *p=node->getParent(); + if(p){ + p->removeChild(node); + if(p->children.size()==1){ + collapse(p,evo); + } + } + treenodes.remove(node); + delete node; + numSp--; } } void PhyloTree::collapse(Treenode *node, Evo *evo){ - Treenode *child = node->children.front(); if(!(node->isRoot())){ - Treenode *parent = node->getParent(); - parent->removeChild(node); - parent->addChild(child); - double dist= child->getDist() + node->getDist(); - child->addDistance(dist); - if(evo) - evo->addBranchLength(dist); + Treenode *parent = node->getParent(); + parent->removeChild(node); + parent->addChild(child); + double dist= child->getDist() + node->getDist(); + child->addDistance(dist); + if(evo) + evo->addBranchLength(dist); } else{ - child->makeRoot(); + child->makeRoot(); } treenodes.remove(node); delete node; @@ -395,97 +406,95 @@ void PhyloTree::collapse(Treenode *node, Evo *evo){ */ double PhyloTree::MAP(vector &labels, vector &weights, Evo *evo, bool fixLeafLabels){ - int states = evo->getNumStates(); for(list::iterator node = treenodes.begin(); node != treenodes.end(); node++){ - if((*node)->isLeaf()){ - // initialization - int idx = (*node)->getIdx(); - int c = labels[idx]; - if(c >= 4 || c < 0) - throw ProjectError("PhyloTree::MAP(): species with index "+ itoa(idx) + " does not exist in tree"); - double weight = weights[idx]; - (*node)->resizeTable(states,-std::numeric_limits::max()); - if(fixLeafLabels || c >= 2){ // c=2: EC absent, but aligned is always a fixed state - (c == 1)? (*node)->setTable(c,weight) : (*node)->setTable(c,0); - } - else{ - for(int i = 0; i < 2; i++) - (*node)->setTable(i,i*weight); - } - (*node)->bestAssign.clear(); - (*node)->bestAssign.resize(states); - } - else{ - //recursion for the interior nodes - (*node)->resizeTable(states); - (*node)->bestAssign.clear(); - (*node)->bestAssign.resize(states); - for(int i=0; i::iterator it = (*node)->children.begin(); it != (*node)->children.end(); it++){ - double max = -std::numeric_limits::max(); - int bestAssign = -1; - gsl_matrix *P = (*it)->getLogP(); - if(!P){ // upon the first call, set pointer to the corresponding probability matrix - P = evo->getSubMatrixLogP(0,(*it)->getDist()); - (*it)->setLogP(P); - } - for(int j=0; jgetTable(j); - if(max < branch_score){ - max = branch_score; - bestAssign=j; - } - } - score+=max; - (*it)->bestAssign[i]=bestAssign; - } - (*node)->setTable(i,score); - } - } + if((*node)->isLeaf()){ + // initialization + int idx = (*node)->getIdx(); + int c = labels[idx]; + if(c >= 4 || c < 0) + throw ProjectError("PhyloTree::MAP(): species with index "+ itoa(idx) + " does not exist in tree"); + double weight = weights[idx]; + (*node)->resizeTable(states,-std::numeric_limits::max()); + if(fixLeafLabels || c >= 2){ // c=2: EC absent, but aligned is always a fixed state + (c == 1)? (*node)->setTable(c,weight) : (*node)->setTable(c,0); + } + else{ + for(int i = 0; i < 2; i++) + (*node)->setTable(i,i*weight); + } + (*node)->bestAssign.clear(); + (*node)->bestAssign.resize(states); + } + else{ + //recursion for the interior nodes + (*node)->resizeTable(states); + (*node)->bestAssign.clear(); + (*node)->bestAssign.resize(states); + for(int i=0; i::iterator it = (*node)->children.begin(); it != (*node)->children.end(); it++){ + double max = -std::numeric_limits::max(); + int bestAssign = -1; + gsl_matrix *P = (*it)->getLogP(); + if(!P){ // upon the first call, set pointer to the corresponding probability matrix + P = evo->getSubMatrixLogP(0,(*it)->getDist()); + (*it)->setLogP(P); + } + for(int j=0; jgetTable(j); + if(max < branch_score){ + max = branch_score; + bestAssign=j; + } + } + score+=max; + (*it)->bestAssign[i]=bestAssign; + } + (*node)->setTable(i,score); + } + } } //printRecursionTable(); double max = -std::numeric_limits::max(); if(!treenodes.empty()){ - int bestAssign = -1; - Treenode* root = treenodes.back(); - for(int i=0; igetLogPi(i))) + root->getTable(i); - if(max < root_score){ - max = root_score; - bestAssign=i; - } - } - // backtracking to assign leaf nodes to the MAP labels - //if(!fixLeafLabels) - MAPbacktrack(labels, root, bestAssign, fixLeafLabels); + int bestAssign = -1; + Treenode* root = treenodes.back(); + for(int i=0; igetLogPi(i))) + root->getTable(i); + if(max < root_score){ + max = root_score; + bestAssign=i; + } + } + // backtracking to assign leaf nodes to the MAP labels + //if(!fixLeafLabels) + MAPbacktrack(labels, root, bestAssign, fixLeafLabels); } return max; } void PhyloTree::MAPbacktrack(vector &labels, Treenode* root, int bestAssign, bool fixLeafLabels){ - list< pair > stack; stack.push_front(make_pair(root,bestAssign)); while(!stack.empty()){ - pair p = stack.front(); - stack.pop_front(); - Treenode *node=p.first; - bestAssign=p.second; - if(node->isLeaf()){ - int idx = node->getIdx(); - if( (fixLeafLabels || labels[idx] >= 2 ) && bestAssign != labels[idx]) - throw ProjectError("in MAPbacktrack: fixLeafNodes is one but different node labels"); - labels[idx]=bestAssign; - } - else{ - for(list::iterator it=node->children.begin(); it!=node->children.end(); it++){ - stack.push_front(make_pair(*it, (*it)->bestAssign[bestAssign])); - } - } + pair p = stack.front(); + stack.pop_front(); + Treenode *node=p.first; + bestAssign=p.second; + if(node->isLeaf()){ + int idx = node->getIdx(); + if( (fixLeafLabels || labels[idx] >= 2 ) && bestAssign != labels[idx]) + throw ProjectError("in MAPbacktrack: fixLeafNodes is one but different node labels"); + labels[idx]=bestAssign; + } + else{ + for(list::iterator it=node->children.begin(); it!=node->children.end(); it++){ + stack.push_front(make_pair(*it, (*it)->bestAssign[bestAssign])); + } + } } } @@ -495,8 +504,8 @@ void PhyloTree::MAPbacktrack(vector &labels, Treenode* root, int bestAssign double PhyloTree::getDiversity(){ double div=0.0; for(list::const_iterator node = treenodes.begin(); node != treenodes.end(); node++){ - if(!((*node)->isRoot())) - div+=(*node)->getDist(); + if(!((*node)->isRoot())) + div+=(*node)->getDist(); } return div; } @@ -506,11 +515,11 @@ double PhyloTree::getDiversity(){ */ void PhyloTree::scaleTree(double factor){ for(list::const_iterator node = treenodes.begin(); node != treenodes.end(); node++){ - if(!((*node)->isRoot())){ - double dist = (*node)->getDist(); - dist*=factor; - (*node)->addDistance(dist); - } + if(!((*node)->isRoot())){ + double dist = (*node)->getDist(); + dist*=factor; + (*node)->addDistance(dist); + } } } @@ -521,70 +530,70 @@ void PhyloTree::scaleTree(double factor){ void PhyloTree::prune(bit_vector &bv, Evo *evo){ start: for(list::iterator node = treenodes.begin(); node != treenodes.end(); node++){ - if((*node)->isLeaf()){ - if(!bv[(*node)->getIdx()]){ - Treenode* tmp=*node; - if(node == treenodes.begin()){ - drop(tmp,evo); - goto start; - } - else{ - node--; - drop(tmp,evo); - } - } - } + if((*node)->isLeaf()){ + if(!bv[(*node)->getIdx()]){ + Treenode* tmp=*node; + if(node == treenodes.begin()){ + drop(tmp,evo); + goto start; + } + else{ + node--; + drop(tmp,evo); + } + } + } } } int PhyloTree::fitch(vector &labels, int states){ - for (list::iterator node = treenodes.begin(); node != treenodes.end(); node++){ - if ((*node)->isLeaf()){ - // initialization - int idx = (*node)->getIdx(); - int c = labels[idx]; - if(c >= states || c < 0) - throw ProjectError("PhyloTree::fitch(): index "+ itoa(c) + " out of bounds."); - - (*node)->resizeTable(states, 100000); // any number > 1 - (*node)->setTable(c,0); - (*node)->bestAssign.clear(); - (*node)->bestAssign.resize(states); - } - else{ - // recursion for the interior nodes - (*node)->resizeTable(states); - (*node)->bestAssign.clear(); - (*node)->bestAssign.resize(states); - for (int i=0; i::iterator it = (*node)->children.begin(); it != (*node)->children.end(); it++){ - double min = std::numeric_limits::max(); - int bestAssign = -1; - for (int j=0; jgetTable(j); - if (i != j) - branch_score++; // count one substitution - if (min > branch_score){ - min = branch_score; - bestAssign=j; - } - } - score += min; - (*it)->bestAssign[i] = bestAssign; - } - (*node)->setTable(i,score); - } - } + for(list::iterator node = treenodes.begin(); node != treenodes.end(); node++){ + if((*node)->isLeaf()){ + // initialization + int idx = (*node)->getIdx(); + int c = labels[idx]; + if(c >= states || c < 0) + throw ProjectError("PhyloTree::fitch(): index "+ itoa(c) + " out of bounds."); + + (*node)->resizeTable(states, 100000); // any number > 1 + (*node)->setTable(c,0); + (*node)->bestAssign.clear(); + (*node)->bestAssign.resize(states); + } + else{ + //recursion for the interior nodes + (*node)->resizeTable(states); + (*node)->bestAssign.clear(); + (*node)->bestAssign.resize(states); + for(int i=0; i::iterator it = (*node)->children.begin(); it != (*node)->children.end(); it++){ + double min = std::numeric_limits::max(); + int bestAssign = -1; + for(int j=0; jgetTable(j); + if(i != j) + branch_score++; // count one substitution + if(min > branch_score){ + min = branch_score; + bestAssign=j; + } + } + score+=min; + (*it)->bestAssign[i]=bestAssign; + } + (*node)->setTable(i,score); + } + } } double min = std::numeric_limits::max(); - if (!treenodes.empty()){ - Treenode* root = treenodes.back(); - for (int i=0; i root->getTable(i)) - min = root->getTable(i); - } - } else { + if(!treenodes.empty()){ + Treenode* root = treenodes.back(); + for(int i=0; i root->getTable(i)) + min = root->getTable(i); + } + }else{ min = -1; } return min; From 83762a9a8aa0f3acccb5cdb7841f9c4baae9bc65 Mon Sep 17 00:00:00 2001 From: Stepan Saenko <123179229+StepanSaenko@users.noreply.github.com> Date: Sat, 1 Jul 2023 13:42:19 +0200 Subject: [PATCH 7/8] Update phylotree.cc --- src/phylotree.cc | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/phylotree.cc b/src/phylotree.cc index fca5a286..7980d0cc 100644 --- a/src/phylotree.cc +++ b/src/phylotree.cc @@ -69,16 +69,6 @@ void PhyloTree::printTree() const { PhyloTree::PhyloTree(string filename){ /*check if any nodes contains only digits, it caused the error*/ - /*filebuf fb0; - fb0.open(filename.c_str(),ios::in); - if (fb0.is_open()){ - - std::string fileContents((std::istreambuf_iterator(&fb0)), std::istreambuf_iterator()); - if (!printNodeNames(fileContents)) { - throw ProjectError("the parsing of " + filename + " has been unsuccessful. At least one node named with only digits. Please check, whether the nodenames of your input are correct " ); - } - fb0.close(); - }*/ #ifdef COMPGENEPRED filebuf fb; From 8005e6e3ce06419a4a2e1642a832e2a56d07d823 Mon Sep 17 00:00:00 2001 From: Stepan Saenko <123179229+StepanSaenko@users.noreply.github.com> Date: Fri, 8 Sep 2023 09:21:17 +0200 Subject: [PATCH 8/8] Update phylotree.cc --- src/phylotree.cc | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/phylotree.cc b/src/phylotree.cc index 7980d0cc..c8793331 100644 --- a/src/phylotree.cc +++ b/src/phylotree.cc @@ -17,12 +17,7 @@ #include #include #include -#include -#include -#include -#include -#include -#include + #ifdef COMPGENEPRED #include "parser/parser.h" #endif @@ -57,7 +52,7 @@ void Treenode::makeRoot(){ } void Treenode::resizeTable(int size, double val){ - table.clear(); + table.clear(); table.resize(size,val); } @@ -88,15 +83,14 @@ PhyloTree::PhyloTree(string filename){ int error_message = parser.parse(); if(error_message == 1){ throw ProjectError("the parsing of " + filename + " has been unsuccessful. Please check, whether the syntax of your input file is correct" ); + } + } + catch (const std::out_of_range& e) { + std::cout << "An out_of_range exception occurred: " << e.what() << std::endl; + throw ProjectError("the parsing of " + filename + " has been unsuccessful. Probably, at least one node named with only digits. Please check, whether the syntax of your input file is correct" ); } - } - catch (const std::out_of_range& e) { - std::cout << "An out_of_range exception occurred: " << e.what() << std::endl; - throw ProjectError("the parsing of " + filename + " has been unsuccessful. Probably, at least one node named with only digits. Please check, whether the syntax of your input file is correct" ); - } fb.close(); - /* * check if all branch lengths >0 */