Created
October 8, 2011 15:39
-
-
Save baoilleach/1272440 to your computer and use it in GitHub Desktop.
Add support for specify atom order for SMILES
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /********************************************************************** | |
| Copyright (C) 2005-2007 by Craig A. James, eMolecules Inc. | |
| Some portions Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. | |
| Some portions Copyright (C) 2001-2008 by Geoffrey R. Hutchison | |
| Some portions Copyright (C) 2004 by Chris Morley | |
| This program is free software; you can redistribute it and/or modify | |
| it under the terms of the GNU General Public License as published by | |
| the Free Software Foundation version 2 of the License. | |
| This program is distributed in the hope that it will be useful, | |
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| GNU General Public License for more details. | |
| ***********************************************************************/ | |
| // This code uses the old OpenEye SMILES parser | |
| // but replaces the SMILES export with Craig James canonical smiles | |
| // (For regular SMILES, the canonical order is not computed and ignored) | |
| #include <openbabel/babelconfig.h> | |
| #include <openbabel/obmolecformat.h> | |
| #include <openbabel/chiral.h> | |
| #include <openbabel/atomclass.h> | |
| #include <openbabel/stereo/tetrahedral.h> | |
| #include <openbabel/stereo/cistrans.h> | |
| #include <openbabel/stereo/squareplanar.h> | |
| #include <openbabel/stereo/stereo.h> | |
| #include <openbabel/graphsym.h> | |
| #include <openbabel/canon.h> | |
| #include <limits> | |
| #include <iostream> | |
| #include <cassert> | |
| #include <string> | |
| //#define DEBUG 1 | |
| #define IMPLICIT_CIS_RING_SIZE 12 | |
| using namespace std; | |
| namespace OpenBabel { | |
| // some constant variables | |
| const char BondUpChar = '\\'; | |
| const char BondDownChar = '/'; | |
| //Base class for SMIFormat and CANSIFormat with most of the functionality | |
| class SMIBaseFormat : public OBMoleculeFormat | |
| { | |
| public: | |
| virtual const char* GetMIMEType() | |
| { return "chemical/x-daylight-smiles"; }; | |
| //////////////////////////////////////////////////// | |
| /// The "API" interface functions | |
| virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv); | |
| virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv); | |
| /////////////////////////////////////////////////////// | |
| virtual const char* TargetClassDescription(){return OBMol::ClassDescription();}; | |
| virtual const char* SpecificationURL() | |
| {return "http://www.daylight.com/smiles/";}; | |
| virtual int SkipObjects(int n, OBConversion* pConv) | |
| { | |
| if(n==0) return 1; //already points after current line | |
| istream& ifs = *pConv->GetInStream(); | |
| if (ifs.eof()) | |
| return -1; | |
| int i=0; | |
| while(i<n && ifs.good()) | |
| { | |
| if(ifs.peek()!='#') | |
| i++; | |
| ifs.ignore(numeric_limits<streamsize>::max(),'\n'); | |
| } | |
| return ifs ? 1 : -1; | |
| } | |
| }; | |
| //************************************************** | |
| class SMIFormat : public SMIBaseFormat | |
| { | |
| public: | |
| //Register this format type ID | |
| SMIFormat() | |
| { | |
| OBConversion::RegisterFormat("smi",this, "chemical/x-daylight-smiles"); | |
| OBConversion::RegisterFormat("smiles",this, "chemical/x-daylight-smiles"); | |
| OBConversion::RegisterOptionParam("n", this); | |
| OBConversion::RegisterOptionParam("t", this); | |
| OBConversion::RegisterOptionParam("r", this); | |
| OBConversion::RegisterOptionParam("a", this); | |
| OBConversion::RegisterOptionParam("h", this); | |
| OBConversion::RegisterOptionParam("x", this); | |
| OBConversion::RegisterOptionParam("C", this); // "anti-canonical" form (random order) | |
| } | |
| virtual const char* Description() | |
| { | |
| return | |
| "SMILES format\n" | |
| "A linear text format which can describe the connectivity and chirality of a molecule\n" | |
| "Open Babel implements the `OpenSMILES specification <http://opensmiles.org>`_.\n\n" | |
| "It also implements an extension to this specification for radicals.\n\n" | |
| "Note that the ``l <atomno>`` option, used to specify a \"last\" atom, is\n" | |
| "intended for the generation of SMILES strings to which additional atoms\n" | |
| "will be concatenated. If the atom specified has an explicit H within a bracket\n" | |
| "(e.g. ``[nH]`` or ``[C@@H]``) the output will have the H removed along with any\n" | |
| "associated stereo symbols.\n\n" | |
| ".. seealso::\n\n" | |
| " The :ref:`Canonical_SMILES_format` produces a canonical representation\n" | |
| " of the molecule in SMILES format. This is the same as the ``c`` option\n" | |
| " below but may be more convenient to use.\n\n" | |
| "Write Options e.g. -xt\n" | |
| " a Output atomclass like [C:2], if available\n" | |
| " c Output in canonical form\n" | |
| " h Output explicit hydrogens as such\n" | |
| " i Do not include isotopic or chiral markings\n" | |
| " n No molecule name\n" | |
| " r Radicals lower case eg ethyl is Cc\n" | |
| " t Molecule name only\n" | |
| " x append X/Y coordinates in canonical-SMILES order\n" | |
| " C 'anti-canonical' random order (mostly for testing)\n" | |
| " f <atomno> Specify the first atom\n" | |
| " This atom will be used to begin the SMILES string.\n" | |
| " l <atomno> Specify the last atom\n" | |
| " The output will be rearranged so that any additional\n" | |
| " SMILES added to the end will be attached to this atom.\n\n"; | |
| } | |
| }; | |
| //Make an instance of the format class | |
| SMIFormat theSMIFormat; | |
| //************************************************** | |
| class CANSMIFormat : public SMIBaseFormat | |
| { | |
| public: | |
| //Register this format type ID | |
| CANSMIFormat() | |
| { | |
| OBConversion::RegisterFormat("can", this, "chemical/x-daylight-cansmiles"); | |
| } | |
| virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv) | |
| { | |
| //The "c" option sets us to use canonical ordering | |
| pConv->AddOption("c",OBConversion::OUTOPTIONS); | |
| return SMIBaseFormat::WriteMolecule(pOb, pConv); | |
| } | |
| /////////////////////////////////////////////////////// | |
| virtual const char* Description() { | |
| return | |
| "Canonical SMILES format\n" | |
| "A canonical form of the SMILES linear text format\n" | |
| "The SMILES format is a linear text format which can describe the\n" | |
| "connectivity " | |
| "and chirality of a molecule. Canonical SMILES gives a single\n" | |
| "'canonical' form for any particular molecule.\n\n" | |
| ".. seealso::\n\n" | |
| " The \"regular\" :ref:`SMILES_format` gives faster\n" | |
| " output, since no canonical numbering is performed.\n\n" | |
| "Write Options e.g. -xt\n" | |
| " a Output atomclass like [C:2], if available\n" | |
| " h Output explicit hydrogens as such\n" | |
| " i Do not include isotopic or chiral markings\n" | |
| " n No molecule name\n" | |
| " r Radicals lower case eg ethyl is Cc\n" | |
| " t Molecule name only\n" | |
| " f <atomno> Specify the first atom\n" | |
| " This atom will be used to begin the SMILES string.\n" | |
| " l <atomno> Specify the last atom\n" | |
| " The output will be rearranged so that any additional\n" | |
| " SMILES added to the end will be attached to this atom.\n" | |
| " See the :ref:`SMILES_format` for more information.\n\n"; | |
| }; | |
| }; | |
| // Make an instance of the format class | |
| CANSMIFormat theCANSMIFormat; | |
| //************************************************************ | |
| class OBSmilesParser | |
| { | |
| // simple structs to make code more readable | |
| // see _extbond | |
| struct ExternalBond | |
| { | |
| int digit; | |
| int prev; | |
| int order; | |
| char updown; | |
| }; | |
| // see _rclose | |
| struct RingClosureBond | |
| { | |
| int digit; | |
| int prev; | |
| int order; | |
| char updown; | |
| int numConnections; | |
| }; | |
| char _updown; | |
| int _order; | |
| int _prev; | |
| char *_ptr; | |
| vector<int> _vprev; | |
| vector<RingClosureBond> _rclose; | |
| vector<ExternalBond> _extbond; | |
| vector<int> _path; | |
| vector<bool> _avisit; | |
| vector<bool> _bvisit; | |
| char _buffer[BUFF_SIZE]; | |
| vector<int> PosDouble; //for extension: lc atoms as conjugated double bonds | |
| OBAtomClassData _classdata; // to hold atom class data like [C:2] | |
| struct StereoRingBond | |
| { | |
| vector<OBAtom*> atoms; | |
| vector<char> updown; | |
| }; | |
| map<OBBond*, StereoRingBond> _stereorbond; // Remember info on the stereo ring closure bonds | |
| // stereochimistry | |
| bool chiralWatch; // set when a tetrahedral atom is read | |
| map<OBAtom*, OBTetrahedralStereo::Config*> _tetrahedralMap; // map of tetrahedral atoms and their data | |
| map<OBBond*, char> _upDownMap; // store the '/' & '\' as they occured in smiles | |
| map<unsigned int, char> _chiralLonePair; // for atoms with potential chiral lone pairs, remember when the l.p. was encountered | |
| bool squarePlanarWatch; // set when a square planar atom is read | |
| map<OBAtom*, OBSquarePlanarStereo::Config*> _squarePlanarMap; | |
| public: | |
| OBSmilesParser() { } | |
| ~OBSmilesParser() { } | |
| bool SmiToMol(OBMol&,const string&); | |
| bool ParseSmiles(OBMol&); | |
| bool ParseSimple(OBMol&); | |
| bool ParseComplex(OBMol&); | |
| bool ParseRingBond(OBMol&); | |
| bool ParseExternalBond(OBMol&); | |
| bool CapExternalBonds(OBMol &mol); | |
| void FindAromaticBonds(OBMol &mol,OBAtom*,int); | |
| void FindAromaticBonds(OBMol&); | |
| void FindOrphanAromaticAtoms(OBMol &mol); //CM 18 Sept 2003 | |
| int NumConnections(OBAtom *); | |
| void CreateCisTrans(OBMol &mol); | |
| char SetRingClosureStereo(StereoRingBond rcstereo, OBBond* dbl_bond); | |
| void InsertTetrahedralRef(OBMol &mol, unsigned long id); | |
| void InsertSquarePlanarRef(OBMol &mol, unsigned long id); | |
| bool IsUp(OBBond*); | |
| bool IsDown(OBBond*); | |
| }; | |
| ///////////////////////////////////////////////////////////////// | |
| /* Lines starting with # are ignored. Whitespace at the start (including | |
| blank lines) terminate the input unless -e option is used. | |
| Valid SMILES reactions such as [C]=O.O>[Fe]>O=C=O.[H][H] with non-null | |
| reactant and product are accepted and the reactant, product and | |
| possibly the agent molecules are output when using the Convert interface | |
| (babel commandline). With the OBConversion functions Read, ReadString | |
| and ReadFile all SMILES reactions give an error when read with this format. | |
| */ | |
| bool SMIBaseFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) | |
| { | |
| OBMol* pmol = pOb->CastAndClear<OBMol>(); | |
| istream &ifs = *pConv->GetInStream(); | |
| string ln, smiles, title; | |
| string::size_type pos, pos2; | |
| //Ignore lines that start with # | |
| while(ifs && ifs.peek()=='#') | |
| if(!getline(ifs, ln)) | |
| return false; | |
| //Get title | |
| if(getline(ifs, ln)) | |
| { | |
| pos = ln.find_first_of(" \t"); | |
| if(pos!=string::npos) | |
| { | |
| smiles = ln.substr(0,pos); | |
| title = ln.substr(pos+1); | |
| Trim(title); | |
| pmol->SetTitle(title.c_str()); | |
| } | |
| else | |
| smiles = ln; | |
| } | |
| pos= smiles.find_first_of(",<\"\'!^&_|{}"); | |
| if(pos!=string::npos) | |
| { | |
| obErrorLog.ThrowError(__FUNCTION__, | |
| smiles + " contained a character '" + smiles[pos] + "' which is invalid in SMILES", obError); | |
| return false; | |
| } | |
| pmol->SetDimension(0); | |
| OBSmilesParser sp; | |
| pos = smiles.find('>'); | |
| if(pos==string::npos) | |
| return sp.SmiToMol(*pmol, smiles); //normal return | |
| //Possibly a SMILES reaction | |
| OBMol* pmol1 = new OBMol; | |
| OBMol* pmol2 = new OBMol; | |
| if(sp.SmiToMol(*pmol1, smiles.substr(0,pos)) //reactant | |
| && (pos2 = smiles.find('>', pos+1))!=string::npos) //second > | |
| { | |
| if((pos2-pos==1 //no agent | |
| || sp.SmiToMol(*pmol2, smiles.substr(pos+1,pos2-pos-1))) //agent | |
| && sp.SmiToMol(*pmol, smiles.substr(pos2+1))) //product | |
| { | |
| //Is a valid reaction. Output species | |
| pmol1->SetDimension(0); | |
| pmol1->SetTitle(title); | |
| pmol2->SetTitle(title); | |
| pmol->SetTitle(title); | |
| pmol2->SetDimension(0); | |
| if(pConv->AddChemObject( | |
| pmol1->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS),pConv))<0)//using Read or ReadString or ReadFile | |
| { | |
| obErrorLog.ThrowError(__FUNCTION__, smiles + | |
| " SmilesFormat accepts reactions only with the \"Convert\" (commandline) interface", obError); | |
| return false; //Error | |
| } | |
| if(pmol2->NumAtoms()) | |
| pConv->AddChemObject( | |
| pmol2->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS),pConv)); | |
| return true; //valid reaction return | |
| } | |
| } | |
| obErrorLog.ThrowError(__FUNCTION__, smiles + " contained '>' but was not a acceptable reaction", obError); | |
| return false; | |
| } | |
| ////////////////////////////////////////////// | |
| bool OBSmilesParser::SmiToMol(OBMol &mol,const string &s) | |
| { | |
| strncpy(_buffer,s.c_str(), BUFF_SIZE); | |
| _buffer[BUFF_SIZE - 1] = '\0'; | |
| _vprev.clear(); | |
| _rclose.clear(); | |
| _prev=0; | |
| chiralWatch=false; | |
| squarePlanarWatch = false; | |
| if (!ParseSmiles(mol) || mol.NumAtoms() == 0) | |
| { | |
| mol.Clear(); | |
| return(false); | |
| } | |
| map<OBAtom*, OBTetrahedralStereo::Config*>::iterator i; | |
| for (i = _tetrahedralMap.begin(); i != _tetrahedralMap.end(); ++i) | |
| delete i->second; | |
| _tetrahedralMap.clear(); | |
| map<OBAtom*, OBSquarePlanarStereo::Config*>::iterator j; | |
| for (j = _squarePlanarMap.begin(); j != _squarePlanarMap.end(); ++j) | |
| delete j->second; | |
| _squarePlanarMap.clear(); | |
| mol.SetAutomaticFormalCharge(false); | |
| return(true); | |
| } | |
| bool OBSmilesParser::ParseSmiles(OBMol &mol) | |
| { | |
| mol.BeginModify(); | |
| for (_ptr=_buffer;*_ptr;_ptr++) | |
| { | |
| //cerr << " parsing " << _ptr << endl; | |
| if (*_ptr<0 || isspace(*_ptr)) | |
| continue; | |
| else if (isdigit(*_ptr) || *_ptr == '%') //ring open/close | |
| { | |
| if(!ParseRingBond(mol)) | |
| return false; | |
| continue; | |
| } | |
| else if(*_ptr == '&') //external bond | |
| { | |
| ParseExternalBond(mol); | |
| continue; | |
| } | |
| else | |
| switch(*_ptr) | |
| { | |
| case '.': | |
| _prev=0; | |
| break; | |
| case '(': | |
| _vprev.push_back(_prev); | |
| break; | |
| case ')': | |
| if(_vprev.empty()) //CM | |
| return false; | |
| _prev = _vprev.back(); | |
| _vprev.pop_back(); | |
| break; | |
| case '[': | |
| if (!ParseComplex(mol)) | |
| { | |
| mol.EndModify(); | |
| mol.Clear(); | |
| return(false); | |
| } | |
| break; | |
| case '-': | |
| _order = 1; | |
| break; | |
| case '=': | |
| _order = 2; | |
| break; | |
| case '#': | |
| _order = 3; | |
| break; | |
| case '$': | |
| _order = 4; | |
| break; | |
| case ':': | |
| _order = 5; | |
| break; | |
| case '/': | |
| _updown = BondDownChar; | |
| break; | |
| case '\\': | |
| _updown = BondUpChar; | |
| break; | |
| default: | |
| if (!ParseSimple(mol)) | |
| { | |
| mol.EndModify(); | |
| mol.Clear(); | |
| return(false); | |
| } | |
| } // end switch | |
| } // end for _ptr | |
| // place dummy atoms for each unfilled external bond | |
| if(!_extbond.empty()) | |
| CapExternalBonds(mol); | |
| //Save atom class values in OBGenericData object if there are any | |
| if(_classdata.size()>0) | |
| mol.SetData(new OBAtomClassData(_classdata)); | |
| // Check to see if we've balanced out all ring closures | |
| // They are removed from _rclose when matched | |
| if (!_rclose.empty()) { | |
| mol.EndModify(); | |
| mol.Clear(); | |
| stringstream errorMsg; | |
| errorMsg << "Invalid SMILES string: " << _rclose.size() << " unmatched " | |
| << "ring bonds." << endl; | |
| obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning); | |
| return false; // invalid SMILES since rings aren't properly closed | |
| } | |
| //set aromatic bond orders | |
| mol.SetAromaticPerceived(); | |
| FindAromaticBonds(mol); | |
| FindOrphanAromaticAtoms(mol);// CM 18 Sept 2003 | |
| mol.AssignSpinMultiplicity(); | |
| mol.UnsetAromaticPerceived(); | |
| mol.EndModify(); | |
| //Extension which interprets cccc with conjugated double bonds if niether | |
| //of its atoms is aromatic. | |
| vector<int>::iterator itr; | |
| for(itr=PosDouble.begin();itr!=PosDouble.end();++itr) | |
| { | |
| OBBond* bond = mol.GetBond(*itr); | |
| if(!bond->GetBeginAtom()->IsAromatic() && !bond->GetEndAtom()->IsAromatic()) | |
| { | |
| bond->SetBO(2); | |
| mol.UnsetImplicitValencePerceived(); | |
| } | |
| } | |
| // Add the data stored inside the _tetrahedralMap to the atoms now after end | |
| // modify so they don't get lost. | |
| if(_tetrahedralMap.size() > 0) { | |
| OBAtom* atom; | |
| map<OBAtom*, OBTetrahedralStereo::Config*>::iterator ChiralSearch; | |
| for(ChiralSearch = _tetrahedralMap.begin(); ChiralSearch != _tetrahedralMap.end(); ChiralSearch++) { | |
| atom = ChiralSearch->first; | |
| OBTetrahedralStereo::Config *ts = ChiralSearch->second; | |
| if (!ts) | |
| continue; | |
| if (ts->refs.size() != 3) | |
| continue; | |
| if (ts->refs[2] == OBStereo::NoRef) { | |
| // This happens where there is chiral lone pair or where there simply aren't enough connections | |
| // around a chiral atom. We handle the case where there is a S with a chiral lone pair. | |
| // All other cases are ignored, and raise a warning. (Note that S can be chiral even without | |
| // a lone pair, think of C[S@](=X)(=Y)Cl. | |
| // We have remembered where to insert the lone pair in the _chiralLonePair map | |
| map<unsigned int, char>::iterator m_it = _chiralLonePair.find(atom->GetIdx()); | |
| if (atom->GetAtomicNum() == 16 && m_it != _chiralLonePair.end()) { // Sulfur | |
| ts->refs[2] = ts->refs[1]; ts->refs[1] = ts->refs[0]; | |
| if (m_it->second == 0) { // Insert in the 'from' position | |
| ts->refs[0] = ts->from; | |
| ts->from = OBStereo::ImplicitRef; | |
| } | |
| else // Insert in the refs[0] position | |
| ts->refs[0] = OBStereo::ImplicitRef; | |
| } | |
| else { // Ignored by Open Babel | |
| stringstream ss; | |
| ss << "Ignoring stereochemistry. Not enough connections to this atom. " << mol.GetTitle(); | |
| obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning); | |
| continue; | |
| } | |
| } | |
| // cout << "*ts = " << *ts << endl; | |
| OBTetrahedralStereo *obts = new OBTetrahedralStereo(&mol); | |
| obts->SetConfig(*ts); | |
| mol.SetData(obts); | |
| } | |
| } | |
| // Add the data stored inside the _squarePlanarMap to the atoms now after end | |
| // modify so they don't get lost. | |
| if(_squarePlanarMap.size() > 0) { | |
| OBAtom* atom; | |
| map<OBAtom*, OBSquarePlanarStereo::Config*>::iterator ChiralSearch; | |
| for(ChiralSearch = _squarePlanarMap.begin(); ChiralSearch != _squarePlanarMap.end(); ChiralSearch++) { | |
| atom = ChiralSearch->first; | |
| OBSquarePlanarStereo::Config *sp = ChiralSearch->second; | |
| if (!sp) | |
| continue; | |
| if (sp->refs.size() != 4) | |
| continue; | |
| // cout << "*ts = " << *ts << endl; | |
| OBSquarePlanarStereo *obsp = new OBSquarePlanarStereo(&mol); | |
| obsp->SetConfig(*sp); | |
| mol.SetData(obsp); | |
| } | |
| } | |
| CreateCisTrans(mol); | |
| StereoFrom0D(&mol); | |
| return(true); | |
| } | |
| bool OBSmilesParser::IsUp(OBBond *bond) | |
| { | |
| map<OBBond*, char>::iterator UpDownSearch; | |
| UpDownSearch = _upDownMap.find(bond); | |
| if (UpDownSearch != _upDownMap.end()) | |
| if (UpDownSearch->second == BondUpChar) | |
| return true; | |
| return false; | |
| } | |
| bool OBSmilesParser::IsDown(OBBond *bond) | |
| { | |
| map<OBBond*, char>::iterator UpDownSearch; | |
| UpDownSearch = _upDownMap.find(bond); | |
| if (UpDownSearch != _upDownMap.end()) | |
| if (UpDownSearch->second == BondDownChar) | |
| return true; | |
| return false; | |
| } | |
| char OBSmilesParser::SetRingClosureStereo(StereoRingBond rcstereo, OBBond* dbl_bond) | |
| { | |
| // Ring Closure bonds appear twice (at opening and closure). | |
| // If involved in cis/trans stereo, then the stereo may be | |
| // specified at either end or indeed both. Although Open Babel | |
| // will only write out SMILES with the stereo at one end (the end | |
| // on the double bond), it must handle all cases when reading. | |
| // For example: | |
| // | |
| // C | |
| // /| | |
| // C = C | | |
| // / \| | |
| // C N | |
| // | |
| // Can be written as: | |
| // (a) C/C=C/1\NC1 -- preferred | |
| // (b) C/C=C1\NC\1 | |
| // (c) C/C=C/1\NC\1 | |
| // or indeed by replacing the "\N" with "N". | |
| // If the stereo chemistry for a ring closure is inconsistently specified, | |
| // it is ignored. In that case, if a stereo symbol does not exist for its | |
| // partner bond on the double bond (e.g. (b) below), then the stereo is unspecified. | |
| // (a) C/C=C/1NC\1 -- specified stereo | |
| // (b) C/C=C/1NC/1 -- ignore ring closure stereo => treated as C/C=C1NC1 => CC=C1NC1 | |
| // (c) C/C=C/1\NC/1 -- ignore ring closure stereo => treated as C/C=C1\NC1 => C/C=C/1\NC1 | |
| // The ring closure bond is either up or down with respect | |
| // to the double bond. Our task here is to figure out which it is, | |
| // based on the contents of _stereorbond. | |
| bool found = false; // We have found the answer | |
| bool updown = true; // The answer | |
| if (rcstereo.updown[0] == BondUpChar || rcstereo.updown[0] == BondDownChar) { // Is there a stereo symbol at the opening? | |
| bool on_dbl_bond = (rcstereo.atoms[0] == dbl_bond->GetBeginAtom() || rcstereo.atoms[0] == dbl_bond->GetEndAtom()); | |
| updown = (rcstereo.updown[0]==BondUpChar) ^ on_dbl_bond; | |
| found = true; | |
| } | |
| if (rcstereo.updown[1] == BondUpChar || rcstereo.updown[1] == BondDownChar) { // Is there a stereo symbol at the closing? | |
| bool on_dbl_bond = (rcstereo.atoms[1] == dbl_bond->GetBeginAtom() || rcstereo.atoms[1] == dbl_bond->GetEndAtom()); | |
| bool new_updown = (rcstereo.updown[1]==BondUpChar) ^ on_dbl_bond; | |
| if (!found) { | |
| updown = new_updown; | |
| found = true; | |
| } | |
| else if (new_updown != updown) { | |
| obErrorLog.ThrowError(__FUNCTION__, "Ignoring the cis/trans stereochemistry specified for the ring closure\n as it is inconsistent.", obWarning); | |
| found = false; | |
| } | |
| } | |
| if (!found) | |
| return 0; | |
| else | |
| return updown ? 1 : 2; | |
| } | |
| void OBSmilesParser::CreateCisTrans(OBMol &mol) | |
| { | |
| // Create a vector of CisTransStereo objects for the molecule | |
| FOR_BONDS_OF_MOL(dbi, mol) { | |
| OBBond *dbl_bond = &(*dbi); | |
| // Not a double bond? | |
| if (!dbl_bond->IsDouble() || dbl_bond->IsAromatic()) | |
| continue; | |
| // Find the single bonds around the atoms connected by the double bond. | |
| // FIXME: "While we're at it, note whether the pair of atoms on either end are | |
| // identical, in which case it's not cis/trans." This code must be in the original | |
| // smilesformat.cpp | |
| OBAtom *a1 = dbl_bond->GetBeginAtom(); | |
| OBAtom *a2 = dbl_bond->GetEndAtom(); | |
| // Check that both atoms on the double bond have at least one | |
| // other neighbor, but not more than two other neighbors; | |
| int v1 = a1->GetValence(); | |
| int v2 = a2->GetValence(); | |
| if (v1 < 2 || v1 > 3 || v2 < 2 || v2 > 3) { | |
| continue; | |
| } | |
| vector<OBAtom*> dbl_bond_atoms; | |
| dbl_bond_atoms.push_back(a1); | |
| dbl_bond_atoms.push_back(a2); | |
| vector<bool> bond_stereo(2, true); // Store the stereo of the chosen bonds at each end of the dbl bond | |
| vector<OBBond*> stereo_bond(2, (OBBond*) NULL); // These are the chosen stereo bonds | |
| vector<OBBond*> other_bond(2, (OBBond*) NULL); // These are the 'other' bonds at each end | |
| for (int i = 0; i < 2; ++i) { // Loop over each end of the double bond in turn | |
| FOR_BONDS_OF_ATOM(bi, dbl_bond_atoms[i]) { | |
| OBBond *b = &(*bi); | |
| if (b == dbl_bond) continue; | |
| if (!(IsUp(b) || IsDown(b))) { | |
| other_bond[i] = b; // Use this for the 'other' bond | |
| continue; | |
| } | |
| bool found = true; | |
| bool stereo; | |
| map<OBBond*, StereoRingBond>::iterator sb_it = _stereorbond.find(b); | |
| if (sb_it == _stereorbond.end()) // Not a ring closure | |
| // True/False for "up/down if moved to before the double bond C" | |
| stereo = !(IsUp(b) ^ (b->GetNbrAtomIdx(dbl_bond_atoms[i]) < dbl_bond_atoms[i]->GetIdx())) ; | |
| else { // Is a ring closure | |
| char bc_result = SetRingClosureStereo(sb_it->second, dbl_bond); | |
| if (bc_result) | |
| stereo = bc_result == 1 ? true : false; | |
| else | |
| found = false; | |
| } | |
| if (!found) { // This cannot be used as the stereo bond | |
| other_bond[i] = b; // Use this for the 'other' bond | |
| continue; | |
| } | |
| if (stereo_bond[i] == NULL) { // This is a first stereo bond | |
| stereo_bond[i] = b; // Use this for the 'stereo' bond | |
| bond_stereo[i] = stereo; | |
| } | |
| else { // This is a second stereo bond | |
| if (stereo != bond_stereo[i]) { // Verify that the other stereo bond (on the same atom) has opposite stereo | |
| other_bond[i] = b; // Use this for the 'other' bond | |
| } | |
| else { | |
| obErrorLog.ThrowError(__FUNCTION__, "Error in cis/trans stereochemistry specified for the double bond\n", obWarning); | |
| stereo_bond[i] = (OBBond*) NULL; | |
| } | |
| } | |
| } | |
| } | |
| if (stereo_bond[0] == NULL || stereo_bond[1] == NULL) continue; // No cis/trans | |
| // other_bond will contain NULLs if there are bonds to implicit hydrogens | |
| unsigned int second = (other_bond[0] == NULL) ? OBStereo::ImplicitRef : other_bond[0]->GetNbrAtom(a1)->GetId(); | |
| unsigned int fourth = (other_bond[1] == NULL) ? OBStereo::ImplicitRef : other_bond[1]->GetNbrAtom(a2)->GetId(); | |
| OBCisTransStereo *ct = new OBCisTransStereo(&mol); | |
| OBCisTransStereo::Config cfg; | |
| cfg.begin = a1->GetId(); | |
| cfg.end = a2->GetId(); | |
| // If bond_stereo[0]==bond_stereo[1], this means cis for stereo_bond[0] and stereo_bond[1]. | |
| if (bond_stereo[0] == bond_stereo[1]) | |
| cfg.refs = OBStereo::MakeRefs(stereo_bond[0]->GetNbrAtom(a1)->GetId(), second, | |
| fourth, stereo_bond[1]->GetNbrAtom(a2)->GetId()); | |
| else | |
| cfg.refs = OBStereo::MakeRefs(stereo_bond[0]->GetNbrAtom(a1)->GetId(), second, | |
| stereo_bond[1]->GetNbrAtom(a2)->GetId(), fourth); | |
| ct->SetConfig(cfg); | |
| // add the data to the atom | |
| mol.SetData(ct); | |
| } | |
| } | |
| void OBSmilesParser::FindOrphanAromaticAtoms(OBMol &mol) | |
| { | |
| //Facilitates the use lower case shorthand for radical entry | |
| //Atoms which are marked as aromatic but have no aromatic bonds | |
| //are taken to be radical centres | |
| OBAtom *atom; | |
| vector<OBAtom*>::iterator j; | |
| for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j)) | |
| if(atom->IsAromatic()) | |
| { | |
| if(atom->CountBondsOfOrder(5)<2) //bonds order 5 set in FindAromaticBonds() | |
| //not proper aromatic atoms - could be conjugated chain or radical centre | |
| atom->UnsetAromatic(); | |
| else | |
| { | |
| //recognized as aromatic, so are not radicals | |
| atom->SetSpinMultiplicity(0); | |
| } | |
| } | |
| } | |
| void OBSmilesParser::FindAromaticBonds(OBMol &mol) | |
| { | |
| _path.clear(); | |
| _avisit.clear(); | |
| _bvisit.clear(); | |
| _avisit.resize(mol.NumAtoms()+1); | |
| _bvisit.resize(mol.NumBonds()); | |
| _path.resize(mol.NumAtoms()+1); | |
| OBBond *bond; | |
| vector<OBBond*>::iterator i; | |
| for (bond = mol.BeginBond(i);bond;bond = mol.NextBond(i)) | |
| if (!bond->GetBeginAtom()->IsAromatic() || | |
| !bond->GetEndAtom()->IsAromatic()) | |
| _bvisit[bond->GetIdx()] = true; | |
| OBAtom *atom; | |
| vector<OBAtom*>::iterator j; | |
| for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j)) | |
| if(!_avisit[atom->GetIdx()] && atom->IsAromatic()) | |
| FindAromaticBonds(mol,atom,0); | |
| } | |
| void OBSmilesParser::FindAromaticBonds(OBMol &mol,OBAtom *atom,int depth ) | |
| { | |
| OBBond *bond; | |
| vector<OBBond*>::iterator k; | |
| if (_avisit[atom->GetIdx()]) | |
| { | |
| int j = depth-1; | |
| bond=mol.GetBond(_path[j--]); | |
| if (bond->GetBO() != 2) { // don't rewrite explicit double bonds | |
| bond->SetBO(5); | |
| } | |
| while( j >= 0 ) | |
| { | |
| bond=mol.GetBond(_path[j--]); | |
| if (bond->GetBO() != 2) { // don't rewrite explicit double bonds | |
| bond->SetBO(5); | |
| } | |
| if(bond->GetBeginAtom() == atom || bond->GetEndAtom() == atom) | |
| break; | |
| } | |
| } | |
| else | |
| { | |
| _avisit[atom->GetIdx()] = true; | |
| for(bond = atom->BeginBond(k);bond;bond = atom->NextBond(k)) | |
| if( !_bvisit[bond->GetIdx()]) | |
| { | |
| _path[depth] = bond->GetIdx(); | |
| _bvisit[bond->GetIdx()] = true; | |
| FindAromaticBonds(mol,bond->GetNbrAtom(atom),depth+1); | |
| } | |
| } | |
| } | |
| void OBSmilesParser::InsertTetrahedralRef(OBMol &mol, unsigned long id) | |
| { | |
| map<OBAtom*, OBTetrahedralStereo::Config*>::iterator ChiralSearch; | |
| ChiralSearch = _tetrahedralMap.find(mol.GetAtom(_prev)); | |
| if (ChiralSearch != _tetrahedralMap.end() && ChiralSearch->second != NULL) | |
| { | |
| int insertpos = NumConnections(ChiralSearch->first) - 2; | |
| if (insertpos > 2) | |
| return; | |
| if (insertpos < 0) { | |
| if (ChiralSearch->second->from != OBStereo::NoRef) | |
| obErrorLog.ThrowError(__FUNCTION__, "Warning: Overwriting previous from reference id.", obWarning); | |
| (ChiralSearch->second)->from = id; | |
| // cerr << "Adding " << id << " at Config.from to " << ChiralSearch->second << endl; | |
| } else { | |
| if (ChiralSearch->second->refs[insertpos] != OBStereo::NoRef) | |
| obErrorLog.ThrowError(__FUNCTION__, "Warning: Overwriting previously set reference id.", obWarning); | |
| (ChiralSearch->second)->refs[insertpos] = id; | |
| // cerr << "Adding " << id << " at " << insertpos << " to " << ChiralSearch->second << endl; | |
| } | |
| } | |
| } | |
| void OBSmilesParser::InsertSquarePlanarRef(OBMol &mol, unsigned long id) | |
| { | |
| map<OBAtom*, OBSquarePlanarStereo::Config*>::iterator ChiralSearch; | |
| ChiralSearch = _squarePlanarMap.find(mol.GetAtom(_prev)); | |
| if (ChiralSearch != _squarePlanarMap.end() && ChiralSearch->second != NULL) | |
| { | |
| int insertpos = NumConnections(ChiralSearch->first) - 1; | |
| if (insertpos < 0) { | |
| if (ChiralSearch->second->refs[0] != OBStereo::NoRef) | |
| obErrorLog.ThrowError(__FUNCTION__, "Warning: Overwriting previous from reference id.", obWarning); | |
| (ChiralSearch->second)->refs[0] = id; | |
| //cerr << "Adding " << id << " at Config.refs[0] to " << ChiralSearch->second << endl; | |
| } else { | |
| if (ChiralSearch->second->refs[insertpos] != OBStereo::NoRef) | |
| obErrorLog.ThrowError(__FUNCTION__, "Warning: Overwriting previously set reference id.", obWarning); | |
| (ChiralSearch->second)->refs[insertpos] = id; | |
| //cerr << "Adding " << id << " at " << insertpos << " to " << ChiralSearch->second << endl; | |
| } | |
| } | |
| } | |
| bool OBSmilesParser::ParseSimple(OBMol &mol) | |
| { | |
| char symbol[3]; | |
| int element; | |
| bool arom=false; | |
| memset(symbol,'\0',sizeof(char)*3); | |
| if (isupper(*_ptr)) | |
| switch(*_ptr) | |
| { | |
| case 'C': | |
| _ptr++; | |
| if (*_ptr == 'l') | |
| { | |
| strcpy(symbol,"Cl"); | |
| element = 17; | |
| } | |
| else | |
| { | |
| symbol[0] = 'C'; | |
| element = 6; | |
| _ptr--; | |
| } | |
| break; | |
| case 'N': | |
| element = 7; | |
| symbol[0] = 'N'; | |
| break; | |
| case 'O': | |
| element = 8; | |
| symbol[0] = 'O'; | |
| break; | |
| case 'S': | |
| element = 16; | |
| symbol[0] = 'S'; | |
| break; | |
| case 'P': | |
| element = 15; | |
| symbol[0] = 'P'; | |
| break; | |
| case 'F': | |
| element = 9; | |
| symbol[0] = 'F'; | |
| break; | |
| case 'I': | |
| element = 53; | |
| symbol[0] = 'I'; | |
| break; | |
| case 'B': | |
| _ptr++; | |
| if (*_ptr == 'r') | |
| { | |
| element = 35; | |
| strcpy(symbol,"Br"); | |
| } | |
| else | |
| { | |
| element = 5; | |
| symbol[0] = 'B'; | |
| _ptr--; | |
| } | |
| break; | |
| default: | |
| return(false); | |
| } | |
| else | |
| { | |
| arom = true; | |
| switch(*_ptr) | |
| { | |
| case 'c': | |
| element = 6; | |
| symbol[0] = 'C'; | |
| break; | |
| case 'n': | |
| element = 7; | |
| symbol[0] = 'N'; | |
| break; | |
| case 'o': | |
| element = 8; | |
| symbol[0] = 'O'; | |
| break; | |
| case 'p': | |
| element = 15; | |
| symbol[0] = 'P'; | |
| break; | |
| case 's': | |
| element = 16; | |
| symbol[0] = 'S'; | |
| break; | |
| case '*': | |
| element = 0; | |
| strcpy(symbol,"Du"); | |
| arom = false; | |
| break; | |
| case 'b': | |
| obErrorLog.ThrowError(__FUNCTION__, "Illegal aromatic element b", obWarning); | |
| element = 5; | |
| strcpy(symbol,"B"); | |
| break; | |
| default: | |
| return(false); | |
| } | |
| } | |
| OBAtom *atom = mol.NewAtom(); | |
| atom->SetAtomicNum(element); | |
| atom->SetType(symbol); | |
| if (arom) | |
| { | |
| atom->SetAromatic(); | |
| atom->SetSpinMultiplicity(2); // CM 18 Sept 2003 | |
| } | |
| else | |
| atom->ForceImplH();//ensures atom is never hydrogen deficient | |
| // Untrue, but necessary to avoid perception being called in OBAtom::IsAromatic() | |
| // on incomplete molecule. Undone at end of function. | |
| mol.SetAromaticPerceived(); | |
| if (_prev) //need to add bond | |
| { | |
| /* CM 18 Sept 2003 | |
| An extension to the SMILES format has been added so that lower case c,n,o can | |
| represent a radical centre: CcC is isopropyl radical; | |
| and cccc... a carbon chain bonded by conjugated double bonds. | |
| Fails sometimes when using c as both aromatic and as the extened form. | |
| For benzyl radical C6H5CH2. c1ccccc1c is ok; c1cc(c)ccc1 fails. | |
| Radical centres should not be involved in ring closure: | |
| for cyclohexyl radical C1cCCCC1 is ok, c1CCCCC1 is not. | |
| Implementation | |
| Atoms c,n,o, etc initially added as a radical centre | |
| unless _prev is a radical centre when both are made a normal atoms | |
| connected by a double bond. But making this bond double is deferred until | |
| the molecule has been constructed, because it is not appropriate if | |
| either of the atoms is really part of an aromatic ring. | |
| Since they are still marked as aromatic, FindAromaticBonds() will | |
| replace the bonds by aromatic bonds if they are in a ring. | |
| FindOrphanAromand removes the aromatic tag from the atoms not found in this way | |
| and removes stray radical centres. | |
| */ | |
| OBAtom* prevatom = mol.GetAtom(_prev); | |
| assert(prevatom); | |
| if(arom && prevatom->IsAromatic()) | |
| { | |
| if (_order != 2) _order=5; //Potential aromatic bond -- but flag explicit double bonds | |
| if (prevatom->GetSpinMultiplicity()) | |
| { | |
| //Previous atom had been marked, so bond is potentially a double bond | |
| //if it is not part of an aromatic ring. This will be decided when all | |
| //molecule has been constructed. | |
| PosDouble.push_back(mol.NumBonds()); //saves index of bond about to be added | |
| prevatom->SetSpinMultiplicity(0); | |
| atom->SetSpinMultiplicity(0); | |
| } | |
| } | |
| mol.AddBond(_prev, mol.NumAtoms(), _order); | |
| // store up/down | |
| if (_updown == BondUpChar || _updown == BondDownChar) | |
| _upDownMap[mol.GetBond(_prev, mol.NumAtoms())] = _updown; | |
| InsertTetrahedralRef(mol, mol.NumAtoms() - 1); | |
| InsertSquarePlanarRef(mol, mol.NumAtoms() - 1); | |
| } | |
| //set values | |
| _prev = mol.NumAtoms(); | |
| _order = 1; | |
| _updown = ' '; | |
| mol.UnsetAromaticPerceived(); //undo | |
| return(true); | |
| } | |
| bool OBSmilesParser::ParseComplex(OBMol &mol) | |
| { | |
| char symbol[7]; | |
| int element=0; | |
| int isotope=0; | |
| int isoPtr=0; | |
| bool arom=false; | |
| memset(symbol,'\0',sizeof(char)*7); | |
| _ptr++; | |
| //grab isotope information | |
| for (;*_ptr && isdigit(*_ptr) && (isoPtr <= 6);_ptr++) | |
| { | |
| symbol[isoPtr] = *_ptr; | |
| isoPtr++; | |
| } | |
| if (isoPtr >= 6) | |
| return false; | |
| isotope = atoi(symbol); | |
| //parse element data | |
| if (isupper(*_ptr)) | |
| switch(*_ptr) | |
| { | |
| case 'C': | |
| _ptr++; | |
| switch(*_ptr) | |
| { | |
| case 'a': | |
| element = 20; | |
| strcpy(symbol,"Ca"); | |
| break; | |
| case 'd': | |
| element = 48; | |
| strcpy(symbol,"Cd"); | |
| break; | |
| case 'e': | |
| element = 58; | |
| strcpy(symbol,"Ce"); | |
| break; | |
| case 'f': | |
| element = 98; | |
| strcpy(symbol,"Cf"); | |
| break; | |
| case 'l': | |
| element = 17; | |
| strcpy(symbol,"Cl"); | |
| break; | |
| case 'm': | |
| element = 96; | |
| strcpy(symbol,"Cm"); | |
| break; | |
| case 'n': | |
| element = 112; | |
| strcpy(symbol,"Cn"); | |
| break; | |
| case 'o': | |
| element = 27; | |
| strcpy(symbol,"Co"); | |
| break; | |
| case 'r': | |
| element = 24; | |
| strcpy(symbol,"Cr"); | |
| break; | |
| case 's': | |
| element = 55; | |
| strcpy(symbol,"Cs"); | |
| break; | |
| case 'u': | |
| element = 29; | |
| strcpy(symbol,"Cu"); | |
| break; | |
| default: | |
| element = 6; | |
| symbol[0] = 'C'; | |
| _ptr--; | |
| } | |
| break; | |
| case 'N': | |
| _ptr++; | |
| switch(*_ptr) | |
| { | |
| case 'a': | |
| element = 11; | |
| strcpy(symbol,"Na"); | |
| break; | |
| case 'b': | |
| element = 41; | |
| strcpy(symbol,"Nb"); | |
| break; | |
| case 'd': | |
| element = 60; | |
| strcpy(symbol,"Nd"); | |
| break; | |
| case 'e': | |
| element = 10; | |
| strcpy(symbol,"Ne"); | |
| break; | |
| case 'i': | |
| element = 28; | |
| strcpy(symbol,"Ni"); | |
| break; | |
| case 'o': | |
| element = 102; | |
| strcpy(symbol,"No"); | |
| break; | |
| case 'p': | |
| element = 93; | |
| strcpy(symbol,"Np"); | |
| break; | |
| default: | |
| element = 7; | |
| symbol[0] = 'N'; | |
| _ptr--; | |
| } | |
| break; | |
| case('O'): | |
| _ptr++; | |
| if(*_ptr == 's') | |
| { | |
| element = 76; | |
| strcpy(symbol,"Os"); | |
| } | |
| else | |
| { | |
| element = 8; | |
| symbol[0] = 'O'; | |
| _ptr--; | |
| } | |
| break; | |
| case 'P': | |
| _ptr++; | |
| switch(*_ptr) | |
| { | |
| case 'a': | |
| element = 91; | |
| strcpy(symbol,"Pa"); | |
| break; | |
| case 'b': | |
| element = 82; | |
| strcpy(symbol,"Pb"); | |
| break; | |
| case 'd': | |
| element = 46; | |
| strcpy(symbol,"Pd"); | |
| break; | |
| case 'm': | |
| element = 61; | |
| strcpy(symbol,"Pm"); | |
| break; | |
| case 'o': | |
| element = 84; | |
| strcpy(symbol,"Po"); | |
| break; | |
| case 'r': | |
| element = 59; | |
| strcpy(symbol,"Pr"); | |
| break; | |
| case 't': | |
| element = 78; | |
| strcpy(symbol,"Pt"); | |
| break; | |
| case 'u': | |
| element = 94; | |
| strcpy(symbol,"Pu"); | |
| break; | |
| default: | |
| element = 15; | |
| symbol[0] = 'P'; | |
| _ptr--; | |
| } | |
| break; | |
| case('S'): | |
| _ptr++; | |
| switch(*_ptr) | |
| { | |
| case 'b': | |
| element = 51; | |
| strcpy(symbol,"Sb"); | |
| break; | |
| case 'c': | |
| element = 21; | |
| strcpy(symbol,"Sc"); | |
| break; | |
| case 'e': | |
| element = 34; | |
| strcpy(symbol,"Se"); | |
| break; | |
| case 'g': | |
| element = 106; | |
| strcpy(symbol,"Sg"); | |
| break; | |
| case 'i': | |
| element = 14; | |
| strcpy(symbol,"Si"); | |
| break; | |
| case 'm': | |
| element = 62; | |
| strcpy(symbol,"Sm"); | |
| break; | |
| case 'n': | |
| element = 50; | |
| strcpy(symbol,"Sn"); | |
| break; | |
| case 'r': | |
| element = 38; | |
| strcpy(symbol,"Sr"); | |
| break; | |
| default: | |
| element = 16; | |
| symbol[0] = 'S'; | |
| _ptr--; | |
| } | |
| break; | |
| case 'B': | |
| _ptr++; | |
| switch(*_ptr) | |
| { | |
| case 'a': | |
| element = 56; | |
| strcpy(symbol,"Ba"); | |
| break; | |
| case 'e': | |
| element = 4; | |
| strcpy(symbol,"Be"); | |
| break; | |
| case 'h': | |
| element = 107; | |
| strcpy(symbol,"Bh"); | |
| break; | |
| case 'i': | |
| element = 83; | |
| strcpy(symbol,"Bi"); | |
| break; | |
| case 'k': | |
| element = 97; | |
| strcpy(symbol,"Bk"); | |
| break; | |
| case 'r': | |
| element = 35; | |
| strcpy(symbol,"Br"); | |
| break; | |
| default: | |
| element = 5; | |
| symbol[0] = 'B'; | |
| _ptr--; | |
| } | |
| break; | |
| case 'F': | |
| _ptr++; | |
| switch(*_ptr) | |
| { | |
| case 'e': | |
| element = 26; | |
| strcpy(symbol,"Fe"); | |
| break; | |
| case 'm': | |
| element = 100; | |
| strcpy(symbol,"Fm"); | |
| break; | |
| case 'r': | |
| element = 87; | |
| strcpy(symbol,"Fr"); | |
| break; | |
| default: | |
| element = 9; | |
| symbol[0] = 'F'; | |
| _ptr--; | |
| } | |
| break; | |
| case 'I': | |
| _ptr++; | |
| switch(*_ptr) | |
| { | |
| case 'n': | |
| element = 49; | |
| strcpy(symbol,"In"); | |
| break; | |
| case 'r': | |
| element = 77; | |
| strcpy(symbol,"Ir"); | |
| break; | |
| default: | |
| element = 53; | |
| symbol[0] = 'I'; | |
| _ptr--; | |
| } | |
| break; | |
| case 'A': | |
| _ptr++; | |
| switch(*_ptr) | |
| { | |
| case 'c': | |
| element = 89; | |
| strcpy(symbol,"Ac"); | |
| break; | |
| case 'g': | |
| element = 47; | |
| strcpy(symbol,"Ag"); | |
| break; | |
| case 'l': | |
| element = 13; | |
| strcpy(symbol,"Al"); | |
| break; | |
| case 'm': | |
| element = 95; | |
| strcpy(symbol,"Am"); | |
| break; | |
| case 'r': | |
| element = 18; | |
| strcpy(symbol,"Ar"); | |
| break; | |
| case 's': | |
| element = 33; | |
| strcpy(symbol,"As"); | |
| break; | |
| case 't': | |
| element = 85; | |
| strcpy(symbol,"At"); | |
| break; | |
| case 'u': | |
| element = 79; | |
| strcpy(symbol,"Au"); | |
| break; | |
| default: | |
| _ptr--; | |
| return(false); | |
| } | |
| break; | |
| case 'D': | |
| _ptr++; | |
| switch(*_ptr) | |
| { | |
| case 'b': | |
| element = 105; | |
| strcpy(symbol,"Db"); | |
| break; | |
| case 's': | |
| element = 110; | |
| strcpy(symbol,"Ds"); | |
| break; | |
| case 'y': | |
| element = 66; | |
| strcpy(symbol,"Dy"); | |
| break; | |
| default: | |
| _ptr--; | |
| return(false); | |
| } | |
| break; | |
| case 'E': | |
| _ptr++; | |
| switch(*_ptr) | |
| { | |
| case 'r': | |
| element = 68; | |
| strcpy(symbol,"Er"); | |
| break; | |
| case 's': | |
| element = 99; | |
| strcpy(symbol,"Es"); | |
| break; | |
| case 'u': | |
| element = 63; | |
| strcpy(symbol,"Eu"); | |
| break; | |
| default: | |
| _ptr--; | |
| return(false); | |
| } | |
| break; | |
| case 'G': | |
| _ptr++; | |
| switch (*_ptr) | |
| { | |
| case 'a': | |
| element = 31; | |
| strcpy(symbol,"Ga"); | |
| break; | |
| case 'd': | |
| element = 64; | |
| strcpy(symbol,"Gd"); | |
| break; | |
| case 'e': | |
| element = 32; | |
| strcpy(symbol,"Ge"); | |
| break; | |
| default: | |
| _ptr--; | |
| return(false); | |
| } | |
| break; | |
| case 'H': | |
| _ptr++; | |
| switch (*_ptr) | |
| { | |
| case 'e': | |
| element = 2; | |
| strcpy(symbol,"He"); | |
| break; | |
| case 'f': | |
| element = 72; | |
| strcpy(symbol,"Hf"); | |
| break; | |
| case 'g': | |
| element = 80; | |
| strcpy(symbol,"Hg"); | |
| break; | |
| case 'o': | |
| element = 67; | |
| strcpy(symbol,"Ho"); | |
| break; | |
| case 's': | |
| element = 108; | |
| strcpy(symbol,"Hs"); | |
| break; | |
| default: | |
| element = 1; | |
| symbol[0] = 'H'; | |
| _ptr--; | |
| } | |
| break; | |
| case 'K': | |
| _ptr++; | |
| if(*_ptr == 'r') | |
| { | |
| element = 36; | |
| strcpy(symbol,"Kr"); | |
| } | |
| else | |
| { | |
| element = 19; | |
| symbol[0] = 'K'; | |
| _ptr--; | |
| } | |
| break; | |
| case 'L': | |
| _ptr++; | |
| switch(*_ptr) | |
| { | |
| case 'a': | |
| element = 57; | |
| strcpy(symbol,"La"); | |
| break; | |
| case 'i': | |
| element = 3; | |
| strcpy(symbol,"Li"); | |
| break; | |
| case 'r': | |
| element = 103; | |
| strcpy(symbol,"Lr"); | |
| break; | |
| case 'u': | |
| element = 71; | |
| strcpy(symbol,"Lu"); | |
| break; | |
| default: | |
| _ptr--; | |
| return(false); | |
| } | |
| break; | |
| case 'M': | |
| _ptr++; | |
| switch(*_ptr) | |
| { | |
| case 'd': | |
| element = 101; | |
| strcpy(symbol,"Md"); | |
| break; | |
| case 'g': | |
| element = 12; | |
| strcpy(symbol,"Mg"); | |
| break; | |
| case 'n': | |
| element = 25; | |
| strcpy(symbol,"Mn"); | |
| break; | |
| case 'o': | |
| element = 42; | |
| strcpy(symbol,"Mo"); | |
| break; | |
| case 't': | |
| element = 109; | |
| strcpy(symbol,"Mt"); | |
| break; | |
| default: | |
| _ptr--; | |
| return(false); | |
| } | |
| break; | |
| case 'R': | |
| _ptr++; | |
| switch(*_ptr) | |
| { | |
| case 'a': | |
| element = 88; | |
| strcpy(symbol,"Ra"); | |
| break; | |
| case 'b': | |
| element = 37; | |
| strcpy(symbol,"Rb"); | |
| break; | |
| case 'e': | |
| element = 75; | |
| strcpy(symbol,"Re"); | |
| break; | |
| case 'f': | |
| element = 104; | |
| strcpy(symbol,"Rf"); | |
| break; | |
| case 'g': | |
| element = 111; | |
| strcpy(symbol,"Rg"); | |
| break; | |
| case 'h': | |
| element = 45; | |
| strcpy(symbol,"Rh"); | |
| break; | |
| case 'n': | |
| element = 86; | |
| strcpy(symbol,"Rn"); | |
| break; | |
| case 'u': | |
| element = 44; | |
| strcpy(symbol,"Ru"); | |
| break; | |
| default: | |
| _ptr--; | |
| return(false); | |
| } | |
| break; | |
| case 'T': | |
| _ptr++; | |
| switch(*_ptr) | |
| { | |
| case 'a': | |
| element = 73; | |
| strcpy(symbol,"Ta"); | |
| break; | |
| case 'b': | |
| element = 65; | |
| strcpy(symbol,"Tb"); | |
| break; | |
| case 'c': | |
| element = 43; | |
| strcpy(symbol,"Tc"); | |
| break; | |
| case 'e': | |
| element = 52; | |
| strcpy(symbol,"Te"); | |
| break; | |
| case 'h': | |
| element = 90; | |
| strcpy(symbol,"Th"); | |
| break; | |
| case 'i': | |
| element = 22; | |
| strcpy(symbol,"Ti"); | |
| break; | |
| case 'l': | |
| element = 81; | |
| strcpy(symbol,"Tl"); | |
| break; | |
| case 'm': | |
| element = 69; | |
| strcpy(symbol,"Tm"); | |
| break; | |
| default: | |
| _ptr--; | |
| return(false); | |
| } | |
| break; | |
| case('U'): element = 92; | |
| symbol[0] = 'U'; | |
| break; | |
| case('V'): element = 23; | |
| symbol[0] = 'V'; | |
| break; | |
| case('W'): element = 74; | |
| symbol[0] = 'W'; | |
| break; | |
| case('X'): | |
| _ptr++; | |
| if (*_ptr == 'e') | |
| { | |
| element = 54; | |
| strcpy(symbol,"Xe"); | |
| } | |
| else | |
| { | |
| _ptr--; | |
| return(false); | |
| } | |
| break; | |
| case('Y'): | |
| _ptr++; | |
| if (*_ptr == 'b') | |
| { | |
| element = 70; | |
| strcpy(symbol,"Yb"); | |
| } | |
| else | |
| { | |
| element = 39; | |
| symbol[0] = 'Y'; | |
| _ptr--; | |
| } | |
| break; | |
| case('Z'): | |
| _ptr++; | |
| switch(*_ptr) | |
| { | |
| case 'n': | |
| element = 30; | |
| strcpy(symbol,"Zn"); | |
| break; | |
| case 'r': | |
| element = 40; | |
| strcpy(symbol,"Zr"); | |
| break; | |
| default: | |
| _ptr--; | |
| return(false); | |
| } | |
| break; | |
| } | |
| else | |
| { | |
| arom = true; | |
| switch(*_ptr) | |
| { | |
| case 'c': | |
| element = 6; | |
| symbol[0] = 'C'; | |
| break; | |
| case 'n': | |
| element = 7; | |
| symbol[0] = 'N'; | |
| break; | |
| case 'o': | |
| element = 8; | |
| symbol[0] = 'O'; | |
| break; | |
| case 'p': | |
| element = 15; | |
| symbol[0] = 'P'; | |
| break; | |
| case 'a': | |
| _ptr++; | |
| if (*_ptr == 's') | |
| { | |
| element = 33; | |
| strcpy(symbol,"As"); | |
| } | |
| else | |
| return(false); | |
| break; | |
| case '*': | |
| element = 0; | |
| strcpy(symbol,"Du"); | |
| arom = false; | |
| break; | |
| case 's': //note fall through | |
| _ptr++; | |
| if (*_ptr == 'e') | |
| { | |
| element = 34; | |
| strcpy(symbol,"Se"); | |
| break; | |
| } | |
| else if (*_ptr == 'i' || *_ptr == 'n' || *_ptr == 'b') | |
| { | |
| _ptr--; | |
| } | |
| else | |
| { | |
| element = 16; | |
| symbol[0] = 'S'; | |
| _ptr--; | |
| break; | |
| } | |
| //fall through | |
| default: | |
| strncpy(symbol, _ptr, 2); | |
| string symb(symbol); | |
| symbol[0] = toupper(symbol[0]); | |
| obErrorLog.ThrowError(__FUNCTION__, "Illegal aromatic element " + symb, obWarning); | |
| //But convert anyway | |
| ++_ptr; | |
| if(symb=="si") | |
| { | |
| element = 14; | |
| break; | |
| } | |
| else if(symb=="ge") | |
| { | |
| element = 32; | |
| break; | |
| } | |
| else if(symb=="sb") | |
| { | |
| element = 51; | |
| break; | |
| } | |
| else if(symb=="bi") | |
| { | |
| element = 83; | |
| break; | |
| } | |
| else if(symb=="te") | |
| { | |
| element = 52; | |
| break; | |
| } | |
| else if(symb=="sn") | |
| { | |
| element = 50; | |
| break; | |
| } | |
| else | |
| return(false); | |
| } | |
| } | |
| //handle hydrogen count, stereochemistry, and charge | |
| OBAtom *atom = mol.NewAtom(); | |
| int hcount = 0; | |
| int charge=0; | |
| int rad=0; | |
| int clval=0; | |
| char tmpc[2]; | |
| tmpc[1] = '\0'; | |
| stringstream errorMsg; | |
| for (_ptr++;*_ptr && *_ptr != ']';_ptr++) | |
| { | |
| switch(*_ptr) | |
| { | |
| case '@': | |
| _ptr++; | |
| if (*_ptr == 'S') { | |
| // square planar atom found | |
| squarePlanarWatch = true; | |
| if (_squarePlanarMap.find(atom)==_squarePlanarMap.end()) // Prevent memory leak for malformed smiles (PR#3428432) | |
| _squarePlanarMap[atom] = new OBSquarePlanarStereo::Config; | |
| _squarePlanarMap[atom]->refs = OBStereo::Refs(4, OBStereo::NoRef); | |
| _squarePlanarMap[atom]->center = atom->GetId(); | |
| _ptr += 2; | |
| if (*_ptr == '1') | |
| _squarePlanarMap[atom]->shape = OBStereo::ShapeU; | |
| if (*_ptr == '2') | |
| _squarePlanarMap[atom]->shape = OBStereo::Shape4; | |
| if (*_ptr == '3') | |
| _squarePlanarMap[atom]->shape = OBStereo::ShapeZ; | |
| } else { | |
| // tetrahedral atom found | |
| chiralWatch=true; | |
| if (_tetrahedralMap.find(atom)==_tetrahedralMap.end()) // Prevent memory leak for malformed smiles (PR#3428432) | |
| _tetrahedralMap[atom] = new OBTetrahedralStereo::Config; | |
| _tetrahedralMap[atom]->refs = OBStereo::Refs(3, OBStereo::NoRef); | |
| _tetrahedralMap[atom]->center = atom->GetId(); | |
| if (*_ptr == '@') { | |
| _tetrahedralMap[atom]->winding = OBStereo::Clockwise; | |
| } else if (*_ptr == '?') { | |
| _tetrahedralMap[atom]->specified = false; | |
| } else { | |
| _tetrahedralMap[atom]->winding = OBStereo::AntiClockwise; | |
| _ptr--; | |
| } | |
| } | |
| break; | |
| case '-': | |
| _ptr++; | |
| if (!isdigit(*_ptr)) | |
| charge--; | |
| while( isdigit(*_ptr) ) // go number by number | |
| charge = charge*10 - ((*_ptr++)-'0'); | |
| _ptr--; | |
| break; | |
| case '+': | |
| _ptr++; | |
| if (!isdigit(*_ptr)) | |
| charge++; | |
| while( isdigit(*_ptr) ) // go number by number | |
| charge = charge*10 + ((*_ptr++)-'0'); | |
| _ptr--; | |
| break; | |
| case 'H': | |
| _ptr++; | |
| if (isdigit(*_ptr)) | |
| { | |
| tmpc[0] = *_ptr; | |
| hcount = atoi(tmpc); | |
| } | |
| else | |
| { | |
| hcount = 1; | |
| _ptr--; | |
| } | |
| break; | |
| case '.': //CM Feb05 | |
| rad=2; | |
| if(*(++_ptr)=='.') | |
| rad=3; | |
| else | |
| _ptr--; | |
| break; | |
| case ':': | |
| if(!isdigit(*(++_ptr))) | |
| { | |
| obErrorLog.ThrowError(__FUNCTION__,"The atom class following : must be a number", obWarning); | |
| return false; | |
| } | |
| while( isdigit(*_ptr) ) | |
| clval = clval*10 + ((*_ptr++)-'0'); | |
| --_ptr; | |
| _classdata.Add(atom->GetIdx(), clval); | |
| break; | |
| default: | |
| return(false); | |
| } | |
| } | |
| if (!*_ptr || *_ptr != ']') | |
| return(false); // we should have a trailing ']' now | |
| if (charge) { | |
| atom->SetFormalCharge(charge); | |
| if (abs(charge) > 10 || charge > element) { // if the charge is +/- 10 or more than the number of electrons | |
| errorMsg << "Atom " << atom->GetIdx() << " had an unrealistic charge of " << charge | |
| << "." << endl; | |
| obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning); | |
| } | |
| } | |
| if (rad) | |
| atom->SetSpinMultiplicity(rad); | |
| atom->SetAtomicNum(element); | |
| atom->SetIsotope(isotope); | |
| atom->SetType(symbol); | |
| if (arom) | |
| atom->SetAromatic(); | |
| if (_prev) //need to add bond | |
| { | |
| OBAtom* prevatom = mol.GetAtom(_prev); | |
| mol.SetAromaticPerceived(); // prevent aromaticity analysis | |
| if(arom && prevatom->IsAromatic()) | |
| { | |
| _order=5; //Potential aromatic bond | |
| if (prevatom->GetSpinMultiplicity()) | |
| { | |
| // Previous atom had been marked, so bond is potentially a double bond | |
| // if it is not part of an aromatic ring. This will be decided when all | |
| // molecule has been constructed. | |
| PosDouble.push_back(mol.NumBonds()); //saves index of bond about to be added | |
| prevatom->SetSpinMultiplicity(0); | |
| atom->SetSpinMultiplicity(0); | |
| } | |
| } | |
| mol.UnsetAromaticPerceived(); | |
| mol.AddBond(_prev, mol.NumAtoms(), _order); | |
| // store up/down | |
| if (_updown == BondUpChar || _updown == BondDownChar) | |
| _upDownMap[mol.GetBond(_prev, mol.NumAtoms())] = _updown; | |
| if(chiralWatch) { // if tetrahedral atom, set previous as from atom | |
| _tetrahedralMap[atom]->from = mol.GetAtom(_prev)->GetId(); | |
| if (element == 16) // Handle chiral lone pair as in X[S@@](Y)Z | |
| _chiralLonePair[mol.NumAtoms()] = 1; // First of the refs | |
| //cerr <<"NB7: line 1622: Added atom ref "<<_prev<<" at " << 0 << " to "<<_mapcd[atom]<<endl; | |
| } | |
| if (squarePlanarWatch) { // if squareplanar atom, set previous atom as first ref | |
| _squarePlanarMap[atom]->refs[0] = mol.GetAtom(_prev)->GetId(); | |
| //cerr <<"TV7: line 1748: Added atom ref " << mol.GetAtom(_prev)->GetId() | |
| // << " at " << 0 << " to " << _squarePlanarMap[atom] << endl; | |
| } | |
| InsertTetrahedralRef(mol, atom->GetId()); | |
| InsertSquarePlanarRef(mol, atom->GetId()); | |
| } | |
| else | |
| { | |
| // Handle chiral lone pair as in [S@@](X)(Y)Z | |
| if (chiralWatch && element == 16) // Handle chiral lone pair (only S at the moment) | |
| _chiralLonePair[mol.NumAtoms()] = 0; // 'from' atom | |
| } | |
| //set values | |
| _prev = mol.NumAtoms(); | |
| _order = 1; | |
| _updown = ' '; | |
| //now add hydrogens | |
| if(hcount==0) | |
| atom->ForceNoH();//ensure AssignMultiplicity regards [C] as C atom | |
| for (int i = 0;i < hcount;i++) | |
| { | |
| atom = mol.NewAtom(); | |
| atom->SetAtomicNum(1); | |
| atom->SetType("H"); | |
| mol.AddBond(_prev, mol.NumAtoms(), 1); | |
| // store up/down | |
| if (_updown == BondUpChar || _updown == BondDownChar) | |
| _upDownMap[mol.GetBond(_prev, mol.NumAtoms())] = _updown; | |
| if(chiralWatch) | |
| InsertTetrahedralRef(mol, atom->GetId()); | |
| if (squarePlanarWatch) | |
| InsertSquarePlanarRef(mol, atom->GetId()); | |
| } | |
| chiralWatch=false; | |
| squarePlanarWatch = false; | |
| return(true); | |
| } | |
| bool OBSmilesParser::CapExternalBonds(OBMol &mol) | |
| { | |
| if (_extbond.empty()) | |
| return true; | |
| OBAtom *atom; | |
| vector<ExternalBond>::iterator bond; | |
| for (bond = _extbond.begin(); bond != _extbond.end(); bond++) { | |
| // create new dummy atom | |
| atom = mol.NewAtom(); | |
| atom->SetAtomicNum(0); | |
| atom->SetType("*"); | |
| // bond dummy atom to mol via external bond | |
| mol.AddBond(bond->prev, atom->GetIdx(), bond->order); | |
| // store up/down | |
| if (bond->updown == BondUpChar || bond->updown == BondDownChar) | |
| _upDownMap[mol.GetBond(bond->prev, atom->GetIdx())] = bond->updown; | |
| OBBond *refbond = atom->GetBond(mol.GetAtom(bond->prev)); | |
| //record external bond information | |
| OBExternalBondData *xbd; | |
| if (mol.HasData(OBGenericDataType::ExternalBondData)) { | |
| xbd = (OBExternalBondData*) mol.GetData(OBGenericDataType::ExternalBondData); | |
| } else { | |
| xbd = new OBExternalBondData; | |
| xbd->SetOrigin(fileformatInput); | |
| mol.SetData(xbd); | |
| } | |
| xbd->SetData(atom,refbond, bond->digit); | |
| //this data gets cleaned up in mol.Clear. | |
| } | |
| return true; | |
| } | |
| bool OBSmilesParser::ParseExternalBond(OBMol &mol) | |
| { | |
| int digit; | |
| char str[10]; | |
| //*_ptr should == '&' | |
| _ptr++; | |
| switch (*_ptr) // check for bond order indicators CC&=1.C&1 | |
| { | |
| case '-': | |
| _order = 1; | |
| _ptr++; | |
| break; | |
| case '=': | |
| _order = 2; | |
| _ptr++; | |
| break; | |
| case '#': | |
| _order = 3; | |
| _ptr++; | |
| break; | |
| case '$': | |
| _order = 4; | |
| _ptr++; | |
| break; | |
| case ';': | |
| _order = 5; | |
| _ptr++; | |
| break; | |
| case '/': //chiral, but _order still == 1 | |
| _updown = BondDownChar; | |
| _ptr++; | |
| break; | |
| _ptr++; | |
| case '\\': // chiral, but _order still == 1 | |
| _updown = BondUpChar; | |
| _ptr++; | |
| break; | |
| default: // no bond indicator just leave order = 1 | |
| break; | |
| } | |
| if (*_ptr == '%') // external bond indicator > 10 | |
| { | |
| _ptr++; | |
| str[0] = *_ptr; | |
| _ptr++; | |
| str[1] = *_ptr; | |
| str[2] = '\0'; | |
| } | |
| else // simple single digit external bond indicator | |
| { | |
| str[0] = *_ptr; | |
| str[1] = '\0'; | |
| } | |
| digit = atoi(str); // convert indicator to digit | |
| //check for dot disconnect closures | |
| vector<ExternalBond>::iterator bond; | |
| int upDown, bondOrder; | |
| for (bond = _extbond.begin(); bond != _extbond.end(); bond++) { | |
| if (bond->digit == digit) { | |
| upDown = (_updown > bond->updown) ? _updown : bond->updown; | |
| bondOrder = (_order > bond->order) ? _order : bond->order; | |
| mol.AddBond(bond->prev, _prev, bondOrder); | |
| // store up/down | |
| if (upDown == BondUpChar || upDown == BondDownChar) | |
| _upDownMap[mol.GetBond(bond->prev, _prev)] = upDown; | |
| // after adding a bond to atom "_prev" | |
| // search to see if atom is bonded to a chiral atom | |
| InsertTetrahedralRef(mol, bond->prev - 1); | |
| InsertSquarePlanarRef(mol, bond->prev - 1); | |
| _extbond.erase(bond); | |
| _updown = ' '; | |
| _order = 0; | |
| return true; | |
| } | |
| } | |
| //since no closures save another ext bond | |
| ExternalBond extBond; | |
| extBond.digit = digit; | |
| extBond.prev = _prev; | |
| extBond.order = _order; | |
| extBond.updown = _updown; | |
| _extbond.push_back(extBond); | |
| _order = 1; | |
| _updown = ' '; | |
| return(true); | |
| } | |
| bool OBSmilesParser::ParseRingBond(OBMol &mol) | |
| { | |
| int digit; | |
| char str[10]; | |
| if (*_ptr == '%') | |
| { | |
| _ptr++; | |
| str[0] = *_ptr; | |
| _ptr++; | |
| str[1] = *_ptr; | |
| str[2] = '\0'; | |
| } | |
| else | |
| { | |
| str[0] = *_ptr; | |
| str[1] = '\0'; | |
| } | |
| digit = atoi(str); | |
| vector<RingClosureBond>::iterator bond; | |
| int upDown, bondOrder; | |
| for (bond = _rclose.begin(); bond != _rclose.end(); ++bond) { | |
| if (bond->digit == digit) { | |
| upDown = (_updown > bond->updown) ? _updown : bond->updown; | |
| bondOrder = (_order > bond->order) ? _order : bond->order; | |
| // Check if this ring closure bond may be aromatic and set order accordingly | |
| if (bondOrder == 1) { | |
| OBAtom *a1 = mol.GetAtom(bond->prev); | |
| OBAtom *a2 = mol.GetAtom(_prev); | |
| mol.SetAromaticPerceived(); // prevent aromaticity analysis | |
| if (a1->IsAromatic() && a2->IsAromatic()) | |
| bondOrder = 5; | |
| mol.UnsetAromaticPerceived(); | |
| } | |
| mol.AddBond(bond->prev, _prev, bondOrder, 0, bond->numConnections); | |
| // store up/down | |
| if (upDown == BondUpChar || upDown == BondDownChar) | |
| _upDownMap[mol.GetBond(bond->prev, _prev)] = upDown; | |
| // For assigning cis/trans in the presence of bond closures, we need to | |
| // remember info on all bond closure bonds. | |
| StereoRingBond sb; | |
| sb.updown.push_back(_updown); | |
| sb.atoms.push_back(mol.GetAtom(_prev)); | |
| sb.updown.push_back(bond->updown); | |
| sb.atoms.push_back(mol.GetAtom(bond->prev)); | |
| _stereorbond[mol.GetBond(bond->prev, _prev)] = sb; // Store for later | |
| // after adding a bond to atom "_prev" | |
| // search to see if atom is bonded to a chiral atom | |
| // need to check both _prev and bond->prev as closure is direction independent | |
| InsertTetrahedralRef(mol, bond->prev - 1); | |
| InsertSquarePlanarRef(mol, bond->prev - 1); | |
| // FIXME: needed for squreplanar too?? | |
| map<OBAtom*, OBTetrahedralStereo::Config*>::iterator ChiralSearch; | |
| ChiralSearch = _tetrahedralMap.find(mol.GetAtom(bond->prev)); | |
| if (ChiralSearch != _tetrahedralMap.end() && ChiralSearch->second != NULL) { | |
| int insertpos = bond->numConnections - 1; | |
| if (insertpos < 0) { | |
| if (ChiralSearch->second->from != OBStereo::NoRef) | |
| obErrorLog.ThrowError(__FUNCTION__, "Warning: Overwriting previous from reference id.", obWarning); | |
| (ChiralSearch->second)->from = mol.GetAtom(_prev)->GetId(); | |
| // cerr << "Adding " << mol.GetAtom(_prev)->GetId() << " at Config.from to " << ChiralSearch->second << endl; | |
| } else { | |
| if (ChiralSearch->second->refs[insertpos] != OBStereo::NoRef) | |
| obErrorLog.ThrowError(__FUNCTION__, "Warning: Overwriting previously set reference id.", obWarning); | |
| (ChiralSearch->second)->refs[insertpos] = mol.GetAtom(_prev)->GetId(); | |
| // cerr << "Adding " << mol.GetAtom(_prev)->GetId() << " at " | |
| // << insertpos << " to " << ChiralSearch->second << endl; | |
| } | |
| } | |
| //CM ensure neither atoms in ring closure is a radical centre | |
| OBAtom* patom = mol.GetAtom(_prev); | |
| patom->SetSpinMultiplicity(0); | |
| patom = mol.GetAtom(bond->prev); | |
| patom->SetSpinMultiplicity(0); | |
| //CM end | |
| _rclose.erase(bond); | |
| _updown = ' '; | |
| _order = 1; | |
| return true; | |
| } | |
| } | |
| //since no closures save another rclose bond | |
| RingClosureBond ringClosure; | |
| ringClosure.digit = digit; | |
| ringClosure.prev = _prev; | |
| ringClosure.order = _order; | |
| ringClosure.updown = _updown; | |
| OBAtom* atom = mol.GetAtom(_prev); | |
| if (!atom) { | |
| obErrorLog.ThrowError(__FUNCTION__,"Number not parsed correctly as a ring bond", obWarning); | |
| return false; | |
| } | |
| ringClosure.numConnections = NumConnections(atom); //store position to insert closure bond | |
| _rclose.push_back(ringClosure); | |
| _order = 1; | |
| _updown = ' '; | |
| return(true); | |
| } | |
| // NumConnections finds the number of connections already made to | |
| // a particular atom. This is used to figure out the correct position | |
| // to insert an atom ID into atom4refs | |
| int OBSmilesParser::NumConnections(OBAtom *atom) { | |
| int val = atom->GetValence(); | |
| int idx = atom->GetIdx(); | |
| vector<RingClosureBond>::iterator bond; | |
| //correct for multiple closure bonds to a single atom | |
| for (bond = _rclose.begin(); bond != _rclose.end(); ++bond) | |
| if (bond->prev == idx) | |
| val++; | |
| return val; | |
| } | |
| /*---------------------------------------------------------------------- | |
| * CLASS: OBBondClosureInfo: For recording bond-closure digits as | |
| * work progresses on canonical SMILES. | |
| ----------------------------------------------------------------------*/ | |
| class OBBondClosureInfo | |
| { | |
| public: | |
| OBAtom *toatom; // second atom in SMILES order | |
| OBAtom *fromatom; // first atom in SMILES order | |
| OBBond *bond; | |
| int ringdigit; | |
| int is_open; // TRUE if SMILES processing hasn't reached 'toatom' yet | |
| OBBondClosureInfo(OBAtom *, OBAtom*, OBBond*, int, bool); | |
| ~OBBondClosureInfo(); | |
| }; | |
| OBBondClosureInfo::OBBondClosureInfo(OBAtom *a1, OBAtom *a2, OBBond *b, int rd, bool open) | |
| { | |
| toatom = a1; | |
| fromatom = a2; | |
| bond = b; | |
| ringdigit = rd; | |
| is_open = open; | |
| } | |
| OBBondClosureInfo::~OBBondClosureInfo() | |
| { | |
| } | |
| /*---------------------------------------------------------------------- | |
| * CLASS: OBCanSmiNode: A Tree structure, each node of which is an atom in | |
| * the tree being built to write out the SMILES. | |
| ----------------------------------------------------------------------*/ | |
| class OBCanSmiNode | |
| { | |
| OBAtom *_atom,*_parent; | |
| std::vector<OBCanSmiNode*> _child_nodes; | |
| std::vector<OBBond*> _child_bonds; | |
| public: | |
| OBCanSmiNode(OBAtom *atom); | |
| ~OBCanSmiNode(); | |
| int Size() | |
| { | |
| return(_child_nodes.empty() ? 0 : _child_nodes.size()); | |
| } | |
| void SetParent(OBAtom *a) | |
| { | |
| _parent = a; | |
| } | |
| void AddChildNode(OBCanSmiNode*,OBBond*); | |
| OBAtom *GetAtom() | |
| { | |
| return(_atom); | |
| } | |
| OBAtom *GetParent() | |
| { | |
| return(_parent); | |
| } | |
| OBAtom *GetChildAtom(int i) | |
| { | |
| return(_child_nodes[i]->GetAtom()); | |
| } | |
| OBBond *GetChildBond(int i) | |
| { | |
| return(_child_bonds[i]); | |
| } | |
| OBCanSmiNode *GetChildNode(int i) | |
| { | |
| return(_child_nodes[i]); | |
| } | |
| }; | |
| OBCanSmiNode::OBCanSmiNode(OBAtom *atom) | |
| { | |
| _atom = atom; | |
| _parent = NULL; | |
| _child_nodes.clear(); | |
| _child_bonds.clear(); | |
| } | |
| void OBCanSmiNode::AddChildNode(OBCanSmiNode *node,OBBond *bond) | |
| { | |
| _child_nodes.push_back(node); | |
| _child_bonds.push_back(bond); | |
| } | |
| OBCanSmiNode::~OBCanSmiNode() | |
| { | |
| vector<OBCanSmiNode*>::iterator i; | |
| for (i = _child_nodes.begin();i != _child_nodes.end();i++) | |
| delete (*i); | |
| } | |
| /*---------------------------------------------------------------------- | |
| * CLASS OBMol2Cansmi - Declarations | |
| ----------------------------------------------------------------------*/ | |
| class OBMol2Cansmi | |
| { | |
| std::vector<int> _atmorder; | |
| std::vector<bool> _aromNH; | |
| OBBitVec _uatoms,_ubonds; | |
| std::vector<OBBondClosureInfo> _vopen; | |
| std::string _canorder; | |
| std::vector<OBCisTransStereo> _cistrans, _unvisited_cistrans; | |
| std::map<OBBond *, bool> _isup; | |
| bool _canonicalOutput; // regular or canonical SMILES | |
| OBConversion* _pconv; | |
| OBAtomClassData* _pac; | |
| OBAtom* _endatom; | |
| OBAtom* _startatom; | |
| public: | |
| OBMol2Cansmi() | |
| { | |
| } | |
| ~OBMol2Cansmi() {} | |
| void Init(bool canonicalOutput = true, OBConversion* pconv=NULL); | |
| void CreateCisTrans(OBMol&); | |
| char GetCisTransBondSymbol(OBBond *, OBCanSmiNode *); | |
| void AddHydrogenToChiralCenters(OBMol &mol, OBBitVec &frag_atoms); | |
| bool AtomIsChiral(OBAtom *atom); | |
| bool BuildCanonTree(OBMol &mol, OBBitVec &frag_atoms, | |
| vector<unsigned int> &canonical_order, | |
| OBCanSmiNode *node); | |
| void CorrectAromaticAmineCharge(OBMol&); | |
| void CreateFragCansmiString(OBMol&, OBBitVec&, bool, char *); | |
| bool GetTetrahedralStereo(OBCanSmiNode*, | |
| vector<OBAtom*>&chiral_neighbors, | |
| vector<unsigned int> &symmetry_classes, | |
| char*); | |
| bool GetSquarePlanarStereo(OBCanSmiNode*, | |
| vector<OBAtom*>&chiral_neighbors, | |
| vector<unsigned int> &symmetry_classes, | |
| char*); | |
| bool GetSmilesElement(OBCanSmiNode*, | |
| vector<OBAtom*>&chiral_neighbors, | |
| vector<unsigned int> &symmetry_classes, | |
| char*, | |
| bool isomeric); | |
| int GetSmilesValence(OBAtom *atom); | |
| int GetUnusedIndex(); | |
| vector<OBBondClosureInfo> | |
| GetCanonClosureDigits(OBAtom *atom, | |
| OBBitVec &frag_atoms, | |
| vector<unsigned int> &canonical_order); | |
| bool IsSuppressedHydrogen(OBAtom *atom); | |
| void ToCansmilesString(OBCanSmiNode *node, | |
| char *buffer, | |
| OBBitVec &frag_atoms, | |
| vector<unsigned int> &symmetry_classes, | |
| vector<unsigned int> &canonical_order, | |
| bool isomeric); | |
| bool HasStereoDblBond(OBBond *, OBAtom *atom); | |
| void MyFindChildren(OBMol &mol, vector<OBAtom*> &children, OBBitVec &seen, OBAtom *end); | |
| std::string &GetOutputOrder() | |
| { | |
| return _canorder; | |
| } | |
| bool ParseInChI(OBMol &mol, vector<int> &atom_order); | |
| }; | |
| /*---------------------------------------------------------------------- | |
| * CLASS OBMol2Cansmi - implementation | |
| ----------------------------------------------------------------------*/ | |
| /*************************************************************************** | |
| * FUNCTION: Init | |
| * | |
| * DESCRIPTION: | |
| * Initializes the OBMol2Cansmi writer object. | |
| ***************************************************************************/ | |
| void OBMol2Cansmi::Init(bool canonical, OBConversion* pconv) | |
| { | |
| _atmorder.clear(); | |
| _aromNH.clear(); | |
| _uatoms.Clear(); | |
| _ubonds.Clear(); | |
| _vopen.clear(); | |
| _canorder.clear(); | |
| _pac = NULL; | |
| _pconv = pconv; | |
| _canonicalOutput = canonical; | |
| _endatom = NULL; | |
| _startatom = NULL; | |
| } | |
| /*************************************************************************** | |
| * FUNCTION: GetUnusedIndex | |
| * | |
| * DESCRIPTION: | |
| * Returns the next available bond-closure index for a SMILES. | |
| * | |
| * You could just do this sequentially, not reusing bond-closure | |
| * digits, thus: | |
| * | |
| * c1cc2ccccc2cc1 napthalene | |
| * c1ccccc1c2ccccc2 biphenyl | |
| * | |
| * But molecules with more than ten rings, this requires the use of | |
| * two-digit ring closures (like c1ccccc1C...c%11ccccc%11). To help | |
| * avoid digit reuse, this finds the lowest digit that's not currently | |
| * "open", thus | |
| * | |
| * c1cc2ccccc2cc1 napthalene (same) | |
| * c1ccccc1c1ccccc1 biphenyl (reuses "1") | |
| * | |
| ***************************************************************************/ | |
| int OBMol2Cansmi::GetUnusedIndex() | |
| { | |
| int idx=1; | |
| vector<OBBondClosureInfo>::iterator j; | |
| for (j = _vopen.begin();j != _vopen.end();) | |
| if (j->ringdigit == idx) | |
| { | |
| idx++; //increment idx and start over if digit is already used | |
| j = _vopen.begin(); | |
| } | |
| else j++; | |
| return(idx); | |
| } | |
| /*************************************************************************** | |
| * FUNCTION: CorrectAromaticAmineCharge | |
| * | |
| * DESCRIPTION: | |
| * Finds all aromatic nitrogens, and updates the _aromNH vector to | |
| * note which aromatic nitrogens require an H to balance the charge. | |
| ***************************************************************************/ | |
| void OBMol2Cansmi::CorrectAromaticAmineCharge(OBMol &mol) | |
| { | |
| OBAtom *atom; | |
| vector<OBNodeBase*>::iterator i; | |
| _aromNH.clear(); | |
| _aromNH.resize(mol.NumAtoms()+1); | |
| for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) | |
| if (atom->IsNitrogen() && atom->IsAromatic()) | |
| if (atom->GetHvyValence() == 2) | |
| { | |
| if (atom->GetValence() == 3 || atom->GetImplicitValence() == 3) | |
| _aromNH[atom->GetIdx()] = true; | |
| } | |
| } | |
| void OBMol2Cansmi::CreateCisTrans(OBMol &mol) | |
| { | |
| std::vector<OBGenericData*> vdata = mol.GetAllData(OBGenericDataType::StereoData); | |
| for (std::vector<OBGenericData*>::iterator data = vdata.begin(); data != vdata.end(); ++data) { | |
| if (((OBStereoBase*)*data)->GetType() != OBStereo::CisTrans) | |
| continue; | |
| OBCisTransStereo *ct = dynamic_cast<OBCisTransStereo*>(*data); | |
| if (ct && ct->GetConfig().specified) { | |
| OBCisTransStereo::Config config = ct->GetConfig(); | |
| OBBond* dbl_bond = mol.GetBond(mol.GetAtomById(config.begin), mol.GetAtomById(config.end)); | |
| if (!dbl_bond) | |
| continue; | |
| // Do not output cis/trans bond symbols for double bonds in rings of size IMPLICIT_CIS_RING_SIZE or less | |
| OBRing* ring = dbl_bond->FindSmallestRing(); | |
| if (!ring || ring->Size()>IMPLICIT_CIS_RING_SIZE) | |
| _cistrans.push_back(*ct); | |
| } | |
| } | |
| _unvisited_cistrans = _cistrans; // Make a copy of _cistrans | |
| } | |
| bool OBMol2Cansmi::HasStereoDblBond(OBBond *bond, OBAtom *atom) | |
| { | |
| // This is a helper function for determining whether to | |
| // consider writing a cis/trans bond symbol for bond closures. | |
| // Returns TRUE only if the atom is connected to the cis/trans | |
| // double bond. To handle the case of conjugated bonds, one must | |
| // remember that the ring opening preceded the closure, so if the | |
| // ring opening bond was on a stereocenter, it got the symbol already. | |
| if (!bond || !atom) | |
| return false; | |
| std::vector<OBCisTransStereo>::iterator ChiralSearch; | |
| OBAtom *nbr_atom = bond->GetNbrAtom(atom); | |
| bool stereo_dbl = false; | |
| if (atom->HasDoubleBond()) { | |
| stereo_dbl = true; | |
| if (nbr_atom->HasDoubleBond()) | |
| // Check whether the nbr_atom is a begin or end in any CisTransStereo. If so, | |
| // then the ring opening already had the symbol. | |
| for (ChiralSearch = _cistrans.begin(); ChiralSearch != _cistrans.end(); ChiralSearch++) { | |
| OBCisTransStereo::Config cfg = ChiralSearch->GetConfig(); | |
| if (nbr_atom->GetId() == cfg.begin || nbr_atom->GetId() == cfg.end) { | |
| // I don't think I need to check whether it has a bond with atom | |
| stereo_dbl = false; | |
| break; | |
| } | |
| } | |
| } | |
| return stereo_dbl; | |
| } | |
| char OBMol2Cansmi::GetCisTransBondSymbol(OBBond *bond, OBCanSmiNode *node) | |
| { | |
| // Given a cis/trans bond and the node in the SMILES tree, figures out | |
| // whether to write a '/' or '\' symbol. | |
| // See the comments smilesformat.cpp: FixCisTransBonds(). | |
| // | |
| // The OBCanSmiNode is the most-recently-written atom in the SMILES string | |
| // we're creating. If it is the double-bonded atom, then the substituent | |
| // follows, so that "up" means '/' and "down" means '\'. If the OBCanSmiNode | |
| // atom is the single-bonded atom then the double-bonded atom comes next, | |
| // in which case "up" means '\' and "down" means '/'. | |
| // | |
| // Note that the story is not so simple for conjugated systems where | |
| // we need to take into account what symbol was already used. | |
| if (!bond /*|| (!bond->IsUp() && !bond->IsDown())*/) | |
| return '\0'; | |
| OBAtom *atom = node->GetAtom(); | |
| OBAtom *nbr_atom = bond->GetNbrAtom(atom); | |
| OBMol *mol = atom->GetParent(); | |
| // If this bond is in two different obcistransstereos (e.g. a conjugated system) | |
| // choose the one where the dbl bond atom is *atom (i.e. the one which comes first) | |
| std::vector<OBCisTransStereo>::iterator ChiralSearch; | |
| std::vector<unsigned long>::iterator lookup; | |
| bool dbl_bond_first = false; | |
| if (atom->HasDoubleBond()) | |
| { | |
| if (nbr_atom->HasDoubleBond()) | |
| // Check whether the atom is a center in any CisTransStereo. If so,# | |
| // then this CisTransStereo takes precedence over any other | |
| for (ChiralSearch = _cistrans.begin(); ChiralSearch != _cistrans.end(); ChiralSearch++) | |
| { | |
| OBCisTransStereo::Config cfg = ChiralSearch->GetConfig(); | |
| if (atom->GetId() == cfg.begin || atom->GetId() == cfg.end) { | |
| // I don't think I need to check whether it has a bond with nbr_atom | |
| dbl_bond_first = true; | |
| break; | |
| } | |
| } | |
| else | |
| dbl_bond_first = true; | |
| } | |
| // Has the symbol for this bond already been set? | |
| if (_isup.find(bond) == _isup.end()) // No it hasn't | |
| { | |
| unsigned int endatom, centeratom; | |
| if (dbl_bond_first) { | |
| if (atom->IsAromatic()) | |
| FOR_BONDS_OF_ATOM (bond, atom) | |
| if (bond->IsAromatic() && bond->IsDouble()) | |
| return 0; | |
| endatom = nbr_atom->GetId(); | |
| centeratom = atom->GetId(); | |
| } | |
| else { | |
| if (nbr_atom->IsAromatic()) | |
| FOR_BONDS_OF_ATOM (bond, nbr_atom) | |
| if (bond->IsAromatic() && bond->IsDouble()) | |
| return 0; | |
| endatom = atom->GetId(); | |
| centeratom = nbr_atom->GetId(); | |
| } | |
| for (ChiralSearch = _unvisited_cistrans.begin(); ChiralSearch != _unvisited_cistrans.end(); ChiralSearch++) | |
| { | |
| OBCisTransStereo::Config cfg = ChiralSearch->GetConfig(OBStereo::ShapeU); | |
| lookup = std::find(cfg.refs.begin(), cfg.refs.end(), endatom); | |
| if (lookup != cfg.refs.end() && (cfg.begin == centeratom || cfg.end == centeratom)) | |
| { // Atoms endatom and centeratom are in this OBCisTransStereo | |
| std::vector<OBBond *> refbonds(4, (OBBond*)NULL); | |
| refbonds[0] = mol->GetBond(mol->GetAtomById(cfg.refs[0]), mol->GetAtomById(cfg.begin)); | |
| if (cfg.refs[1] != OBStereo::ImplicitRef) // Could be a hydrogen | |
| refbonds[1] = mol->GetBond(mol->GetAtomById(cfg.refs[1]), mol->GetAtomById(cfg.begin)); | |
| if (cfg.refs[2] != OBStereo::ImplicitRef) // Could be a hydrogen | |
| refbonds[2] = mol->GetBond(mol->GetAtomById(cfg.refs[2]), mol->GetAtomById(cfg.end)); | |
| if (cfg.refs[3] != OBStereo::ImplicitRef) // Could be a hydrogen | |
| refbonds[3] = mol->GetBond(mol->GetAtomById(cfg.refs[3]), mol->GetAtomById(cfg.end)); | |
| // What symbol would the four refs use if before the dbl bond? | |
| bool config[4] = {true, false, false, true}; | |
| bool use_same_config = true; | |
| // (The actual config used will be config ^ use_same_config) | |
| // Make sure that the symbol for this bond is true. This ensures | |
| // a canonical string, so that it's always C/C=C/C and not C\C=C\C. | |
| for(int i=0;i<4;i++) | |
| if (refbonds[i] == bond) | |
| if (!config[i]) | |
| { | |
| use_same_config = false; | |
| break; | |
| } | |
| // If any of the bonds have been previously set, now set them all | |
| // in the opposite sense | |
| for(int i=0;i<4;i++) | |
| if (_isup.find(refbonds[i]) != _isup.end()) // We have already set this one (conjugated bond) | |
| if (_isup[refbonds[i]] == (config[i] ^ use_same_config)) | |
| { | |
| use_same_config = !use_same_config; | |
| break; | |
| } | |
| // Set the configuration | |
| for(int i=0;i<4;i++) | |
| if (refbonds[i] != NULL) | |
| _isup[refbonds[i]] = config[i] ^ use_same_config; | |
| _unvisited_cistrans.erase(ChiralSearch); | |
| break; // break out of the ChiralSearch | |
| } | |
| } | |
| } | |
| // If ChiralSearch didn't find the bond, we can't set this symbol | |
| if (_isup.find(bond) == _isup.end()) return '\0'; | |
| if (dbl_bond_first) { // double-bonded atom is first in the SMILES | |
| if (_isup[bond]) | |
| return '/'; | |
| else | |
| return '\\'; | |
| } | |
| else { // double-bonded atom is second in the SMILES | |
| if (_isup[bond]) | |
| return '\\'; | |
| else | |
| return '/'; | |
| } | |
| } | |
| // Helper function | |
| // Is this atom an oxygen in a water molecule | |
| // We know the oxygen is connected to one ion, but check for non-hydrogens | |
| // Returns: true if the atom is an oxygen and connected to two hydrogens + one coordinated atom | |
| bool isWaterOxygen(OBAtom *atom) | |
| { | |
| if (!atom->IsOxygen()) | |
| return false; | |
| int nonHydrogenCount = 0; | |
| int hydrogenCount = 0; | |
| FOR_NBORS_OF_ATOM(neighbor, *atom) { | |
| if (!neighbor->IsHydrogen()) | |
| nonHydrogenCount++; | |
| else | |
| hydrogenCount++; | |
| } | |
| return (hydrogenCount == 2 && nonHydrogenCount == 1); | |
| } | |
| /*************************************************************************** | |
| * FUNCTION: GetSmilesElement | |
| * | |
| * DESCRIPTION: | |
| * Writes the symbol for an atom, e.g. "C" or "[NH2]" or "[C@@H]". | |
| * | |
| * RETURNS: true (always) | |
| ***************************************************************************/ | |
| bool OBMol2Cansmi::GetSmilesElement(OBCanSmiNode *node, | |
| vector<OBAtom*>&chiral_neighbors, | |
| vector<unsigned int> &symmetry_classes, | |
| char *buffer, | |
| bool isomeric) | |
| { | |
| char symbol[10]; | |
| symbol[0] = '\0'; // make sure to initialize for all paths below | |
| char bracketBuffer[32]; | |
| bool bracketElement = false; | |
| bool normalValence = true; | |
| bool writeExplicitHydrogen = false; | |
| OBAtom *atom = node->GetAtom(); | |
| int bosum = atom->KBOSum(); | |
| int maxBonds = etab.GetMaxBonds(atom->GetAtomicNum()); | |
| // default -- bracket if we have more bonds than possible | |
| // we have some special cases below | |
| bracketElement = !(normalValence = (bosum <= maxBonds)); | |
| int element = atom->GetAtomicNum(); | |
| switch (element) { | |
| case 0: // pseudo '*' atom has no normal valence, needs brackets if it has any hydrogens | |
| normalValence = (atom->ExplicitHydrogenCount() == 0); | |
| bracketElement = !normalValence; | |
| break; | |
| case 5: | |
| bracketElement = !(normalValence = (bosum == 3)); | |
| break; | |
| case 6: break; | |
| case 7: | |
| if (atom->IsAromatic() | |
| && atom->GetHvyValence() == 2 | |
| && atom->GetImplicitValence() == 3) { | |
| #ifdef __INTEL_COMPILER | |
| #pragma warning (disable:187) | |
| #endif | |
| // warning #187 is use of "=" where "==" may have been intended | |
| bracketElement = !(normalValence = false); | |
| #ifdef __INTEL_COMPILER | |
| #pragma warning (default:187) | |
| #endif | |
| break; | |
| } | |
| else | |
| bracketElement = !(normalValence = (bosum == 3 || bosum == 5)); | |
| break; | |
| case 8: break; | |
| case 9: break; | |
| case 15: break; | |
| case 16: | |
| bracketElement = !(normalValence = (bosum == 2 || bosum == 4 || bosum == 6)); | |
| break; | |
| case 17: break; | |
| case 35: break; | |
| case 53: break; | |
| default: bracketElement = true; | |
| } | |
| if (atom->GetFormalCharge() != 0) //bracket charged elements | |
| bracketElement = true; | |
| if(isomeric && atom->GetIsotope()) | |
| bracketElement = true; | |
| //If the molecule has Atom Class data and -xa option set and atom has data | |
| if(_pac && _pac->HasClass(atom->GetIdx())) | |
| bracketElement = true; | |
| char stereo[5] = ""; | |
| if (GetSmilesValence(atom) > 2 && isomeric) { | |
| if (GetTetrahedralStereo(node, chiral_neighbors, symmetry_classes, stereo)) | |
| strcat(buffer,stereo); | |
| if (GetSquarePlanarStereo(node, chiral_neighbors, symmetry_classes, stereo)) | |
| strcat(buffer,stereo); | |
| } | |
| if (stereo[0] != '\0') | |
| bracketElement = true; | |
| if (atom->GetSpinMultiplicity()) { | |
| //For radicals output bracket form anyway unless r option specified | |
| if(!(_pconv && _pconv->IsOption ("r"))) | |
| bracketElement = true; | |
| } | |
| // Add brackets and explicit hydrogens for coordinated water molecules | |
| // PR#2505562 | |
| if (isWaterOxygen(atom)) { | |
| bracketElement = true; | |
| writeExplicitHydrogen = true; | |
| } | |
| //Output as [CH3][CH3] rather than CC if -xh option has been specified | |
| if (!bracketElement && _pconv && _pconv->IsOption("h") && atom->ExplicitHydrogenCount() > 0) { | |
| bracketElement = true; | |
| writeExplicitHydrogen = true; | |
| } | |
| if (!bracketElement) { | |
| // Ordinary non-bracket element | |
| if (element) { | |
| strcpy(symbol,etab.GetSymbol(atom->GetAtomicNum())); | |
| if (atom->IsAromatic()) | |
| symbol[0] = tolower(symbol[0]); | |
| //Radical centres lc if r option set | |
| if(atom->GetSpinMultiplicity() && _pconv && _pconv->IsOption ("r")) | |
| symbol[0] = tolower(symbol[0]); | |
| } | |
| // Atomic number zero - either '*' or an external atom | |
| else { | |
| bool external = false; | |
| vector<pair<int,pair<OBAtom *,OBBond *> > > *externalBonds = | |
| (vector<pair<int,pair<OBAtom *,OBBond *> > > *)((OBMol*)atom->GetParent())->GetData("extBonds"); | |
| vector<pair<int,pair<OBAtom *,OBBond *> > >::iterator externalBond; | |
| if (externalBonds) | |
| for(externalBond = externalBonds->begin();externalBond != externalBonds->end();externalBond++) { | |
| if (externalBond->second.first == atom) { | |
| external = true; | |
| strcpy(symbol,"&"); | |
| OBBond *bond = externalBond->second.second; | |
| if (bond->IsUp()) { | |
| if ( (bond->GetBeginAtom())->HasDoubleBond() || | |
| (bond->GetEndAtom())->HasDoubleBond() ) | |
| strcat(symbol,"\\"); | |
| } | |
| if (bond->IsDown()) { | |
| if ( (bond->GetBeginAtom())->HasDoubleBond() || | |
| (bond->GetEndAtom())->HasDoubleBond() ) | |
| strcat(symbol,"/"); | |
| } | |
| if (bond->GetBO() == 2 && !bond->IsAromatic()) | |
| strcat(symbol,"="); | |
| if (bond->GetBO() == 2 && bond->IsAromatic()) | |
| strcat(symbol,":"); | |
| if (bond->GetBO() == 3) | |
| strcat(symbol,"#"); | |
| if (bond->GetBO() == 4) | |
| strcat(symbol,"$"); | |
| sprintf(symbol+strlen(symbol),"%d",externalBond->first); | |
| break; | |
| } | |
| } | |
| if(!external) | |
| strcpy(symbol,"*"); | |
| } | |
| strcpy(buffer, symbol); | |
| return(true); | |
| } | |
| // Bracketed atoms, e.g. [Pb], [OH-], [C@] | |
| bracketBuffer[0] = '\0'; | |
| if (isomeric && atom->GetIsotope()) { | |
| char iso[4]; | |
| sprintf(iso,"%d",atom->GetIsotope()); | |
| strcat(bracketBuffer,iso); | |
| } | |
| if (!atom->GetAtomicNum()) | |
| strcpy(symbol,"*"); | |
| else { | |
| strcpy(symbol,etab.GetSymbol(atom->GetAtomicNum())); | |
| if (atom->IsAromatic()) | |
| symbol[0] = tolower(symbol[0]); | |
| } | |
| strcat(bracketBuffer,symbol); | |
| // If chiral, append '@' or '@@' | |
| if (stereo[0] != '\0') | |
| strcat(bracketBuffer, stereo); | |
| // don't try to handle implicit hydrogens for metals | |
| if ( (element >= 21 && element <= 30) | |
| || (element >= 39 && element <= 49) | |
| || (element >= 71 && element <= 82) ) | |
| writeExplicitHydrogen = true; | |
| // Add extra hydrogens. If this is a bracket-atom *only* because the | |
| // "-xh" option was specified, then we're writing a SMARTS, so we | |
| // write the explicit hydrogen atom count only. Otherwise we write | |
| // the proper total number of hydrogens. | |
| if (!atom->IsHydrogen()) { | |
| int hcount; | |
| if (writeExplicitHydrogen) | |
| hcount = atom->ExplicitHydrogenCount(); | |
| else | |
| // if "isomeric", doesn't count isotopic H | |
| hcount = atom->ImplicitHydrogenCount() + atom->ExplicitHydrogenCount(isomeric); | |
| // OK, see if we need to decrease the H-count due to hydrogens we'll write later | |
| FOR_NBORS_OF_ATOM(nbr, atom) { | |
| if (nbr->IsHydrogen() && ( (nbr->GetFormalCharge() != 0) | |
| || (nbr->GetValence() > 1) ) ) { | |
| hcount--; | |
| } | |
| } | |
| if ((atom == _endatom || atom == _startatom) && hcount>0) // Leave a free valence for attachment | |
| hcount--; | |
| if (hcount != 0) { | |
| strcat(bracketBuffer,"H"); | |
| if (hcount > 1) { | |
| char tcount[10]; | |
| sprintf(tcount,"%d", hcount); | |
| strcat(bracketBuffer,tcount); | |
| } | |
| } | |
| } | |
| // Append charge to the end | |
| if (atom->GetFormalCharge() != 0) { | |
| if (atom->GetFormalCharge() > 0) | |
| strcat(bracketBuffer,"+"); | |
| else | |
| strcat(bracketBuffer,"-"); | |
| if (abs(atom->GetFormalCharge()) > 1) | |
| sprintf(bracketBuffer+strlen(bracketBuffer), "%d", abs(atom->GetFormalCharge())); | |
| } | |
| //atom class e.g. [C:2] | |
| if (_pac) | |
| strcat(bracketBuffer, _pac->GetClassString(atom->GetIdx()).c_str()); | |
| // Check if this is an aromatic "n" or "s" that doesn't need a hydrogen after all | |
| if (atom->IsAromatic() && strlen(bracketBuffer) == 1) | |
| bracketElement = false; | |
| // if the element is supposed to be bracketed (e.g., [U]), *always* use brackets | |
| if (strlen(bracketBuffer) > 1 || bracketElement) { | |
| strcpy(buffer, "["); | |
| strcat(buffer, bracketBuffer); | |
| strcat(buffer,"]"); | |
| } else { | |
| strcpy(buffer, bracketBuffer); | |
| } | |
| return(true); | |
| } | |
| /*************************************************************************** | |
| * FUNCTION: AtomIsChiral | |
| * | |
| * DESCRIPTION: | |
| * Returns TRUE if the atom is genuinely chiral, that is, it meets | |
| * the criteria from OBAtom::IsChiral, and additionally it actually | |
| * has a connected hash or wedge bond. | |
| * | |
| * We arbitrarily reject chiral nitrogen because for our purposes there's | |
| * no need to consider it. | |
| * | |
| * NOTE: This is a simplistic test. When the full SMILES canonicalization | |
| * includes chiral markings, this should check the symmetry classes | |
| * of the neighbors, not the hash/wedge bonds. | |
| ***************************************************************************/ | |
| bool OBMol2Cansmi::AtomIsChiral(OBAtom *atom) | |
| { | |
| OBMol *mol = dynamic_cast<OBMol*>(atom->GetParent()); | |
| OBStereoFacade stereoFacade(mol); | |
| return stereoFacade.HasTetrahedralStereo(atom->GetId()); | |
| /* | |
| if (!atom->IsChiral()) | |
| return false; | |
| if (atom->IsNitrogen()) | |
| return false; | |
| // Added by ghutchis 2007-06-04 -- make sure to check for 3D molecules | |
| // Fixes PR#1699418 | |
| if (atom->GetParent()->GetDimension() == 3) | |
| return true; // no hash/wedge for 3D molecules | |
| vector<int> symclass; | |
| FOR_BONDS_OF_ATOM(bond, atom) { | |
| if (bond->IsHash() || bond->IsWedge()) | |
| return true; | |
| } | |
| return false; | |
| */ | |
| } | |
| /*************************************************************************** | |
| * FUNCTION: GetTetrahedralStereo | |
| * | |
| * DESCRIPTION: | |
| * If the atom is chiral, fills in the string with either '@', '@@' | |
| * or '@?' and returns true, otherwise returns false. | |
| ***************************************************************************/ | |
| bool OBMol2Cansmi::GetTetrahedralStereo(OBCanSmiNode *node, | |
| vector<OBAtom*> &chiral_neighbors, | |
| vector<unsigned int> &symmetry_classes, | |
| char *stereo) | |
| { | |
| OBAtom *atom = node->GetAtom(); | |
| OBMol *mol = (OBMol*) atom->GetParent(); | |
| // If not enough chiral neighbors were passed in, we're done | |
| if (chiral_neighbors.size() < 4) | |
| return false; | |
| // OBStereoFacade will run symmetry analysis & stereo perception if needed | |
| OBStereoFacade stereoFacade(mol); | |
| OBTetrahedralStereo *ts = stereoFacade.GetTetrahedralStereo(atom->GetId()); | |
| // If atom is not a tetrahedral center, we're done | |
| if (!ts) | |
| return false; | |
| // get the Config struct defining the stereochemistry | |
| OBTetrahedralStereo::Config atomConfig = ts->GetConfig(); | |
| // Don't write '@?' for unspecified or unknown stereochemistry | |
| if (!atomConfig.specified || (atomConfig.specified && atomConfig.winding==OBStereo::UnknownWinding)) { | |
| // strcpy(stereo, "@?"); | |
| return true; | |
| } | |
| // create a Config struct with the chiral_neighbors in canonical output order | |
| OBStereo::Refs canonRefs; | |
| for (vector<OBAtom*>::const_iterator atom_it = chiral_neighbors.begin() + 1; atom_it != chiral_neighbors.end(); ++atom_it) { | |
| if (*atom_it) | |
| canonRefs.push_back((*atom_it)->GetId()); | |
| else // Handle a chiral lone pair, represented by a NULL OBAtom* in chiral_neighbors | |
| canonRefs.push_back(OBStereo::ImplicitRef); | |
| } | |
| OBTetrahedralStereo::Config canConfig; | |
| canConfig.center = atom->GetId(); | |
| if (chiral_neighbors[0]) | |
| canConfig.from = chiral_neighbors[0]->GetId(); | |
| else // Handle a chiral lone pair, represented by a NULL OBAtom* in chiral_neighbors | |
| canConfig.from = OBStereo::ImplicitRef; | |
| canConfig.refs = canonRefs; | |
| //cout << "atomConfig = " << atomConfig << endl; | |
| //cout << "canConfig = " << canConfig << endl; | |
| // canConfig is clockwise | |
| if (atomConfig == canConfig) | |
| strcpy(stereo, "@@"); | |
| else | |
| strcpy(stereo, "@"); | |
| return true; | |
| } | |
| /*************************************************************************** | |
| * FUNCTION: GetSquarePlanarStereo | |
| * | |
| * DESCRIPTION: | |
| * If the atom is chiral, fills in the string with either '@', '@@' | |
| * or '@?' and returns true, otherwise returns false. | |
| ***************************************************************************/ | |
| bool OBMol2Cansmi::GetSquarePlanarStereo(OBCanSmiNode *node, | |
| vector<OBAtom*> &chiral_neighbors, | |
| vector<unsigned int> &symmetry_classes, | |
| char *stereo) | |
| { | |
| OBAtom *atom = node->GetAtom(); | |
| OBMol *mol = (OBMol*) atom->GetParent(); | |
| // If no chiral neighbors were passed in, we're done | |
| if (chiral_neighbors.size() < 4) | |
| return false; | |
| // OBStereoFacade will run symmetry analysis & stereo perception if needed | |
| OBStereoFacade stereoFacade(mol); | |
| OBSquarePlanarStereo *sp = stereoFacade.GetSquarePlanarStereo(atom->GetId()); | |
| // If atom is not a square-planar center, we're done | |
| if (!sp) | |
| return false; | |
| // get the Config struct defining the stereochemistry | |
| OBSquarePlanarStereo::Config atomConfig = sp->GetConfig(); | |
| if (!atomConfig.specified) { | |
| // write '@?' for unspecified (unknown) stereochemistry | |
| // strcpy(stereo, "@?"); | |
| //return true; | |
| return false; | |
| } | |
| // create a Config struct with the chiral_neighbors in canonical output order | |
| OBStereo::Refs canonRefs = OBStereo::MakeRefs(chiral_neighbors[0]->GetId(), | |
| chiral_neighbors[1]->GetId(), chiral_neighbors[2]->GetId(), chiral_neighbors[3]->GetId()); | |
| OBSquarePlanarStereo::Config canConfig; | |
| canConfig.center = atom->GetId(); | |
| canConfig.refs = canonRefs; | |
| //cout << "atomConfig = " << atomConfig << endl; | |
| //cout << "canConfig = " << canConfig << endl; | |
| // canConfig is U shape | |
| if (atomConfig == canConfig) { | |
| strcpy(stereo, "@SP1"); | |
| return true; | |
| } | |
| canConfig.shape = OBStereo::Shape4; | |
| //cout << "canConfig = " << canConfig << endl; | |
| if (atomConfig == canConfig) { | |
| strcpy(stereo, "@SP2"); | |
| return true; | |
| } | |
| canConfig.shape = OBStereo::ShapeZ; | |
| //cout << "canConfig = " << canConfig << endl; | |
| if (atomConfig == canConfig) { | |
| strcpy(stereo, "@SP3"); | |
| return true; | |
| } | |
| return false; | |
| } | |
| //! Adaptation of OBMol::FindChildren to allow a vector of OBAtoms to be passed in | |
| // MOVE THIS TO OBMOL FOR 2.4 | |
| void OBMol2Cansmi::MyFindChildren(OBMol &mol, vector<OBAtom*> &children, OBBitVec &seen, OBAtom *end) | |
| { | |
| OBBitVec curr,next; | |
| OBBitVec used(seen); | |
| used |= end->GetIdx(); | |
| curr |= end->GetIdx(); | |
| children.clear(); | |
| int i; | |
| OBAtom *atom,*nbr; | |
| vector<OBBond*>::iterator j; | |
| for (;;) | |
| { | |
| next.Clear(); | |
| for (i = curr.NextBit(-1);i != curr.EndBit();i = curr.NextBit(i)) | |
| { | |
| atom = mol.GetAtom(i); | |
| for (nbr = atom->BeginNbrAtom(j);nbr;nbr = atom->NextNbrAtom(j)) | |
| if (!used[nbr->GetIdx()]) | |
| { | |
| children.push_back(nbr); | |
| next |= nbr->GetIdx(); | |
| used |= nbr->GetIdx(); | |
| } | |
| } | |
| if (next.Empty()) | |
| break; | |
| curr = next; | |
| } | |
| } | |
| /*************************************************************************** | |
| * FUNCTION: BuildCanonTree | |
| * | |
| * DESCRIPTION: | |
| * Builds the SMILES tree, in canonical order, for the specified | |
| * molecular fragment. | |
| ***************************************************************************/ | |
| bool OBMol2Cansmi::BuildCanonTree(OBMol &mol, | |
| OBBitVec &frag_atoms, | |
| vector<unsigned int> &canonical_order, | |
| OBCanSmiNode *node) | |
| { | |
| vector<OBEdgeBase*>::iterator i; | |
| OBAtom *nbr, *atom; | |
| vector<OBAtom *> sort_nbrs; | |
| vector<OBAtom *>::iterator ai; | |
| OBBond *bond; | |
| OBCanSmiNode *next; | |
| int idx; | |
| atom = node->GetAtom(); | |
| #if DEBUG | |
| cout << "BuildCanonTree: " << etab.GetSymbol(atom->GetAtomicNum()) << ", " << atom->GetIdx() << ", canorder " << canonical_order[atom->GetIdx()-1] << "\n"; | |
| #endif | |
| // Create a vector of neighbors sorted by canonical order, but favor | |
| // double and triple bonds over single and aromatic. This causes | |
| // ring-closure digits to avoid double and triple bonds. | |
| // | |
| // Since there are typically just one to three neighbors, we just do a | |
| // ordered insertion rather than sorting. | |
| bool favor_multiple = true; // Visit 'multiple' bonds first | |
| if (_pconv->IsOption("I") || _pconv->IsOption("N")) | |
| favor_multiple = false; // Visit in strict canonical order | |
| for (nbr = atom->BeginNbrAtom(i); nbr; nbr = atom->NextNbrAtom(i)) { | |
| idx = nbr->GetIdx(); | |
| if (nbr->IsHydrogen() && IsSuppressedHydrogen(nbr)) { | |
| _uatoms.SetBitOn(nbr->GetIdx()); // mark suppressed hydrogen, so it won't be considered | |
| continue; // later when looking for more fragments. | |
| } | |
| if (_uatoms[idx] || !frag_atoms.BitIsOn(idx)) | |
| continue; | |
| OBBond *nbr_bond = atom->GetBond(nbr); | |
| int new_needs_bsymbol = nbr_bond->IsDouble() || nbr_bond->IsTriple(); | |
| for (ai = sort_nbrs.begin(); ai != sort_nbrs.end(); ++ai) { | |
| bond = atom->GetBond(*ai); | |
| int sorted_needs_bsymbol = bond->IsDouble() || bond->IsTriple(); | |
| if (favor_multiple && new_needs_bsymbol && !sorted_needs_bsymbol) { | |
| sort_nbrs.insert(ai, nbr); | |
| ai = sort_nbrs.begin();//insert invalidated ai; set it to fail next test | |
| break; | |
| } | |
| if ( (!favor_multiple || new_needs_bsymbol == sorted_needs_bsymbol) | |
| && canonical_order[idx-1] < canonical_order[(*ai)->GetIdx()-1]) { | |
| sort_nbrs.insert(ai, nbr); | |
| ai = sort_nbrs.begin();//insert invalidated ai; set it to fail next test | |
| break; | |
| } | |
| } | |
| if (ai == sort_nbrs.end()) | |
| sort_nbrs.push_back(nbr); | |
| } | |
| _uatoms.SetBitOn(atom->GetIdx()); //mark the atom as visited | |
| cout << "Visited atom " << atom->GetIdx() << " with canonical value of " << canonical_order.at(atom->GetIdx()-1) << "\n"; | |
| if (_endatom && !_uatoms.BitIsSet(_endatom->GetIdx()) && sort_nbrs.size() > 1) { | |
| // If you have specified an _endatom, the following section rearranges | |
| // sort_nbrs as follows: | |
| // - if a branch does not lead to the end atom, move it to the front | |
| // (i.e. visit it first) | |
| // - otherwise move it to the end | |
| // This section is skipped if sort_nbrs has only a single member, or if | |
| // we have already visited _endatom. | |
| vector<OBAtom*> children; | |
| MyFindChildren(mol, children, _uatoms, _endatom); | |
| vector<OBAtom*> front, end; | |
| for (vector<OBAtom *>::iterator it=sort_nbrs.begin(); it!=sort_nbrs.end(); ++it) | |
| if (std::find(children.begin(), children.end(), *it) == children.end() && *it != _endatom) | |
| front.push_back(*it); | |
| else | |
| end.push_back(*it); | |
| sort_nbrs = front; | |
| sort_nbrs.insert(sort_nbrs.end(), end.begin(), end.end()); | |
| } | |
| // Build the next layer of nodes, in canonical order | |
| for (ai = sort_nbrs.begin(); ai != sort_nbrs.end(); ++ai) { | |
| nbr = *ai; | |
| idx = nbr->GetIdx(); | |
| if (_uatoms[idx]) // depth-first search may have used this atom since | |
| continue; // we sorted the bonds above | |
| bond = atom->GetBond(nbr); | |
| _ubonds.SetBitOn(bond->GetIdx()); | |
| next = new OBCanSmiNode(nbr); | |
| next->SetParent(atom); | |
| node->AddChildNode(next, bond); | |
| BuildCanonTree(mol, frag_atoms, canonical_order, next); | |
| } | |
| return(true); | |
| } | |
| /*************************************************************************** | |
| * FUNCTION: GetCanonClosureDigits | |
| * | |
| * DESCRIPTION: | |
| * Given an atom, returns the ring-closure digits for that atom, in | |
| * the form of a vector of digit/OBBond* pair. Some of the digits may | |
| * be for newly-opened rings (the matching digit occurs later in the | |
| * SMILES string), and some may be for closing rings (the matching | |
| * digit occured earlier in the string). | |
| * | |
| * Canonicalization requires that atoms with more than one digit | |
| * have the digits assigned in a canonical fashion. For example, | |
| * the SMILES "CC12(NCCC2)CCC1" and "CC12(NCCC1)CCC2" are the | |
| * same molecule; we need to assign the digits of the first "C12" | |
| * such that it always comes out one way or the other. | |
| * | |
| * This needs to find closing bonds (ring bonds already assigned a | |
| * digit) and opening bonds (ring bonds not encountered yet). | |
| * | |
| * Closing Bonds: | |
| * This is easy: open bonds are already stored in the _vopen vector, | |
| * in canonical order. Just find open bonds to this atom and copy | |
| * them from _vopen to our return vector. | |
| * | |
| * Opening Bonds: | |
| * This function looks through the bonds for this atoms and finds | |
| * any that aren't on the _ubonds "used" list, (and also are non-H | |
| * and are in this fragment). Any such bonds must be ring-closure | |
| * bonds. If there is more than one, they are ordered by the | |
| * canonical order of the bonds' neighbor atoms; that is, the bond | |
| * to the lowest canonical-ordered neighbor is assigned the first | |
| * available number, and upwards in neighbor-atom canonical order. | |
| ***************************************************************************/ | |
| vector<OBBondClosureInfo> | |
| OBMol2Cansmi::GetCanonClosureDigits(OBAtom *atom, | |
| OBBitVec &frag_atoms, | |
| vector<unsigned int> &canonical_order) | |
| { | |
| vector<OBBondClosureInfo> vp_closures; | |
| vector<OBBond*> vbonds; | |
| vector<OBBond*>::iterator bi; | |
| vector<OBEdgeBase*>::iterator i; | |
| OBBond *bond1, *bond2; | |
| OBAtom *nbr1, *nbr2; | |
| int nbr1_canorder, nbr2_canorder; | |
| vp_closures.clear(); | |
| vbonds.clear(); | |
| // Find new ring-closure bonds for this atom | |
| for (bond1 = atom->BeginBond(i); bond1; bond1 = atom->NextBond(i)) { | |
| // Is this a ring-closure neighbor? | |
| if (_ubonds.BitIsOn(bond1->GetIdx())) | |
| continue; | |
| nbr1 = bond1->GetNbrAtom(atom); | |
| // Skip hydrogens before checking canonical_order | |
| // PR#1999348 | |
| if ( (nbr1->IsHydrogen() && IsSuppressedHydrogen(nbr1)) | |
| || !frag_atoms.BitIsOn(nbr1->GetIdx())) | |
| continue; | |
| nbr1_canorder = canonical_order[nbr1->GetIdx()-1]; | |
| // Insert into the bond-vector in canonical order (by neighbor atom order) | |
| for (bi = vbonds.begin(); bi != vbonds.end(); ++bi) { | |
| bond2 = *bi; | |
| nbr2 = bond2->GetNbrAtom(atom); | |
| nbr2_canorder = canonical_order[nbr2->GetIdx()-1]; | |
| if (nbr1_canorder < nbr2_canorder) { | |
| vbonds.insert(bi, bond1); | |
| bi = vbonds.begin();//insert invalidated bi; set it to fail next test | |
| break; | |
| } | |
| } | |
| if (bi == vbonds.end()) // highest one (or first one) - append to end | |
| vbonds.push_back(bond1); | |
| } | |
| // If we found new open bonds, assign a bond-closure digits to each one, | |
| // add it to _vopen, and add it to the return vector. | |
| for (bi = vbonds.begin(); bi != vbonds.end(); ++bi) { | |
| bond1 = *bi; | |
| _ubonds.SetBitOn(bond1->GetIdx()); | |
| int digit = GetUnusedIndex(); | |
| int bo = (bond1->IsAromatic())? 1 : bond1->GetBO(); // CJ: why was this line added? bo is never used? | |
| _vopen.push_back(OBBondClosureInfo(bond1->GetNbrAtom(atom), atom, bond1, digit, true)); | |
| vp_closures.push_back(OBBondClosureInfo(bond1->GetNbrAtom(atom), atom, bond1, digit, true)); | |
| } | |
| // Now look through the list of open closure-bonds and find any to this | |
| // atom (but watch out for the ones we just added). For each one found, | |
| // add it to the return vector, and erase it from _vopen. | |
| if (!_vopen.empty()) { | |
| vector<OBBondClosureInfo>::iterator j; | |
| for (j = _vopen.begin(); j != _vopen.end(); ) { | |
| if (j->toatom == atom) { | |
| OBBondClosureInfo bci = *j; | |
| _vopen.erase(j); // take bond off "open" list | |
| bci.is_open = false; // mark it "closed" | |
| vp_closures.push_back(bci); // and add it to this atom's list | |
| j = _vopen.begin(); // reset iterator | |
| } | |
| else | |
| j++; | |
| } | |
| } | |
| return(vp_closures); | |
| } | |
| /*************************************************************************** | |
| * FUNCTION: IsSuppressedHydrogen | |
| * | |
| * DESCRIPTION: | |
| * For a hydrogen atom, returns TRUE if the atom is not [2H] or [3H], only | |
| * has one bond, and is not bonded to another hydrogen. | |
| * | |
| * NOTE: Return value is nonsensical if you pass it a non-hydrogen | |
| * atom. Presumably, you're calling this because you've found a | |
| * hydrogen and want to know if it goes in the SMILES. | |
| ***************************************************************************/ | |
| bool OBMol2Cansmi::IsSuppressedHydrogen(OBAtom *atom) | |
| { | |
| if (atom->GetIsotope() != 0) // Deuterium or Tritium | |
| return false; | |
| if (atom->GetValence() != 1) // not exactly one bond | |
| return false; | |
| FOR_NBORS_OF_ATOM(nbr, atom) { | |
| if (nbr->GetAtomicNum() == 1) // neighbor is hydrogen | |
| return false; | |
| } | |
| return true; | |
| } | |
| /*************************************************************************** | |
| * FUNCTION: GetSmilesValence | |
| * | |
| * DESCRIPTION: | |
| * This is like GetHvyValence(), but it returns the "valence" of an | |
| * atom as it appears in the SMILES string. In particular, hydrogens | |
| * count if they will appear explicitly -- see IsSuppressedHydrogen() | |
| * above. | |
| ***************************************************************************/ | |
| int OBMol2Cansmi::GetSmilesValence(OBAtom *atom) | |
| { | |
| int count = 0; | |
| if (atom->IsHydrogen()) | |
| return atom->GetValence(); | |
| if (_pconv && _pconv->IsOption("h")) | |
| return atom->GetValence(); | |
| FOR_NBORS_OF_ATOM(nbr, atom) { | |
| if ( !nbr->IsHydrogen() | |
| || nbr->GetIsotope() != 0 | |
| || nbr->GetValence() != 1) | |
| count++; | |
| } | |
| return(count); | |
| } | |
| /*************************************************************************** | |
| * FUNCTION: ToCansmilesString | |
| * | |
| * DESCRIPTION: | |
| * Recursively writes the canonical SMILES string to a buffer. Writes | |
| * this node, then selects each of the child nodes (in canonical | |
| * order) and writes them. | |
| * | |
| * Chirality is the tricky bit here. Before we can write out a chiral | |
| * atom, we have to "look ahead" to determine the order in which the | |
| * neighbor atoms are/will be written. | |
| * | |
| * The SMILES language defines the order-of-appearance of a ring-closure | |
| * bond as the position of the digit, in the SMILES, not the actual atom. | |
| * For example, the fragments N[C@H](C)Br, and N[C@H]1(Br)CCCC1 have | |
| * the same chiral center, because the "1" in the second one is a "stand | |
| * in" for the "C" in the first, even though the actual carbon atom appears | |
| * after the Bromine atom in the second string. | |
| ***************************************************************************/ | |
| void OBMol2Cansmi::ToCansmilesString(OBCanSmiNode *node, | |
| char *buffer, | |
| OBBitVec &frag_atoms, | |
| vector<unsigned int> &symmetry_classes, | |
| vector<unsigned int> &canonical_order, | |
| bool isomeric) | |
| { | |
| OBAtom *atom = node->GetAtom(); | |
| vector<OBAtom *> chiral_neighbors; | |
| // Get the ring-closure digits in canonical order. We'll use these in | |
| // two places: First, for figuring out chirality, then later for writing | |
| // the actual ring-closure digits to the string. | |
| vector<OBBondClosureInfo> vclose_bonds = GetCanonClosureDigits(atom, frag_atoms, canonical_order); | |
| // First thing: Figure out chirality. We start by creating a vector of the neighbors | |
| // in the order in which they'll appear in the canonical SMILES string. This is more | |
| // complex than you'd guess because of implicit/explicit H and ring-closure digits. | |
| // Don't include chiral symbol on _endatom or _startatom. | |
| // Otherwise, we end up with C[C@@H](Br)(Cl), where the C has 4 neighbours already | |
| // and we cannot concatenate another SMILES string without creating a 5-valent C. | |
| bool is_chiral = AtomIsChiral(atom); | |
| if (is_chiral && atom!=_endatom && atom!=_startatom) { | |
| // If there's a parent node, it's the first atom in the ordered neighbor-vector | |
| // used for chirality. | |
| if (node->GetParent()) { | |
| chiral_neighbors.push_back(node->GetParent()); | |
| } | |
| // Next for chirality order will be hydrogen -- since it occurs | |
| // inside the atom's [] brackets, it's always before other neighbors. | |
| // | |
| // Note that we check the regular neighbor list, NOT the canonical | |
| // SMILES tree, because hydrogens normally aren't part of the canonical | |
| // SMILES, but we still need them to figure out chirality. | |
| // | |
| // There are two cases: it's explicit in the OBMol object but should be | |
| // written inside the brackets, i.e. "[C@H]", or it is explicit and | |
| // must be outside the brackets, such as for deuterium. (A hydrogen | |
| // that will appear explicitly in the SMILES as a separate atom is | |
| // treated like any other atom when calculating the chirality.) | |
| FOR_NBORS_OF_ATOM(i_nbr, atom) { | |
| OBAtom *nbr = &(*i_nbr); | |
| if (nbr->IsHydrogen() && IsSuppressedHydrogen(nbr) ) { | |
| chiral_neighbors.push_back(nbr); | |
| break; // quit loop: only be one H if atom is chiral | |
| } | |
| } | |
| // Ok, done with H. Now we need to consider the case where there is a chiral | |
| // lone pair. If it exists (and we won't know for sure until we've counted up | |
| // all the neighbours) it will go in here | |
| int lonepair_location = chiral_neighbors.size(); | |
| // Ok, done with all that. Next in the SMILES will be the ring-closure characters. | |
| // So we need to find the corresponding atoms and add them to the list. | |
| // (We got the canonical ring-closure list earlier.) | |
| if (!vclose_bonds.empty()) { | |
| vector<OBBondClosureInfo>::iterator i; | |
| for (i = vclose_bonds.begin();i != vclose_bonds.end();i++) { | |
| OBBond *bond = i->bond; | |
| OBAtom *nbr = bond->GetNbrAtom(atom); | |
| chiral_neighbors.push_back(nbr); | |
| } | |
| } | |
| // Finally, add the "regular" neighbors, the "child" nodes in the | |
| // canonical-SMILES tree, to the chiral-neighbors list. | |
| for (int i = 0; i < node->Size(); i++) { | |
| OBAtom *nbr = node->GetChildAtom(i); | |
| chiral_neighbors.push_back(nbr); | |
| } | |
| // Handle a chiral lone-pair on a sulfur, by inserting a NULL OBAtom* at the | |
| // appropriate location | |
| if (chiral_neighbors.size() == 3 && atom->GetAtomicNum() == 16) // Handle sulfur | |
| chiral_neighbors.insert(chiral_neighbors.begin() + lonepair_location, static_cast<OBAtom*> (NULL)); | |
| } | |
| // Write the current atom to the string | |
| GetSmilesElement(node, chiral_neighbors, symmetry_classes, buffer+strlen(buffer), isomeric); | |
| _atmorder.push_back(atom->GetIdx()); //store the atom ordering | |
| // Write ring-closure digits | |
| if (!vclose_bonds.empty()) { | |
| vector<OBBondClosureInfo>::iterator bci; | |
| for (bci = vclose_bonds.begin();bci != vclose_bonds.end();bci++) { | |
| if (!bci->is_open) | |
| { // Ring closure | |
| char bs[2] = {'\0', '\0'}; | |
| // Only get symbol for ring closures on the dbl bond | |
| if (HasStereoDblBond(bci->bond, node->GetAtom())) | |
| bs[0] = GetCisTransBondSymbol(bci->bond, node); | |
| if (bs[0]) | |
| strcat(buffer, bs); // append "/" or "\" | |
| else | |
| { | |
| if (bci->bond->GetBO() == 2 && !bci->bond->IsAromatic()) strcat(buffer,"="); | |
| if (bci->bond->GetBO() == 3) strcat(buffer,"#"); | |
| if (bci->bond->GetBO() == 4) strcat(buffer,"$"); | |
| } | |
| } | |
| else | |
| { // Ring opening | |
| char bs[2] = {'\0', '\0'}; | |
| // Only get symbol for ring openings on the dbl bond | |
| if (!HasStereoDblBond(bci->bond, bci->bond->GetNbrAtom(node->GetAtom()))) | |
| bs[0] = GetCisTransBondSymbol(bci->bond, node); | |
| if (bs[0]) | |
| strcat(buffer, bs); // append "/" or "\" | |
| } | |
| if (bci->ringdigit > 9) strcat(buffer,"%"); | |
| sprintf(buffer+strlen(buffer), "%d", bci->ringdigit); | |
| } | |
| } | |
| // Write child bonds, then recursively follow paths to child nodes | |
| // to print the SMILES for each child branch. | |
| OBBond *bond; | |
| for (int i = 0;i < node->Size();i++) { | |
| bond = node->GetChildBond(i); | |
| if (i+1 < node->Size() || node->GetAtom() == _endatom) | |
| strcat(buffer,"("); | |
| //if (bond->IsUp() || bond->IsDown()) { | |
| char cc[2]; | |
| cc[0] = GetCisTransBondSymbol(bond, node); | |
| cc[1] = '\0'; | |
| if (cc[0] == BondUpChar || cc[0] == BondDownChar) | |
| strcat(buffer, cc); | |
| //} | |
| else if (bond->GetBO() == 2 && !bond->IsAromatic()) | |
| strcat(buffer,"="); | |
| else if (bond->GetBO() == 3) | |
| strcat(buffer,"#"); | |
| else if (bond->GetBO() == 4) | |
| strcat(buffer,"$"); | |
| ToCansmilesString(node->GetChildNode(i),buffer, frag_atoms, symmetry_classes, canonical_order, isomeric); | |
| if (i+1 < node->Size() || node->GetAtom() == _endatom) | |
| strcat(buffer,")"); | |
| } | |
| } | |
| /**************************************************************************** | |
| * FUNCTION: StandardLabels | |
| * | |
| * DESCRIPTION: | |
| * Creates a set of non-canonical labels for the fragment atoms | |
| * ***************************************************************************/ | |
| void StandardLabels(OBMol *pMol, OBBitVec *frag_atoms, | |
| vector<unsigned int> &symmetry_classes, | |
| vector<unsigned int> &labels) | |
| { | |
| FOR_ATOMS_OF_MOL(atom, *pMol) { | |
| if (frag_atoms->BitIsOn(atom->GetIdx())) { | |
| labels.push_back(atom->GetIdx() - 1); | |
| symmetry_classes.push_back(atom->GetIdx() - 1); | |
| } | |
| else{ | |
| labels.push_back(OBStereo::ImplicitRef); //to match situation when canonical ordering. Just a big number? | |
| symmetry_classes.push_back(OBStereo::ImplicitRef); | |
| } | |
| } | |
| } | |
| /*************************************************************************** | |
| * FUNCTION: RandomLabels | |
| * | |
| * DESCRIPTION: | |
| * Creates a set of random labels for the fragment atoms. Primarily | |
| * for testing: you can create a bunch of random SMILES for the same | |
| * molecule, and use those to test the canonicalizer. | |
| ***************************************************************************/ | |
| static int timeseed = 0; | |
| void RandomLabels(OBMol *pMol, OBBitVec &frag_atoms, | |
| vector<unsigned int> &symmetry_classes, | |
| vector<unsigned int> &labels) | |
| { | |
| int natoms = pMol->NumAtoms(); | |
| OBBitVec used(natoms); | |
| if (!timeseed) { | |
| OBRandom rand; | |
| rand.TimeSeed(); | |
| timeseed = 1; | |
| } | |
| FOR_ATOMS_OF_MOL(atom, *pMol) { | |
| if (frag_atoms.BitIsOn(atom->GetIdx())) { | |
| int r = rand() % natoms; | |
| while (used.BitIsOn(r)) { | |
| r = (r + 1) % natoms; // find an unused number | |
| } | |
| used.SetBitOn(r); | |
| labels.push_back(r); | |
| symmetry_classes.push_back(r); | |
| } | |
| else{ | |
| labels.push_back(OBStereo::ImplicitRef); //to match situation when canonical ordering. Just a big number? | |
| symmetry_classes.push_back(OBStereo::ImplicitRef); | |
| } | |
| } | |
| } | |
| //! Same as tokenize, except in treatment of multiple delimiters. Tokenize | |
| //! treats multiple delimiters as a single delimiter. It also ignores delimiters | |
| //! in the first or last position. In contrast, mytokenize treats each instance of | |
| //! the delimiter as the end/start of a new token. | |
| bool mytokenize(std::vector<std::string> &vcr, std::string &s, | |
| const char *delimstr) | |
| { | |
| vcr.clear(); | |
| size_t startpos=0,endpos=0; | |
| size_t s_size = s.size(); | |
| for (;;) | |
| { | |
| //startpos = s.find_first_not_of(delimstr,startpos); | |
| endpos = s.find_first_of(delimstr,startpos); | |
| if (endpos <= s_size && startpos <= s_size) | |
| { | |
| vcr.push_back(s.substr(startpos,endpos-startpos)); | |
| } | |
| else | |
| { | |
| if (startpos <= s_size) | |
| vcr.push_back(s.substr(startpos,s_size-startpos)); | |
| break; | |
| } | |
| startpos = endpos+1; | |
| } | |
| return(true); | |
| } | |
| bool OBMol2Cansmi::ParseInChI(OBMol &mol, vector<int> &atom_order) | |
| { | |
| OBConversion MolConv(*_pconv); //new copy to use to write associated MOL | |
| MolConv.SetAuxConv(NULL); //temporary until a proper OBConversion copy constructor written | |
| OBFormat* pInChIFormat = _pconv->FindFormat("InChI"); | |
| if(pInChIFormat==NULL) { | |
| obErrorLog.ThrowError(__FUNCTION__, "InChI format not available", obError); | |
| return false; | |
| } | |
| stringstream newstream; | |
| MolConv.SetOutStream(&newstream); | |
| MolConv.SetOptions("aX'RecMet'", OBConversion::OUTOPTIONS); | |
| pInChIFormat->WriteMolecule(&mol, &MolConv); | |
| cout << newstream.str() << "\n"; | |
| vector<string> splitlines; | |
| tokenize(splitlines, newstream.str(),"\n"); | |
| vector<string> split, split_aux; | |
| size_t rm_start = splitlines.at(0).find("/r"); // Correct for reconnected metal if necessary | |
| if (rm_start == string::npos) { | |
| tokenize(split, splitlines.at(0),"/"); | |
| tokenize(split_aux, splitlines.at(1),"/"); | |
| } | |
| else { | |
| tokenize(split, splitlines.at(0).substr(rm_start), "/"); | |
| split.insert(split.begin(), ""); | |
| size_t rm_start_b = splitlines.at(1).find("/R:"); | |
| tokenize(split_aux, splitlines.at(1).substr(rm_start_b), "/"); | |
| } | |
| vector<string> component, s_conn_table, multiple; | |
| vector<vector<int> > conn_table; // Vector for each disconnected component | |
| int N_components = 0; | |
| bool has_connection_layer = true; | |
| if (split.size() > 2 && split[2][0] == 'c') { // e.g. /c1-2-3;4*1-2;1-3-5-4-2;/ | |
| mytokenize(component, split[2].substr(1), ";"); | |
| for(vector<string>::iterator it=component.begin(); it!=component.end(); ++it) { | |
| if (*it == "") { | |
| conn_table.push_back(vector<int>(1, 1)); | |
| continue; | |
| } | |
| // Find whether these connections represent multiple components | |
| tokenize(multiple, *it, "*"); | |
| int factor = 1; | |
| if (multiple.size() == 2) { | |
| factor = atoi(multiple.at(0).c_str()); | |
| tokenize(s_conn_table, multiple.at(1), "(),-"); | |
| } | |
| else | |
| tokenize(s_conn_table, *it, "(),-"); | |
| // Now parse the connection table for this component | |
| vector<int> conns; | |
| for(vector<string>::iterator bit=s_conn_table.begin(); bit!=s_conn_table.end(); ++bit) | |
| conns.push_back(atoi(bit->c_str())); | |
| for (int i=0; i<factor; ++i) | |
| conn_table.push_back(conns); | |
| N_components += factor; | |
| } | |
| } | |
| else { // The connection layer will be missing if all of its disconnected components have a single atom | |
| vector<string> norm_comps; | |
| tokenize(norm_comps, split_aux.at(2).substr(2), ";"); | |
| N_components = norm_comps.size(); | |
| for(int i=0; i<N_components; ++i) | |
| conn_table.push_back(vector<int>(1, 1)); | |
| } | |
| // Extract the normalized atom numbers | |
| vector<string> s_normalized; | |
| tokenize(s_normalized, split_aux.at(2).substr(2), ",;"); | |
| vector<int> normalized; | |
| for(vector<string>::iterator it=s_normalized.begin(); it!=s_normalized.end(); ++it) | |
| normalized.push_back(atoi(it->c_str())); | |
| // Add some sanity checks | |
| cout << "InChI Order: "; | |
| int N_atoms = 0; | |
| for (int i=0; i<conn_table.size(); ++i) | |
| { | |
| for (int j=0; j<conn_table.at(i).size(); ++j) { | |
| atom_order.push_back(normalized.at(N_atoms + conn_table.at(i).at(j) - 1)); | |
| cout << atom_order.back() << ","; | |
| } | |
| N_atoms += *std::max_element(conn_table.at(i).begin(), conn_table.at(i).end()); | |
| } | |
| cout << "\n"; | |
| return true; | |
| } | |
| /*************************************************************************** | |
| * FUNCTION: CreateFragCansmiString | |
| * | |
| * DESCRIPTION: | |
| * Selects the "root" atom, which will be first in the SMILES, then | |
| * builds a tree in canonical order, and finally generates the SMILES. | |
| * If there are then atoms that haven't been visited (i.e. a molecule | |
| * with disconnected parts), selects a new root from the remaining | |
| * atoms and repeats the process. | |
| ***************************************************************************/ | |
| void OBMol2Cansmi::CreateFragCansmiString(OBMol &mol, OBBitVec &frag_atoms, bool isomeric, char *buffer) | |
| { | |
| //cout << "CreateFragCansmiString()" << endl; | |
| OBAtom *atom; | |
| OBCanSmiNode *root; | |
| buffer[0] = '\0'; | |
| vector<OBNodeBase*>::iterator ai; | |
| vector<unsigned int> symmetry_classes, canonical_order; | |
| //Pointer to Atom Class data set if -xa option and the molecule has any; NULL otherwise. | |
| if(_pconv->IsOption("a")) | |
| _pac = static_cast<OBAtomClassData*>(mol.GetData("Atom Class")); | |
| // Remember the desired endatom, if specified | |
| const char* pp = _pconv->IsOption("l"); | |
| int atom_idx = pp ? atoi(pp) : -1; | |
| if (atom_idx >= 1 && atom_idx <= mol.NumAtoms()) | |
| _endatom = mol.GetAtom(atom_idx); | |
| // Was a start atom specified? | |
| pp = _pconv->IsOption("f"); | |
| atom_idx = pp ? atoi(pp) : -1; | |
| if (atom_idx >= 1 && atom_idx <= mol.NumAtoms()) | |
| _startatom = mol.GetAtom(atom_idx); | |
| // Was an atom ordering specified? | |
| const char* ppI = _pconv->IsOption("I"); | |
| vector<string> s_atom_order; | |
| vector<int> atom_order; | |
| if (ppI) { | |
| tokenize(s_atom_order,ppI,"-()"); | |
| if (s_atom_order.size() != mol.NumHvyAtoms()) | |
| ppI = NULL; | |
| else { | |
| for (vector<string>::const_iterator cit=s_atom_order.begin(); cit!=s_atom_order.end(); ++cit) | |
| atom_order.push_back(atoi(cit->c_str())); | |
| atom_idx = atom_order.at(0); | |
| if (atom_idx >= 1 && atom_idx <= mol.NumAtoms()) | |
| _startatom = mol.GetAtom(atom_idx); | |
| } | |
| } | |
| if (_pconv->IsOption("N")) { | |
| bool useInChIOrder = ParseInChI(mol, atom_order); | |
| if (!useInChIOrder) | |
| _pconv->RemoveOption("N", OBConversion::OUTOPTIONS); | |
| } | |
| // First, create a canonical ordering vector for the atoms. Canonical | |
| // labels are zero indexed, corresponding to "atom->GetIdx()-1". | |
| if (_canonicalOutput) { | |
| OBGraphSym gs(&mol, &frag_atoms); | |
| gs.GetSymmetry(symmetry_classes); | |
| CanonicalLabels(&mol, symmetry_classes, canonical_order, frag_atoms); | |
| } | |
| else { | |
| if (_pconv->IsOption("C")) { // "C" == "anti-canonical form" | |
| RandomLabels(&mol, frag_atoms, symmetry_classes, canonical_order); | |
| } else if (ppI || _pconv->IsOption("N")) { // "I" (user specified order) or "N" (InChI order) | |
| canonical_order.resize(atom_order.size()); | |
| symmetry_classes.resize(atom_order.size()); | |
| int idx = 1; | |
| for (int i=0; i<atom_order.size(); ++i) { | |
| if (canonical_order[atom_order[i] - 1] == 0) { // Ignore ring closures (for "N") | |
| canonical_order[atom_order[i] - 1] = idx; | |
| symmetry_classes[atom_order[i] - 1] = idx; | |
| ++idx; | |
| } | |
| } | |
| } else { | |
| StandardLabels(&mol, &frag_atoms, symmetry_classes, canonical_order); | |
| } | |
| } | |
| // OUTER LOOP: Handles dot-disconnected structures. Finds the | |
| // lowest unmarked canorder atom, and starts there to generate a SMILES. | |
| // Repeats until no atoms remain unmarked. | |
| while (1) { | |
| // It happens that the lowest canonically-numbered atom is usually | |
| // a good place to start the canonical SMILES. | |
| OBAtom *root_atom; | |
| int lowest_canorder = 999999; | |
| root_atom = NULL; | |
| // If we specified a startatom_idx & it's in this fragment, use it to start the fragment | |
| if (_startatom) | |
| if (!_uatoms[_startatom->GetIdx()] && frag_atoms.BitIsOn(_startatom->GetIdx())) | |
| root_atom = _startatom; | |
| if (root_atom == NULL) { | |
| for (atom = mol.BeginAtom(ai); atom; atom = mol.NextAtom(ai)) { | |
| int idx = atom->GetIdx(); | |
| if (!_uatoms[idx] // skip atoms already used (for fragments) | |
| && frag_atoms.BitIsOn(idx) // skip atoms not in this fragment | |
| && canonical_order[idx-1] < lowest_canorder) { | |
| if (_pconv->IsOption("N") || // skip the following conditions if using InChI order | |
| (!atom->IsHydrogen() // don't start with a hydrogen | |
| //&& !atom->IsChiral() // don't use chiral atoms as root node | |
| && !_uatoms[idx])) // skip atoms already used (for fragments) | |
| { | |
| root_atom = atom; | |
| lowest_canorder = canonical_order[idx-1]; | |
| } | |
| } | |
| } | |
| } | |
| // If we didn't pick an atom, it is because the fragment is made | |
| // entirely of hydrogen atoms (e.g. [H][H]). Repeat the loop but | |
| // allow hydrogens this time. | |
| if (root_atom == NULL) { | |
| for (atom = mol.BeginAtom(ai); atom; atom = mol.NextAtom(ai)) { | |
| int idx = atom->GetIdx(); | |
| if (!_uatoms[idx] // skip atoms already used (for fragments) | |
| && frag_atoms.BitIsOn(idx)// skip atoms not in this fragment | |
| && canonical_order[idx-1] < lowest_canorder) { | |
| root_atom = atom; | |
| lowest_canorder = canonical_order[idx-1]; | |
| } | |
| } | |
| } | |
| // No atom found? We've done all fragments. | |
| if (root_atom == NULL) | |
| break; | |
| // Clear out closures in case structure is dot disconnected | |
| // _atmorder.clear(); | |
| _vopen.clear(); | |
| // Dot disconnected structure? | |
| if (strlen(buffer) > 0) strcat(buffer,"."); | |
| root = new OBCanSmiNode (root_atom); | |
| BuildCanonTree(mol, frag_atoms, canonical_order, root); | |
| ToCansmilesString(root, buffer, frag_atoms, symmetry_classes, canonical_order, isomeric); | |
| delete root; | |
| } | |
| // save the canonical order as a space-separated string | |
| // which will be returned by GetOutputOrder() for incorporation | |
| // into an OBPairData keyed "canonical order" | |
| if (_atmorder.size()) { | |
| stringstream temp; | |
| vector<int>::iterator can_iter = _atmorder.begin(); | |
| if (can_iter != _atmorder.end()) { | |
| temp << (*can_iter++); | |
| } | |
| for (; can_iter != _atmorder.end(); ++can_iter) { | |
| if (*can_iter <= mol.NumAtoms()) | |
| temp << " " << (*can_iter); | |
| } | |
| _canorder = temp.str(); // returned by GetOutputOrder() | |
| } | |
| } | |
| /*************************************************************************** | |
| * FUNCTION: OBMol2Cansmi::AddHydrogenToChiralCenters | |
| * | |
| * DESCRIPTION: | |
| * Adds an explicit hydrogen to any chiral center that only has three | |
| * atoms. This makes analysis much easier since the algorithms can | |
| * assume that all tetrahedral carbons have four neighbors. | |
| ***************************************************************************/ | |
| void OBMol2Cansmi::AddHydrogenToChiralCenters(OBMol &mol, OBBitVec &frag_atoms) | |
| { | |
| bool is_modified = false; | |
| vector <OBAtom *> atomList; | |
| int element; | |
| bool hasChiralityPerceived = mol.HasChiralityPerceived(); // remember to restore | |
| // Find all appropriate atoms to add hydrogens | |
| FOR_ATOMS_OF_MOL(atom, mol) | |
| { | |
| if (!frag_atoms[atom->GetIdx()] || !AtomIsChiral(&*atom)) | |
| continue; | |
| // don't mess with transition elements! | |
| element = atom->GetAtomicNum(); | |
| if ( (element >= 21 && element <= 30) | |
| || (element >= 39 && element <= 49) | |
| || (element >= 71 && element <= 82) ) | |
| continue; | |
| if (GetSmilesValence(&*atom) == 3 && atom->GetValence() == 3) { // implicit H? | |
| atomList.push_back(&*atom); | |
| } | |
| } | |
| // Now add hydrogens to the list | |
| if (atomList.size() > 0) { | |
| mol.BeginModify(); | |
| vector<OBAtom*>::iterator i; | |
| // OBAtom *atom; | |
| for (i = atomList.begin(); i != atomList.end(); ++i) { | |
| #if DEBUG | |
| cout << "AddHydrogenToChiralCenters: Adding H to atom " << (*i)->GetIdx() << "\n"; | |
| #endif | |
| // Add the H atom | |
| mol.AddHydrogens(*i); | |
| frag_atoms.SetBitOn(mol.NumAtoms()); | |
| } | |
| mol.EndModify(); | |
| // Don't lose the ChiralityPerceived flag... | |
| if (hasChiralityPerceived) | |
| mol.SetChiralityPerceived(); | |
| } | |
| } | |
| /*---------------------------------------------------------------------- | |
| * END OF CLASS: OBMol2Cansmi | |
| ----------------------------------------------------------------------*/ | |
| /*************************************************************************** | |
| * FUNCTION: CreateCansmiString | |
| * | |
| * DESCRIPTION: | |
| * Writes the canonical SMILES for a molecule or molecular fragment | |
| * to the given buffer. | |
| * | |
| * frag_atoms represents atoms in a fragment of the molecule; the | |
| * SMILES will contain those atoms only. | |
| * | |
| * (Note: This is an ordinary public C++ function, not a member | |
| * of any class.) | |
| * | |
| ***************************************************************************/ | |
| void CreateCansmiString(OBMol &mol, char *buffer, OBBitVec &frag_atoms, bool iso, OBConversion* pConv) | |
| { | |
| bool canonical = pConv->IsOption("c")!=NULL; | |
| // This is a hack to prevent recursion problems. | |
| // we still need to fix the underlying problem -GRH | |
| if (mol.NumAtoms() > 1000) { | |
| stringstream errorMsg; | |
| errorMsg << | |
| "SMILES Conversion failed: Molecule is too large to convert." | |
| "Open Babel is currently limited to 1000 atoms." << endl; | |
| errorMsg << " Molecule size: " << mol.NumAtoms() << " atoms " << endl; | |
| obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError); | |
| return; | |
| } | |
| OBMol2Cansmi m2s; | |
| m2s.Init(canonical, pConv); | |
| // GRH Added 2008-06-05 | |
| // This came from the WriteMolecule call below | |
| // It doesn't seem to have any effect | |
| // m2s.CorrectAromaticAmineCharge(mol); | |
| if (iso) { | |
| PerceiveStereo(&mol); | |
| m2s.CreateCisTrans(mol); // No need for this if not iso | |
| m2s.AddHydrogenToChiralCenters(mol, frag_atoms); | |
| } else { | |
| // Not isomeric - be sure there are no Z coordinates, clear | |
| // all stereo-center and cis/trans information. | |
| OBBond *bond; | |
| vector<OBEdgeBase*>::iterator bi; | |
| vector<OBNodeBase*>::iterator ai; | |
| for (bond = mol.BeginBond(bi); bond; bond = mol.NextBond(bi)) { | |
| bond->UnsetUp(); | |
| bond->UnsetDown(); | |
| bond->UnsetHash(); | |
| bond->UnsetWedge(); | |
| } | |
| } | |
| // If the fragment includes ordinary hydrogens, get rid of them. | |
| // They won't appear in the SMILES anyway (unless they're attached to | |
| // a chiral center, or it's something like [H][H]). | |
| FOR_ATOMS_OF_MOL(iatom, mol) { | |
| OBAtom *atom = &(*iatom); | |
| if (frag_atoms.BitIsOn(atom->GetIdx()) && atom->IsHydrogen() | |
| && (!iso || m2s.IsSuppressedHydrogen(atom))) { | |
| frag_atoms.SetBitOff(atom->GetIdx()); | |
| } | |
| } | |
| m2s.CreateFragCansmiString(mol, frag_atoms, iso, buffer); | |
| // Could also save canonical bond order if anyone desires | |
| if (!mol.HasData("SMILES Atom Order")) { | |
| // This atom order data is useful not just for canonical SMILES | |
| OBPairData *canData = new OBPairData; | |
| canData->SetAttribute("SMILES Atom Order"); | |
| canData->SetValue(m2s.GetOutputOrder()); | |
| // cout << "SMILES_atom_order "<<m2s.GetOutputOrder() << "\n"; | |
| canData->SetOrigin(OpenBabel::local); | |
| mol.SetData(canData); | |
| } | |
| } | |
| ////////////////////////////////////////////////// | |
| bool SMIBaseFormat::WriteMolecule(OBBase* pOb,OBConversion* pConv) | |
| { | |
| //cout << "SMIBaseFromat::WriteMolecule()" << endl; | |
| OBMol* pmol = dynamic_cast<OBMol*>(pOb); | |
| // Define some references so we can use the old parameter names | |
| ostream &ofs = *pConv->GetOutStream(); | |
| OBMol mol = *pmol; | |
| // Title only option? | |
| if(pConv->IsOption("t")) { | |
| ofs << mol.GetTitle() <<endl; | |
| return true; | |
| } | |
| char buffer[BUFF_SIZE]; | |
| *buffer = '\0'; // clear the buffer | |
| // This is a hack to prevent recursion problems. | |
| // we still need to fix the underlying problem (mainly chiral centers) -GRH | |
| if (mol.NumAtoms() > 1000) { | |
| stringstream errorMsg; | |
| errorMsg << | |
| "SMILES Conversion failed: Molecule is too large to convert." | |
| "Open Babel is currently limited to 1000 atoms." << endl; | |
| errorMsg << " Molecule size: " << mol.NumAtoms() << " atoms " << endl; | |
| obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError); | |
| return(false); | |
| } | |
| // If there is data attached called "SMILES_Fragment", then it's | |
| // an ascii OBBitVec, representing the atoms of a fragment. The | |
| // SMILES generated will only include these fragment atoms. | |
| OBBitVec fragatoms(mol.NumAtoms()); | |
| OBPairData *dp = (OBPairData *) mol.GetData("SMILES_Fragment"); | |
| if (dp) { | |
| fragatoms.FromString(dp->GetValue(), mol.NumAtoms()); | |
| } | |
| // If no "SMILES_Fragment" data, fill the entire OBBitVec | |
| // with 1's so that the SMILES will be for the whole molecule. | |
| else { | |
| FOR_ATOMS_OF_MOL(a, mol) | |
| { | |
| fragatoms.SetBitOn(a->GetIdx()); | |
| } | |
| } | |
| if (mol.NumAtoms() > 0) { | |
| CreateCansmiString(mol, buffer, fragatoms, !pConv->IsOption("i"), pConv); | |
| } | |
| ofs << buffer; | |
| if(!pConv->IsOption("smilesonly")) { | |
| if(!pConv->IsOption("n")) | |
| ofs << '\t' << mol.GetTitle(); | |
| if (pConv->IsOption("x") && mol.HasData("SMILES Atom Order")) { | |
| vector<string> vs; | |
| string canorder = mol.GetData("SMILES Atom Order")->GetValue(); | |
| tokenize(vs, canorder); | |
| ofs << '\t'; | |
| for (int i = 0; i < vs.size(); i++) { | |
| int idx = atoi(vs[i].c_str()); | |
| OBAtom *atom = mol.GetAtom(idx); | |
| if (i > 0) | |
| ofs << ","; | |
| ofs << atom->GetX() << "," << atom->GetY(); | |
| } | |
| } | |
| if(!pConv->IsOption("nonewline")) | |
| ofs << endl; | |
| } | |
| //cout << "end SMIBaseFromat::WriteMolecule()" << endl; | |
| return true; | |
| } | |
| //******************************************************** | |
| class FIXFormat : public OBMoleculeFormat | |
| { | |
| public: | |
| //Register this format type ID | |
| FIXFormat() | |
| { | |
| OBConversion::RegisterFormat("fix",this); | |
| } | |
| virtual const char* Description() //required | |
| { | |
| return | |
| "SMILES FIX format\n" | |
| " No comments yet\n"; | |
| }; | |
| virtual const char* SpecificationURL() | |
| {return "";}; //optional | |
| //Flags() can return be any the following combined by | or be omitted if none apply | |
| // NOTREADABLE READONEONLY NOTWRITABLE WRITEONEONLY | |
| virtual unsigned int Flags() | |
| { | |
| return NOTREADABLE; | |
| }; | |
| //////////////////////////////////////////////////// | |
| /// The "API" interface functions | |
| virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv); | |
| }; | |
| //Make an instance of the format class | |
| FIXFormat theFIXFormat; | |
| ///////////////////////////////////////////////////////////////// | |
| bool FIXFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv) | |
| { | |
| //cout << "FIXFromat::WriteMolecule()" << endl; | |
| OBMol* pmol = dynamic_cast<OBMol*>(pOb); | |
| if(pmol==NULL) | |
| return false; | |
| //Define some references so we can use the old parameter names | |
| ostream &ofs = *pConv->GetOutStream(); | |
| OBMol &mol = *pmol; | |
| char buffer[BUFF_SIZE]; | |
| OBMol2Cansmi m2s; | |
| // This is a hack to prevent recursion problems. | |
| // we still need to fix the underlying problem -GRH | |
| if (mol.NumAtoms() > 1000) | |
| { | |
| stringstream errorMsg; | |
| errorMsg << "SMILES Conversion failed: Molecule is too large to convert. Open Babel is currently limited to 1000 atoms." << endl; | |
| errorMsg << " Molecule size: " << mol.NumAtoms() << " atoms " << endl; | |
| obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning); | |
| return(false); | |
| } | |
| // Write the SMILES in a FIX with canonical order | |
| m2s.Init(true, pConv); | |
| // From 2.1 code. | |
| m2s.CorrectAromaticAmineCharge(mol); | |
| // We're outputting a full molecule | |
| // so we pass a bitvec for all atoms | |
| OBBitVec allbits(mol.NumAtoms()); | |
| FOR_ATOMS_OF_MOL(a, mol) { | |
| allbits.SetBitOn(a->GetIdx()); | |
| } | |
| if (mol.NumAtoms() > 0) { | |
| CreateCansmiString(mol, buffer, allbits, !pConv->IsOption("i"), pConv); | |
| } | |
| ofs << buffer << endl; | |
| OBAtom *atom; | |
| vector<int>::iterator i; | |
| // Retrieve the canonical order of the molecule | |
| string orderString = m2s.GetOutputOrder(); | |
| vector<string> canonical_order; | |
| tokenize(canonical_order, orderString); | |
| int j; | |
| int atomIdx; | |
| for (j = 0;j < mol.NumConformers();j++) | |
| { | |
| mol.SetConformer(j); | |
| for (unsigned int index = 0; index < canonical_order.size(); | |
| ++index) { | |
| atomIdx = atoi(canonical_order[index].c_str()); | |
| atom = mol.GetAtom(atomIdx); | |
| sprintf(buffer,"%9.3f %9.3f %9.3f",atom->GetX(),atom->GetY(),atom->GetZ()); | |
| ofs << buffer<< endl; | |
| } | |
| } | |
| return(true); | |
| } | |
| } // end namespace OpenBabel |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment