baoilleach/smilesformat.cpp

## smilesformat.cpp
/**********************************************************************
Copyright (C) 2005-2007 by Craig A. James, eMolecules Inc.
Some portions Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
Some portions Copyright (C) 2001-2008 by Geoffrey R. Hutchison
Some portions Copyright (C) 2004 by Chris Morley

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
***********************************************************************/

// This code uses the old OpenEye SMILES parser
// but replaces the SMILES export with Craig James canonical smiles
// (For regular SMILES, the canonical order is not computed and ignored)

#include <openbabel/babelconfig.h>
#include <openbabel/obmolecformat.h>
#include <openbabel/chiral.h>
#include <openbabel/atomclass.h>

#include <openbabel/stereo/tetrahedral.h>
#include <openbabel/stereo/cistrans.h>
#include <openbabel/stereo/squareplanar.h>
#include <openbabel/stereo/stereo.h>

#include <openbabel/graphsym.h>
#include <openbabel/canon.h>

#include <limits>
#include <iostream>
#include <cassert>
#include <string>

//#define DEBUG 1
#define IMPLICIT_CIS_RING_SIZE 12

using namespace std;

namespace OpenBabel {

  // some constant variables
  const char BondUpChar = '\\';
  const char BondDownChar = '/';

  //Base class for SMIFormat and CANSIFormat with most of the functionality
  class SMIBaseFormat : public OBMoleculeFormat
  {
  public:
    virtual const char* GetMIMEType()
    { return "chemical/x-daylight-smiles"; };

    ////////////////////////////////////////////////////
    /// The "API" interface functions
    virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv);
    virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);

    ///////////////////////////////////////////////////////

    virtual const char* TargetClassDescription(){return OBMol::ClassDescription();};

    virtual const char* SpecificationURL()
    {return "http://www.daylight.com/smiles/";};

    virtual int SkipObjects(int n, OBConversion* pConv)
    {
      if(n==0) return 1; //already points after current line
      istream& ifs = *pConv->GetInStream();
      if (ifs.eof())
        return -1;

      int i=0;
      while(i<n && ifs.good())
        {
          if(ifs.peek()!='#')
            i++;
          ifs.ignore(numeric_limits<streamsize>::max(),'\n');
        }
      return ifs ? 1 : -1;
    }
  };

  //**************************************************
  class SMIFormat : public SMIBaseFormat
  {
  public:
    //Register this format type ID
    SMIFormat()
    {
      OBConversion::RegisterFormat("smi",this, "chemical/x-daylight-smiles");
      OBConversion::RegisterFormat("smiles",this, "chemical/x-daylight-smiles");
      OBConversion::RegisterOptionParam("n", this);
      OBConversion::RegisterOptionParam("t", this);
      OBConversion::RegisterOptionParam("r", this);
      OBConversion::RegisterOptionParam("a", this);
      OBConversion::RegisterOptionParam("h", this);
      OBConversion::RegisterOptionParam("x", this);
      OBConversion::RegisterOptionParam("C", this);	// "anti-canonical" form (random order)
    }
    virtual const char* Description()
    {
      return
        "SMILES format\n"
        "A linear text format which can describe the connectivity and chirality of a molecule\n"
        "Open Babel implements the `OpenSMILES specification <http://opensmiles.org>`_.\n\n"

        "It also implements an extension to this specification for radicals.\n\n"

        "Note that the ``l <atomno>`` option, used to specify a \"last\" atom, is\n"
        "intended for the generation of SMILES strings to which additional atoms\n"
        "will be concatenated. If the atom specified has an explicit H within a bracket\n"
        "(e.g. ``[nH]`` or ``[C@@H]``) the output will have the H removed along with any\n"
        "associated stereo symbols.\n\n"

        ".. seealso::\n\n"

        "  The :ref:`Canonical_SMILES_format` produces a canonical representation\n"
        "  of the molecule in SMILES format. This is the same as the ``c`` option\n"
        "  below but may be more convenient to use.\n\n"

        "Write Options e.g. -xt\n"
        "  a  Output atomclass like [C:2], if available\n"
        "  c  Output in canonical form\n"
        "  h  Output explicit hydrogens as such\n"
        "  i  Do not include isotopic or chiral markings\n"
        "  n  No molecule name\n"
        "  r  Radicals lower case eg ethyl is Cc\n"
        "  t  Molecule name only\n"
        "  x  append X/Y coordinates in canonical-SMILES order\n"
        "  C  'anti-canonical' random order (mostly for testing)\n"
        "  f  <atomno> Specify the first atom\n"
        "     This atom will be used to begin the SMILES string.\n"
        "  l  <atomno> Specify the last atom\n"
        "     The output will be rearranged so that any additional\n"
        "     SMILES added to the end will be attached to this atom.\n\n";
    }


  };

  //Make an instance of the format class
  SMIFormat theSMIFormat;

  //**************************************************
  class CANSMIFormat : public SMIBaseFormat
  {
  public:
    //Register this format type ID
    CANSMIFormat()
    {
      OBConversion::RegisterFormat("can", this, "chemical/x-daylight-cansmiles");
    }

    virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv)
    {
      //The "c" option sets us to use canonical ordering
      pConv->AddOption("c",OBConversion::OUTOPTIONS);
      return SMIBaseFormat::WriteMolecule(pOb, pConv);
    }

    ///////////////////////////////////////////////////////

    virtual const char* Description() {
      return
        "Canonical SMILES format\n"
        "A canonical form of the SMILES linear text format\n"
        "The SMILES format is a linear text format which can describe the\n"
       	"connectivity "
        "and chirality of a molecule. Canonical SMILES gives a single\n"
       	"'canonical' form for any particular molecule.\n\n"

        ".. seealso::\n\n"

        "  The \"regular\" :ref:`SMILES_format` gives faster\n"
        "  output, since no canonical numbering is performed.\n\n"

        "Write Options e.g. -xt\n"
        "  a  Output atomclass like [C:2], if available\n"
        "  h  Output explicit hydrogens as such\n"
        "  i  Do not include isotopic or chiral markings\n"
        "  n  No molecule name\n"
        "  r  Radicals lower case eg ethyl is Cc\n"
        "  t  Molecule name only\n"
        "  f  <atomno> Specify the first atom\n"
        "     This atom will be used to begin the SMILES string.\n"
        "  l  <atomno> Specify the last atom\n"
        "     The output will be rearranged so that any additional\n"
        "     SMILES added to the end will be attached to this atom.\n"
        "     See the :ref:`SMILES_format` for more information.\n\n";
    };

  };

  // Make an instance of the format class
  CANSMIFormat theCANSMIFormat;

  //************************************************************

  class OBSmilesParser
  {
    // simple structs to make code more readable

    // see _extbond
    struct ExternalBond
    {
      int digit;
      int prev;
      int order;
      char updown;
    };
    // see _rclose
    struct RingClosureBond
    {
      int digit;
      int prev;
      int order;
      char updown;
      int numConnections;
    };


    char _updown;
    int _order;
    int _prev;
    char *_ptr;
    vector<int>             _vprev;
    vector<RingClosureBond> _rclose;
    vector<ExternalBond>    _extbond;
    vector<int>             _path;
    vector<bool>            _avisit;
    vector<bool>            _bvisit;
    char                    _buffer[BUFF_SIZE];
    vector<int> PosDouble; //for extension: lc atoms as conjugated double bonds
    OBAtomClassData _classdata; // to hold atom class data like [C:2]

    struct StereoRingBond
    {
      vector<OBAtom*> atoms;
      vector<char> updown;
    };
    map<OBBond*, StereoRingBond> _stereorbond; // Remember info on the stereo ring closure bonds

    // stereochimistry
    bool chiralWatch; // set when a tetrahedral atom is read
    map<OBAtom*, OBTetrahedralStereo::Config*> _tetrahedralMap; // map of tetrahedral atoms and their data
    map<OBBond*, char> _upDownMap; // store the '/' & '\' as they occured in smiles
    map<unsigned int, char> _chiralLonePair; // for atoms with potential chiral lone pairs, remember when the l.p. was encountered
    bool squarePlanarWatch; // set when a square planar atom is read
    map<OBAtom*, OBSquarePlanarStereo::Config*> _squarePlanarMap;

  public:

    OBSmilesParser() { }
    ~OBSmilesParser() { }

    bool SmiToMol(OBMol&,const string&);
    bool ParseSmiles(OBMol&);
    bool ParseSimple(OBMol&);
    bool ParseComplex(OBMol&);
    bool ParseRingBond(OBMol&);
    bool ParseExternalBond(OBMol&);
    bool CapExternalBonds(OBMol &mol);
    void FindAromaticBonds(OBMol &mol,OBAtom*,int);
    void FindAromaticBonds(OBMol&);
    void FindOrphanAromaticAtoms(OBMol &mol); //CM 18 Sept 2003
    int NumConnections(OBAtom *);
    void CreateCisTrans(OBMol &mol);
    char SetRingClosureStereo(StereoRingBond rcstereo, OBBond* dbl_bond);
    void InsertTetrahedralRef(OBMol &mol, unsigned long id);
    void InsertSquarePlanarRef(OBMol &mol, unsigned long id);

    bool IsUp(OBBond*);
    bool IsDown(OBBond*);
  };

  /////////////////////////////////////////////////////////////////
  /* Lines starting with # are ignored. Whitespace at the start (including
     blank lines) terminate the input unless -e option is used.
     Valid SMILES reactions such as [C]=O.O>[Fe]>O=C=O.[H][H] with non-null
     reactant and product are accepted and the reactant, product and
     possibly the agent molecules are output when using the Convert interface
     (babel commandline). With the OBConversion functions Read, ReadString
     and ReadFile all SMILES reactions give an error when read with this format.
  */
  bool SMIBaseFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
  {
    OBMol* pmol = pOb->CastAndClear<OBMol>();

    istream &ifs = *pConv->GetInStream();
    string ln, smiles, title;
    string::size_type pos, pos2;

    //Ignore lines that start with #
    while(ifs && ifs.peek()=='#')
      if(!getline(ifs, ln))
        return false;

    //Get title
    if(getline(ifs, ln))
    {
      pos = ln.find_first_of(" \t");
      if(pos!=string::npos)
      {
        smiles = ln.substr(0,pos);
        title = ln.substr(pos+1);
        Trim(title);
        pmol->SetTitle(title.c_str());
      }
      else
        smiles = ln;
    }
    pos= smiles.find_first_of(",<\"\'!^&_|{}");
    if(pos!=string::npos)
    {
      obErrorLog.ThrowError(__FUNCTION__,
        smiles + " contained a character '" + smiles[pos] + "' which is invalid in SMILES", obError);
      return false;
    }

    pmol->SetDimension(0);
    OBSmilesParser sp;

    pos = smiles.find('>');
    if(pos==string::npos)
      return sp.SmiToMol(*pmol, smiles); //normal return

    //Possibly a SMILES reaction
    OBMol* pmol1 = new OBMol;
    OBMol* pmol2 = new OBMol;
    if(sp.SmiToMol(*pmol1, smiles.substr(0,pos))                  //reactant
      && (pos2 = smiles.find('>', pos+1))!=string::npos)          //second >
    {
      if((pos2-pos==1                                             //no agent
         || sp.SmiToMol(*pmol2, smiles.substr(pos+1,pos2-pos-1))) //agent
         && sp.SmiToMol(*pmol, smiles.substr(pos2+1)))            //product
      {
        //Is a valid reaction. Output species
        pmol1->SetDimension(0);
        pmol1->SetTitle(title);
        pmol2->SetTitle(title);
        pmol->SetTitle(title);
        pmol2->SetDimension(0);
        if(pConv->AddChemObject(
          pmol1->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS),pConv))<0)//using Read or ReadString or ReadFile
        {
          obErrorLog.ThrowError(__FUNCTION__, smiles +
            " SmilesFormat accepts reactions only with the \"Convert\" (commandline) interface", obError);

          return false; //Error
        }
        if(pmol2->NumAtoms())
           pConv->AddChemObject(
             pmol2->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS),pConv));
        return true; //valid reaction return
      }
    }
    obErrorLog.ThrowError(__FUNCTION__, smiles + " contained '>' but was not a acceptable reaction", obError);
    return false;
  }

  //////////////////////////////////////////////

  bool OBSmilesParser::SmiToMol(OBMol &mol,const string &s)
  {
    strncpy(_buffer,s.c_str(), BUFF_SIZE);
    _buffer[BUFF_SIZE - 1] = '\0';

    _vprev.clear();
    _rclose.clear();
    _prev=0;
    chiralWatch=false;
    squarePlanarWatch = false;

    if (!ParseSmiles(mol) || mol.NumAtoms() == 0)
      {
        mol.Clear();
        return(false);
      }

    map<OBAtom*, OBTetrahedralStereo::Config*>::iterator i;
    for (i = _tetrahedralMap.begin(); i != _tetrahedralMap.end(); ++i)
      delete i->second;
    _tetrahedralMap.clear();

    map<OBAtom*, OBSquarePlanarStereo::Config*>::iterator j;
    for (j = _squarePlanarMap.begin(); j != _squarePlanarMap.end(); ++j)
      delete j->second;
    _squarePlanarMap.clear();

    mol.SetAutomaticFormalCharge(false);

    return(true);
  }

  bool OBSmilesParser::ParseSmiles(OBMol &mol)
  {
    mol.BeginModify();

    for (_ptr=_buffer;*_ptr;_ptr++)
      {
        //cerr << " parsing " << _ptr << endl;

        if (*_ptr<0 || isspace(*_ptr))
          continue;
        else if (isdigit(*_ptr) || *_ptr == '%') //ring open/close
          {
            if(!ParseRingBond(mol))
              return false;
            continue;
          }
        else if(*_ptr == '&') //external bond
          {
            ParseExternalBond(mol);
            continue;
          }
        else
          switch(*_ptr)
            {
            case '.':
              _prev=0;
              break;
            case '(':
              _vprev.push_back(_prev);
              break;
            case ')':
              if(_vprev.empty()) //CM
                return false;
              _prev = _vprev.back();
              _vprev.pop_back();
              break;
            case '[':
              if (!ParseComplex(mol))
                {
                  mol.EndModify();
                  mol.Clear();
                  return(false);
                }
              break;
            case '-':
              _order = 1;
              break;
            case '=':
              _order = 2;
              break;
            case '#':
              _order = 3;
              break;
            case '$':
              _order = 4;
              break;
            case ':':
              _order = 5;
              break;
            case '/':
              _updown = BondDownChar;
              break;
            case '\\':
              _updown = BondUpChar;
              break;
            default:
              if (!ParseSimple(mol))
                {
                  mol.EndModify();
                  mol.Clear();
                  return(false);
                }
            } // end switch
      } // end for _ptr

    // place dummy atoms for each unfilled external bond
    if(!_extbond.empty())
      CapExternalBonds(mol);

    //Save atom class values in OBGenericData object if there are any
    if(_classdata.size()>0)
      mol.SetData(new OBAtomClassData(_classdata));

    // Check to see if we've balanced out all ring closures
    // They are removed from _rclose when matched
    if (!_rclose.empty()) {
      mol.EndModify();
      mol.Clear();

      stringstream errorMsg;
      errorMsg << "Invalid SMILES string: " << _rclose.size() << " unmatched "
               << "ring bonds." << endl;
      obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
      return false; // invalid SMILES since rings aren't properly closed
    }

    //set aromatic bond orders
    mol.SetAromaticPerceived();
    FindAromaticBonds(mol);
    FindOrphanAromaticAtoms(mol);// CM 18 Sept 2003
    mol.AssignSpinMultiplicity();
    mol.UnsetAromaticPerceived();

    mol.EndModify();

    //Extension which interprets cccc with conjugated double bonds if niether
    //of its atoms is aromatic.
    vector<int>::iterator itr;
    for(itr=PosDouble.begin();itr!=PosDouble.end();++itr)
      {
        OBBond* bond = mol.GetBond(*itr);
        if(!bond->GetBeginAtom()->IsAromatic() && !bond->GetEndAtom()->IsAromatic())
          {
            bond->SetBO(2);
            mol.UnsetImplicitValencePerceived();
          }
      }

    // Add the data stored inside the _tetrahedralMap to the atoms now after end
    // modify so they don't get lost.
    if(_tetrahedralMap.size() > 0) {
      OBAtom* atom;
      map<OBAtom*, OBTetrahedralStereo::Config*>::iterator ChiralSearch;
      for(ChiralSearch = _tetrahedralMap.begin(); ChiralSearch != _tetrahedralMap.end(); ChiralSearch++) {
        atom = ChiralSearch->first;
        OBTetrahedralStereo::Config *ts = ChiralSearch->second;
        if (!ts)
          continue;
        if (ts->refs.size() != 3)
          continue;
        if (ts->refs[2] == OBStereo::NoRef) {
          // This happens where there is chiral lone pair or where there simply aren't enough connections
          // around a chiral atom. We handle the case where there is a S with a chiral lone pair.
          // All other cases are ignored, and raise a warning. (Note that S can be chiral even without
          // a lone pair, think of C[S@](=X)(=Y)Cl.

          // We have remembered where to insert the lone pair in the _chiralLonePair map
          map<unsigned int, char>::iterator m_it = _chiralLonePair.find(atom->GetIdx());
          if (atom->GetAtomicNum() == 16 && m_it != _chiralLonePair.end()) { // Sulfur
            ts->refs[2] = ts->refs[1]; ts->refs[1] = ts->refs[0];
            if (m_it->second == 0) { // Insert in the 'from' position
              ts->refs[0] = ts->from;
              ts->from = OBStereo::ImplicitRef;
            }
            else // Insert in the refs[0] position
              ts->refs[0] = OBStereo::ImplicitRef;
          }
          else { // Ignored by Open Babel
            stringstream ss;
            ss << "Ignoring stereochemistry. Not enough connections to this atom. " << mol.GetTitle();
            obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
            continue;
          }
        }

        // cout << "*ts = " << *ts << endl;
        OBTetrahedralStereo *obts = new OBTetrahedralStereo(&mol);
        obts->SetConfig(*ts);
        mol.SetData(obts);
      }
    }

    // Add the data stored inside the _squarePlanarMap to the atoms now after end
    // modify so they don't get lost.
    if(_squarePlanarMap.size() > 0) {
      OBAtom* atom;
      map<OBAtom*, OBSquarePlanarStereo::Config*>::iterator ChiralSearch;
      for(ChiralSearch = _squarePlanarMap.begin(); ChiralSearch != _squarePlanarMap.end(); ChiralSearch++) {
        atom = ChiralSearch->first;
        OBSquarePlanarStereo::Config *sp = ChiralSearch->second;
        if (!sp)
          continue;
        if (sp->refs.size() != 4)
          continue;

        // cout << "*ts = " << *ts << endl;
        OBSquarePlanarStereo *obsp = new OBSquarePlanarStereo(&mol);
        obsp->SetConfig(*sp);
        mol.SetData(obsp);
      }
    }

    CreateCisTrans(mol);
    StereoFrom0D(&mol);

    return(true);
  }

  bool OBSmilesParser::IsUp(OBBond *bond)
  {
    map<OBBond*, char>::iterator UpDownSearch;
    UpDownSearch = _upDownMap.find(bond);
    if (UpDownSearch != _upDownMap.end())
      if (UpDownSearch->second == BondUpChar)
        return true;
    return false;
  }

  bool OBSmilesParser::IsDown(OBBond *bond)
  {
    map<OBBond*, char>::iterator UpDownSearch;
    UpDownSearch = _upDownMap.find(bond);
    if (UpDownSearch != _upDownMap.end())
      if (UpDownSearch->second == BondDownChar)
        return true;
    return false;
  }

  char OBSmilesParser::SetRingClosureStereo(StereoRingBond rcstereo, OBBond* dbl_bond)
  {
    // Ring Closure bonds appear twice (at opening and closure).
    // If involved in cis/trans stereo, then the stereo may be
    // specified at either end or indeed both. Although Open Babel
    // will only write out SMILES with the stereo at one end (the end
    // on the double bond), it must handle all cases when reading.

    // For example:
    //
    //         C
    //        /|
    //   C = C |
    //  /     \|
    // C       N
    //
    // Can be written as:
    // (a) C/C=C/1\NC1 -- preferred
    // (b) C/C=C1\NC\1
    // (c) C/C=C/1\NC\1
    //  or indeed by replacing the "\N" with "N".

    // If the stereo chemistry for a ring closure is inconsistently specified,
    // it is ignored. In that case, if a stereo symbol does not exist for its
    // partner bond on the double bond (e.g. (b) below), then the stereo is unspecified.

    // (a) C/C=C/1NC\1 -- specified stereo
    // (b) C/C=C/1NC/1  -- ignore ring closure stereo => treated as C/C=C1NC1  => CC=C1NC1
    // (c) C/C=C/1\NC/1 -- ignore ring closure stereo => treated as C/C=C1\NC1 => C/C=C/1\NC1

    // The ring closure bond is either up or down with respect
    // to the double bond. Our task here is to figure out which it is,
    // based on the contents of _stereorbond.

    bool found = false; // We have found the answer
    bool updown = true; // The answer

    if (rcstereo.updown[0] == BondUpChar || rcstereo.updown[0] == BondDownChar) { // Is there a stereo symbol at the opening?
      bool on_dbl_bond = (rcstereo.atoms[0] == dbl_bond->GetBeginAtom() || rcstereo.atoms[0] == dbl_bond->GetEndAtom());
      updown = (rcstereo.updown[0]==BondUpChar) ^ on_dbl_bond;
      found = true;
    }
    if (rcstereo.updown[1] == BondUpChar || rcstereo.updown[1] == BondDownChar) { // Is there a stereo symbol at the closing?
      bool on_dbl_bond = (rcstereo.atoms[1] == dbl_bond->GetBeginAtom() || rcstereo.atoms[1] == dbl_bond->GetEndAtom());
      bool new_updown = (rcstereo.updown[1]==BondUpChar) ^ on_dbl_bond;
      if (!found) {
        updown = new_updown;
        found = true;
      }
      else if (new_updown != updown) {
        obErrorLog.ThrowError(__FUNCTION__, "Ignoring the cis/trans stereochemistry specified for the ring closure\n  as it is inconsistent.", obWarning);
        found = false;
      }
    }

    if (!found)
      return 0;
    else
      return updown ? 1 : 2;
  }

  void OBSmilesParser::CreateCisTrans(OBMol &mol)
  {
    // Create a vector of CisTransStereo objects for the molecule
    FOR_BONDS_OF_MOL(dbi, mol) {

      OBBond *dbl_bond = &(*dbi);

      // Not a double bond?
      if (!dbl_bond->IsDouble() || dbl_bond->IsAromatic())
        continue;

      // Find the single bonds around the atoms connected by the double bond.
      // FIXME: "While we're at it, note whether the pair of atoms on either end are
      // identical, in which case it's not cis/trans." This code must be in the original
      // smilesformat.cpp

      OBAtom *a1 = dbl_bond->GetBeginAtom();
      OBAtom *a2 = dbl_bond->GetEndAtom();

      // Check that both atoms on the double bond have at least one
      // other neighbor, but not more than two other neighbors;
      int v1 = a1->GetValence();
      int v2 = a2->GetValence();
      if (v1 < 2 || v1 > 3 || v2 < 2 || v2 > 3) {
        continue;
      }

      vector<OBAtom*> dbl_bond_atoms;
      dbl_bond_atoms.push_back(a1);
      dbl_bond_atoms.push_back(a2);

      vector<bool> bond_stereo(2, true); // Store the stereo of the chosen bonds at each end of the dbl bond
      vector<OBBond*> stereo_bond(2, (OBBond*) NULL); // These are the chosen stereo bonds
      vector<OBBond*> other_bond(2, (OBBond*) NULL);  // These are the 'other' bonds at each end

      for (int i = 0; i < 2; ++i) { // Loop over each end of the double bond in turn

        FOR_BONDS_OF_ATOM(bi, dbl_bond_atoms[i]) {
          OBBond *b = &(*bi);
          if (b == dbl_bond) continue;
          if (!(IsUp(b) || IsDown(b))) {
            other_bond[i] = b; // Use this for the 'other' bond
            continue;
          }

          bool found = true;
          bool stereo;
          map<OBBond*, StereoRingBond>::iterator sb_it = _stereorbond.find(b);
          if (sb_it == _stereorbond.end()) // Not a ring closure
            // True/False for "up/down if moved to before the double bond C"
            stereo = !(IsUp(b) ^ (b->GetNbrAtomIdx(dbl_bond_atoms[i]) < dbl_bond_atoms[i]->GetIdx())) ;
          else  {                                                               // Is a ring closure
            char bc_result = SetRingClosureStereo(sb_it->second, dbl_bond);
            if (bc_result)
              stereo = bc_result == 1 ? true : false;
            else
              found = false;
          }

          if (!found) { // This cannot be used as the stereo bond
            other_bond[i] = b; // Use this for the 'other' bond
            continue;
          }

          if (stereo_bond[i] == NULL) { // This is a first stereo bond
            stereo_bond[i] = b; // Use this for the 'stereo' bond
            bond_stereo[i] = stereo;
          }
          else {               // This is a second stereo bond
            if (stereo != bond_stereo[i]) { // Verify that the other stereo bond (on the same atom) has opposite stereo
              other_bond[i] = b; // Use this for the 'other' bond
            }
            else  {
              obErrorLog.ThrowError(__FUNCTION__, "Error in cis/trans stereochemistry specified for the double bond\n", obWarning);
              stereo_bond[i] = (OBBond*) NULL;
            }
          }
        }
      }

      if (stereo_bond[0] == NULL || stereo_bond[1] == NULL) continue; // No cis/trans

      // other_bond will contain NULLs if there are bonds to implicit hydrogens
      unsigned int second = (other_bond[0] == NULL) ? OBStereo::ImplicitRef : other_bond[0]->GetNbrAtom(a1)->GetId();
      unsigned int fourth = (other_bond[1] == NULL) ? OBStereo::ImplicitRef : other_bond[1]->GetNbrAtom(a2)->GetId();


      OBCisTransStereo *ct = new OBCisTransStereo(&mol);
      OBCisTransStereo::Config cfg;
      cfg.begin = a1->GetId();
      cfg.end = a2->GetId();

       // If bond_stereo[0]==bond_stereo[1], this means cis for stereo_bond[0] and stereo_bond[1].
      if (bond_stereo[0] == bond_stereo[1])
        cfg.refs = OBStereo::MakeRefs(stereo_bond[0]->GetNbrAtom(a1)->GetId(), second,
                                      fourth, stereo_bond[1]->GetNbrAtom(a2)->GetId());
      else
        cfg.refs = OBStereo::MakeRefs(stereo_bond[0]->GetNbrAtom(a1)->GetId(), second,
                                      stereo_bond[1]->GetNbrAtom(a2)->GetId(), fourth);
      ct->SetConfig(cfg);
      // add the data to the atom
      mol.SetData(ct);
    }
  }

  void OBSmilesParser::FindOrphanAromaticAtoms(OBMol &mol)
  {
    //Facilitates the use lower case shorthand for radical entry
    //Atoms which are marked as aromatic but have no aromatic bonds
    //are taken to be radical centres
    OBAtom *atom;
    vector<OBAtom*>::iterator j;

    for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j))
      if(atom->IsAromatic())
        {
          if(atom->CountBondsOfOrder(5)<2) //bonds order 5 set in FindAromaticBonds()
            //not proper aromatic atoms - could be conjugated chain or radical centre
            atom->UnsetAromatic();
          else
            {
              //recognized as aromatic, so are not radicals
              atom->SetSpinMultiplicity(0);
            }
        }
  }

  void OBSmilesParser::FindAromaticBonds(OBMol &mol)
  {
    _path.clear();
    _avisit.clear();
    _bvisit.clear();
    _avisit.resize(mol.NumAtoms()+1);
    _bvisit.resize(mol.NumBonds());
    _path.resize(mol.NumAtoms()+1);

    OBBond *bond;
    vector<OBBond*>::iterator i;
    for (bond = mol.BeginBond(i);bond;bond = mol.NextBond(i))
      if (!bond->GetBeginAtom()->IsAromatic() ||
          !bond->GetEndAtom()->IsAromatic())
        _bvisit[bond->GetIdx()] = true;

    OBAtom *atom;
    vector<OBAtom*>::iterator j;

    for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j))
      if(!_avisit[atom->GetIdx()] && atom->IsAromatic())
        FindAromaticBonds(mol,atom,0);
  }

  void OBSmilesParser::FindAromaticBonds(OBMol &mol,OBAtom *atom,int depth )
  {
    OBBond *bond;
    vector<OBBond*>::iterator k;

    if (_avisit[atom->GetIdx()])
      {
        int j = depth-1;
        bond=mol.GetBond(_path[j--]);
        if (bond->GetBO() != 2) { // don't rewrite explicit double bonds
          bond->SetBO(5);
        }
        while( j >= 0 )
          {
            bond=mol.GetBond(_path[j--]);
            if (bond->GetBO() != 2) { // don't rewrite explicit double bonds
              bond->SetBO(5);
            }
            if(bond->GetBeginAtom() == atom || bond->GetEndAtom() == atom)
              break;
          }
      }
    else
      {
        _avisit[atom->GetIdx()] = true;
        for(bond = atom->BeginBond(k);bond;bond = atom->NextBond(k))
          if( !_bvisit[bond->GetIdx()])
            {
              _path[depth] = bond->GetIdx();
              _bvisit[bond->GetIdx()] = true;
              FindAromaticBonds(mol,bond->GetNbrAtom(atom),depth+1);
            }
      }
  }

  void OBSmilesParser::InsertTetrahedralRef(OBMol &mol, unsigned long id)
  {
    map<OBAtom*, OBTetrahedralStereo::Config*>::iterator ChiralSearch;
    ChiralSearch = _tetrahedralMap.find(mol.GetAtom(_prev));
    if (ChiralSearch != _tetrahedralMap.end() && ChiralSearch->second != NULL)
    {
      int insertpos = NumConnections(ChiralSearch->first) - 2;
      if (insertpos > 2)
        return;
      if (insertpos < 0) {
        if (ChiralSearch->second->from != OBStereo::NoRef)
          obErrorLog.ThrowError(__FUNCTION__, "Warning: Overwriting previous from reference id.", obWarning);

        (ChiralSearch->second)->from = id;
        // cerr << "Adding " << id << " at Config.from to " << ChiralSearch->second << endl;
      } else {
        if (ChiralSearch->second->refs[insertpos] != OBStereo::NoRef)
          obErrorLog.ThrowError(__FUNCTION__, "Warning: Overwriting previously set reference id.", obWarning);

        (ChiralSearch->second)->refs[insertpos] = id;
        // cerr << "Adding " << id << " at " << insertpos << " to " << ChiralSearch->second << endl;
      }
    }
  }

  void OBSmilesParser::InsertSquarePlanarRef(OBMol &mol, unsigned long id)
  {
    map<OBAtom*, OBSquarePlanarStereo::Config*>::iterator ChiralSearch;
    ChiralSearch = _squarePlanarMap.find(mol.GetAtom(_prev));
    if (ChiralSearch != _squarePlanarMap.end() && ChiralSearch->second != NULL)
    {
      int insertpos = NumConnections(ChiralSearch->first) - 1;
      if (insertpos < 0) {
        if (ChiralSearch->second->refs[0] != OBStereo::NoRef)
          obErrorLog.ThrowError(__FUNCTION__, "Warning: Overwriting previous from reference id.", obWarning);

        (ChiralSearch->second)->refs[0] = id;
        //cerr << "Adding " << id << " at Config.refs[0] to " << ChiralSearch->second << endl;
      } else {
        if (ChiralSearch->second->refs[insertpos] != OBStereo::NoRef)
          obErrorLog.ThrowError(__FUNCTION__, "Warning: Overwriting previously set reference id.", obWarning);

        (ChiralSearch->second)->refs[insertpos] = id;
        //cerr << "Adding " << id << " at " << insertpos << " to " << ChiralSearch->second << endl;
      }
    }
  }


  bool OBSmilesParser::ParseSimple(OBMol &mol)
  {
    char symbol[3];
    int element;
    bool arom=false;
    memset(symbol,'\0',sizeof(char)*3);

    if (isupper(*_ptr))
      switch(*_ptr)
        {
        case 'C':
          _ptr++;
          if (*_ptr == 'l')
            {
              strcpy(symbol,"Cl");
              element = 17;
            }
          else
            {
              symbol[0] = 'C';
              element = 6;
              _ptr--;
            }
          break;

        case 'N':
          element = 7;
          symbol[0] = 'N';
          break;
        case 'O':
          element = 8;
          symbol[0] = 'O';
          break;
        case 'S':
          element = 16;
          symbol[0] = 'S';
          break;
        case 'P':
          element = 15;
          symbol[0] = 'P';
          break;
        case 'F':
          element = 9;
          symbol[0] = 'F';
          break;
        case 'I':
          element = 53;
          symbol[0] = 'I';
          break;

        case 'B':
          _ptr++;
          if (*_ptr == 'r')
            {
              element = 35;
              strcpy(symbol,"Br");
            }
          else
            {
              element = 5;
              symbol[0] = 'B';
              _ptr--;
            }
          break;
        default:
          return(false);
        }
    else
      {
        arom = true;
        switch(*_ptr)
          {
          case 'c':
            element = 6;
            symbol[0] = 'C';
            break;
          case 'n':
            element = 7;
            symbol[0] = 'N';
            break;
          case 'o':
            element = 8;
            symbol[0] = 'O';
            break;
          case 'p':
            element = 15;
            symbol[0] = 'P';
            break;
          case 's':
            element = 16;
            symbol[0] = 'S';
            break;
          case '*':
            element = 0;
            strcpy(symbol,"Du");
            arom = false;
            break;
          case 'b':
            obErrorLog.ThrowError(__FUNCTION__, "Illegal aromatic element b", obWarning);
            element = 5;
            strcpy(symbol,"B");
            break;
          default:
            return(false);
          }
      }

    OBAtom *atom = mol.NewAtom();
    atom->SetAtomicNum(element);
    atom->SetType(symbol);

    if (arom)
      {
        atom->SetAromatic();
        atom->SetSpinMultiplicity(2); // CM 18 Sept 2003
      }
    else
      atom->ForceImplH();//ensures atom is never hydrogen deficient


    // Untrue, but necessary to avoid perception being called in OBAtom::IsAromatic()
    // on incomplete molecule. Undone at end of function.
    mol.SetAromaticPerceived();

    if (_prev) //need to add bond
      {
        /* CM 18 Sept 2003
           An extension to the SMILES format has been added so that lower case c,n,o can
           represent a radical centre: CcC is isopropyl radical;
           and cccc... a carbon chain bonded by conjugated double bonds.
           Fails sometimes when using c as both aromatic and as the extened form.
           For benzyl radical C6H5CH2. c1ccccc1c is ok; c1cc(c)ccc1 fails.
           Radical centres should not be involved in ring closure:
           for cyclohexyl radical C1cCCCC1 is ok, c1CCCCC1 is not.

           Implementation
           Atoms c,n,o, etc initially added as a radical centre
           unless _prev is a radical centre when both are made a normal atoms
           connected by a double bond. But making this bond double is deferred until
           the molecule has been constructed, because it is not appropriate if
           either of the atoms is really part of an aromatic ring.

           Since they are still marked as aromatic, FindAromaticBonds() will
           replace the bonds by aromatic bonds if they are in a ring.
           FindOrphanAromand removes the aromatic tag from the atoms not found in this way
           and removes stray radical centres.
        */
        OBAtom* prevatom = mol.GetAtom(_prev);
        assert(prevatom);
        if(arom && prevatom->IsAromatic())
          {
            if (_order != 2) _order=5; //Potential aromatic bond -- but flag explicit double bonds

            if (prevatom->GetSpinMultiplicity())
              {
                //Previous atom had been marked, so bond is potentially a double bond
                //if it is not part of an aromatic ring. This will be decided when all
                //molecule has been constructed.
                PosDouble.push_back(mol.NumBonds()); //saves index of bond about to be added
                prevatom->SetSpinMultiplicity(0);
                atom->SetSpinMultiplicity(0);
              }
          }

        mol.AddBond(_prev, mol.NumAtoms(), _order);
        // store up/down
        if (_updown == BondUpChar || _updown == BondDownChar)
          _upDownMap[mol.GetBond(_prev, mol.NumAtoms())] = _updown;

        InsertTetrahedralRef(mol, mol.NumAtoms() - 1);
        InsertSquarePlanarRef(mol, mol.NumAtoms() - 1);
      }

    //set values
    _prev = mol.NumAtoms();
    _order = 1;
    _updown = ' ';

    mol.UnsetAromaticPerceived(); //undo
    return(true);
  }

  bool OBSmilesParser::ParseComplex(OBMol &mol)
  {
    char symbol[7];
    int element=0;
    int isotope=0;
    int isoPtr=0;
    bool arom=false;
    memset(symbol,'\0',sizeof(char)*7);

    _ptr++;

    //grab isotope information
    for (;*_ptr && isdigit(*_ptr) && (isoPtr <= 6);_ptr++)
      {
        symbol[isoPtr] = *_ptr;
        isoPtr++;
      }
    if (isoPtr >= 6)
      return false;
    isotope = atoi(symbol);

    //parse element data
    if (isupper(*_ptr))
      switch(*_ptr)
        {
        case 'C':
          _ptr++;
          switch(*_ptr)
            {
            case 'a':
              element = 20;
              strcpy(symbol,"Ca");
              break;
            case 'd':
              element = 48;
              strcpy(symbol,"Cd");
              break;
            case 'e':
              element = 58;
              strcpy(symbol,"Ce");
              break;
            case 'f':
              element = 98;
              strcpy(symbol,"Cf");
              break;
            case 'l':
              element = 17;
              strcpy(symbol,"Cl");
              break;
            case 'm':
              element = 96;
              strcpy(symbol,"Cm");
              break;
            case 'n':
              element = 112;
              strcpy(symbol,"Cn");
              break;
            case 'o':
              element = 27;
              strcpy(symbol,"Co");
              break;
            case 'r':
              element = 24;
              strcpy(symbol,"Cr");
              break;
            case 's':
              element = 55;
              strcpy(symbol,"Cs");
              break;
            case 'u':
              element = 29;
              strcpy(symbol,"Cu");
              break;
            default:
              element =  6;
              symbol[0] = 'C';
              _ptr--;
            }
          break;

        case 'N':
          _ptr++;
          switch(*_ptr)
            {
            case 'a':
              element =  11;
              strcpy(symbol,"Na");
              break;
            case 'b':
              element =  41;
              strcpy(symbol,"Nb");
              break;
            case 'd':
              element =  60;
              strcpy(symbol,"Nd");
              break;
            case 'e':
              element =  10;
              strcpy(symbol,"Ne");
              break;
            case 'i':
              element =  28;
              strcpy(symbol,"Ni");
              break;
            case 'o':
              element = 102;
              strcpy(symbol,"No");
              break;
            case 'p':
              element =  93;
              strcpy(symbol,"Np");
              break;
            default:
              element =   7;
              symbol[0] = 'N';
              _ptr--;
            }
          break;

        case('O'):
          _ptr++;
          if(*_ptr == 's')
            {
              element = 76;
              strcpy(symbol,"Os");
            }
          else
            {
              element = 8;
              symbol[0] = 'O';
              _ptr--;
            }
          break;

        case 'P':
          _ptr++;
          switch(*_ptr)
            {
            case 'a':
              element = 91;
              strcpy(symbol,"Pa");
              break;
            case 'b':
              element = 82;
              strcpy(symbol,"Pb");
              break;
            case 'd':
              element = 46;
              strcpy(symbol,"Pd");
              break;
            case 'm':
              element = 61;
              strcpy(symbol,"Pm");
              break;
            case 'o':
              element = 84;
              strcpy(symbol,"Po");
              break;
            case 'r':
              element = 59;
              strcpy(symbol,"Pr");
              break;
            case 't':
              element = 78;
              strcpy(symbol,"Pt");
              break;
            case 'u':
              element = 94;
              strcpy(symbol,"Pu");
              break;
            default:
              element = 15;
              symbol[0] = 'P';
              _ptr--;
            }
          break;

        case('S'):
          _ptr++;
          switch(*_ptr)
            {
            case 'b':
              element = 51;
              strcpy(symbol,"Sb");
              break;
            case 'c':
              element = 21;
              strcpy(symbol,"Sc");
              break;
            case 'e':
              element = 34;
              strcpy(symbol,"Se");
              break;
            case 'g':
              element = 106;
              strcpy(symbol,"Sg");
              break;
            case 'i':
              element = 14;
              strcpy(symbol,"Si");
              break;
            case 'm':
              element = 62;
              strcpy(symbol,"Sm");
              break;
            case 'n':
              element = 50;
              strcpy(symbol,"Sn");
              break;
            case 'r':
              element = 38;
              strcpy(symbol,"Sr");
              break;
            default:
              element = 16;
              symbol[0] = 'S';
              _ptr--;
            }
          break;

        case 'B':
          _ptr++;
          switch(*_ptr)
            {
            case 'a':
              element = 56;
              strcpy(symbol,"Ba");
              break;
            case 'e':
              element =  4;
              strcpy(symbol,"Be");
              break;
            case 'h':
              element =  107;
              strcpy(symbol,"Bh");
              break;
            case 'i':
              element = 83;
              strcpy(symbol,"Bi");
              break;
            case 'k':
              element = 97;
              strcpy(symbol,"Bk");
              break;
            case 'r':
              element = 35;
              strcpy(symbol,"Br");
              break;
            default:
              element = 5;
              symbol[0] = 'B';
              _ptr--;
            }
          break;

        case 'F':
          _ptr++;
          switch(*_ptr)
            {
            case 'e':
              element = 26;
              strcpy(symbol,"Fe");
              break;
            case 'm':
              element = 100;
              strcpy(symbol,"Fm");
              break;
            case 'r':
              element = 87;
              strcpy(symbol,"Fr");
              break;
            default:
              element = 9;
              symbol[0] = 'F';
              _ptr--;
            }
          break;

        case 'I':
          _ptr++;
          switch(*_ptr)
            {
            case 'n':
              element = 49;
              strcpy(symbol,"In");
              break;
            case 'r':
              element = 77;
              strcpy(symbol,"Ir");
              break;
            default:
              element = 53;
              symbol[0] = 'I';
              _ptr--;
            }
          break;

        case 'A':
          _ptr++;
          switch(*_ptr)
            {
            case 'c':
              element = 89;
              strcpy(symbol,"Ac");
              break;
            case 'g':
              element = 47;
              strcpy(symbol,"Ag");
              break;
            case 'l':
              element = 13;
              strcpy(symbol,"Al");
              break;
            case 'm':
              element = 95;
              strcpy(symbol,"Am");
              break;
            case 'r':
              element = 18;
              strcpy(symbol,"Ar");
              break;
            case 's':
              element = 33;
              strcpy(symbol,"As");
              break;
            case 't':
              element = 85;
              strcpy(symbol,"At");
              break;
            case 'u':
              element = 79;
              strcpy(symbol,"Au");
              break;
            default:
              _ptr--;
              return(false);
            }
          break;

        case 'D':
          _ptr++;
          switch(*_ptr)
            {
            case 'b':
              element = 105;
              strcpy(symbol,"Db");
              break;
            case 's':
              element = 110;
              strcpy(symbol,"Ds");
              break;
            case 'y':
              element = 66;
              strcpy(symbol,"Dy");
              break;
            default:
              _ptr--;
              return(false);
            }
          break;

        case 'E':
          _ptr++;
          switch(*_ptr)
            {
            case 'r':
              element = 68;
              strcpy(symbol,"Er");
              break;
            case 's':
              element = 99;
              strcpy(symbol,"Es");
              break;
            case 'u':
              element = 63;
              strcpy(symbol,"Eu");
              break;
            default:
              _ptr--;
              return(false);
            }
          break;

        case 'G':
          _ptr++;
          switch (*_ptr)
            {
            case 'a':
              element = 31;
              strcpy(symbol,"Ga");
              break;
            case 'd':
              element = 64;
              strcpy(symbol,"Gd");
              break;
            case 'e':
              element = 32;
              strcpy(symbol,"Ge");
              break;
            default:
              _ptr--;
              return(false);
            }
          break;

        case 'H':
          _ptr++;
          switch (*_ptr)
            {
            case 'e':
              element =  2;
              strcpy(symbol,"He");
              break;
            case 'f':
              element = 72;
              strcpy(symbol,"Hf");
              break;
            case 'g':
              element = 80;
              strcpy(symbol,"Hg");
              break;
            case 'o':
              element = 67;
              strcpy(symbol,"Ho");
              break;
            case 's':
              element = 108;
              strcpy(symbol,"Hs");
              break;
            default:
              element = 1;
              symbol[0] = 'H';
              _ptr--;
            }
          break;

        case 'K':
          _ptr++;
          if(*_ptr == 'r')
            {
              element = 36;
              strcpy(symbol,"Kr");
            }
          else
            {
              element = 19;
              symbol[0] = 'K';
              _ptr--;
            }
          break;

        case 'L':
          _ptr++;
          switch(*_ptr)
            {
            case 'a':
              element =  57;
              strcpy(symbol,"La");
              break;
            case 'i':
              element =   3;
              strcpy(symbol,"Li");
              break;
            case 'r':
              element = 103;
              strcpy(symbol,"Lr");
              break;
            case 'u':
              element =  71;
              strcpy(symbol,"Lu");
              break;
            default:
              _ptr--;
              return(false);
            }
          break;

        case 'M':
          _ptr++;
          switch(*_ptr)
            {
            case 'd':
              element = 101;
              strcpy(symbol,"Md");
              break;
            case 'g':
              element =  12;
              strcpy(symbol,"Mg");
              break;
            case 'n':
              element =  25;
              strcpy(symbol,"Mn");
              break;
            case 'o':
              element =  42;
              strcpy(symbol,"Mo");
              break;
            case 't':
              element =  109;
              strcpy(symbol,"Mt");
              break;
            default:
              _ptr--;
              return(false);
            }
          break;

        case 'R':
          _ptr++;
          switch(*_ptr)
            {
            case 'a':
              element = 88;
              strcpy(symbol,"Ra");
              break;
            case 'b':
              element = 37;
              strcpy(symbol,"Rb");
              break;
            case 'e':
              element = 75;
              strcpy(symbol,"Re");
              break;
            case 'f':
              element = 104;
              strcpy(symbol,"Rf");
              break;
            case 'g':
              element = 111;
              strcpy(symbol,"Rg");
              break;
            case 'h':
              element = 45;
              strcpy(symbol,"Rh");
              break;
            case 'n':
              element = 86;
              strcpy(symbol,"Rn");
              break;
            case 'u':
              element = 44;
              strcpy(symbol,"Ru");
              break;
            default:
              _ptr--;
              return(false);
            }
          break;

        case 'T':
          _ptr++;
          switch(*_ptr)
            {
            case 'a':
              element = 73;
              strcpy(symbol,"Ta");
              break;
            case 'b':
              element = 65;
              strcpy(symbol,"Tb");
              break;
            case 'c':
              element = 43;
              strcpy(symbol,"Tc");
              break;
            case 'e':
              element = 52;
              strcpy(symbol,"Te");
              break;
            case 'h':
              element = 90;
              strcpy(symbol,"Th");
              break;
            case 'i':
              element = 22;
              strcpy(symbol,"Ti");
              break;
            case 'l':
              element = 81;
              strcpy(symbol,"Tl");
              break;
            case 'm':
              element = 69;
              strcpy(symbol,"Tm");
              break;
            default:
              _ptr--;
              return(false);
            }
          break;

        case('U'):  element = 92;
          symbol[0] = 'U';
          break;
        case('V'):  element = 23;
          symbol[0] = 'V';
          break;
        case('W'):  element = 74;
          symbol[0] = 'W';
          break;

        case('X'):
          _ptr++;
          if (*_ptr == 'e')
            {
              element = 54;
              strcpy(symbol,"Xe");
            }
          else
            {
              _ptr--;
              return(false);
            }
          break;

        case('Y'):
          _ptr++;
          if (*_ptr == 'b')
            {
              element = 70;
              strcpy(symbol,"Yb");
            }
          else
            {
              element = 39;
              symbol[0] = 'Y';
              _ptr--;
            }
          break;

        case('Z'):
          _ptr++;
          switch(*_ptr)
            {
            case 'n':
              element = 30;
              strcpy(symbol,"Zn");
              break;
            case 'r':
              element = 40;
              strcpy(symbol,"Zr");
              break;
            default:
              _ptr--;
              return(false);
            }
          break;
        }
    else
      {
        arom = true;
        switch(*_ptr)
          {
          case 'c':
            element = 6;
            symbol[0] = 'C';
            break;
          case 'n':
            element = 7;
            symbol[0] = 'N';
            break;
          case 'o':
            element = 8;
            symbol[0] = 'O';
            break;
          case 'p':
            element = 15;
            symbol[0] = 'P';
            break;
          case 'a':
            _ptr++;
            if (*_ptr == 's')
              {
                element = 33;
                strcpy(symbol,"As");
              }
            else
              return(false);
            break;
          case '*':
            element = 0;
            strcpy(symbol,"Du");
            arom = false;
            break;
          case 's': //note fall through
            _ptr++;
            if (*_ptr == 'e')
              {
                element = 34;
                strcpy(symbol,"Se");
                break;
              }
            else if (*_ptr == 'i' || *_ptr == 'n' || *_ptr == 'b')
              {
                _ptr--;
              }
            else
              {
                element = 16;
                symbol[0] = 'S';
                _ptr--;
                break;
              }
            //fall through
          default:
            strncpy(symbol, _ptr, 2);
            string symb(symbol);
            symbol[0] = toupper(symbol[0]);
            obErrorLog.ThrowError(__FUNCTION__, "Illegal aromatic element " + symb, obWarning);
            //But convert anyway
            ++_ptr;
            if(symb=="si")
              {
                element = 14;
                break;
              }
            else if(symb=="ge")
              {
                element = 32;
                break;
              }
            else if(symb=="sb")
              {
                element = 51;
                break;
              }
            else if(symb=="bi")
              {
                element = 83;
                break;
              }
            else if(symb=="te")
              {
                element = 52;
                break;
              }
            else if(symb=="sn")
              {
                element = 50;
                break;
              }
            else
              return(false);
          }
      }

    //handle hydrogen count, stereochemistry, and charge

    OBAtom *atom = mol.NewAtom();
    int hcount = 0;
    int charge=0;
    int rad=0;
    int clval=0;
    char tmpc[2];
    tmpc[1] = '\0';

    stringstream errorMsg;

    for (_ptr++;*_ptr && *_ptr != ']';_ptr++)
      {
        switch(*_ptr)
          {
          case '@':
            _ptr++;
            if (*_ptr == 'S') {
              // square planar atom found
              squarePlanarWatch = true;
              if (_squarePlanarMap.find(atom)==_squarePlanarMap.end()) // Prevent memory leak for malformed smiles (PR#3428432)
                _squarePlanarMap[atom] = new OBSquarePlanarStereo::Config;
              _squarePlanarMap[atom]->refs = OBStereo::Refs(4, OBStereo::NoRef);
              _squarePlanarMap[atom]->center = atom->GetId();
              _ptr += 2;
              if (*_ptr == '1')
                _squarePlanarMap[atom]->shape = OBStereo::ShapeU;
              if (*_ptr == '2')
                _squarePlanarMap[atom]->shape = OBStereo::Shape4;
              if (*_ptr == '3')
                _squarePlanarMap[atom]->shape = OBStereo::ShapeZ;
            } else {
              // tetrahedral atom found
              chiralWatch=true;
              if (_tetrahedralMap.find(atom)==_tetrahedralMap.end()) // Prevent memory leak for malformed smiles (PR#3428432)
                _tetrahedralMap[atom] = new OBTetrahedralStereo::Config;
              _tetrahedralMap[atom]->refs = OBStereo::Refs(3, OBStereo::NoRef);
              _tetrahedralMap[atom]->center = atom->GetId();
              if (*_ptr == '@') {
                _tetrahedralMap[atom]->winding = OBStereo::Clockwise;
              } else if (*_ptr == '?') {
                _tetrahedralMap[atom]->specified = false;
              } else {
                _tetrahedralMap[atom]->winding = OBStereo::AntiClockwise;
                _ptr--;
              }
            }
            break;
          case '-':
            _ptr++;
            if (!isdigit(*_ptr))
              charge--;
            while( isdigit(*_ptr) ) // go number by number
              charge = charge*10 - ((*_ptr++)-'0');
            _ptr--;
            break;
          case '+':
            _ptr++;
            if (!isdigit(*_ptr))
              charge++;
            while( isdigit(*_ptr) ) // go number by number
              charge = charge*10 + ((*_ptr++)-'0');
            _ptr--;
            break;
          case 'H':
            _ptr++;
            if (isdigit(*_ptr))
              {
                tmpc[0] = *_ptr;
                hcount = atoi(tmpc);
              }
            else
              {
                hcount = 1;
                _ptr--;
              }
            break;
          case '.': //CM Feb05
            rad=2;
            if(*(++_ptr)=='.')
              rad=3;
            else
              _ptr--;
            break;

          case ':':
            if(!isdigit(*(++_ptr)))
              {
                obErrorLog.ThrowError(__FUNCTION__,"The atom class following : must be a number", obWarning);
                return false;
              }
            while( isdigit(*_ptr) )
              clval = clval*10 + ((*_ptr++)-'0');
            --_ptr;
            _classdata.Add(atom->GetIdx(), clval);
            break;

          default:
            return(false);
          }
      }

    if (!*_ptr || *_ptr != ']')
      return(false); // we should have a trailing ']' now

    if (charge) {
      atom->SetFormalCharge(charge);
      if (abs(charge) > 10 || charge > element) { // if the charge is +/- 10 or more than the number of electrons
        errorMsg << "Atom " << atom->GetIdx() << " had an unrealistic charge of " << charge
                 << "." << endl;
        obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
      }
    }
    if (rad)
      atom->SetSpinMultiplicity(rad);
    atom->SetAtomicNum(element);
    atom->SetIsotope(isotope);
    atom->SetType(symbol);
    if (arom)
      atom->SetAromatic();

    if (_prev) //need to add bond
      {
        OBAtom* prevatom = mol.GetAtom(_prev);
        mol.SetAromaticPerceived();             // prevent aromaticity analysis
        if(arom && prevatom->IsAromatic())
          {
            _order=5; //Potential aromatic bond

            if (prevatom->GetSpinMultiplicity())
              {
                // Previous atom had been marked, so bond is potentially a double bond
                // if it is not part of an aromatic ring. This will be decided when all
                // molecule has been constructed.
                PosDouble.push_back(mol.NumBonds()); //saves index of bond about to be added
                prevatom->SetSpinMultiplicity(0);
                atom->SetSpinMultiplicity(0);
              }
          }
        mol.UnsetAromaticPerceived();
        mol.AddBond(_prev, mol.NumAtoms(), _order);
        // store up/down
        if (_updown == BondUpChar || _updown == BondDownChar)
          _upDownMap[mol.GetBond(_prev, mol.NumAtoms())] = _updown;

        if(chiralWatch) { // if tetrahedral atom, set previous as from atom
          _tetrahedralMap[atom]->from = mol.GetAtom(_prev)->GetId();
          if (element == 16) // Handle chiral lone pair as in X[S@@](Y)Z
            _chiralLonePair[mol.NumAtoms()] = 1; // First of the refs

          //cerr <<"NB7: line 1622: Added atom ref "<<_prev<<" at " << 0 << " to "<<_mapcd[atom]<<endl;
        }
        if (squarePlanarWatch) { // if squareplanar atom, set previous atom as first ref
          _squarePlanarMap[atom]->refs[0] = mol.GetAtom(_prev)->GetId();
          //cerr <<"TV7: line 1748: Added atom ref " << mol.GetAtom(_prev)->GetId()
          //     << " at " << 0 << " to " << _squarePlanarMap[atom] << endl;
        }
        InsertTetrahedralRef(mol, atom->GetId());
        InsertSquarePlanarRef(mol, atom->GetId());
      }
    else
      {
        // Handle chiral lone pair as in [S@@](X)(Y)Z
        if (chiralWatch && element == 16) // Handle chiral lone pair (only S at the moment)
          _chiralLonePair[mol.NumAtoms()] = 0; // 'from' atom
      }

    //set values
    _prev = mol.NumAtoms();
    _order = 1;
    _updown = ' ';

    //now add hydrogens
    if(hcount==0)
      atom->ForceNoH();//ensure AssignMultiplicity regards [C] as C atom

    for (int i = 0;i < hcount;i++)
      {
        atom = mol.NewAtom();
        atom->SetAtomicNum(1);
        atom->SetType("H");
        mol.AddBond(_prev, mol.NumAtoms(), 1);
        // store up/down
        if (_updown == BondUpChar || _updown == BondDownChar)
          _upDownMap[mol.GetBond(_prev, mol.NumAtoms())] = _updown;

        if(chiralWatch)
          InsertTetrahedralRef(mol, atom->GetId());
        if (squarePlanarWatch)
          InsertSquarePlanarRef(mol, atom->GetId());
      }
    chiralWatch=false;
    squarePlanarWatch = false;
    return(true);
  }

  bool OBSmilesParser::CapExternalBonds(OBMol &mol)
  {
    if (_extbond.empty())
      return true;

    OBAtom *atom;
    vector<ExternalBond>::iterator bond;
    for (bond = _extbond.begin(); bond != _extbond.end(); bond++) {
      // create new dummy atom
      atom = mol.NewAtom();
      atom->SetAtomicNum(0);
      atom->SetType("*");

      // bond dummy atom to mol via external bond
      mol.AddBond(bond->prev, atom->GetIdx(), bond->order);
      // store up/down
      if (bond->updown == BondUpChar || bond->updown == BondDownChar)
        _upDownMap[mol.GetBond(bond->prev, atom->GetIdx())] = bond->updown;

      OBBond *refbond = atom->GetBond(mol.GetAtom(bond->prev));

      //record external bond information
      OBExternalBondData *xbd;
      if (mol.HasData(OBGenericDataType::ExternalBondData)) {
        xbd = (OBExternalBondData*) mol.GetData(OBGenericDataType::ExternalBondData);
      } else {
        xbd = new OBExternalBondData;
        xbd->SetOrigin(fileformatInput);
        mol.SetData(xbd);
      }
      xbd->SetData(atom,refbond, bond->digit);
      //this data gets cleaned up in mol.Clear.
    }

    return true;
  }

  bool OBSmilesParser::ParseExternalBond(OBMol &mol)
  {
    int digit;
    char str[10];

    //*_ptr should == '&'
    _ptr++;

    switch (*_ptr) // check for bond order indicators CC&=1.C&1
      {
      case '-':
        _order = 1;
        _ptr++;
        break;
      case '=':
        _order = 2;
        _ptr++;
        break;
      case '#':
        _order = 3;
        _ptr++;
        break;
      case '$':
        _order = 4;
        _ptr++;
        break;
      case ';':
        _order = 5;
        _ptr++;
        break;
      case '/': //chiral, but _order still == 1
        _updown = BondDownChar;
        _ptr++;
        break;
        _ptr++;
      case '\\': // chiral, but _order still == 1
        _updown = BondUpChar;
        _ptr++;
        break;
      default: // no bond indicator just leave order = 1
        break;
      }

    if (*_ptr == '%') // external bond indicator > 10
      {
        _ptr++;
        str[0] = *_ptr;
        _ptr++;
        str[1] = *_ptr;
        str[2] = '\0';
      }
    else // simple single digit external bond indicator
      {
        str[0] = *_ptr;
        str[1] = '\0';
      }
    digit = atoi(str);  // convert indicator to digit

    //check for dot disconnect closures
    vector<ExternalBond>::iterator bond;
    int upDown, bondOrder;
    for (bond = _extbond.begin(); bond != _extbond.end(); bond++) {

      if (bond->digit == digit) {
        upDown = (_updown > bond->updown) ? _updown : bond->updown;
        bondOrder = (_order > bond->order) ? _order : bond->order;
        mol.AddBond(bond->prev, _prev, bondOrder);
        // store up/down
        if (upDown == BondUpChar || upDown == BondDownChar)
          _upDownMap[mol.GetBond(bond->prev, _prev)] = upDown;


        // after adding a bond to atom "_prev"
        // search to see if atom is bonded to a chiral atom
        InsertTetrahedralRef(mol, bond->prev - 1);
        InsertSquarePlanarRef(mol, bond->prev - 1);

        _extbond.erase(bond);
        _updown = ' ';
        _order = 0;
        return true;
      }
    }

    //since no closures save another ext bond
    ExternalBond extBond;
    extBond.digit  = digit;
    extBond.prev   = _prev;
    extBond.order  = _order;
    extBond.updown = _updown;

    _extbond.push_back(extBond);
    _order = 1;
    _updown = ' ';

    return(true);

  }

  bool OBSmilesParser::ParseRingBond(OBMol &mol)
  {
    int digit;
    char str[10];

    if (*_ptr == '%')
      {
        _ptr++;
        str[0] = *_ptr;
        _ptr++;
        str[1] = *_ptr;
        str[2] = '\0';
      }
    else
      {
        str[0] = *_ptr;
        str[1] = '\0';
      }
    digit = atoi(str);

    vector<RingClosureBond>::iterator bond;
    int upDown, bondOrder;
    for (bond = _rclose.begin(); bond != _rclose.end(); ++bond) {
      if (bond->digit == digit) {
        upDown = (_updown > bond->updown) ? _updown : bond->updown;
        bondOrder = (_order > bond->order) ? _order : bond->order;
        // Check if this ring closure bond may be aromatic and set order accordingly
        if (bondOrder == 1) {
          OBAtom *a1 = mol.GetAtom(bond->prev);
          OBAtom *a2 = mol.GetAtom(_prev);
          mol.SetAromaticPerceived();                 // prevent aromaticity analysis
          if (a1->IsAromatic() && a2->IsAromatic())
            bondOrder = 5;
          mol.UnsetAromaticPerceived();
        }

        mol.AddBond(bond->prev, _prev, bondOrder, 0, bond->numConnections);
        // store up/down
        if (upDown == BondUpChar || upDown == BondDownChar)
          _upDownMap[mol.GetBond(bond->prev, _prev)] = upDown;

        // For assigning cis/trans in the presence of bond closures, we need to
        // remember info on all bond closure bonds.
        StereoRingBond sb;
        sb.updown.push_back(_updown);
        sb.atoms.push_back(mol.GetAtom(_prev));
        sb.updown.push_back(bond->updown);
        sb.atoms.push_back(mol.GetAtom(bond->prev));
        _stereorbond[mol.GetBond(bond->prev, _prev)] = sb; // Store for later

        // after adding a bond to atom "_prev"
        // search to see if atom is bonded to a chiral atom
        // need to check both _prev and bond->prev as closure is direction independent
        InsertTetrahedralRef(mol, bond->prev - 1);
        InsertSquarePlanarRef(mol, bond->prev - 1);

        // FIXME: needed for squreplanar too??
        map<OBAtom*, OBTetrahedralStereo::Config*>::iterator ChiralSearch;
        ChiralSearch = _tetrahedralMap.find(mol.GetAtom(bond->prev));
        if (ChiralSearch != _tetrahedralMap.end() && ChiralSearch->second != NULL) {
          int insertpos = bond->numConnections - 1;
          if (insertpos < 0) {
            if (ChiralSearch->second->from != OBStereo::NoRef)
              obErrorLog.ThrowError(__FUNCTION__, "Warning: Overwriting previous from reference id.", obWarning);

            (ChiralSearch->second)->from = mol.GetAtom(_prev)->GetId();
            // cerr << "Adding " << mol.GetAtom(_prev)->GetId() << " at Config.from to " << ChiralSearch->second << endl;
          } else {
            if (ChiralSearch->second->refs[insertpos] != OBStereo::NoRef)
              obErrorLog.ThrowError(__FUNCTION__, "Warning: Overwriting previously set reference id.", obWarning);

            (ChiralSearch->second)->refs[insertpos] = mol.GetAtom(_prev)->GetId();
            // cerr << "Adding " << mol.GetAtom(_prev)->GetId() << " at "
            //     << insertpos << " to " << ChiralSearch->second << endl;
          }
       }

        //CM ensure neither atoms in ring closure is a radical centre
        OBAtom* patom = mol.GetAtom(_prev);
        patom->SetSpinMultiplicity(0);
        patom = mol.GetAtom(bond->prev);
        patom->SetSpinMultiplicity(0);
        //CM end
        _rclose.erase(bond);
        _updown = ' ';
        _order = 1;
        return true;
      }
    }

    //since no closures save another rclose bond
    RingClosureBond ringClosure;
    ringClosure.digit  = digit;
    ringClosure.prev   = _prev;
    ringClosure.order  = _order;
    ringClosure.updown = _updown;

    OBAtom* atom = mol.GetAtom(_prev);
    if (!atom) {
      obErrorLog.ThrowError(__FUNCTION__,"Number not parsed correctly as a ring bond", obWarning);
      return false;
    }

    ringClosure.numConnections = NumConnections(atom); //store position to insert closure bond

    _rclose.push_back(ringClosure);
    _order = 1;
    _updown = ' ';

    return(true);
  }

  // NumConnections finds the number of connections already made to
  // a particular atom. This is used to figure out the correct position
  // to insert an atom ID into atom4refs
  int OBSmilesParser::NumConnections(OBAtom *atom) {
    int val = atom->GetValence();
    int idx = atom->GetIdx();
    vector<RingClosureBond>::iterator bond;
    //correct for multiple closure bonds to a single atom
    for (bond = _rclose.begin(); bond != _rclose.end(); ++bond)
      if (bond->prev == idx)
        val++;

    return val;
  }


  /*----------------------------------------------------------------------
   * CLASS: OBBondClosureInfo: For recording bond-closure digits as
   * work progresses on canonical SMILES.
   ----------------------------------------------------------------------*/

  class OBBondClosureInfo
  {
  public:
    OBAtom *toatom;       // second atom in SMILES order
    OBAtom *fromatom;     // first atom in SMILES order
    OBBond *bond;
    int    ringdigit;
    int    is_open;       // TRUE if SMILES processing hasn't reached 'toatom' yet

    OBBondClosureInfo(OBAtom *, OBAtom*, OBBond*, int, bool);
    ~OBBondClosureInfo();
  };

  OBBondClosureInfo::OBBondClosureInfo(OBAtom *a1, OBAtom *a2, OBBond *b, int rd, bool open)
  {
    toatom    = a1;
    fromatom  = a2;
    bond      = b;
    ringdigit = rd;
    is_open   = open;
  }

  OBBondClosureInfo::~OBBondClosureInfo()
  {
  }


  /*----------------------------------------------------------------------
   * CLASS: OBCanSmiNode: A Tree structure, each node of which is an atom in
   * the tree being built to write out the SMILES.
   ----------------------------------------------------------------------*/

  class OBCanSmiNode
  {
    OBAtom *_atom,*_parent;
    std::vector<OBCanSmiNode*> _child_nodes;
    std::vector<OBBond*> _child_bonds;

  public:
    OBCanSmiNode(OBAtom *atom);
    ~OBCanSmiNode();

    int Size()
    {
      return(_child_nodes.empty() ? 0 : _child_nodes.size());
    }

    void SetParent(OBAtom *a)
    {
      _parent = a;
    }

    void AddChildNode(OBCanSmiNode*,OBBond*);

    OBAtom *GetAtom()
    {
      return(_atom);
    }

    OBAtom *GetParent()
    {
      return(_parent);
    }

    OBAtom *GetChildAtom(int i)
    {
      return(_child_nodes[i]->GetAtom());
    }

    OBBond *GetChildBond(int i)
    {
      return(_child_bonds[i]);
    }

    OBCanSmiNode *GetChildNode(int i)
    {
      return(_child_nodes[i]);
    }
  };


  OBCanSmiNode::OBCanSmiNode(OBAtom *atom)
  {
    _atom = atom;
    _parent = NULL;
    _child_nodes.clear();
    _child_bonds.clear();
  }

  void OBCanSmiNode::AddChildNode(OBCanSmiNode *node,OBBond *bond)
  {
    _child_nodes.push_back(node);
    _child_bonds.push_back(bond);
  }

  OBCanSmiNode::~OBCanSmiNode()
  {
    vector<OBCanSmiNode*>::iterator i;
    for (i = _child_nodes.begin();i != _child_nodes.end();i++)
      delete (*i);
  }

  /*----------------------------------------------------------------------
   * CLASS OBMol2Cansmi - Declarations
   ----------------------------------------------------------------------*/

  class OBMol2Cansmi
  {
    std::vector<int> _atmorder;
    std::vector<bool> _aromNH;
    OBBitVec _uatoms,_ubonds;
    std::vector<OBBondClosureInfo> _vopen;
    std::string       _canorder;
    std::vector<OBCisTransStereo> _cistrans, _unvisited_cistrans;
    std::map<OBBond *, bool> _isup;

    bool          _canonicalOutput; // regular or canonical SMILES

    OBConversion* _pconv;
    OBAtomClassData* _pac;

    OBAtom* _endatom;
    OBAtom* _startatom;

  public:
    OBMol2Cansmi()
    {
    }
    ~OBMol2Cansmi() {}

    void         Init(bool canonicalOutput = true, OBConversion* pconv=NULL);

    void         CreateCisTrans(OBMol&);
    char         GetCisTransBondSymbol(OBBond *, OBCanSmiNode *);
    void         AddHydrogenToChiralCenters(OBMol &mol, OBBitVec &frag_atoms);
    bool         AtomIsChiral(OBAtom *atom);
    bool         BuildCanonTree(OBMol &mol, OBBitVec &frag_atoms,
                                vector<unsigned int> &canonical_order,
                                OBCanSmiNode *node);
    void         CorrectAromaticAmineCharge(OBMol&);
    void         CreateFragCansmiString(OBMol&, OBBitVec&, bool, char *);
    bool         GetTetrahedralStereo(OBCanSmiNode*,
                                      vector<OBAtom*>&chiral_neighbors,
                                      vector<unsigned int> &symmetry_classes,
                                      char*);
    bool         GetSquarePlanarStereo(OBCanSmiNode*,
                                       vector<OBAtom*>&chiral_neighbors,
                                       vector<unsigned int> &symmetry_classes,
                                       char*);
    bool         GetSmilesElement(OBCanSmiNode*,
                                  vector<OBAtom*>&chiral_neighbors,
                                  vector<unsigned int> &symmetry_classes,
                                  char*,
                                  bool isomeric);
    int          GetSmilesValence(OBAtom *atom);
    int          GetUnusedIndex();
    vector<OBBondClosureInfo>
    GetCanonClosureDigits(OBAtom *atom,
                          OBBitVec &frag_atoms,
                          vector<unsigned int> &canonical_order);
    bool         IsSuppressedHydrogen(OBAtom *atom);
    void         ToCansmilesString(OBCanSmiNode *node,
                                   char *buffer,
                                   OBBitVec &frag_atoms,
                                   vector<unsigned int> &symmetry_classes,
                                   vector<unsigned int> &canonical_order,
                                   bool isomeric);
    bool         HasStereoDblBond(OBBond *, OBAtom *atom);
    void MyFindChildren(OBMol &mol, vector<OBAtom*> &children, OBBitVec &seen, OBAtom *end);
    std::string &GetOutputOrder()
    {
      return _canorder;
    }
    bool         ParseInChI(OBMol &mol, vector<int> &atom_order);
  };


  /*----------------------------------------------------------------------
   * CLASS OBMol2Cansmi - implementation
   ----------------------------------------------------------------------*/

  /***************************************************************************
   * FUNCTION: Init
   *
   * DESCRIPTION:
   *       Initializes the OBMol2Cansmi writer object.
   ***************************************************************************/

  void OBMol2Cansmi::Init(bool canonical, OBConversion* pconv)
  {
    _atmorder.clear();
    _aromNH.clear();
    _uatoms.Clear();
    _ubonds.Clear();
    _vopen.clear();
    _canorder.clear();
    _pac = NULL;

    _pconv = pconv;
    _canonicalOutput = canonical;

    _endatom = NULL;
    _startatom = NULL;
  }


  /***************************************************************************
   * FUNCTION: GetUnusedIndex
   *
   * DESCRIPTION:
   *       Returns the next available bond-closure index for a SMILES.
   *
   *       You could just do this sequentially, not reusing bond-closure
   *       digits, thus:
   *
   *               c1cc2ccccc2cc1          napthalene
   *               c1ccccc1c2ccccc2        biphenyl
   *
   *       But molecules with more than ten rings, this requires the use of
   *       two-digit ring closures (like c1ccccc1C...c%11ccccc%11).  To help
   *       avoid digit reuse, this finds the lowest digit that's not currently
   *       "open", thus
   *
   *               c1cc2ccccc2cc1          napthalene (same)
   *               c1ccccc1c1ccccc1        biphenyl (reuses "1")
   *
   ***************************************************************************/


  int OBMol2Cansmi::GetUnusedIndex()
  {
    int idx=1;

    vector<OBBondClosureInfo>::iterator j;
    for (j = _vopen.begin();j != _vopen.end();)
      if (j->ringdigit == idx)
        {
          idx++; //increment idx and start over if digit is already used
          j = _vopen.begin();
        }
      else j++;

    return(idx);
  }

  /***************************************************************************
   * FUNCTION: CorrectAromaticAmineCharge
   *
   * DESCRIPTION:
   *       Finds all aromatic nitrogens, and updates the _aromNH vector to
   *       note which aromatic nitrogens require an H to balance the charge.
   ***************************************************************************/

  void OBMol2Cansmi::CorrectAromaticAmineCharge(OBMol &mol)
  {
    OBAtom *atom;
    vector<OBNodeBase*>::iterator i;

    _aromNH.clear();
    _aromNH.resize(mol.NumAtoms()+1);

    for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i))
      if (atom->IsNitrogen() && atom->IsAromatic())
        if (atom->GetHvyValence() == 2)
          {
            if (atom->GetValence() == 3 || atom->GetImplicitValence() == 3)
              _aromNH[atom->GetIdx()] = true;
          }
  }

  void OBMol2Cansmi::CreateCisTrans(OBMol &mol)
  {
    std::vector<OBGenericData*> vdata = mol.GetAllData(OBGenericDataType::StereoData);
    for (std::vector<OBGenericData*>::iterator data = vdata.begin(); data != vdata.end(); ++data) {
      if (((OBStereoBase*)*data)->GetType() != OBStereo::CisTrans)
        continue;
      OBCisTransStereo *ct = dynamic_cast<OBCisTransStereo*>(*data);
      if (ct && ct->GetConfig().specified) {
        OBCisTransStereo::Config config = ct->GetConfig();
        OBBond* dbl_bond = mol.GetBond(mol.GetAtomById(config.begin), mol.GetAtomById(config.end));
        if (!dbl_bond)
          continue;
        // Do not output cis/trans bond symbols for double bonds in rings of size IMPLICIT_CIS_RING_SIZE or less
        OBRing* ring = dbl_bond->FindSmallestRing();
        if (!ring || ring->Size()>IMPLICIT_CIS_RING_SIZE)
          _cistrans.push_back(*ct);
      }
    }

    _unvisited_cistrans = _cistrans; // Make a copy of _cistrans
  }

  bool OBMol2Cansmi::HasStereoDblBond(OBBond *bond, OBAtom *atom)
  {
    // This is a helper function for determining whether to
    // consider writing a cis/trans bond symbol for bond closures.
    // Returns TRUE only if the atom is connected to the cis/trans
    // double bond. To handle the case of conjugated bonds, one must
    // remember that the ring opening preceded the closure, so if the
    // ring opening bond was on a stereocenter, it got the symbol already.

    if (!bond || !atom)
      return false;

    std::vector<OBCisTransStereo>::iterator ChiralSearch;
    OBAtom *nbr_atom = bond->GetNbrAtom(atom);

    bool stereo_dbl = false;
    if (atom->HasDoubleBond()) {
      stereo_dbl = true;
      if (nbr_atom->HasDoubleBond())
        // Check whether the nbr_atom is a begin or end in any CisTransStereo. If so,
        // then the ring opening already had the symbol.
        for (ChiralSearch = _cistrans.begin(); ChiralSearch != _cistrans.end(); ChiralSearch++) {
          OBCisTransStereo::Config cfg = ChiralSearch->GetConfig();
          if (nbr_atom->GetId() == cfg.begin || nbr_atom->GetId() == cfg.end) {
            // I don't think I need to check whether it has a bond with atom
            stereo_dbl = false;
            break;
          }
        }
    }
    return stereo_dbl;
  }

  char OBMol2Cansmi::GetCisTransBondSymbol(OBBond *bond, OBCanSmiNode *node)
  {
    // Given a cis/trans bond and the node in the SMILES tree, figures out
    // whether to write a '/' or '\' symbol.
    // See the comments smilesformat.cpp: FixCisTransBonds().
    //
    // The OBCanSmiNode is the most-recently-written atom in the SMILES string
    // we're creating.  If it is the double-bonded atom, then the substituent
    // follows, so that "up" means '/' and "down" means '\'.  If the OBCanSmiNode
    // atom is the single-bonded atom then the double-bonded atom comes next,
    // in which case "up" means '\' and "down" means '/'.
    //
    // Note that the story is not so simple for conjugated systems where
    // we need to take into account what symbol was already used.

    if (!bond /*|| (!bond->IsUp() && !bond->IsDown())*/)
      return '\0';
    OBAtom *atom = node->GetAtom();
    OBAtom *nbr_atom = bond->GetNbrAtom(atom);
    OBMol *mol = atom->GetParent();

    // If this bond is in two different obcistransstereos (e.g. a conjugated system)
    // choose the one where the dbl bond atom is *atom (i.e. the one which comes first)
    std::vector<OBCisTransStereo>::iterator ChiralSearch;
    std::vector<unsigned long>::iterator lookup;

    bool dbl_bond_first = false;
    if (atom->HasDoubleBond())
    {
      if (nbr_atom->HasDoubleBond())
        // Check whether the atom is a center in any CisTransStereo. If so,#
        // then this CisTransStereo takes precedence over any other
        for (ChiralSearch = _cistrans.begin(); ChiralSearch != _cistrans.end(); ChiralSearch++)
        {
          OBCisTransStereo::Config cfg = ChiralSearch->GetConfig();
          if (atom->GetId() == cfg.begin || atom->GetId() == cfg.end) {
            // I don't think I need to check whether it has a bond with nbr_atom
            dbl_bond_first = true;
            break;
          }
        }
      else
        dbl_bond_first = true;
    }

    // Has the symbol for this bond already been set?
    if (_isup.find(bond) == _isup.end()) // No it hasn't
    {
      unsigned int endatom, centeratom;
      if (dbl_bond_first) {
        if (atom->IsAromatic())
          FOR_BONDS_OF_ATOM (bond, atom)
            if (bond->IsAromatic() && bond->IsDouble())
              return 0;
        endatom = nbr_atom->GetId();
        centeratom = atom->GetId();
      }
      else {
        if (nbr_atom->IsAromatic())
          FOR_BONDS_OF_ATOM (bond, nbr_atom)
            if (bond->IsAromatic() && bond->IsDouble())
              return 0;
        endatom = atom->GetId();
        centeratom = nbr_atom->GetId();
      }

      for (ChiralSearch = _unvisited_cistrans.begin(); ChiralSearch != _unvisited_cistrans.end(); ChiralSearch++)
      {
        OBCisTransStereo::Config cfg = ChiralSearch->GetConfig(OBStereo::ShapeU);
        lookup = std::find(cfg.refs.begin(), cfg.refs.end(), endatom);
        if (lookup != cfg.refs.end() && (cfg.begin == centeratom || cfg.end == centeratom))
        { // Atoms endatom and centeratom are in this OBCisTransStereo

          std::vector<OBBond *> refbonds(4, (OBBond*)NULL);
          refbonds[0] = mol->GetBond(mol->GetAtomById(cfg.refs[0]), mol->GetAtomById(cfg.begin));

          if (cfg.refs[1] != OBStereo::ImplicitRef) // Could be a hydrogen
            refbonds[1] = mol->GetBond(mol->GetAtomById(cfg.refs[1]), mol->GetAtomById(cfg.begin));

          if (cfg.refs[2] != OBStereo::ImplicitRef) // Could be a hydrogen
            refbonds[2] = mol->GetBond(mol->GetAtomById(cfg.refs[2]), mol->GetAtomById(cfg.end));

          if (cfg.refs[3] != OBStereo::ImplicitRef) // Could be a hydrogen
            refbonds[3] = mol->GetBond(mol->GetAtomById(cfg.refs[3]), mol->GetAtomById(cfg.end));

          // What symbol would the four refs use if before the dbl bond?
          bool config[4] = {true, false, false, true};
          bool use_same_config = true;
          // (The actual config used will be config ^ use_same_config)

          // Make sure that the symbol for this bond is true. This ensures
          // a canonical string, so that it's always C/C=C/C and not C\C=C\C.
          for(int i=0;i<4;i++)
            if (refbonds[i] == bond)
              if (!config[i])
              {
                  use_same_config = false;
                  break;
              }
          // If any of the bonds have been previously set, now set them all
          // in the opposite sense
          for(int i=0;i<4;i++)
            if (_isup.find(refbonds[i]) != _isup.end()) // We have already set this one (conjugated bond)
              if (_isup[refbonds[i]] == (config[i] ^ use_same_config))
              {
                use_same_config = !use_same_config;
                break;
              }
          // Set the configuration
          for(int i=0;i<4;i++)
            if (refbonds[i] != NULL)
              _isup[refbonds[i]] = config[i] ^ use_same_config;
          _unvisited_cistrans.erase(ChiralSearch);
          break; // break out of the ChiralSearch
        }
      }
    }

     // If ChiralSearch didn't find the bond, we can't set this symbol
    if (_isup.find(bond) == _isup.end()) return '\0';

    if (dbl_bond_first) { // double-bonded atom is first in the SMILES
      if (_isup[bond])
        return '/';
      else
        return '\\';
    }
    else { // double-bonded atom is second in the SMILES
      if (_isup[bond])
        return '\\';
      else
        return '/';
    }
  }


  // Helper function
  // Is this atom an oxygen in a water molecule
  // We know the oxygen is connected to one ion, but check for non-hydrogens
  // Returns: true if the atom is an oxygen and connected to two hydrogens + one coordinated atom
  bool isWaterOxygen(OBAtom *atom)
  {
    if (!atom->IsOxygen())
      return false;

    int nonHydrogenCount = 0;
    int hydrogenCount = 0;
    FOR_NBORS_OF_ATOM(neighbor, *atom) {
      if (!neighbor->IsHydrogen())
        nonHydrogenCount++;
      else
        hydrogenCount++;
    }

    return (hydrogenCount == 2 && nonHydrogenCount == 1);
  }


  /***************************************************************************
   * FUNCTION: GetSmilesElement
   *
   * DESCRIPTION:
   *       Writes the symbol for an atom, e.g. "C" or "[NH2]" or "[C@@H]".
   *
   * RETURNS: true (always)
   ***************************************************************************/


  bool OBMol2Cansmi::GetSmilesElement(OBCanSmiNode *node,
                                      vector<OBAtom*>&chiral_neighbors,
                                      vector<unsigned int> &symmetry_classes,
                                      char *buffer,
                                      bool isomeric)
  {
    char symbol[10];
    symbol[0] = '\0'; // make sure to initialize for all paths below
    char bracketBuffer[32];
    bool bracketElement = false;
    bool normalValence = true;
    bool writeExplicitHydrogen = false;

    OBAtom *atom = node->GetAtom();

    int bosum = atom->KBOSum();
    int maxBonds = etab.GetMaxBonds(atom->GetAtomicNum());
    // default -- bracket if we have more bonds than possible
    // we have some special cases below
    bracketElement = !(normalValence = (bosum <= maxBonds));

    int element = atom->GetAtomicNum();
    switch (element) {
    case 0:	// pseudo '*' atom has no normal valence, needs brackets if it has any hydrogens
      normalValence = (atom->ExplicitHydrogenCount() == 0);
      bracketElement = !normalValence;
      break;
    case 5:
      bracketElement = !(normalValence = (bosum == 3));
      break;
    case 6: break;
    case 7:
      if (atom->IsAromatic()
          && atom->GetHvyValence() == 2
          && atom->GetImplicitValence() == 3) {
#ifdef __INTEL_COMPILER
#pragma warning (disable:187)
#endif
// warning #187 is use of "=" where "==" may have been intended
        bracketElement = !(normalValence = false);
#ifdef __INTEL_COMPILER
#pragma warning (default:187)
#endif
        break;
      }
      else
        bracketElement = !(normalValence = (bosum == 3 || bosum == 5));
      break;
    case 8: break;
    case 9: break;
    case 15: break;
    case 16:
      bracketElement = !(normalValence = (bosum == 2 || bosum == 4 || bosum == 6));
      break;
    case 17: break;
    case 35: break;
    case 53: break;
    default: bracketElement = true;
    }

    if (atom->GetFormalCharge() != 0) //bracket charged elements
      bracketElement = true;

    if(isomeric && atom->GetIsotope())
      bracketElement = true;

    //If the molecule has Atom Class data and -xa option set and atom has data
    if(_pac && _pac->HasClass(atom->GetIdx()))
      bracketElement = true;

    char stereo[5] = "";
    if (GetSmilesValence(atom) > 2 && isomeric) {
      if (GetTetrahedralStereo(node, chiral_neighbors, symmetry_classes, stereo))
        strcat(buffer,stereo);
      if (GetSquarePlanarStereo(node, chiral_neighbors, symmetry_classes, stereo))
        strcat(buffer,stereo);
    }
    if (stereo[0] != '\0')
      bracketElement = true;


    if (atom->GetSpinMultiplicity()) {
      //For radicals output bracket form anyway unless r option specified
      if(!(_pconv && _pconv->IsOption ("r")))
        bracketElement = true;
    }

    // Add brackets and explicit hydrogens for coordinated water molecules
    // PR#2505562
    if (isWaterOxygen(atom)) {
      bracketElement = true;
      writeExplicitHydrogen = true;
    }

    //Output as [CH3][CH3] rather than CC if -xh option has been specified
    if (!bracketElement && _pconv && _pconv->IsOption("h") && atom->ExplicitHydrogenCount() > 0) {
      bracketElement = true;
      writeExplicitHydrogen = true;
    }

    if (!bracketElement) {

      // Ordinary non-bracket element
      if (element) {
        strcpy(symbol,etab.GetSymbol(atom->GetAtomicNum()));
        if (atom->IsAromatic())
          symbol[0] = tolower(symbol[0]);

        //Radical centres lc if r option set
        if(atom->GetSpinMultiplicity() && _pconv && _pconv->IsOption ("r"))
          symbol[0] = tolower(symbol[0]);
      }

      // Atomic number zero - either '*' or an external atom
      else {
        bool external = false;
        vector<pair<int,pair<OBAtom *,OBBond *> > > *externalBonds =
          (vector<pair<int,pair<OBAtom *,OBBond *> > > *)((OBMol*)atom->GetParent())->GetData("extBonds");
        vector<pair<int,pair<OBAtom *,OBBond *> > >::iterator externalBond;

        if (externalBonds)
          for(externalBond = externalBonds->begin();externalBond != externalBonds->end();externalBond++) {
            if (externalBond->second.first == atom) {
              external = true;
              strcpy(symbol,"&");
              OBBond *bond = externalBond->second.second;
              if (bond->IsUp()) {
                if ( (bond->GetBeginAtom())->HasDoubleBond() ||
                     (bond->GetEndAtom())->HasDoubleBond() )
                  strcat(symbol,"\\");
              }
              if (bond->IsDown()) {
                if ( (bond->GetBeginAtom())->HasDoubleBond() ||
                     (bond->GetEndAtom())->HasDoubleBond() )
                  strcat(symbol,"/");
              }
              if (bond->GetBO() == 2 && !bond->IsAromatic())
                strcat(symbol,"=");
              if (bond->GetBO() == 2 && bond->IsAromatic())
                strcat(symbol,":");
              if (bond->GetBO() == 3)
                strcat(symbol,"#");
              if (bond->GetBO() == 4)
                strcat(symbol,"$");
              sprintf(symbol+strlen(symbol),"%d",externalBond->first);
              break;
            }
          }

        if(!external)
          strcpy(symbol,"*");
      }

      strcpy(buffer, symbol);
      return(true);
    }

    // Bracketed atoms, e.g. [Pb], [OH-], [C@]
    bracketBuffer[0] = '\0';
    if (isomeric && atom->GetIsotope()) {
      char iso[4];
      sprintf(iso,"%d",atom->GetIsotope());
      strcat(bracketBuffer,iso);
    }
    if (!atom->GetAtomicNum())
      strcpy(symbol,"*");
    else {
      strcpy(symbol,etab.GetSymbol(atom->GetAtomicNum()));
      if (atom->IsAromatic())
        symbol[0] = tolower(symbol[0]);
    }
    strcat(bracketBuffer,symbol);

    // If chiral, append '@' or '@@'
    if (stereo[0] != '\0')
      strcat(bracketBuffer, stereo);

    // don't try to handle implicit hydrogens for metals
    if ( (element >= 21 && element <= 30)
         || (element >= 39 && element <= 49)
         || (element >= 71 && element <= 82) )
      writeExplicitHydrogen = true;

    // Add extra hydrogens.  If this is a bracket-atom *only* because the
    // "-xh" option was specified, then we're writing a SMARTS, so we
    // write the explicit hydrogen atom count only.  Otherwise we write
    // the proper total number of hydrogens.
    if (!atom->IsHydrogen()) {
      int hcount;
      if (writeExplicitHydrogen)
        hcount = atom->ExplicitHydrogenCount();
      else
        // if "isomeric", doesn't count isotopic H
        hcount = atom->ImplicitHydrogenCount() + atom->ExplicitHydrogenCount(isomeric);

      // OK, see if we need to decrease the H-count due to hydrogens we'll write later
      FOR_NBORS_OF_ATOM(nbr, atom) {
        if (nbr->IsHydrogen() && ( (nbr->GetFormalCharge() != 0)
                                   || (nbr->GetValence() > 1) ) ) {
            hcount--;
        }
      }
      if ((atom == _endatom || atom == _startatom) && hcount>0) // Leave a free valence for attachment
        hcount--;

      if (hcount != 0) {
        strcat(bracketBuffer,"H");
        if (hcount > 1) {
          char tcount[10];
          sprintf(tcount,"%d", hcount);
          strcat(bracketBuffer,tcount);
        }
      }
    }

    // Append charge to the end
    if (atom->GetFormalCharge() != 0) {
      if (atom->GetFormalCharge() > 0)
        strcat(bracketBuffer,"+");
      else
        strcat(bracketBuffer,"-");

      if (abs(atom->GetFormalCharge()) > 1)
        sprintf(bracketBuffer+strlen(bracketBuffer), "%d", abs(atom->GetFormalCharge()));
    }

    //atom class e.g. [C:2]
    if (_pac)
      strcat(bracketBuffer, _pac->GetClassString(atom->GetIdx()).c_str());

    // Check if this is an aromatic "n" or "s" that doesn't need a hydrogen after all
    if (atom->IsAromatic() && strlen(bracketBuffer) == 1)
      bracketElement = false;

    // if the element is supposed to be bracketed (e.g., [U]), *always* use brackets
    if (strlen(bracketBuffer) > 1 || bracketElement) {
      strcpy(buffer, "[");
      strcat(buffer, bracketBuffer);
      strcat(buffer,"]");
    } else {
      strcpy(buffer, bracketBuffer);
    }

    return(true);
  }

  /***************************************************************************
   * FUNCTION: AtomIsChiral
   *
   * DESCRIPTION:
   *       Returns TRUE if the atom is genuinely chiral, that is, it meets
   *       the criteria from OBAtom::IsChiral, and additionally it actually
   *       has a connected hash or wedge bond.
   *
   *       We arbitrarily reject chiral nitrogen because for our purposes there's
   *       no need to consider it.
   *
   *       NOTE: This is a simplistic test.  When the full SMILES canonicalization
   *       includes chiral markings, this should check the symmetry classes
   *       of the neighbors, not the hash/wedge bonds.
   ***************************************************************************/

  bool OBMol2Cansmi::AtomIsChiral(OBAtom *atom)
  {
    OBMol *mol = dynamic_cast<OBMol*>(atom->GetParent());
    OBStereoFacade stereoFacade(mol);
    return stereoFacade.HasTetrahedralStereo(atom->GetId());
    /*
    if (!atom->IsChiral())
      return false;
    if (atom->IsNitrogen())
      return false;
    // Added by ghutchis 2007-06-04 -- make sure to check for 3D molecules
    // Fixes PR#1699418
    if (atom->GetParent()->GetDimension() == 3)
      return true; // no hash/wedge for 3D molecules

    vector<int> symclass;
    FOR_BONDS_OF_ATOM(bond, atom) {
      if (bond->IsHash() || bond->IsWedge())
        return true;
    }
    return false;
    */
  }

  /***************************************************************************
   * FUNCTION: GetTetrahedralStereo
   *
   * DESCRIPTION:
   *       If the atom is chiral, fills in the string with either '@', '@@'
   *       or '@?' and returns true, otherwise returns false.
   ***************************************************************************/

  bool OBMol2Cansmi::GetTetrahedralStereo(OBCanSmiNode *node,
                                          vector<OBAtom*> &chiral_neighbors,
                                          vector<unsigned int> &symmetry_classes,
                                          char *stereo)
  {
    OBAtom *atom = node->GetAtom();
    OBMol *mol = (OBMol*) atom->GetParent();

    // If not enough chiral neighbors were passed in, we're done
    if (chiral_neighbors.size() < 4)
      return false;

    // OBStereoFacade will run symmetry analysis & stereo perception if needed
    OBStereoFacade stereoFacade(mol);
    OBTetrahedralStereo *ts = stereoFacade.GetTetrahedralStereo(atom->GetId());
    // If atom is not a tetrahedral center, we're done
    if (!ts)
      return false;

    // get the Config struct defining the stereochemistry
    OBTetrahedralStereo::Config atomConfig = ts->GetConfig();

    // Don't write '@?' for unspecified or unknown stereochemistry
    if (!atomConfig.specified || (atomConfig.specified && atomConfig.winding==OBStereo::UnknownWinding)) {
      // strcpy(stereo, "@?");
      return true;
    }

    // create a Config struct with the chiral_neighbors in canonical output order
    OBStereo::Refs canonRefs;
    for (vector<OBAtom*>::const_iterator atom_it = chiral_neighbors.begin() + 1; atom_it != chiral_neighbors.end(); ++atom_it) {
      if (*atom_it)
        canonRefs.push_back((*atom_it)->GetId());
      else // Handle a chiral lone pair, represented by a NULL OBAtom* in chiral_neighbors
        canonRefs.push_back(OBStereo::ImplicitRef);
    }
    OBTetrahedralStereo::Config canConfig;
    canConfig.center = atom->GetId();
    if (chiral_neighbors[0])
      canConfig.from = chiral_neighbors[0]->GetId();
    else // Handle a chiral lone pair, represented by a NULL OBAtom* in chiral_neighbors
      canConfig.from = OBStereo::ImplicitRef;
    canConfig.refs = canonRefs;

    //cout << "atomConfig = " << atomConfig << endl;
    //cout << "canConfig = " << canConfig << endl;

    // canConfig is clockwise
    if (atomConfig == canConfig)
      strcpy(stereo, "@@");
    else
      strcpy(stereo, "@");

    return true;
  }

  /***************************************************************************
   * FUNCTION: GetSquarePlanarStereo
   *
   * DESCRIPTION:
   *       If the atom is chiral, fills in the string with either '@', '@@'
   *       or '@?' and returns true, otherwise returns false.
   ***************************************************************************/

  bool OBMol2Cansmi::GetSquarePlanarStereo(OBCanSmiNode *node,
                                           vector<OBAtom*> &chiral_neighbors,
                                           vector<unsigned int> &symmetry_classes,
                                           char *stereo)
  {
    OBAtom *atom = node->GetAtom();
    OBMol *mol = (OBMol*) atom->GetParent();

    // If no chiral neighbors were passed in, we're done
    if (chiral_neighbors.size() < 4)
      return false;

    // OBStereoFacade will run symmetry analysis & stereo perception if needed
    OBStereoFacade stereoFacade(mol);
    OBSquarePlanarStereo *sp = stereoFacade.GetSquarePlanarStereo(atom->GetId());
    // If atom is not a square-planar center, we're done
    if (!sp)
      return false;

    // get the Config struct defining the stereochemistry
    OBSquarePlanarStereo::Config atomConfig = sp->GetConfig();

    if (!atomConfig.specified) {
      // write '@?' for unspecified (unknown) stereochemistry
      // strcpy(stereo, "@?");
      //return true;
      return false;
    }

    // create a Config struct with the chiral_neighbors in canonical output order
    OBStereo::Refs canonRefs = OBStereo::MakeRefs(chiral_neighbors[0]->GetId(),
        chiral_neighbors[1]->GetId(), chiral_neighbors[2]->GetId(), chiral_neighbors[3]->GetId());
    OBSquarePlanarStereo::Config canConfig;
    canConfig.center = atom->GetId();
    canConfig.refs = canonRefs;

    //cout << "atomConfig = " << atomConfig << endl;
    //cout << "canConfig = " << canConfig << endl;

    // canConfig is U shape
    if (atomConfig == canConfig) {
      strcpy(stereo, "@SP1");
      return true;
    }

    canConfig.shape = OBStereo::Shape4;
    //cout << "canConfig = " << canConfig << endl;
    if (atomConfig == canConfig) {
      strcpy(stereo, "@SP2");
      return true;
    }

    canConfig.shape = OBStereo::ShapeZ;
    //cout << "canConfig = " << canConfig << endl;
    if (atomConfig == canConfig) {
      strcpy(stereo, "@SP3");
      return true;
    }

    return false;
  }

  //! Adaptation of OBMol::FindChildren to allow a vector of OBAtoms to be passed in
  //  MOVE THIS TO OBMOL FOR 2.4
  void OBMol2Cansmi::MyFindChildren(OBMol &mol, vector<OBAtom*> &children, OBBitVec &seen, OBAtom *end)
  {
    OBBitVec curr,next;

    OBBitVec used(seen);
    used |= end->GetIdx();
    curr |= end->GetIdx();
    children.clear();

    int i;
    OBAtom *atom,*nbr;
    vector<OBBond*>::iterator j;

    for (;;)
      {
        next.Clear();
        for (i = curr.NextBit(-1);i != curr.EndBit();i = curr.NextBit(i))
          {
            atom = mol.GetAtom(i);
            for (nbr = atom->BeginNbrAtom(j);nbr;nbr = atom->NextNbrAtom(j))
              if (!used[nbr->GetIdx()])
                {
                  children.push_back(nbr);
                  next |= nbr->GetIdx();
                  used |= nbr->GetIdx();
                }
          }
        if (next.Empty())
          break;
        curr = next;
      }
  }

  /***************************************************************************
   * FUNCTION: BuildCanonTree
   *
   * DESCRIPTION:
   *       Builds the SMILES tree, in canonical order, for the specified
   *       molecular fragment.
   ***************************************************************************/

  bool OBMol2Cansmi::BuildCanonTree(OBMol &mol,
                                    OBBitVec &frag_atoms,
                                    vector<unsigned int> &canonical_order,
                                    OBCanSmiNode *node)
  {
    vector<OBEdgeBase*>::iterator i;
    OBAtom *nbr, *atom;
    vector<OBAtom *> sort_nbrs;
    vector<OBAtom *>::iterator ai;
    OBBond *bond;
    OBCanSmiNode *next;
    int idx;

    atom = node->GetAtom();

#if DEBUG
    cout << "BuildCanonTree: " << etab.GetSymbol(atom->GetAtomicNum()) << ", " << atom->GetIdx() << ", canorder " << canonical_order[atom->GetIdx()-1] << "\n";
#endif

    // Create a vector of neighbors sorted by canonical order, but favor
    // double and triple bonds over single and aromatic.  This causes
    // ring-closure digits to avoid double and triple bonds.
    //
    // Since there are typically just one to three neighbors, we just do a
    // ordered insertion rather than sorting.

    bool favor_multiple = true; // Visit 'multiple' bonds first
    if (_pconv->IsOption("I") || _pconv->IsOption("N"))
      favor_multiple = false;   // Visit in strict canonical order

    for (nbr = atom->BeginNbrAtom(i); nbr; nbr = atom->NextNbrAtom(i)) {

      idx = nbr->GetIdx();
      if (nbr->IsHydrogen() && IsSuppressedHydrogen(nbr)) {
        _uatoms.SetBitOn(nbr->GetIdx());        // mark suppressed hydrogen, so it won't be considered
        continue;                               // later when looking for more fragments.
      }
      if (_uatoms[idx] || !frag_atoms.BitIsOn(idx))
        continue;

      OBBond *nbr_bond = atom->GetBond(nbr);
      int new_needs_bsymbol = nbr_bond->IsDouble() || nbr_bond->IsTriple();

      for (ai = sort_nbrs.begin(); ai != sort_nbrs.end(); ++ai) {
        bond = atom->GetBond(*ai);
        int sorted_needs_bsymbol = bond->IsDouble() || bond->IsTriple();
        if (favor_multiple && new_needs_bsymbol && !sorted_needs_bsymbol) {
          sort_nbrs.insert(ai, nbr);
          ai = sort_nbrs.begin();//insert invalidated ai; set it to fail next test
          break;
        }
        if (   (!favor_multiple || new_needs_bsymbol == sorted_needs_bsymbol)
               && canonical_order[idx-1] < canonical_order[(*ai)->GetIdx()-1]) {
          sort_nbrs.insert(ai, nbr);
          ai = sort_nbrs.begin();//insert invalidated ai; set it to fail next test
          break;
        }
      }
      if (ai == sort_nbrs.end())
        sort_nbrs.push_back(nbr);
    }

    _uatoms.SetBitOn(atom->GetIdx());     //mark the atom as visited
    cout << "Visited atom " << atom->GetIdx() << " with canonical value of " << canonical_order.at(atom->GetIdx()-1) << "\n";

    if (_endatom && !_uatoms.BitIsSet(_endatom->GetIdx()) && sort_nbrs.size() > 1) {
      // If you have specified an _endatom, the following section rearranges
      // sort_nbrs as follows:
      //   - if a branch does not lead to the end atom, move it to the front
      //     (i.e. visit it first)
      //   - otherwise move it to the end
      // This section is skipped if sort_nbrs has only a single member, or if
      // we have already visited _endatom.

      vector<OBAtom*> children;
      MyFindChildren(mol, children, _uatoms, _endatom);

      vector<OBAtom*> front, end;
      for (vector<OBAtom *>::iterator it=sort_nbrs.begin(); it!=sort_nbrs.end(); ++it)
        if (std::find(children.begin(), children.end(), *it) == children.end() && *it != _endatom)
          front.push_back(*it);
        else
          end.push_back(*it);
      sort_nbrs = front;
      sort_nbrs.insert(sort_nbrs.end(), end.begin(), end.end());
    }

    // Build the next layer of nodes, in canonical order
    for (ai = sort_nbrs.begin(); ai != sort_nbrs.end(); ++ai) {
      nbr = *ai;
      idx = nbr->GetIdx();
      if (_uatoms[idx])   // depth-first search may have used this atom since
        continue;         // we sorted the bonds above
      bond = atom->GetBond(nbr);
      _ubonds.SetBitOn(bond->GetIdx());
      next = new OBCanSmiNode(nbr);
      next->SetParent(atom);
      node->AddChildNode(next, bond);
      BuildCanonTree(mol, frag_atoms, canonical_order, next);
    }

    return(true);
  }


  /***************************************************************************
   * FUNCTION: GetCanonClosureDigits
   *
   * DESCRIPTION:
   *       Given an atom, returns the ring-closure digits for that atom, in
   *       the form of a vector of digit/OBBond* pair.  Some of the digits may
   *       be for newly-opened rings (the matching digit occurs later in the
   *       SMILES string), and some may be for closing rings (the matching
   *       digit occured earlier in the string).
   *
   *       Canonicalization requires that atoms with more than one digit
   *       have the digits assigned in a canonical fashion.  For example,
   *       the SMILES  "CC12(NCCC2)CCC1" and "CC12(NCCC1)CCC2" are the
   *       same molecule; we need to assign the digits of the first "C12"
   *       such that it always comes out one way or the other.
   *
   *       This needs to find closing bonds (ring bonds already assigned a
   *       digit) and opening bonds (ring bonds not encountered yet).
   *
   *    Closing Bonds:
   *       This is easy: open bonds are already stored in the _vopen vector,
   *       in canonical order.  Just find open bonds to this atom and copy
   *       them from _vopen to our return vector.
   *
   *    Opening Bonds:
   *       This function looks through the bonds for this atoms and finds
   *       any that aren't on the _ubonds "used" list, (and also are non-H
   *       and are in this fragment).  Any such bonds must be ring-closure
   *       bonds.  If there is more than one, they are ordered by the
   *       canonical order of the bonds' neighbor atoms; that is, the bond
   *       to the lowest canonical-ordered neighbor is assigned the first
   *       available number, and upwards in neighbor-atom canonical order.
   ***************************************************************************/

  vector<OBBondClosureInfo>
  OBMol2Cansmi::GetCanonClosureDigits(OBAtom *atom,
                                      OBBitVec &frag_atoms,
                                      vector<unsigned int> &canonical_order)
  {
    vector<OBBondClosureInfo> vp_closures;
    vector<OBBond*> vbonds;
    vector<OBBond*>::iterator bi;
    vector<OBEdgeBase*>::iterator i;
    OBBond *bond1, *bond2;
    OBAtom *nbr1, *nbr2;
    int nbr1_canorder, nbr2_canorder;

    vp_closures.clear();
    vbonds.clear();

    // Find new ring-closure bonds for this atom
    for (bond1 = atom->BeginBond(i); bond1; bond1 = atom->NextBond(i)) {

      // Is this a ring-closure neighbor?
      if (_ubonds.BitIsOn(bond1->GetIdx()))
        continue;
      nbr1 = bond1->GetNbrAtom(atom);
      // Skip hydrogens before checking canonical_order
      // PR#1999348
      if (   (nbr1->IsHydrogen() && IsSuppressedHydrogen(nbr1))
             || !frag_atoms.BitIsOn(nbr1->GetIdx()))
        continue;

      nbr1_canorder = canonical_order[nbr1->GetIdx()-1];

      // Insert into the bond-vector in canonical order (by neighbor atom order)
      for (bi = vbonds.begin(); bi != vbonds.end(); ++bi) {
        bond2 = *bi;
        nbr2 = bond2->GetNbrAtom(atom);
        nbr2_canorder = canonical_order[nbr2->GetIdx()-1];
        if (nbr1_canorder < nbr2_canorder) {
          vbonds.insert(bi, bond1);
          bi = vbonds.begin();//insert invalidated bi; set it to fail next test
          break;
        }
      }
      if (bi == vbonds.end())     // highest one (or first one) - append to end
        vbonds.push_back(bond1);
    }

    // If we found new open bonds, assign a bond-closure digits to each one,
    // add it to _vopen, and add it to the return vector.
    for (bi = vbonds.begin(); bi != vbonds.end(); ++bi) {
      bond1 = *bi;
      _ubonds.SetBitOn(bond1->GetIdx());
      int digit = GetUnusedIndex();
      int bo = (bond1->IsAromatic())? 1 : bond1->GetBO();  // CJ: why was this line added?  bo is never used?
      _vopen.push_back(OBBondClosureInfo(bond1->GetNbrAtom(atom), atom, bond1, digit, true));
      vp_closures.push_back(OBBondClosureInfo(bond1->GetNbrAtom(atom), atom, bond1, digit, true));
    }


    // Now look through the list of open closure-bonds and find any to this
    // atom (but watch out for the ones we just added).  For each one found,
    // add it to the return vector, and erase it from _vopen.

    if (!_vopen.empty()) {
      vector<OBBondClosureInfo>::iterator j;
      for (j = _vopen.begin(); j != _vopen.end(); ) {
        if (j->toatom == atom) {
          OBBondClosureInfo bci = *j;
          _vopen.erase(j);                // take bond off "open" list
          bci.is_open = false;            // mark it "closed"
          vp_closures.push_back(bci);     // and add it to this atom's list
          j = _vopen.begin();             // reset iterator
        }
        else
          j++;
      }
    }

    return(vp_closures);
  }


  /***************************************************************************
   * FUNCTION: IsSuppressedHydrogen
   *
   * DESCRIPTION:
   *       For a hydrogen atom, returns TRUE if the atom is not [2H] or [3H], only
   *       has one bond, and is not bonded to another hydrogen.
   *
   *       NOTE: Return value is nonsensical if you pass it a non-hydrogen
   *       atom.  Presumably, you're calling this because you've found a
   *       hydrogen and want to know if it goes in the SMILES.
   ***************************************************************************/

  bool OBMol2Cansmi::IsSuppressedHydrogen(OBAtom *atom)
  {
    if (atom->GetIsotope() != 0)          // Deuterium or Tritium
      return false;
    if (atom->GetValence() != 1)          // not exactly one bond
      return false;

    FOR_NBORS_OF_ATOM(nbr, atom) {
      if (nbr->GetAtomicNum() == 1)       // neighbor is hydrogen
        return false;
    }

    return true;
  }

  /***************************************************************************
   * FUNCTION: GetSmilesValence
   *
   * DESCRIPTION:
   *       This is like GetHvyValence(), but it returns the "valence" of an
   *       atom as it appears in the SMILES string.  In particular, hydrogens
   *       count if they will appear explicitly -- see IsSuppressedHydrogen()
   *       above.
   ***************************************************************************/

  int OBMol2Cansmi::GetSmilesValence(OBAtom *atom)
  {
    int count = 0;

    if (atom->IsHydrogen())
      return atom->GetValence();

    if (_pconv && _pconv->IsOption("h"))
      return atom->GetValence();

    FOR_NBORS_OF_ATOM(nbr, atom) {
      if (  !nbr->IsHydrogen()
            || nbr->GetIsotope() != 0
            || nbr->GetValence() != 1)
        count++;
    }
    return(count);
  }


  /***************************************************************************
   * FUNCTION: ToCansmilesString
   *
   * DESCRIPTION:
   *       Recursively writes the canonical SMILES string to a buffer.  Writes
   *       this node, then selects each of the child nodes (in canonical
   *       order) and writes them.
   *
   *       Chirality is the tricky bit here.  Before we can write out a chiral
   *       atom, we have to "look ahead" to determine the order in which the
   *       neighbor atoms are/will be written.
   *
   *       The SMILES language defines the order-of-appearance of a ring-closure
   *       bond as the position of the digit, in the SMILES, not the actual atom.
   *       For example, the fragments N[C@H](C)Br, and N[C@H]1(Br)CCCC1 have
   *       the same chiral center, because the "1" in the second one is a "stand
   *       in" for the "C" in the first, even though the actual carbon atom appears
   *       after the Bromine atom in the second string.
   ***************************************************************************/

  void OBMol2Cansmi::ToCansmilesString(OBCanSmiNode *node,
                                       char *buffer,
                                       OBBitVec &frag_atoms,
                                       vector<unsigned int> &symmetry_classes,
                                       vector<unsigned int> &canonical_order,
                                       bool isomeric)
  {
    OBAtom *atom = node->GetAtom();
    vector<OBAtom *> chiral_neighbors;

    // Get the ring-closure digits in canonical order.  We'll use these in
    // two places: First, for figuring out chirality, then later for writing
    // the actual ring-closure digits to the string.
    vector<OBBondClosureInfo> vclose_bonds = GetCanonClosureDigits(atom, frag_atoms, canonical_order);

    // First thing: Figure out chirality.  We start by creating a vector of the neighbors
    // in the order in which they'll appear in the canonical SMILES string.  This is more
    // complex than you'd guess because of implicit/explicit H and ring-closure digits.

    // Don't include chiral symbol on _endatom or _startatom.
    // Otherwise, we end up with C[C@@H](Br)(Cl), where the C has 4 neighbours already
    // and we cannot concatenate another SMILES string without creating a 5-valent C.

    bool is_chiral = AtomIsChiral(atom);
    if (is_chiral && atom!=_endatom && atom!=_startatom) {

      // If there's a parent node, it's the first atom in the ordered neighbor-vector
      // used for chirality.
      if (node->GetParent()) {
        chiral_neighbors.push_back(node->GetParent());
      }

      // Next for chirality order will be hydrogen -- since it occurs
      // inside the atom's [] brackets, it's always before other neighbors.
      //
      // Note that we check the regular neighbor list, NOT the canonical
      // SMILES tree, because hydrogens normally aren't part of the canonical
      // SMILES, but we still need them to figure out chirality.
      //
      // There are two cases: it's explicit in the OBMol object but should be
      // written inside the brackets, i.e. "[C@H]", or it is explicit and
      // must be outside the brackets, such as for deuterium.  (A hydrogen
      // that will appear explicitly in the SMILES as a separate atom is
      // treated like any other atom when calculating the chirality.)

      FOR_NBORS_OF_ATOM(i_nbr, atom) {
        OBAtom *nbr = &(*i_nbr);
        if (nbr->IsHydrogen() && IsSuppressedHydrogen(nbr) ) {
          chiral_neighbors.push_back(nbr);
          break;        // quit loop: only be one H if atom is chiral
        }
      }

      // Ok, done with H. Now we need to consider the case where there is a chiral
      // lone pair. If it exists (and we won't know for sure until we've counted up
      // all the neighbours) it will go in here
      int lonepair_location = chiral_neighbors.size();

      // Ok, done with all that. Next in the SMILES will be the ring-closure characters.
      // So we need to find the corresponding atoms and add them to the list.
      // (We got the canonical ring-closure list earlier.)
      if (!vclose_bonds.empty()) {
        vector<OBBondClosureInfo>::iterator i;
        for (i = vclose_bonds.begin();i != vclose_bonds.end();i++) {
          OBBond *bond = i->bond;
          OBAtom *nbr = bond->GetNbrAtom(atom);
          chiral_neighbors.push_back(nbr);
        }
      }

      // Finally, add the "regular" neighbors, the "child" nodes in the
      // canonical-SMILES tree, to the chiral-neighbors list.
      for (int i = 0; i < node->Size(); i++) {
        OBAtom *nbr = node->GetChildAtom(i);
        chiral_neighbors.push_back(nbr);
      }

      // Handle a chiral lone-pair on a sulfur, by inserting a NULL OBAtom* at the
      // appropriate location
      if (chiral_neighbors.size() == 3 && atom->GetAtomicNum() == 16) // Handle sulfur
        chiral_neighbors.insert(chiral_neighbors.begin() + lonepair_location, static_cast<OBAtom*> (NULL));

    }

    // Write the current atom to the string
    GetSmilesElement(node, chiral_neighbors, symmetry_classes, buffer+strlen(buffer), isomeric);

    _atmorder.push_back(atom->GetIdx());  //store the atom ordering

    // Write ring-closure digits
    if (!vclose_bonds.empty()) {
      vector<OBBondClosureInfo>::iterator bci;
      for (bci = vclose_bonds.begin();bci != vclose_bonds.end();bci++) {
        if (!bci->is_open)
        { // Ring closure
          char bs[2] = {'\0', '\0'};
          // Only get symbol for ring closures on the dbl bond
          if (HasStereoDblBond(bci->bond, node->GetAtom()))
            bs[0] = GetCisTransBondSymbol(bci->bond, node);
          if (bs[0])
            strcat(buffer, bs);	// append "/" or "\"
          else
          {
            if (bci->bond->GetBO() == 2 && !bci->bond->IsAromatic())  strcat(buffer,"=");
            if (bci->bond->GetBO() == 3)                              strcat(buffer,"#");
            if (bci->bond->GetBO() == 4)                              strcat(buffer,"$");
          }
        }
        else
        { // Ring opening
          char bs[2] = {'\0', '\0'};
          // Only get symbol for ring openings on the dbl bond
          if (!HasStereoDblBond(bci->bond, bci->bond->GetNbrAtom(node->GetAtom())))
            bs[0] = GetCisTransBondSymbol(bci->bond, node);
          if (bs[0])
            strcat(buffer, bs);	// append "/" or "\"
        }
        if (bci->ringdigit > 9) strcat(buffer,"%");
        sprintf(buffer+strlen(buffer), "%d", bci->ringdigit);
      }
    }

    // Write child bonds, then recursively follow paths to child nodes
    // to print the SMILES for each child branch.

    OBBond *bond;
    for (int i = 0;i < node->Size();i++) {
      bond = node->GetChildBond(i);
      if (i+1 < node->Size() || node->GetAtom() == _endatom)
        strcat(buffer,"(");

      //if (bond->IsUp() || bond->IsDown()) {
      char cc[2];
      cc[0] = GetCisTransBondSymbol(bond, node);
      cc[1] = '\0';
      if (cc[0] == BondUpChar || cc[0] == BondDownChar)
        strcat(buffer, cc);
      //}
      else if (bond->GetBO() == 2 && !bond->IsAromatic())
        strcat(buffer,"=");
      else if (bond->GetBO() == 3)
        strcat(buffer,"#");
      else if (bond->GetBO() == 4)
        strcat(buffer,"$");

      ToCansmilesString(node->GetChildNode(i),buffer, frag_atoms, symmetry_classes, canonical_order, isomeric);
      if (i+1 < node->Size() || node->GetAtom() == _endatom)
        strcat(buffer,")");
    }
  }

  /****************************************************************************
   * FUNCTION: StandardLabels
   *
   * DESCRIPTION:
   *        Creates a set of non-canonical labels for the fragment atoms
   * ***************************************************************************/
  void StandardLabels(OBMol *pMol, OBBitVec *frag_atoms,
                      vector<unsigned int> &symmetry_classes,
                      vector<unsigned int> &labels)
  {
    FOR_ATOMS_OF_MOL(atom, *pMol) {
      if (frag_atoms->BitIsOn(atom->GetIdx())) {
        labels.push_back(atom->GetIdx() - 1);
        symmetry_classes.push_back(atom->GetIdx() - 1);
      }
      else{
        labels.push_back(OBStereo::ImplicitRef); //to match situation when canonical ordering. Just a big number?
        symmetry_classes.push_back(OBStereo::ImplicitRef);
      }
    }
  }

  /***************************************************************************
   * FUNCTION: RandomLabels
   *
   * DESCRIPTION:
   *    Creates a set of random labels for the fragment atoms.  Primarily
   *    for testing: you can create a bunch of random SMILES for the same
   *    molecule, and use those to test the canonicalizer.
   ***************************************************************************/

  static int timeseed = 0;

  void RandomLabels(OBMol *pMol, OBBitVec &frag_atoms,
      vector<unsigned int> &symmetry_classes,
      vector<unsigned int> &labels)
  {
    int natoms = pMol->NumAtoms();
    OBBitVec used(natoms);

    if (!timeseed) {
      OBRandom rand;
      rand.TimeSeed();
      timeseed = 1;
    }


    FOR_ATOMS_OF_MOL(atom, *pMol) {
      if (frag_atoms.BitIsOn(atom->GetIdx())) {
        int r = rand() % natoms;
        while (used.BitIsOn(r)) {
          r = (r + 1) % natoms;         // find an unused number
        }
        used.SetBitOn(r);
        labels.push_back(r);
        symmetry_classes.push_back(r);
      }
      else{
        labels.push_back(OBStereo::ImplicitRef); //to match situation when canonical ordering. Just a big number?
        symmetry_classes.push_back(OBStereo::ImplicitRef);
      }
    }
  }

  //! Same as tokenize, except in treatment of multiple delimiters. Tokenize
  //! treats multiple delimiters as a single delimiter. It also ignores delimiters
  //! in the first or last position. In contrast, mytokenize treats each instance of
  //! the delimiter as the end/start of a new token.
  bool mytokenize(std::vector<std::string> &vcr, std::string &s,
                      const char *delimstr)
  {
    vcr.clear();
    size_t startpos=0,endpos=0;

    size_t s_size = s.size();
    for (;;)
      {
        //startpos = s.find_first_not_of(delimstr,startpos);
        endpos = s.find_first_of(delimstr,startpos);
        if (endpos <= s_size && startpos <= s_size)
          {
            vcr.push_back(s.substr(startpos,endpos-startpos));
          }
        else
          {
            if (startpos <= s_size)
              vcr.push_back(s.substr(startpos,s_size-startpos));
            break;
          }

        startpos = endpos+1;
      }
    return(true);
  }


  bool OBMol2Cansmi::ParseInChI(OBMol &mol, vector<int> &atom_order)
  {
    OBConversion MolConv(*_pconv); //new copy to use to write associated MOL
    MolConv.SetAuxConv(NULL); //temporary until a proper OBConversion copy constructor written

    OBFormat* pInChIFormat = _pconv->FindFormat("InChI");
    if(pInChIFormat==NULL) {
      obErrorLog.ThrowError(__FUNCTION__, "InChI format not available", obError);
      return false;
    }
    stringstream newstream;
    MolConv.SetOutStream(&newstream);
    MolConv.SetOptions("aX'RecMet'", OBConversion::OUTOPTIONS);
    pInChIFormat->WriteMolecule(&mol, &MolConv);
    cout << newstream.str() << "\n";

    vector<string> splitlines;
    tokenize(splitlines, newstream.str(),"\n");
    vector<string> split, split_aux;

    size_t rm_start = splitlines.at(0).find("/r"); // Correct for reconnected metal if necessary
    if (rm_start == string::npos) {
      tokenize(split, splitlines.at(0),"/");
      tokenize(split_aux, splitlines.at(1),"/");
    }
    else {
      tokenize(split, splitlines.at(0).substr(rm_start), "/");
      split.insert(split.begin(), "");
      size_t rm_start_b = splitlines.at(1).find("/R:");
      tokenize(split_aux, splitlines.at(1).substr(rm_start_b), "/");
    }

    vector<string> component, s_conn_table, multiple;
    vector<vector<int> > conn_table; // Vector for each disconnected component
    int N_components = 0;
    bool has_connection_layer = true;
    if (split.size() > 2 && split[2][0] == 'c') { // e.g. /c1-2-3;4*1-2;1-3-5-4-2;/
      mytokenize(component, split[2].substr(1), ";");
      for(vector<string>::iterator it=component.begin(); it!=component.end(); ++it) {
        if (*it == "") {
          conn_table.push_back(vector<int>(1, 1));
          continue;
        }
        // Find whether these connections represent multiple components
        tokenize(multiple, *it, "*");
        int factor = 1;
        if (multiple.size() == 2) {
          factor = atoi(multiple.at(0).c_str());
          tokenize(s_conn_table, multiple.at(1), "(),-");
        }
        else
          tokenize(s_conn_table, *it, "(),-");
        // Now parse the connection table for this component
        vector<int> conns;
        for(vector<string>::iterator bit=s_conn_table.begin(); bit!=s_conn_table.end(); ++bit)
          conns.push_back(atoi(bit->c_str()));
        for (int i=0; i<factor; ++i)
          conn_table.push_back(conns);
        N_components += factor;
      }
    }
    else { // The connection layer will be missing if all of its disconnected components have a single atom
      vector<string> norm_comps;
      tokenize(norm_comps, split_aux.at(2).substr(2), ";");
      N_components = norm_comps.size();
      for(int i=0; i<N_components; ++i)
        conn_table.push_back(vector<int>(1, 1));
    }

    // Extract the normalized atom numbers
    vector<string> s_normalized;
    tokenize(s_normalized, split_aux.at(2).substr(2), ",;");
    vector<int> normalized;
    for(vector<string>::iterator it=s_normalized.begin(); it!=s_normalized.end(); ++it)
      normalized.push_back(atoi(it->c_str()));

    // Add some sanity checks
    cout << "InChI Order: ";
    int N_atoms = 0;
    for (int i=0; i<conn_table.size(); ++i)
    {
      for (int j=0; j<conn_table.at(i).size(); ++j) {
        atom_order.push_back(normalized.at(N_atoms + conn_table.at(i).at(j) - 1));
        cout << atom_order.back() << ",";
      }
      N_atoms += *std::max_element(conn_table.at(i).begin(), conn_table.at(i).end());
    }
    cout << "\n";

    return true;
  }

  /***************************************************************************
   * FUNCTION: CreateFragCansmiString
   *
   * DESCRIPTION:
   *       Selects the "root" atom, which will be first in the SMILES, then
   *       builds a tree in canonical order, and finally generates the SMILES.
   *       If there are then atoms that haven't been visited (i.e. a molecule
   *       with disconnected parts), selects a new root from the remaining
   *       atoms and repeats the process.
   ***************************************************************************/

  void OBMol2Cansmi::CreateFragCansmiString(OBMol &mol, OBBitVec &frag_atoms, bool isomeric, char *buffer)
  {
    //cout << "CreateFragCansmiString()" << endl;
    OBAtom *atom;
    OBCanSmiNode *root;
    buffer[0] = '\0';
    vector<OBNodeBase*>::iterator ai;
    vector<unsigned int> symmetry_classes, canonical_order;

    //Pointer to Atom Class data set if -xa option and the molecule has any; NULL otherwise.
    if(_pconv->IsOption("a"))
      _pac = static_cast<OBAtomClassData*>(mol.GetData("Atom Class"));

    // Remember the desired endatom, if specified
    const char* pp = _pconv->IsOption("l");
    int atom_idx  = pp ? atoi(pp) : -1;
    if (atom_idx >= 1 && atom_idx <= mol.NumAtoms())
      _endatom = mol.GetAtom(atom_idx);
    // Was a start atom specified?
    pp = _pconv->IsOption("f");
    atom_idx  = pp ? atoi(pp) : -1;
    if (atom_idx >= 1 && atom_idx <= mol.NumAtoms())
      _startatom = mol.GetAtom(atom_idx);

    // Was an atom ordering specified?
    const char* ppI = _pconv->IsOption("I");
    vector<string> s_atom_order;
    vector<int> atom_order;
    if (ppI) {
      tokenize(s_atom_order,ppI,"-()");
      if (s_atom_order.size() != mol.NumHvyAtoms())
        ppI = NULL;
      else {
        for (vector<string>::const_iterator cit=s_atom_order.begin(); cit!=s_atom_order.end(); ++cit)
          atom_order.push_back(atoi(cit->c_str()));
        atom_idx = atom_order.at(0);
        if (atom_idx >= 1 && atom_idx <= mol.NumAtoms())
          _startatom = mol.GetAtom(atom_idx);
      }
    }

    if (_pconv->IsOption("N")) {
      bool useInChIOrder = ParseInChI(mol, atom_order);
      if (!useInChIOrder)
        _pconv->RemoveOption("N", OBConversion::OUTOPTIONS);
    }

    // First, create a canonical ordering vector for the atoms.  Canonical
    // labels are zero indexed, corresponding to "atom->GetIdx()-1".
    if (_canonicalOutput) {
      OBGraphSym gs(&mol, &frag_atoms);
      gs.GetSymmetry(symmetry_classes);
      CanonicalLabels(&mol, symmetry_classes, canonical_order, frag_atoms);
    }
    else {
      if (_pconv->IsOption("C")) {      // "C" == "anti-canonical form"
        RandomLabels(&mol, frag_atoms, symmetry_classes, canonical_order);
      } else if (ppI || _pconv->IsOption("N")) { // "I" (user specified order) or "N" (InChI order)
        canonical_order.resize(atom_order.size());
        symmetry_classes.resize(atom_order.size());
        int idx = 1;
        for (int i=0; i<atom_order.size(); ++i) {
          if (canonical_order[atom_order[i] - 1] == 0) { // Ignore ring closures (for "N")
            canonical_order[atom_order[i] - 1] = idx;
            symmetry_classes[atom_order[i] - 1] = idx;
            ++idx;
          }
        }
      } else {
        StandardLabels(&mol, &frag_atoms, symmetry_classes, canonical_order);
      }
    }

    // OUTER LOOP: Handles dot-disconnected structures.  Finds the
    // lowest unmarked canorder atom, and starts there to generate a SMILES.
    // Repeats until no atoms remain unmarked.

    while (1) {

      // It happens that the lowest canonically-numbered atom is usually
      // a good place to start the canonical SMILES.
      OBAtom *root_atom;
      int lowest_canorder = 999999;
      root_atom = NULL;

      // If we specified a startatom_idx & it's in this fragment, use it to start the fragment
      if (_startatom)
        if (!_uatoms[_startatom->GetIdx()] && frag_atoms.BitIsOn(_startatom->GetIdx()))
          root_atom = _startatom;

      if (root_atom == NULL) {
        for (atom = mol.BeginAtom(ai); atom; atom = mol.NextAtom(ai)) {
          int idx = atom->GetIdx();
          if (!_uatoms[idx]              // skip atoms already used (for fragments)
              && frag_atoms.BitIsOn(idx) // skip atoms not in this fragment
              && canonical_order[idx-1] < lowest_canorder) {

              if (_pconv->IsOption("N") ||  // skip the following conditions if using InChI order
                  (!atom->IsHydrogen()      // don't start with a hydrogen
                  //&& !atom->IsChiral()    // don't use chiral atoms as root node
                  && !_uatoms[idx]))        // skip atoms already used (for fragments)
              {
                root_atom = atom;
                lowest_canorder = canonical_order[idx-1];
              }
          }
        }
      }

      // If we didn't pick an atom, it is because the fragment is made
      // entirely of hydrogen atoms (e.g. [H][H]).  Repeat the loop but
      // allow hydrogens this time.
      if (root_atom == NULL) {
        for (atom = mol.BeginAtom(ai); atom; atom = mol.NextAtom(ai)) {
          int idx = atom->GetIdx();
          if (!_uatoms[idx]           // skip atoms already used (for fragments)
              && frag_atoms.BitIsOn(idx)// skip atoms not in this fragment
              && canonical_order[idx-1] < lowest_canorder) {
            root_atom = atom;
            lowest_canorder = canonical_order[idx-1];
          }
        }
      }

      // No atom found?  We've done all fragments.
      if (root_atom == NULL)
        break;

      // Clear out closures in case structure is dot disconnected
      //      _atmorder.clear();
      _vopen.clear();

      // Dot disconnected structure?
      if (strlen(buffer) > 0) strcat(buffer,".");
      root = new OBCanSmiNode (root_atom);

      BuildCanonTree(mol, frag_atoms, canonical_order, root);
      ToCansmilesString(root, buffer, frag_atoms, symmetry_classes, canonical_order, isomeric);
      delete root;
    }

    // save the canonical order as a space-separated string
    // which will be returned by GetOutputOrder() for incorporation
    // into an OBPairData keyed "canonical order"
    if (_atmorder.size()) {
      stringstream temp;
      vector<int>::iterator can_iter = _atmorder.begin();
      if (can_iter != _atmorder.end()) {
        temp << (*can_iter++);
      }

      for (; can_iter != _atmorder.end(); ++can_iter) {
        if (*can_iter <= mol.NumAtoms())
          temp << " " << (*can_iter);
      }
      _canorder = temp.str(); // returned by GetOutputOrder()
    }
  }

  /***************************************************************************
   * FUNCTION: OBMol2Cansmi::AddHydrogenToChiralCenters
   *
   * DESCRIPTION:
   *       Adds an explicit hydrogen to any chiral center that only has three
   *       atoms.  This makes analysis much easier since the algorithms can
   *       assume that all tetrahedral carbons have four neighbors.
   ***************************************************************************/

  void OBMol2Cansmi::AddHydrogenToChiralCenters(OBMol &mol, OBBitVec &frag_atoms)
  {
    bool is_modified = false;
    vector <OBAtom *> atomList;
    int element;
    bool hasChiralityPerceived = mol.HasChiralityPerceived(); // remember to restore

    // Find all appropriate atoms to add hydrogens
    FOR_ATOMS_OF_MOL(atom, mol)
      {
        if (!frag_atoms[atom->GetIdx()] || !AtomIsChiral(&*atom))
          continue;

        // don't mess with transition elements!
        element = atom->GetAtomicNum();
        if ( (element >= 21 && element <= 30)
             || (element >= 39 && element <= 49)
             || (element >= 71 && element <= 82) )
          continue;

        if (GetSmilesValence(&*atom) == 3 && atom->GetValence() == 3) {       // implicit H?
          atomList.push_back(&*atom);
        }
      }

    // Now add hydrogens to the list
    if (atomList.size() > 0) {
      mol.BeginModify();

      vector<OBAtom*>::iterator i;
      //      OBAtom *atom;
      for (i = atomList.begin(); i != atomList.end(); ++i) {

#if DEBUG
        cout << "AddHydrogenToChiralCenters: Adding H to atom " << (*i)->GetIdx() << "\n";
#endif

        // Add the H atom
        mol.AddHydrogens(*i);

        frag_atoms.SetBitOn(mol.NumAtoms());
      }

      mol.EndModify();
      // Don't lose the ChiralityPerceived flag...
      if (hasChiralityPerceived)
        mol.SetChiralityPerceived();
    }
  }

  /*----------------------------------------------------------------------
   * END OF CLASS: OBMol2Cansmi
   ----------------------------------------------------------------------*/


  /***************************************************************************
   * FUNCTION: CreateCansmiString
   *
   * DESCRIPTION:
   *       Writes the canonical SMILES for a molecule or molecular fragment
   *       to the given buffer.
   *
   *       frag_atoms represents atoms in a fragment of the molecule; the
   *       SMILES will contain those atoms only.
   *
   *       (Note: This is an ordinary public C++ function, not a member
   *       of any class.)
   *
   ***************************************************************************/

  void CreateCansmiString(OBMol &mol, char *buffer, OBBitVec &frag_atoms, bool iso, OBConversion* pConv)
  {
    bool canonical = pConv->IsOption("c")!=NULL;

    // This is a hack to prevent recursion problems.
    //  we still need to fix the underlying problem -GRH
    if (mol.NumAtoms() > 1000) {
      stringstream errorMsg;
      errorMsg <<
        "SMILES Conversion failed: Molecule is too large to convert."
        "Open Babel is currently limited to 1000 atoms." << endl;
      errorMsg << "  Molecule size: " << mol.NumAtoms() << " atoms " << endl;
      obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError);
      return;
    }

    OBMol2Cansmi m2s;
    m2s.Init(canonical, pConv);
    // GRH Added 2008-06-05
    // This came from the WriteMolecule call below
    // It doesn't seem to have any effect
    //    m2s.CorrectAromaticAmineCharge(mol);

    if (iso) {
      PerceiveStereo(&mol);
      m2s.CreateCisTrans(mol); // No need for this if not iso
      m2s.AddHydrogenToChiralCenters(mol, frag_atoms);
    } else {
      // Not isomeric - be sure there are no Z coordinates, clear
      // all stereo-center and cis/trans information.
      OBBond *bond;
      vector<OBEdgeBase*>::iterator bi;
      vector<OBNodeBase*>::iterator ai;
      for (bond = mol.BeginBond(bi); bond; bond = mol.NextBond(bi)) {
        bond->UnsetUp();
        bond->UnsetDown();
        bond->UnsetHash();
        bond->UnsetWedge();
      }
    }

    // If the fragment includes ordinary hydrogens, get rid of them.
    // They won't appear in the SMILES anyway (unless they're attached to
    // a chiral center, or it's something like [H][H]).
    FOR_ATOMS_OF_MOL(iatom, mol) {
      OBAtom *atom = &(*iatom);
      if (frag_atoms.BitIsOn(atom->GetIdx()) && atom->IsHydrogen()
          && (!iso || m2s.IsSuppressedHydrogen(atom))) {
        frag_atoms.SetBitOff(atom->GetIdx());
      }
    }

    m2s.CreateFragCansmiString(mol, frag_atoms, iso, buffer);

    // Could also save canonical bond order if anyone desires
    if (!mol.HasData("SMILES Atom Order")) {
      // This atom order data is useful not just for canonical SMILES
      OBPairData *canData = new OBPairData;
      canData->SetAttribute("SMILES Atom Order");
      canData->SetValue(m2s.GetOutputOrder());
      // cout << "SMILES_atom_order "<<m2s.GetOutputOrder() << "\n";
      canData->SetOrigin(OpenBabel::local);
      mol.SetData(canData);
    }
  }

  //////////////////////////////////////////////////
  bool SMIBaseFormat::WriteMolecule(OBBase* pOb,OBConversion* pConv)
  {
    //cout << "SMIBaseFromat::WriteMolecule()" << endl;
    OBMol* pmol = dynamic_cast<OBMol*>(pOb);

    // Define some references so we can use the old parameter names
    ostream &ofs = *pConv->GetOutStream();
    OBMol mol = *pmol;

    // Title only option?
    if(pConv->IsOption("t")) {
      ofs << mol.GetTitle() <<endl;
      return true;
    }

    char buffer[BUFF_SIZE];
    *buffer = '\0'; // clear the buffer

    // This is a hack to prevent recursion problems.
    //  we still need to fix the underlying problem (mainly chiral centers) -GRH
    if (mol.NumAtoms() > 1000) {
      stringstream errorMsg;
      errorMsg <<
        "SMILES Conversion failed: Molecule is too large to convert."
        "Open Babel is currently limited to 1000 atoms." << endl;
      errorMsg << "  Molecule size: " << mol.NumAtoms() << " atoms " << endl;
      obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError);
      return(false);
    }

    // If there is data attached called "SMILES_Fragment", then it's
    // an ascii OBBitVec, representing the atoms of a fragment.  The
    // SMILES generated will only include these fragment atoms.

    OBBitVec fragatoms(mol.NumAtoms());

    OBPairData *dp = (OBPairData *) mol.GetData("SMILES_Fragment");
    if (dp) {
      fragatoms.FromString(dp->GetValue(), mol.NumAtoms());
    }

    // If no "SMILES_Fragment" data, fill the entire OBBitVec
    // with 1's so that the SMILES will be for the whole molecule.
    else {
      FOR_ATOMS_OF_MOL(a, mol)
        {
          fragatoms.SetBitOn(a->GetIdx());
        }
    }

    if (mol.NumAtoms() > 0) {
      CreateCansmiString(mol, buffer, fragatoms, !pConv->IsOption("i"), pConv);
    }

    ofs << buffer;
    if(!pConv->IsOption("smilesonly")) {

      if(!pConv->IsOption("n"))
        ofs << '\t' <<  mol.GetTitle();

      if (pConv->IsOption("x") && mol.HasData("SMILES Atom Order")) {
        vector<string> vs;
        string canorder = mol.GetData("SMILES Atom Order")->GetValue();
        tokenize(vs, canorder);
        ofs << '\t';
        for (int i = 0; i < vs.size(); i++) {
          int idx = atoi(vs[i].c_str());
          OBAtom *atom = mol.GetAtom(idx);
          if (i > 0)
            ofs << ",";
          ofs << atom->GetX() << "," << atom->GetY();
        }
      }

      if(!pConv->IsOption("nonewline"))
        ofs << endl;
    }

    //cout << "end SMIBaseFromat::WriteMolecule()" << endl;
    return true;
  }

  //********************************************************
  class FIXFormat : public OBMoleculeFormat
  {
  public:
    //Register this format type ID
    FIXFormat()
    {
      OBConversion::RegisterFormat("fix",this);
    }

    virtual const char* Description() //required
    {
      return
        "SMILES FIX format\n"
        "  No comments yet\n";
    };

    virtual const char* SpecificationURL()
    {return "";}; //optional

    //Flags() can return be any the following combined by | or be omitted if none apply
    // NOTREADABLE  READONEONLY  NOTWRITABLE  WRITEONEONLY
    virtual unsigned int Flags()
    {
      return NOTREADABLE;
    };

    ////////////////////////////////////////////////////
    /// The "API" interface functions
    virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);

  };

  //Make an instance of the format class
  FIXFormat theFIXFormat;

  /////////////////////////////////////////////////////////////////

  bool FIXFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
  {
    //cout << "FIXFromat::WriteMolecule()" << endl;
    OBMol* pmol = dynamic_cast<OBMol*>(pOb);
    if(pmol==NULL)
      return false;

    //Define some references so we can use the old parameter names
    ostream &ofs = *pConv->GetOutStream();
    OBMol &mol = *pmol;

    char buffer[BUFF_SIZE];
    OBMol2Cansmi m2s;

    // This is a hack to prevent recursion problems.
    //  we still need to fix the underlying problem -GRH
    if (mol.NumAtoms() > 1000)
      {
        stringstream errorMsg;
        errorMsg << "SMILES Conversion failed: Molecule is too large to convert. Open Babel is currently limited to 1000 atoms." << endl;
        errorMsg << "  Molecule size: " << mol.NumAtoms() << " atoms " << endl;
        obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
        return(false);
      }

    // Write the SMILES in a FIX with canonical order
    m2s.Init(true, pConv);
    // From 2.1 code.
    m2s.CorrectAromaticAmineCharge(mol);

    // We're outputting a full molecule
    // so we pass a bitvec for all atoms
    OBBitVec allbits(mol.NumAtoms());
    FOR_ATOMS_OF_MOL(a, mol) {
      allbits.SetBitOn(a->GetIdx());
    }

    if (mol.NumAtoms() > 0) {
      CreateCansmiString(mol, buffer, allbits, !pConv->IsOption("i"), pConv);
    }
    ofs << buffer << endl;

    OBAtom *atom;
    vector<int>::iterator i;
    // Retrieve the canonical order of the molecule
    string orderString = m2s.GetOutputOrder();
    vector<string> canonical_order;
    tokenize(canonical_order, orderString);

    int j;
    int atomIdx;
    for (j = 0;j < mol.NumConformers();j++)
      {
        mol.SetConformer(j);
        for (unsigned int index = 0; index < canonical_order.size();
             ++index) {
          atomIdx = atoi(canonical_order[index].c_str());
          atom = mol.GetAtom(atomIdx);
          sprintf(buffer,"%9.3f %9.3f %9.3f",atom->GetX(),atom->GetY(),atom->GetZ());
          ofs << buffer<< endl;
        }
      }
    return(true);
  }

} // end namespace OpenBabel
No results found