-
-
Save leelasd/51781922adedd586ad76283e84e30d4d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import openbabel | |
# Copyright 2009 TJ O'Donnell | |
class recap: | |
# RECAP-Retrosynthetic Combinatorial Analysis Procedure | |
# J. Chem. Inf. Comput. Sci. 1998, 38, 511-522 | |
def __init__(self, mol, minsize=5): | |
self.mol = mol; | |
# minimum allowed size (atom count) of fragment | |
self.minsize = minsize; | |
# bonded atom pairs populated by the apply method, | |
# subsequently used by split and add_star | |
self.atom_pairs = list() | |
def apply(self, pat, patnum): | |
if pat.Match(self.mol): | |
# find all atom pairs that match | |
for p in pat.GetUMapList(): | |
i = 0 | |
atoms = list() | |
for a in openbabel.OBMolAtomIter(self.mol): | |
i += 1 | |
if i in p: | |
atoms.append(a) | |
if self.small_fragment(atoms[0], atoms[1]): | |
#print True | |
pass | |
else: | |
atoms.append(patnum) | |
self.atom_pairs.append(atoms) | |
#print False | |
def split(self): | |
for a in self.atom_pairs: | |
a[0].SetIsotope(a[2]) | |
a[1].SetIsotope(a[2]) | |
bond = a[0].GetBond(a[1]) | |
self.mol.DeleteBond(bond) | |
def add_star(self): | |
for pair in self.atom_pairs: | |
self.mol.AddBond(pair[0].GetIdx(),self.mol.NewAtom().GetIdx(),1) | |
self.mol.AddBond(pair[1].GetIdx(),self.mol.NewAtom().GetIdx(),1) | |
def decide_multiples(self): | |
# some smarts (e.g. ether, amine) allow multiple bonds to the | |
# central atom to be broken. Yet it appears the central atom | |
# needs to be retained in one of the multiple fragments. | |
# If multiple fragments, let it stay with the smallest fragment. | |
# If tied, pick the first fragment. | |
multiples = [0]*self.mol.NumAtoms() | |
for pair in self.atom_pairs: | |
multiples[pair[0].GetIdx()] += 1 | |
multiples[pair[1].GetIdx()] += 1 | |
#print multiples | |
currsize = -1 | |
currpair = None | |
for pair in self.atom_pairs: | |
a = pair[0] | |
b = pair[1] | |
if multiples[a.GetIdx()] > 1 or multiples[b.GetIdx()] > 1: | |
# remove larger fragment(s) if a-b were broken | |
#print a.GetIdx(),b.GetIdx(), | |
fsize = self.fragment_size(a,b) | |
if currpair == None: | |
currpair = pair | |
currsize = fsize | |
else: | |
if fsize < currsize: | |
self.atom_pairs.remove(pair) | |
else: | |
self.atom_pairs.remove(currpair) | |
currpair = pair | |
currsize = fsize | |
def fragment_size(self, a, b): | |
# size of fragment b if a-b were broken | |
c1 = openbabel.vectorInt() | |
self.mol.FindChildren(c1,a.GetIdx(),b.GetIdx()) | |
#for atom in c1: | |
# if self.mol.GetAtom(atom).GetValence() == 1: | |
return 1+len(c1) | |
def small_fragment(self, a, b): | |
# if we were to break the bond between a and b, | |
# would either fragment be too small? | |
#print a.GetIdx(), b.GetIdx(), | |
if self.fragment_size(a,b) < self.minsize: return True | |
if self.fragment_size(b,a) < self.minsize: return True | |
return False | |
import sys | |
# each smarts must contain only two atoms representing the bond to be broken. | |
# of course, each atom may be a complex atom smarts, ala [$(whatever)] | |
smarts = { | |
' 1.amide':'[$([C;!$(C([#7])[#7])](=!@[O]))]!@[$([#7;+0;!D1])]', | |
' 2.ester':'[$(C=!@O)]!@[$([O;+0])]', | |
' 3.amine':'[$([N;!D1;+0;!$(N-C=[#7,#8,#15,#16])](-!@[*]))]-!@[$([*])]', | |
' 4.urea':'[$(C(=!@O)([#7;+0;D2,D3])!@[#7;+0;D2,D3])]!@[$([#7;+0;D2,D3])]', | |
' 5.ether':'[$([O;+0](-!@[#6!$(C=O)])-!@[#6!$(C=O)])]-!@[$([#6!$(C=O)])]', | |
' 6.olefin':'C=!@C', | |
' 7.quaternaryN':'[N;+1;D4]!@[#6]', | |
' 8.aromaticN-aliphaticC':'[$([n;+0])]-!@C', | |
' 9.lactamN-aromaticC':'[$([O]=[C]-@[N;+0])]-!@[$([C])]', | |
'10.aromaticC-aromaticC':'c-!@c', | |
'11.sulphonamide':'[$([#7;+0;D2,D3])]-!@[$([S](=[O])=[O])]' | |
} | |
pat = openbabel.OBSmartsPattern(); | |
obc = openbabel.OBConversion() | |
obc.SetInAndOutFormats("smi","can") | |
mol = openbabel.OBMol() | |
for smi in sys.stdin.readlines(): | |
mol.Clear() | |
obc.ReadString(mol, smi) | |
amol = mol | |
bonds = list() | |
Recap = recap(amol,4) | |
i = 0 | |
for stype in sorted(smarts.keys()): | |
pat.Init(smarts[stype]) | |
#for sma in smarts: | |
# pat.Init(sma) | |
i += 1 | |
Recap.apply(pat, i) | |
Recap.decide_multiples() | |
Recap.split() | |
#Recap.add_star() | |
cansmi = obc.WriteString(amol,1) | |
print cansmi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment