Skip to content

Instantly share code, notes, and snippets.

@baoilleach
Last active April 26, 2018 20:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save baoilleach/39614be35a8133b2f52086aedd807451 to your computer and use it in GitHub Desktop.
Save baoilleach/39614be35a8133b2f52086aedd807451 to your computer and use it in GitHub Desktop.
Create random smiles
import random
import pybel
def create_mutants(A, B):
# Let's randomly choose a cross-over point in both A and B
# and generate four possible combinations
c1 = random.randint(0, len(A))
c2 = random.randint(0, len(B))
startA, endA = A[:c1], A[c1:]
startB, endB = B[:c2], B[c2:]
children = [
startA+endB, startB+endA, # somewhat sensible
endA+startB, endB+startA, # less sensible
]
# Let's mutate a few characters by swapping nbors randomly
mutant_children = []
for child in children:
mutant = ""
i = 0
N = len(child)
while i < N:
if i+1 < N and random.random() > 0.66: # 1 in 3 chance
mutant += child[i+1]
mutant += child[i]
i += 1 # extra increment
else:
mutant += child[i]
i += 1
mutant_children.append(mutant)
random.shuffle(mutant_children) # don't favour any of them
return mutant_children
def get_mutant(smiA, smiB):
for N in range(50): # try combining these 50 times
mutantsmis = create_mutants(smiA, smiB)
for mutantsmi in mutantsmis:
try:
mol = pybel.readstring("smi", mutantsmi)
except IOError:
continue # bad syntax
return mutantsmi, mol
return "", None
if __name__ == "__main__":
with open(r"C:\Tools\LargeData\chembl_23.smi") as inp:
allchembl = inp.readlines()
dataset = [x.split("\t")[0] for x in random.sample(allchembl, 1000)]
maxwt = 0
while True:
smiA, smiB = random.sample(dataset, 2)
mutantsmi, mol = get_mutant(smiA, smiB)
if mol and mol.molwt > maxwt:
print(mutantsmi)
print(mol.molwt)
maxwt = mol.molwt
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment