Skip to content

Instantly share code, notes, and snippets.

C01 C02 C03 C04 C05 C06
4193 58.52690209 19.79238528 105.52763504 94.95405256 31.71595781 118.67890655
1644 97.88520438 32.16221987 80.88268546 99.14777684 29.55725639 90.61186379
598 2.87791536 1.25763403 49.85402356 2.28738267 2.77951966 65.14421558
7153 10.14441154 13.59621158 57.16808301 7.37447958 2.33377616 77.48738463
4790 22.75508944 42.99521409 109.28704983 36.73099591 34.4355488 89.54434147
6774 4.09136397 7.24804356 108.45800573 40.2471083 4.58565529 106.56955414
3156 78.35995401 54.8259949 114.3780661 87.60616517 37.04524797 104.79968629
5916 23.89803961 9.43186629 84.96080843 13.23830152 22.095928 68.91517787
2932 27.36320561 1.68586442 112.55169248 21.23236686 3.56942032 77.29558595
import selfies, random, sys
N = 1000
M = 500
selfies.set_semantic_constraints("octet_rule")
alphabet= list(selfies.get_semantic_robust_alphabet())
alphabet = list(filter(lambda x: '+' not in x and '-' not in x, alphabet))
rnd_smi = set()
while len(rnd_smi) < N:
library(rcdk)
library(fingerprint)
query_smi <- "CCCCCCCC"
hmdb <- load.molecules("smpdb_structures/compounds-1.sdf")
junk <- lapply(hmdb, function(x) set.title(x, get.property(x, "DATABASE_ID")))
query_fp <- get.fingerprint(parse.smiles(query_smi)[[1]])
hmdb_fp <- lapply(hmdb, get.fingerprint)
sm <- fp.sim.matrix(hmdb_fp, list(query_fp))
rownames(sm) <- sapply(hmdb, get.property, "DATABASE_ID")
sm[ which.max(sm[,1]), ]
library(rcdk)
f <- function(mol) {
tmp <- get.murcko.fragments(list(mol), min.frag.size=3)[[1]]
cat("\rProcessed molregno",get.property(mol, 'cdk:Title'), 'which had',
length(tmp$rings), 'rings and', length(tmp$frameworks), 'frameworks')git
return(tmp)
}
miter <- iload.molecules('/Users/guha/chembl28_10k.smi', type='smi')
frags <- foreach(mol=miter) %do% f(mol)
@rajarshi
rajarshi / foo.py
Created November 17, 2021 15:18
stripping chars in pandas
import pandas as pd
if __name__ == '__main__':
d = {'col1': [1, 2, 3],
'col2': ['{abc defg', '{xyz pqr}', '{nan, foo}']}
df = pd.DataFrame(d)
print(df)
df['col3'] = df['col2'].str.strip("{|}").astype('str')
https://mit.zoom.us/j/94364555006?pwd=bFBGeGlvY0cxV0lTRzZPMWZDdDJNQT09#success
from typing import List, Union
class Unit:
def __init__(self):
pass
class Result:
def __init__(self, value: float, unit: Unit):
https://zoom.us/w/94280441863?tk=FwYQPHrVQzCxdXDQjONgSnw7TnOH2IfrzqdF7Oae7Cg.DQIAAAAV841gBxZsakpEWVQtSFR4Q3NXb01CTzhNV2lRAAAAAAAAAAAAAAAAAAAAAAAAAAAA&pwd=dGVxZ2g3c01PNis5RGxxVVlRQks0Zz09
@rajarshi
rajarshi / fda_drug_cores.smi
Created February 10, 2020 13:57
Bemis Murcko scaffolds derived from FDA Approved Drugs using RDKit
C1=C2CCCCC2C2CCC3C(c4cccnc4)=CCC3C2C1
C1=C[C@H](N[C@H]2CC[C@@H](O[C@H]3CC[C@@H](O[C@H]4CCCOC4)OC3)OC2)CCC1
c1ccccc1
O=c1oc2ccccc2cc1Cc1ccccc1
c1ccc2c(c1)Nc1ccccc1S2
c1ccc2c(c1)Nc1ccccc1S2
c1ccccc1
c1nncs1
O=C(NC1CCCCC1)NS(=O)(=O)c1ccccc1
c1ccc2c(c1)Sc1ccccc1N2CCCN1CCNCC1