Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Introductory tutorial on how to use rcdk to conduct an SAR
require(rcdk) # chemical informatics functionality in R
require(gap) # for qq plots later
options(stringsAsFactors=FALSE)
setwd('c:/sci/037cinf/analysis/sar/')
# plot molecules in R plot window instead of separate Java window
rcdkplot = function(molecule,width=500,height=500,marg=0,main='') {
par(mar=c(marg,marg,marg,marg)) # set margins to zero since this isn't a real plot
temp1 = view.image.2d(molecule,width,height) # get Java representation into an image matrix. set number of pixels you want horiz and vertical
plot(NA,NA,xlim=c(1,10),ylim=c(1,10),xaxt='n',yaxt='n',xlab='',ylab='',main=main) # create an empty plot
rasterImage(temp1,1,1,10,10) # boundaries of raster: xmin, ymin, xmax, ymax. here i set them equal to plot boundaries
}
# first look at curcumin
curcumin = parse.smiles("O=C(\\C=C\\c1ccc(O)c(OC)c1)CC(=O)\\C=C\\c2cc(OC)c(O)cc2")[[1]]
rcdkplot(curcumin)
curc.frags = get.murcko.fragments(curcumin)
curc.frags
#[[1]]
#[[1]]$rings
#[1] "c1ccccc1"
#
#[[1]]$frameworks
#[1] "c1ccc(cc1)C=CCCCC=Cc2ccccc2"
png('curcumin.murcko.fragments.png',width=600,height=300)
par(mfrow=c(1,2))
rcdkplot(parse.smiles(curc.frags[[1]]$rings)[[1]])
rcdkplot(parse.smiles(curc.frags[[1]]$frameworks)[[1]])
dev.off()
anle138b = parse.smiles("C1OC2=C(O1)C=C(C=C2)C3=CC(=NN3)C4=CC(=CC=C4)Br")[[1]]
rcdkplot(anle138b)
anle138b.frags = get.murcko.fragments(anle138b)
anle138b.frags
# [[1]]
# [[1]]$rings
# [1] "c1ccccc1" "c1ccc2OCOc2(c1)"
#
# [[1]]$frameworks
# [1] "c1cccc(c1)c2n[nH]cc2" "c1n[nH]c(c1)c2ccc3OCOc3(c2)"
# [3] "c1cccc(c1)c2n[nH]c(c2)c3ccc4OCOc4(c3)"
png('anle138b.murcko.fragments.png',width=600,height=120)
par(mfrow=c(1,5))
rcdkplot(parse.smiles(anle138b.frags[[1]]$rings[1])[[1]])
rcdkplot(parse.smiles(anle138b.frags[[1]]$rings[2])[[1]])
rcdkplot(parse.smiles(anle138b.frags[[1]]$frameworks[1])[[1]])
rcdkplot(parse.smiles(anle138b.frags[[1]]$frameworks[2])[[1]])
rcdkplot(parse.smiles(anle138b.frags[[1]]$frameworks[3])[[1]])
dev.off()
anle138b.frags = get.murcko.fragments(anle138b,min.frag.size=3)
anle138b.frags[[1]]$rings
# [1] "c1ccccc1" "c1n[nH]cc1" "c1ccc2OCOc2(c1)"
# now for a pretend SAR
# load FDA drug list
drugs = read.table('http://www.cureffi.org/wp-content/uploads/2013/10/drugs.txt',
sep='\t',header=TRUE,allowEscapes=FALSE,quote='',comment.char='')
# remove those with no SMILES or with a really huge smiles
# otherwise R will hang on the macromolecules
drugs = drugs[nchar(drugs$smiles) > 0 & nchar(drugs$smiles) < 200,]
drug.objects = parse.smiles(drugs$smiles) # create rcdk IAtomContainer objects for each drug
# check that lengths are same
dim(drugs) # 1467 3
length(drug.objects) # 1467
statins = c("atorvastatin","fluvastatin","lovastatin","pitavastatin","pravastatin","rosuvastatin","simvastatin")
drugs[tolower(drugs$generic_name) %in% statins,] # check that the statins are in the drug list
# examine statin structures
png('statin.structures.png',width=1200,height=600)
par(mfrow=c(2,4))
for (statin in statins) {
rcdkplot(drug.objects[tolower(drugs$generic_name) == statin][[1]],marg=2,main=statin)
}
dev.off()
# get all murcko fragments
mfrags = get.murcko.fragments(drug.objects,min.frag.size=3)
# get a list of all possible fragments in any of these drugs
allfrags = unique(unlist(mfrags))
length(allfrags) # 2035
# get only the ring systems
allrings = unique(unlist(lapply(mfrags,"[[",1)))
length(allrings) # 556
# convert to a matrix
mat = matrix(nrow=length(drug.objects), ncol=length(allrings))
for (i in 1:length(drug.objects)) {
mat[i,] = allrings %in% mfrags[[i]]$rings
}
# aside: if you want rings and frameworks thrown in together,
# do.call(c,mfrags[[1]]) will concatenate the elements of mfrags[[1]] into a vector
# lapply then does this to each element of the list
mfrags_all = lapply(mfrags,do.call,what=c)
is_statin = tolower(drugs$generic_name) %in% statins # is each drug a statin?
pvals = numeric(length(allrings)) # vector to hold p vals
for (j in 1:length(allrings)) { # iterate over rings
hasfrag = mat[,j] # does each drug contain this fragment?
contingency_table = table(data.frame(is_statin,hasfrag)) # 2x2 table
pvals[j] = fisher.test(contingency_table)$p.value # Fisher test of independence
}
png('murcko.ring.statin.qqplot.png',width=500,height=500)
qqunif(pvals,pch=19,cex.main=.8,
main='Association of Murcko ring systems with statin activity\namong FDA-approved drugs')
dev.off()
png('statins.most.enriched.ring.system.png',width=300,height=300)
allrings[pvals < .000001]
# [1] "C=1CCC2CCCC=C2(C=1)"
# what statins contain it?
drugs$generic_name[is_statin & mat[,which(pvals < .000001)]]
# "Lovastatin" "Pravastatin" "Simvastatin"
# how many other drugs contain it
sum(!is_statin & mat[,which(pvals < .000001)])
# 0
rcdkplot(parse.smiles(allrings[pvals < .000001])[[1]],marg=1,main=allrings[pvals < .000001])
dev.off()
png('frag.freq.hist1.png',width=600,height=400)
hist(colSums(mat),col='yellow',breaks=100,
xlab='# of molecules containing a given fragment',
ylab='# of fragments like this',
main='Histogram of fragment frequencies')
dev.off()
sum(colSums(mat)==1)
# [1] 351
sum(colSums(mat[is_statin,]) > 0)
# 8
start_time = Sys.time()
get.exhaustive.fragments(drug.objects[[which(drugs$generic_name=='Rosuvastatin')]],min.frag.size=3)
Sys.time() - start_time
# Time difference of 3.394177 mins
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.