Skip to content

Instantly share code, notes, and snippets.

@abhik1368
Created July 17, 2014 20:41
Show Gist options
  • Save abhik1368/88634a9c3185c40f7fa3 to your computer and use it in GitHub Desktop.
Save abhik1368/88634a9c3185c40f7fa3 to your computer and use it in GitHub Desktop.
install.packages('RPostgreSQL', type='source')
#initiate connection
library(RPostgreSQL)
library(BMS)
library(ggplot2)
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname="chembl_18")
#query the database
rs <- dbSendQuery(con,"select chembl_id,standard_value,ecfp4 from rdkfps_1 where target_chembl_id ='CHEMBL224' and published_type ='IC50'")
#fetch all the results
d<-fetch(rs ,n =-1)
#close resultSet rs
dbClearResult(rs)
#remove duplicates ids
d<-d[!duplicated(d[,1]), ]
# remove the \\x from the hexadecimal string and convert hexadecimal to binary format
ad<-data.frame()
for (i in 1:dim(d)[1]){
a = hex2bin(gsub("[\\x]","",d[i,3]))
ad<-rbind(a,ad)
}
# remove the third columns
d[,3]<-NULL
#append the descriptor set
d<-cbind(d,ad)
#Near constant columns and NA
descs <- d[, !apply(d, 2, function(x) any(is.na(x)) )]
descs <- d[, !apply( d, 2, function(x) length(unique(x)) == 1 )]
# Remove correlated columns > 0.25
r2 <- which(cor(descs[3:511])^2 > .25, arr.ind=TRUE)
r2 <- r2[ r2[,1] > r2[,2] , ]
data <- descs[, -unique(r2[,2])]
#Check Dimensions of the dataset
dim(data)
#-------------------------------------------------------------
#train and test set
names(set)[2]<-"Activity" # change the column name
#Convert IC50 column to pIC50
data[,2]<- -(log(data[,2]*10^-9))
# split train and test set
ind<-sample(2,nrow(data),replace=TRUE,prob=c(0.8,0.2))
trainsetX<-data[ind==1,2:dim(data)[2]]
trainY<-trainsetX$Activity
testsetX<-data[ind==2,2:dim(data)[2]]
testY<- testsetX$Activity
#-------------------------------------------------------------
# Function to compute RMSE and R2
r2se<- function (obs,pred){
rmse<-(mean((obs -pred)^2))^0.5
ssr<-sum((obs - pred)^2)
sst<-sum((obs - mean(obs))^2)
R2<-1-(ssr/sst)
output<-list(RMSE=rmse,RSquared=R2)
return(output)
}
#-------------------------------------------------------------
# Function to plot qsar results with ggplot2
plotsar <-function(results){
myplot<-ggplot(results,aes(x=observed,y=predicted,color=factor(set),shape=factor(set)))+
geom_point()+scale_colour_manual(values=c("blue", "yellow"))+
theme(axis.line = element_line(colour = "white"))+
theme(panel.grid.major = element_line(colour = "white", linetype = "dotted"))+
theme(axis.ticks.y=element_line(size=1),axis.text.y=element_text(size=16),axis.ticks.length=unit(0.25,"cm"),
panel.background = element_rect(fill = "black", colour = NA))+
theme(legend.key = element_rect(fill = "black"))+
theme(axis.title=element_text(size=12,face="bold",colour = 'white'),
plot.background = element_rect(colour = 'black', fill = 'black'))+
theme( legend.title = element_text(size = 12, face = "bold", hjust = 0, colour = 'white'),
legend.background=element_rect(color="black",fill="black"),
legend.text=element_text(size=12,color="white",face="bold"),
axis.title.x = element_text(size = 12, colour = 'white', vjust = 1) ,
axis.title.y = element_text(size = 12, colour = 'white'))+
ggtitle("Predicted v/s test set QSAR results")+
theme(plot.title=element_text(lineheight=.8,face="bold",color="white",size=14))+stat_smooth(span = 0.9)
return(myplot)
}
#-------------------------------------------------------------
# Ridge Regression Model
library(elasticnet)
ridgeModel <- enet(x = as.matrix(trainsetX), y = trainY,lambda = 0.01)
ridgetest <- predict(ridgeModel, newx = as.matrix(testsetX),
s = 1, mode = "fraction",type = "fit")
ridgetrain <- predict(ridgeModel, newx = as.matrix(trainsetX),
s = 1, mode = "fraction",type = "fit")
ridgeval1<-data.frame(obs = trainY, pred = ridgetrain$fit)
ridgeval2<-data.frame(obs = testY, pred = ridgetest$fit)
r2se(ridgeval1$obs,ridgeval1$pred)
r2se(ridgeval2$obs,ridgeval2$pred)
train.results<-data.frame(observed=trainY,predicted=ridgetrain$fit,set="train")
test.results <- data.frame(observed=testY,predicted=ridgetest$fit,set="test")
results<-rbind(train.results,test.results)
plotsar(results)
## close connection con
dbDisconnect(con)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment