Skip to content

Instantly share code, notes, and snippets.

create table tmp_data as
SELECT DISTINCT molecule_dictionary.chembl_id,compound_structures.canonical_smiles,ASSAYS.TID,target_dictionary.chembl_id AS target_chembl_id,
TARGET_DICTIONARY.ORGANISM,ACTIVITIES.*,assays.chembl_id AS assay_chembl_id
from molecule_dictionary,compound_structures,assays,activities,target_dictionary
where compound_structures.molregno = MOLECULE_DICTIONARY.MOLREGNO
and MOLECULE_DICTIONARY.MOLREGNO = ACTIVITIES.MOLREGNO
and ASSAYS.TID = TARGET_DICTIONARY.TID AND ASSAYS.ASSAY_ID = ACTIVITIES.ASSAY_ID
and ACTIVITIES.STANDARD_UNITS = 'nM' AND ACTIVITIES.STANDARD_VALUE < 50
and TARGET_DICTIONARY.ORGANISM = 'Homo sapiens'
install.packages('RPostgreSQL', type='source')
#initiate connection
library(RPostgreSQL)
library(BMS)
library(ggplot2)
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname="chembl_18")
@abhik1368
abhik1368 / inchifrommol.sql
Created July 21, 2014 14:58
inchifrommol
# Create function
create or replace function inchi_mol(x text)
returns text
AS $$
from rdkit import Chem
from rdkit.Chem import AllChem
file = open("/home/chembl/logerror2.txt","a")
try :
m=Chem.MolFromInchi(x)
if m is not None :
@abhik1368
abhik1368 / auc.R
Last active August 29, 2015 14:06
Function to calculate AUC
# x = a vector for scores
# y = a vector of labels
function (x, y, decreasing = TRUE, top = 1)
{
if (length(x) != length(y)) {
stop(paste("Length of scores does not match with labels."))
}
N <- length(y)
n <- sum(y == 1)
@abhik1368
abhik1368 / bedroc.R
Created September 12, 2014 02:57
Boltzmann-Enhanced Discrimination of ROC
# x = a vector of scores
# y = a vector of labels
function (x, y, decreasing = TRUE, alpha = 20)
{
if (length(x) != length(y)) {
stop(paste("The length of scores should be equal to number of labels."))
}
N <- length(y)
n <- length(which(y == 1))
@abhik1368
abhik1368 / rie.R
Created September 12, 2014 02:58
Robust Initial Enhancement Metric
# x = a vector of scores
# y = a vector of labels
function (x, y, decreasing = TRUE, alpha = 20)
{
if (length(x) != length(y)) {
stop(paste("The length of scores should be equal to number of labels."))
}
N <- length(y)
n <- length(which(y == 1))
@abhik1368
abhik1368 / random_walk.R
Last active August 29, 2015 14:06
Random_walk_with_restart
# Parameter r: restart probability
r<-0.8
# convergence cutoff
conv_cut<-1e-10
RWR <- function(M, p_0, r,conv_cut, prop=FALSE) {
# use Network propagation when prop=TRUE
if(prop) {
w<-colSums(M)
@abhik1368
abhik1368 / mclust.R
Last active August 29, 2015 14:08
mclust.R
# Algorithm to perform MCL Clustering in R
# Add the identity matrix to the matrix which indicates self loops
add.selfloops <- function (M) {
LM<-M+diag(dim(M)[1])
return (LM);
}
# Inflation step of MCL
inflate <- function (M,inf) {
M <- M^(inf)
@abhik1368
abhik1368 / nbi.R
Created February 26, 2015 05:50
Network_Inference
nbi < - function (A){
# A is the n x m adjacencny matrix here
n <- nrow(A)
m <- ncol(A)
# You need to calculate the degree of columns to use it as node weight
Ky <- diag(1/colSums(A))
Ky[is.infinite(Ky) | is.na(Ky)] <- 0
kx <- rowSums(A)
Nx <- 1/(matrix(kx, nrow=n, ncol=n, byrow=TRUE))
@abhik1368
abhik1368 / Link_Prediction_by_Regression.R
Last active August 29, 2015 14:18
Link_Prediction_by_Regression