Skip to content

Instantly share code, notes, and snippets.

@aschreyer
Created November 12, 2012 17:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aschreyer/4060598 to your computer and use it in GitHub Desktop.
Save aschreyer/4060598 to your computer and use it in GitHub Desktop.
The multimcs() function in the PostgreSQL OpenEye cartridge
SELECT assay_id,
count(DISTINCT molregno) AS compounds,
openeye.multimcs(array_agg(ism), threshold:=0.8) AS mcs
FROM chembl.target_dictionary
JOIN chembl.assay2target USING(tid)
JOIN chembl.activities a USING(assay_id)
JOIN chembl.compound_smiles cs USING(molregno)
WHERE protein_accession = 'P53779'
GROUP BY assay_id
HAVING count(DISTINCT molregno) >= 5
AND openeye.multimcs(array_agg(ism), threshold:=0.8) IS NOT NULL;
assay_id compounds mcs
-------- --------- -----------------------------------------------------
216665 5 C1=CC=C(C=C1)C2=C(C(=O)NC2=O)NC3=CC=CC=C3
302631 16 CN=CC=CC1=C(N=NC=C1)C2=CC=CC=C2
321305 10 C1=CC=C(C=C1)C2=NNC3=C2C=CC(=C3)NC4=CC=CC=C4
321351 6 C1CC2=NC(=C(N2[C@@H]1CO)C3=NC(=NC=C3)N)C4=CC=C(C=C4)F
321643 6 C1CC2=NC(=C(N2[C@H]1CO)C3=NC(=NC=C3)N)C4=CC=C(C=C4)F
325077 20 C1=CC=CC=C1
326746 23 C=CC=CC1=CC2=C(C=C1)C(=O)C=C(O2)N3CCOCC3
339825 25 C1=CC=C(C=C1)NC2=NC=CC(=C2)C3=CC(=NC=C3)N
353787 39 CC1=NN(C2=C1C(=O)C3=CC=CC=C3N2O)C
354918 34 CC[NH+](C)CC1=CC2=C(C=C1OC)N=CN=C2NC3=C(C(=CC=C3)Cl)F
363283 6 CC(=O)NC1=NC=CC(=C1)N
367254 14 CC(=CC=C)CNC(=O)C1=CC=CC=N1
379765 7 CC(C)C1=NN=C2N1C=C(C=C2)CC(=N)C3=CC=CC=C3
422241 6 CC1=C(C(=CC=C1)C)/C=C/C2=CN=CC=C2
424398 10 CCN(C1=NN(C(=O)C=C1)C2=CC=CC=C2Cl)C3=CC=CC=C3Cl
424576 55 CC1=CN=C(C=C1N)C(=O)NCC2=CC=C(C=C2)S(=O)(=O)C
428302 33 CNC(=O)C1=CC(=CN1)C2=C(C=NN2)C3=CC(=CC=C3)Cl
428524 6 C(CC1=CC=NC=C1)C2=CC=C(C=C2)F
446769 23 CCC1=CSC(=C1C#N)NC=O
455592 15 CC[C@@H](C)NC1=NC=CC(=N1)C=CNC=CC=C
458261 26 C1CC2=CC(=CN=C2C=C1)C3=CC=CC=C3
459041 19 C1=CC=CC=C1
463964 5 CC1=CC=CC=C1
487994 9 C1=CC=C(C=C1)F
497090 13 CC1=CC=CC=C1C2=CC=CC=C2
537618 17 C1=CC=CC=C1
539863 10 C1CC1NC(=O)C2=CC=CC(=C2)C3=CC(=CN)CC=C3
586022 49 C1=CC=C2C(=C1)/C(=N/O)/C(=O)N2
586406 24 C1=CC=CC=C1
586459 38 C1=CC=CC=C1
586525 7 C1=CC=CC=C1
596583 12 C1=CC=C(C=C1)C2=NC(=NC=C2)NC3=CC=CC=C3
621741 23 C1CN=C(C2=CC=CC=C21)C3=CC=CC=C3
624060 10 CC(=C)C(=C)N
636458 10 CC=CC(=C)C1=CC2=C(NCCN2CC3=CC=CC=C3)N=C1
652139 8 C[NH2+]CCNC(=O)C1=CC=C(C=C1)C2=CN=C3N2N=C(C=C3)NC
655427 7 C1=CC=CC=C1
687741 57 C=CC=CCC(=O)NC=C
688427 362 CC=CC=C
700502 32 C1=CC=C2C=C(C=CC2=C1)N3C(=NNC3=O)C4=CC(=NC=C4)N
701187 6 CCC(CC)NC1=NC=CC(=C1)C2=NNC(=O)N2C3=CC4=CC=CC=C4C=C3
727960 33 C=CCNC(=O)C
739505 31 C1=CC=C2C(=C1)C=NC(=N2)NC3=CC=C(C=C3)N4C=NC=N4
744647 30 C(N=CC=CC1=NNC=C1)NC2=CC=CC=C2
750091 45 CCN(CC)C1=CC=CC=C1NC(=O)C=CC=C
751380 26 C1=CC=C2C=CC=CC2=C1
767484 29 CN=CC1=C(SC=C1Br)NC(=O)C
771557 5 CCCN1C=C(C(=O)C2=C1C=C(C(=C2N)F)NCC)C(=O)[O-]
774371 72 C1=CC=CC=C1
789934 29 CC=CC=CCC(=O)NC
789936 34 C(N=CC=CNC1=CC=CC=C1C(=O)N)OC2=CC=CC=C2
800186 22 CCN1C2=NC(=NC=C2N=C1NC3=CC=CC=C3F)N
809123 44 C1=CC=CC=C1
SELECT assay_id,
count(DISTINCT molregno) AS compounds,
openeye.multimcs(array_agg(ism)) AS mcs
FROM chembl.target_dictionary
JOIN chembl.assay2target USING(tid)
JOIN chembl.activities a USING(assay_id)
JOIN chembl.compound_smiles cs USING(molregno)
WHERE protein_accession = 'P53779'
GROUP BY assay_id
HAVING count(DISTINCT molregno) >= 5
AND openeye.multimcs(array_agg(ism)) IS NOT NULL;
assay_id compounds mcs
-------- --------- -----------------------------------------------------
216665 5 C1=CC=C(C=C1)C2=C(C(=O)NC2=O)NC3=CC=CC=C3
302631 16 C=NC=CCC1=C(N=NC=C1)C2=CC=CC=C2
321305 10 C1=CC=C(C=C1)C2=NNC3=C2C=CC(=C3)NC4=CC=CC=C4
321351 6 C1CC2=NC(=C(N2[C@@H]1CO)C3=NC(=NC=C3)N)C4=CC=C(C=C4)F
321643 6 C1CC2=NC(=C(N2[C@H]1CO)C3=NC(=NC=C3)N)C4=CC=C(C=C4)F
325077 20 C1=CC=CC=C1
326746 23 C1COCCN1C2=CC(=O)C3=CC=CC=C3O2
339825 25 C1=CC=C(C=C1)NC2=NC=CC(=C2)C3=CC(=NC=C3)N
353787 39 C1CC2=CC=CC=C2NCNN=C1
354918 34 CC[NH2+]CC1=CC2=C(C=C1OC)N=CN=C2NC3=CC(=CC=C3)Cl
363283 6 CC(=O)NC1=NC=CC(=C1)N
367254 14 C1=CC=NC=C1
379765 7 C(C(=N)C1=CC=CC=C1)C2=CN3C=NN=C3C=C2
422241 6 CC1=C(C(=CC=C1)C)/C=C/C2=CN=CC=C2
424398 10 CCN(C1=NN(C(=O)C=C1)C2=CC=CC=C2Cl)C3=CC=CC=C3Cl
424576 55 CC1=CN=C(C=C1N)C(=O)NCC2=CC=C(C=C2)S(=O)(=O)C
428302 33 C1=CC=C(C=C1)C2=C(NN=C2)C3=CNC(=C3)C(=O)N
428524 6 C(CC1=CC=NC=C1)C2=CC=C(C=C2)F
446769 23 CCC1=CSC(=C1C#N)N
455592 15 CC=CC=NC=CC1=NC(=NC=C1)NC(C)C
458261 26 C1CC2=CC(=CN=C2C=C1)C3=CC=CC=C3
459041 19 C1=CC=CC=C1
463964 5 CC1=CC=CC=C1
487994 9 C1=CC=C(C=C1)F
497090 13 CC1=CC=CC=C1C2=CC=CC=C2
537618 17 CC=CC=C
539863 10 C1CC1NC(=O)C2=CC=CC(=C2)C3=CC(=CN)CC=C3
586022 49 C1=CC=C2C(=C1)/C(=N/O)/C(=O)N2
596583 12 C1NC=CC(=N1)C2=CC=CC=C2
621741 23 CCC1=CC=CC=C1C
636458 10 C=CC(=C)C1=CC2=C(NCCN2CC3=CC=CC=C3)N=C1
652139 8 C[NH2+]CCNC(=O)C1=CC=C(C=C1)C2=CN=C3N2N=C(C=C3)NC
655427 7 C1=CC=CC=C1
687741 57 CCNC(=O)CC=CC
700502 32 CC=C(C=C)CN=CN
701187 6 CCC(C)NC1=NC=CC(=C1)C2=NNC(=O)N2C3=CC4=CC=CC=C4C=C3
727960 33 CC=CNC(=O)C
739505 31 C1=CC=C2C(=C1)C=NC(=N2)N
744647 30 C(N=CC=CC1=NNC=C1)NC2=CC=CC=C2
750091 45 CCN(CC)C1=CC=CC=C1NC(=O)C
751380 26 C=CC=C1CC=CC=C1
767484 29 CN=CC1=C(SC=C1)NC(=O)C
771557 5 CCN1C=C(C(=O)C2=C1C=C(C(=C2N)F)NCC)C(=O)[O-]
789934 29 C=CC=CC=CN=C
789936 34 C(C=CN=COC1=CC=CC=C1)NC2=CC=CC=C2
800186 22 C1=CC=C(C=C1)NC2=NC3=CN=C(N=C3N2)N
select count(*) from adrian.benzotriazole;
count
-----
3669
select openeye.multimcs(array_agg(ism)) AS mcs from adrian.benzotriazole;
mcs
-------------------
C1=CC=C2C(=C1)NN=N2
(1 row)
Time: 225.499 ms
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment