- To create UniRef90 protein databases for NCBI blast and Diamond Blast
- To create a tab delimited taxid mapping file with two columns : sequenceID\tNCBITaxonID
Download the uniref90 xml file first (warning - this is ~15 GB, will take a while)
Download the uniref90 xml file first (warning - this is ~15 GB, will take a while)
mol.1 "C1=CC2=CC=C3C4=CC5=CC6=CC=CC=C6C=C5C=C4C=CC3=C2C=C1" | |
mol.2 "C1=CC2=CC3=CC=CC=C3C=C2C=C1" | |
mol.3 "C1=CC2=CC=CC=C2C=C1" | |
mol.4 "C1=CC=CC=C1" | |
mol.5 "C1CCCCC1" | |
mol.6 "C1CCC=CC1" | |
mol.7 "C1CC=CC=C1" | |
mol.8 "CCCCCC" | |
mol.9 "CCCCC(C)C" | |
mol.10 "CC1=CC(C)=CC=C1" |
"InChI=1S/C23H30/c24-15(25-24)4-2-1-3(2)6(1,2,4,15)5(2,4,15,36(15)32(15)45(4,5,15,36)43(4,5,15)28(4)15)7(1,2,3,4,6,15,27(2)41(1,2,7)35(1,2)7)12(1,2,3,6)10-11(12)13(1,3,10,12,30-31(13)46(3,12,13,30)42(1,3,12)13)19(3,10,11,12)9(6)8(4,5,6)14(4,5,6,9,15)17(8,9,19)16(9,10,11,19)18(8,9,14,17,19,47(8,9,14,17)48(8,9,14)37(8,14)38(8,14)48)20(3,9,10,11,12,13,16,17,19)21(10,11,13,19)22(10,11,19,20,26-39(10,21)22,33(21)34(21)22,40(21)44(11,21,22)49(10,11,21,22)40)23(9,10,11,16,17,18,19,20,21)29(16)52(16,17,18,23)50(16,17,18,23)51(16,17,18,23)52/h28H", | |
"InChI=1S/C4Cl12/c5-1-2(5)3(1)4(1,2)7(3)8(3,4)12(1,2,3,4)10(1,2,3,4)6(1,2,5,14(1,2,3,4,5,10,12)16(1,2,3,4,7,8,10)12)13(1,2,3,4,5)9(1,2,3,4,5)11(1,2,3,4,7,13)15(1,2,3,4,7,8,9)13", | |
"InChI=1S/C30H28N6O6S4/c37-27-23(54(27)55(23)27)25(27,60(23,27)61(23,27)37)11-12(25)24(11,43-44-25)14-19(9-6-2-3(6,48-2,51(2)6)8(2,6,9)17(6,9,19,52(8)9)28(9,14,19,24,63(8,9,17)19)34(14,17,19,24,57(14,19)28)36(11,12,14,24,28,66(14,24,28)34)40(11,12,24)33(11,12,23,25,27,59(23,25)27)41(11,12,25,36)40) |
Nina's example for CDK Hashed Fingerprinter failure. | |
"InChI=1S/C23H30/c24-15(25-24)4-2-1-3(2)6(1,2,4,15)5(2,4,15,36(15)32(15)45(4,5,15,36)43(4,5,15)28(4)15)7(1,2,3,4,6,15,27(2)41(1,2,7)35(1,2)7)12(1,2,3,6)10-11(12)13(1,3,10,12,30-31(13)46(3,12,13,30)42(1,3,12)13)19(3,10,11,12)9(6)8(4,5,6)14(4,5,6,9,15)17(8,9,19)16(9,10,11,19)18(8,9,14,17,19,47(8,9,14,17)48(8,9,14)37(8,14)38(8,14)48)20(3,9,10,11,12,13,16,17,19)21(10,11,13,19)22(10,11,19,20,26-39(10,21)22,33(21)34(21)22,40(21)44(11,21,22)49(10,11,21,22)40)23(9,10,11,16,17,18,19,20,21)29(16)52(16,17,18,23)50(16,17,18,23)51(16,17,18,23)52/h28H", | |
"InChI=1S/C4Cl12/c5-1-2(5)3(1)4(1,2)7(3)8(3,4)12(1,2,3,4)10(1,2,3,4)6(1,2,5,14(1,2,3,4,5,10,12)16(1,2,3,4,7,8,10)12)13(1,2,3,4,5)9(1,2,3,4,5)11(1,2,3,4,7,13)15(1,2,3,4,7,8,9)13", | |
"InChI=1S/C30H28N6O6S4/c37-27-23(54(27)55(23)27)25(27,60(23,27)61(23,27)37)11-12(25)24(11,43-44-25)14-19(9-6-2-3(6,48-2,51(2)6)8(2,6,9)17(6,9,19,52(8)9)28(9,14,19,24,63(8,9,17)19)34(14,17,19,24,57(14,19)28)36(11,12,14, |
asad:cdksmsdgithub Asad$ ant -Dmodule=smsd qa-module | |
Buildfile: /users/Asad/Software/GITROOT/cdksmsdgithub/build.xml | |
checkPlatforms: | |
check: | |
dist.init: | |
qa-module: |
KEGG ID Atom Count Formula Failed Cases | |
C00023.mol 1 Fe Fe, | |
C00032.mol 43 C34FeN4O4 Fe, | |
C00034.mol 1 Mn Mn, | |
C00038.mol 1 Zn Zn, | |
C00070.mol 1 Cu Cu, | |
C00125.mol 65 C42FeN8O8S2R4 Fe, | |
C00126.mol 65 C42FeN8O8S2R4 Fe, | |
C00150.mol 1 Mo Mo, | |
C00194.mol 109 C72CoN18O17P Co, |
package tools; | |
import java.io.IOException; | |
import java.util.ArrayList; | |
import java.util.List; | |
import java.util.Map; | |
import org.junit.Test; | |
import org.openscience.cdk.AtomContainer; | |
import org.openscience.cdk.Bond; | |
import org.openscience.cdk.DefaultChemObjectBuilder; |
rBLAST 02051100062D | |
31 33 0 0 0 0 999 V2000 | |
-10.5253 -1.9067 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0 | |
-11.7515 -0.8026 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0 | |
-11.6905 -1.9677 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 | |
-11.1384 -1.3546 0.0000 P 0 0 0 0 0 0 0 0 0 0 0 0 | |
-10.5864 -0.7415 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 | |
-9.7794 -0.9131 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 |
311 | |
CDK 0421111417 | |
21 20 0 0 0 0 0 0 0 0999 V2000 | |
4.7690 -1.0005 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 | |
6.0010 -0.1345 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 | |
5.1350 1.3655 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 | |
2.5369 -2.1345 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 | |
2.2690 1.5976 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 | |
4.2690 -2.1345 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 |
OpenBabel03011112432D | |
7 7 0 0 0 0 0 0 0 0999 V2000 | |
0.0000 0.0000 0.0000 C 0 0 0 0 0 | |
0.0000 0.0000 0.0000 C 0 0 0 0 0 | |
0.0000 0.0000 0.0000 C 0 0 0 0 0 | |
0.0000 0.0000 0.0000 C 0 0 0 0 0 | |
0.0000 0.0000 0.0000 I 0 0 0 0 0 | |
0.0000 0.0000 0.0000 C 0 0 0 0 0 |