Skip to content

Instantly share code, notes, and snippets.

@atgctg
Last active September 12, 2022 05:49
Show Gist options
  • Save atgctg/9819ffb6dc4a49920267ae2f2ca540f8 to your computer and use it in GitHub Desktop.
Save atgctg/9819ffb6dc4a49920267ae2f2ca540f8 to your computer and use it in GitHub Desktop.
Sample run of process_data.py
  • Took around an hour on an M1 Mac
  • Memory use ~13GB at the end

Compound_000000001_000500000.jsonl

{"commonchem": {"version": 10}, "defaults": {"atom": {"z": 6, "impHs": 0, "chg": 0, "nRad": 0, "isotope": 0, "stereo": "unspecified"}, "bond": {"bo": 1, "stereo": "unspecified"}}, "molecules": [{"name": "1", "atoms": [{"z": 8}, {"z": 8, "chg": -1}, {"z": 8}, {"z": 8}, {"z": 7, "chg": 1}, {"impHs": 2}, {"impHs": 1}, {"impHs": 3}, {"impHs": 3}, {"impHs": 3}, {"impHs": 2}, {}, {}, {"impHs": 3}], "bonds": [{"atoms": [0, 6]}, {"atoms": [0, 12]}, {"atoms": [1, 11]}, {"bo": 2, "atoms": [2, 11]}, {"bo": 2, "atoms": [3, 12]}, {"atoms": [4, 5]}, {"atoms": [4, 7]}, {"atoms": [4, 8]}, {"atoms": [4, 9]}, {"atoms": [5, 6]}, {"atoms": [6, 10]}, {"atoms": [10, 11]}, {"atoms": [12, 13]}], "conformers": [{"dim": 2, "coords": [[2.866, 0.75], [2.866, -2.25], [2.0, -0.75], [3.732, 2.25], [5.4641, 0.25], [4.5981, 0.75], [3.732, 0.25], [6.3301, -0.25], [5.9641, 1.116], [4.9641, -0.616], [3.732, -0.75], [2.866, -1.25], [2.866, 1.75], [2.0, 2.25]]}], "properties": {"PUBCHEM_COMPOUND_CID": 1, "PUBCHEM_COMPOUND_CANONICALIZED": 1, "PUBCHEM_CACTVS_COMPLEXITY": 214, "PUBCHEM_CACTVS_HBOND_ACCEPTOR": 4, "PUBCHEM_CACTVS_HBOND_DONOR": 0, "PUBCHEM_CACTVS_ROTATABLE_BOND": 5, "PUBCHEM_CACTVS_SUBSKEYS": "AAADceByOAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHgAAAAAACBThgAYCCAMABAAIAACQCAAAAAAAAAAAAAEIAAACABQAgAAHAAAFIAAQAAAkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", "PUBCHEM_IUPAC_OPENEYE_NAME": "3-acetoxy-4-(trimethylammonio)butanoate", "PUBCHEM_IUPAC_CAS_NAME": "3-acetyloxy-4-(trimethylammonio)butanoate", "PUBCHEM_IUPAC_NAME_MARKUP": "3-acetyloxy-4-(trimethylazaniumyl)butanoate", "PUBCHEM_IUPAC_NAME": "3-acetyloxy-4-(trimethylazaniumyl)butanoate", "PUBCHEM_IUPAC_SYSTEMATIC_NAME": "3-acetyloxy-4-(trimethylazaniumyl)butanoate", "PUBCHEM_IUPAC_TRADITIONAL_NAME": "3-acetoxy-4-(trimethylammonio)butyrate", "PUBCHEM_IUPAC_INCHI": "InChI=1S/C9H17NO4/c1-7(11)14-8(5-9(12)13)6-10(2,3)4/h8H,5-6H2,1-4H3", "PUBCHEM_IUPAC_INCHIKEY": "RDHQFKQIGNGIED-UHFFFAOYSA-N", "PUBCHEM_XLOGP3_AA": 0.4, "PUBCHEM_EXACT_MASS": 203.1157, "PUBCHEM_MOLECULAR_FORMULA": "C9H17NO4", "PUBCHEM_MOLECULAR_WEIGHT": 203.24, "PUBCHEM_OPENEYE_CAN_SMILES": "CC(=O)OC(CC(=O)[O-])C[N+](C)(C)C", "PUBCHEM_OPENEYE_ISO_SMILES": "CC(=O)OC(CC(=O)[O-])C[N+](C)(C)C", "PUBCHEM_CACTVS_TPSA": 66.4, "PUBCHEM_MONOISOTOPIC_WEIGHT": 203.1157, "PUBCHEM_TOTAL_CHARGE": 0, "PUBCHEM_HEAVY_ATOM_COUNT": 14, "PUBCHEM_ATOM_DEF_STEREO_COUNT": 0, "PUBCHEM_ATOM_UDEF_STEREO_COUNT": 1, "PUBCHEM_BOND_DEF_STEREO_COUNT": 0, "PUBCHEM_BOND_UDEF_STEREO_COUNT": 0, "PUBCHEM_ISOTOPIC_ATOM_COUNT": 0, "PUBCHEM_COMPONENT_COUNT": 1, "PUBCHEM_CACTVS_TAUTO_COUNT": 1, "PUBCHEM_COORDINATE_TYPE": "1\n5\n255", "PUBCHEM_BONDANNOTATIONS": "7  11  3"}, "extensions": [{"name": "rdkitRepresentation", "formatVersion": 2, "toolkitVersion": "2022.03.5", "cipRanks": [9, 8, 10, 11, 7, 3, 4, 2, 2, 2, 1, 5, 6, 0]}]}], "CAN_SELFIE": "[C][C][=Branch1][C][=O][O][C][Branch1][#Branch1][C][C][=Branch1][C][=O][O-1][C][N+1][Branch1][C][C][Branch1][C][C][C]"}
{"commonchem": {"version": 10}, "defaults": {"atom": {"z": 6, "impHs": 0, "chg": 0, "nRad": 0, "isotope": 0, "stereo": "unspecified"}, "bond": {"bo": 1, "stereo": "unspecified"}}, "molecules": [{"name": "2", "atoms": [{"z": 8}, {"z": 8, "impHs": 1}, {"z": 8}, {"z": 8}, {"z": 7, "chg": 1}, {"impHs": 2}, {"impHs": 1}, {"impHs": 3}, {"impHs": 3}, {"impHs": 3}, {"impHs": 2}, {}, {}, {"impHs": 3}], "bonds": [{"atoms": [0, 6]}, {"atoms": [0, 12]}, {"atoms": [1, 11]}, {"bo": 2, "atoms": [2, 11]}, {"bo": 2, "atoms": [3, 12]}, {"atoms": [4, 5]}, {"atoms": [4, 7]}, {"atoms": [4, 8]}, {"atoms": [4, 9]}, {"atoms": [5, 6]}, {"atoms": [6, 10]}, {"atoms": [10, 11]}, {"atoms": [12, 13]}], "conformers": [{"dim": 2, "coords": [[4.269, -0.75], [2.5369, 2.25], [4.269, 2.25], [2.5369, -0.75], [6.001, 0.25], [5.135, 0.75], [4.269, 0.25], [6.8671, -0.25], [6.501, 1.116], [5.501, -0.616], [3.403, 0.75], [3.403, 1.75], [3.403, -1.25], [3.403, -2.25]]}], "properties": {"PUBCHEM_COMPOUND_CID": 2, "PUBCHEM_COMPOUND_CANONICALIZED": 1, "PUBCHEM_CACTVS_COMPLEXITY": 219, "PUBCHEM_CACTVS_HBOND_ACCEPTOR": 4, "PUBCHEM_CACTVS_HBOND_DONOR": 1, "PUBCHEM_CACTVS_ROTATABLE_BOND": 6, "PUBCHEM_CACTVS_SUBSKEYS": "AAADceByOAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHgAACAAACBThgAYCCAMABgAIAACQCAAAAAAAAAAAAAEIAAACABQAgAAHQAAFIAAQAAAkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", "PUBCHEM_IUPAC_OPENEYE_NAME": "(2-acetoxy-3-carboxy-propyl)-trimethyl-ammonium", "PUBCHEM_IUPAC_CAS_NAME": "(2-acetyloxy-3-carboxypropyl)-trimethylammonium", "PUBCHEM_IUPAC_NAME_MARKUP": "(2-acetyloxy-3-carboxypropyl)-trimethylazanium", "PUBCHEM_IUPAC_NAME": "(2-acetyloxy-3-carboxypropyl)-trimethylazanium", "PUBCHEM_IUPAC_SYSTEMATIC_NAME": "(2-acetyloxy-4-oxidanyl-4-oxidanylidene-butyl)-trimethyl-azanium", "PUBCHEM_IUPAC_TRADITIONAL_NAME": "(2-acetoxy-3-carboxy-propyl)-trimethyl-ammonium", "PUBCHEM_IUPAC_INCHI": "InChI=1S/C9H17NO4/c1-7(11)14-8(5-9(12)13)6-10(2,3)4/h8H,5-6H2,1-4H3/p+1", "PUBCHEM_IUPAC_INCHIKEY": "RDHQFKQIGNGIED-UHFFFAOYSA-O", "PUBCHEM_XLOGP3_AA": -0.3, "PUBCHEM_EXACT_MASS": 204.1235, "PUBCHEM_MOLECULAR_FORMULA": "C9H18NO4+", "PUBCHEM_MOLECULAR_WEIGHT": 204.24, "PUBCHEM_OPENEYE_CAN_SMILES": "CC(=O)OC(CC(=O)O)C[N+](C)(C)C", "PUBCHEM_OPENEYE_ISO_SMILES": "CC(=O)OC(CC(=O)O)C[N+](C)(C)C", "PUBCHEM_CACTVS_TPSA": 63.6, "PUBCHEM_MONOISOTOPIC_WEIGHT": 204.1235, "PUBCHEM_TOTAL_CHARGE": 1, "PUBCHEM_HEAVY_ATOM_COUNT": 14, "PUBCHEM_ATOM_DEF_STEREO_COUNT": 0, "PUBCHEM_ATOM_UDEF_STEREO_COUNT": 1, "PUBCHEM_BOND_DEF_STEREO_COUNT": 0, "PUBCHEM_BOND_UDEF_STEREO_COUNT": 0, "PUBCHEM_ISOTOPIC_ATOM_COUNT": 0, "PUBCHEM_COMPONENT_COUNT": 1, "PUBCHEM_CACTVS_TAUTO_COUNT": 1, "PUBCHEM_COORDINATE_TYPE": "1\n5\n255", "PUBCHEM_BONDANNOTATIONS": "7  11  3"}, "extensions": [{"name": "rdkitRepresentation", "formatVersion": 2, "toolkitVersion": "2022.03.5", "cipRanks": [9, 8, 10, 11, 7, 3, 4, 2, 2, 2, 1, 5, 6, 0]}]}], "CAN_SELFIE": "[C][C][=Branch1][C][=O][O][C][Branch1][#Branch1][C][C][=Branch1][C][=O][O][C][N+1][Branch1][C][C][Branch1][C][C][C]"}
{"commonchem": {"version": 10}, "defaults": {"atom": {"z": 6, "impHs": 0, "chg": 0, "nRad": 0, "isotope": 0, "stereo": "unspecified"}, "bond": {"bo": 1, "stereo": "unspecified"}}, "molecules": [{"name": "3", "atoms": [{"z": 8, "impHs": 1}, {"z": 8, "impHs": 1}, {"z": 8, "impHs": 1}, {"z": 8}, {"impHs": 1}, {"impHs": 1}, {}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {}], "bonds": [{"atoms": [0, 4]}, {"atoms": [1, 5]}, {"atoms": [2, 10]}, {"bo": 2, "atoms": [3, 10]}, {"atoms": [4, 5]}, {"atoms": [4, 6]}, {"atoms": [5, 7]}, {"bo": 2, "atoms": [6, 8]}, {"atoms": [6, 10]}, {"bo": 2, "atoms": [7, 9]}, {"atoms": [8, 9]}], "conformers": [{"dim": 2, "coords": [[5.135, 1.345], [6.8671, 0.345], [2.5369, -0.155], [3.403, 1.345], [5.135, 0.345], [6.001, -0.155], [4.269, -0.155], [6.001, -1.155], [4.269, -1.155], [5.135, -1.655], [3.403, 0.345]]}], "properties": {"PUBCHEM_COMPOUND_CID": 3, "PUBCHEM_COMPOUND_CANONICALIZED": 1, "PUBCHEM_CACTVS_COMPLEXITY": 229, "PUBCHEM_CACTVS_HBOND_ACCEPTOR": 4, "PUBCHEM_CACTVS_HBOND_DONOR": 3, "PUBCHEM_CACTVS_ROTATABLE_BOND": 1, "PUBCHEM_CACTVS_SUBSKEYS": "AAADccBgOAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAAAAAAAAAAAAAAGgAACAAADBSggAIACAAAAgCIAiDSCAAAAAAgAAAICAEAAEgJFBYAAQAAUAAF4AAJkYLKAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", "PUBCHEM_IUPAC_OPENEYE_NAME": "5,6-dihydroxycyclohexa-1,3-diene-1-carboxylic acid", "PUBCHEM_IUPAC_CAS_NAME": "5,6-dihydroxy-1-cyclohexa-1,3-dienecarboxylic acid", "PUBCHEM_IUPAC_NAME_MARKUP": "5,6-dihydroxycyclohexa-1,3-diene-1-carboxylic acid", "PUBCHEM_IUPAC_NAME": "5,6-dihydroxycyclohexa-1,3-diene-1-carboxylic acid", "PUBCHEM_IUPAC_SYSTEMATIC_NAME": "5,6-bis(oxidanyl)cyclohexa-1,3-diene-1-carboxylic acid", "PUBCHEM_IUPAC_TRADITIONAL_NAME": "5,6-dihydroxycyclohexa-1,3-diene-1-carboxylic acid", "PUBCHEM_IUPAC_INCHI": "InChI=1S/C7H8O4/c8-5-3-1-2-4(6(5)9)7(10)11/h1-3,5-6,8-9H,(H,10,11)", "PUBCHEM_IUPAC_INCHIKEY": "INCSWYKICIYAHB-UHFFFAOYSA-N", "PUBCHEM_XLOGP3_AA": -0.3, "PUBCHEM_EXACT_MASS": 156.0422, "PUBCHEM_MOLECULAR_FORMULA": "C7H8O4", "PUBCHEM_MOLECULAR_WEIGHT": 156.14, "PUBCHEM_OPENEYE_CAN_SMILES": "C1=CC(C(C(=C1)C(=O)O)O)O", "PUBCHEM_OPENEYE_ISO_SMILES": "C1=CC(C(C(=C1)C(=O)O)O)O", "PUBCHEM_CACTVS_TPSA": 77.8, "PUBCHEM_MONOISOTOPIC_WEIGHT": 156.0422, "PUBCHEM_TOTAL_CHARGE": 0, "PUBCHEM_HEAVY_ATOM_COUNT": 11, "PUBCHEM_ATOM_DEF_STEREO_COUNT": 0, "PUBCHEM_ATOM_UDEF_STEREO_COUNT": 2, "PUBCHEM_BOND_DEF_STEREO_COUNT": 0, "PUBCHEM_BOND_UDEF_STEREO_COUNT": 0, "PUBCHEM_ISOTOPIC_ATOM_COUNT": 0, "PUBCHEM_COMPONENT_COUNT": 1, "PUBCHEM_CACTVS_TAUTO_COUNT": 1, "PUBCHEM_COORDINATE_TYPE": "1\n5\n255", "PUBCHEM_BONDANNOTATIONS": "5  12  3\n6  13  3"}, "extensions": [{"name": "rdkitRepresentation", "formatVersion": 2, "toolkitVersion": "2022.03.5", "cipRanks": [8, 7, 9, 10, 5, 4, 3, 2, 1, 0, 6], "atomRings": [[4, 6, 8, 9, 7, 5]]}]}], "CAN_SELFIE": "[C][=C][C][Branch1][S][C][Branch1][N][C][=Branch1][Ring2][=C][Ring1][=Branch1][C][=Branch1][C][=O][O][O][O]"}
...
{"commonchem": {"version": 10}, "defaults": {"atom": {"z": 6, "impHs": 0, "chg": 0, "nRad": 0, "isotope": 0, "stereo": "unspecified"}, "bond": {"bo": 1, "stereo": "unspecified"}}, "molecules": [{"name": "336251", "atoms": [{"z": 16}, {"z": 7}, {"z": 7}, {"z": 7}, {"z": 7}, {}, {"impHs": 2}, {"impHs": 2}, {"impHs": 2}, {"impHs": 2}, {}, {}, {}, {}, {}, {"impHs": 1}, {"impHs": 3}], "bonds": [{"atoms": [0, 10]}, {"atoms": [0, 12]}, {"atoms": [1, 13]}, {"atoms": [1, 14]}, {"atoms": [1, 15]}, {"atoms": [2, 12]}, {"bo": 2, "atoms": [2, 14]}, {"atoms": [3, 4]}, {"bo": 2, "atoms": [3, 13]}, {"bo": 2, "atoms": [4, 15]}, {"atoms": [5, 6]}, {"bo": 2, "atoms": [5, 10]}, {"atoms": [5, 11]}, {"atoms": [6, 7]}, {"atoms": [7, 8]}, {"atoms": [8, 9]}, {"atoms": [9, 10]}, {"bo": 2, "atoms": [11, 12]}, {"atoms": [11, 13]}, {"atoms": [14, 16]}], "conformers": [{"dim": 2, "coords": [[5.9989, -0.2387], [3.5298, 1.4284], [5.2619, 1.4284], [2.5836, 0.1236], [2.0, 0.9284], [4.6051, -1.0434], [3.9843, -1.8722], [4.399, -2.821], [5.4289, -2.9284], [6.0303, -2.0855], [5.5938, -1.1465], [4.3958, -0.0716], [5.2619, 0.4284], [3.5298, 0.4284], [4.3958, 1.9284], [2.5836, 1.7331], [4.3958, 2.9284]]}], "properties": {"PUBCHEM_COMPOUND_CID": 336251, "PUBCHEM_COMPOUND_CANONICALIZED": 1, "PUBCHEM_CACTVS_COMPLEXITY": 313, "PUBCHEM_CACTVS_HBOND_ACCEPTOR": 4, "PUBCHEM_CACTVS_HBOND_DONOR": 0, "PUBCHEM_CACTVS_ROTATABLE_BOND": 0, "PUBCHEM_CACTVS_SUBSKEYS": "AAADccBzgABAAAAAAAAAAAAAAAAAAWJAAAA8QAAAAAAAAFgB+AAAHAQIAAAADACBWwQHsd8MGAimAgJhZACT1KtxiLgV2DA4RJiIKCLg2NGEhAxoiALoyCcQgMAOgAAAAAAAAAAAAAAAAAgAAAAAAAAAAA==", "PUBCHEM_IUPAC_OPENEYE_NAME": "7-methyl-10-thia-3,4,6,8-tetrazatetracyclo[7.7.0.02,6.011,16]hexadeca-1(9),2,4,7,11(16)-pentaene", "PUBCHEM_IUPAC_CAS_NAME": "7-methyl-10-thia-3,4,6,8-tetrazatetracyclo[7.7.0.02,6.011,16]hexadeca-1(9),2,4,7,11(16)-pentaene", "PUBCHEM_IUPAC_NAME_MARKUP": "7-methyl-10-thia-3,4,6,8-tetrazatetracyclo[7.7.0.0<SUP>2,6</SUP>.0<SUP>11,16</SUP>]hexadeca-1(9),2,4,7,11(16)-pentaene", "PUBCHEM_IUPAC_NAME": "7-methyl-10-thia-3,4,6,8-tetrazatetracyclo[7.7.0.02,6.011,16]hexadeca-1(9),2,4,7,11(16)-pentaene", "PUBCHEM_IUPAC_SYSTEMATIC_NAME": "7-methyl-10-thia-3,4,6,8-tetrazatetracyclo[7.7.0.02,6.011,16]hexadeca-1(9),2,4,7,11(16)-pentaene", "PUBCHEM_IUPAC_TRADITIONAL_NAME": "7-methyl-10-thia-3,4,6,8-tetrazatetracyclo[7.7.0.02,6.011,16]hexadeca-1(9),2,4,7,11(16)-pentaene", "PUBCHEM_IUPAC_INCHI": "InChI=1S/C12H12N4S/c1-7-14-12-10(11-15-13-6-16(7)11)8-4-2-3-5-9(8)17-12/h6H,2-5H2,1H3", "PUBCHEM_IUPAC_INCHIKEY": "PONPELTUPXRSHB-UHFFFAOYSA-N", "PUBCHEM_XLOGP3_AA": 3.5, "PUBCHEM_EXACT_MASS": 244.0782, "PUBCHEM_MOLECULAR_FORMULA": "C12H12N4S", "PUBCHEM_MOLECULAR_WEIGHT": 244.32, "PUBCHEM_OPENEYE_CAN_SMILES": "CC1=NC2=C(C3=C(S2)CCCC3)C4=NN=CN14", "PUBCHEM_OPENEYE_ISO_SMILES": "CC1=NC2=C(C3=C(S2)CCCC3)C4=NN=CN14", "PUBCHEM_CACTVS_TPSA": 71.3, "PUBCHEM_MONOISOTOPIC_WEIGHT": 244.0782, "PUBCHEM_TOTAL_CHARGE": 0, "PUBCHEM_HEAVY_ATOM_COUNT": 17, "PUBCHEM_ATOM_DEF_STEREO_COUNT": 0, "PUBCHEM_ATOM_UDEF_STEREO_COUNT": 0, "PUBCHEM_BOND_DEF_STEREO_COUNT": 0, "PUBCHEM_BOND_UDEF_STEREO_COUNT": 0, "PUBCHEM_ISOTOPIC_ATOM_COUNT": 0, "PUBCHEM_COMPONENT_COUNT": 1, "PUBCHEM_CACTVS_TAUTO_COUNT": -1, "PUBCHEM_COORDINATE_TYPE": "1\n5\n255", "PUBCHEM_BONDANNOTATIONS": "1  11  8\n1  13  8\n12  13  8\n12  14  8\n2  14  8\n2  15  8\n2  16  8\n3  13  8\n3  15  8\n4  14  8\n4  5  8\n5  16  8\n6  11  8\n6  12  8"}, "extensions": [{"name": "rdkitRepresentation", "formatVersion": 2, "toolkitVersion": "2022.03.5", "aromaticAtoms": [0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15], "aromaticBonds": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 17, 18], "cipRanks": [16, 13, 12, 15, 14, 5, 3, 1, 2, 4, 10, 6, 11, 9, 8, 7, 0], "atomRings": [[0, 10, 5, 11, 12], [2, 14, 1, 13, 11, 12], [3, 4, 15, 1, 13], [6, 7, 8, 9, 10, 5]]}]}], "CAN_SELFIE": "[C][C][=N][C][=C][Branch1][=C][C][=C][Branch1][Ring2][S][Ring1][Branch1][C][C][C][C][Ring1][#Branch1][C][=N][N][=C][N][Ring1][S][Ring1][Branch1]"}
{"commonchem": {"version": 10}, "defaults": {"atom": {"z": 6, "impHs": 0, "chg": 0, "nRad": 0, "isotope": 0, "stereo": "unspecified"}, "bond": {"bo": 1, "stereo": "unspecified"}}, "molecules": [{"name": "336252", "atoms": [{"z": 16}, {"z": 7}, {"z": 7}, {"z": 7}, {"z": 7}, {}, {"impHs": 2}, {"impHs": 2}, {"impHs": 2}, {"impHs": 2}, {}, {}, {}, {}, {"impHs": 1}, {"impHs": 1}], "bonds": [{"atoms": [0, 10]}, {"atoms": [0, 12]}, {"atoms": [1, 13]}, {"atoms": [1, 14]}, {"atoms": [1, 15]}, {"atoms": [2, 12]}, {"bo": 2, "atoms": [2, 14]}, {"atoms": [3, 4]}, {"bo": 2, "atoms": [3, 13]}, {"bo": 2, "atoms": [4, 15]}, {"atoms": [5, 6]}, {"bo": 2, "atoms": [5, 10]}, {"atoms": [5, 11]}, {"atoms": [6, 7]}, {"atoms": [7, 8]}, {"atoms": [8, 9]}, {"atoms": [9, 10]}, {"bo": 2, "atoms": [11, 12]}, {"atoms": [11, 13]}], "conformers": [{"dim": 2, "coords": [[5.9989, 0.2613], [3.5298, 1.9284], [5.2619, 1.9284], [2.5836, 0.6236], [2.0, 1.4284], [4.6051, -0.5435], [3.9843, -1.3722], [4.399, -2.321], [5.4289, -2.4284], [6.0303, -1.5855], [5.5938, -0.6465], [4.3958, 0.4284], [5.2619, 0.9284], [3.5298, 0.9284], [4.3958, 2.4284], [2.5836, 2.2331]]}], "properties": {"PUBCHEM_COMPOUND_CID": 336252, "PUBCHEM_COMPOUND_CANONICALIZED": 1, "PUBCHEM_CACTVS_COMPLEXITY": 287, "PUBCHEM_CACTVS_HBOND_ACCEPTOR": 4, "PUBCHEM_CACTVS_HBOND_DONOR": 0, "PUBCHEM_CACTVS_ROTATABLE_BOND": 0, "PUBCHEM_CACTVS_SUBSKEYS": "AAADccBzgABAAAAAAAAAAAAAAAAAAWJAAAA8QAAAAAAAAFgB+AAAHAQIAAAADACBWwQFsd8MGAimAgJhZACT1KtxiLgV2DAoRJiIKCLg2NGEhAxoiAJoyCcQgMAOgAAAAAAAAAAAAAAAAAgAAAAAAAAAAA==", "PUBCHEM_IUPAC_OPENEYE_NAME": "10-thia-3,4,6,8-tetrazatetracyclo[7.7.0.02,6.011,16]hexadeca-1(9),2,4,7,11(16)-pentaene", "PUBCHEM_IUPAC_CAS_NAME": "10-thia-3,4,6,8-tetrazatetracyclo[7.7.0.02,6.011,16]hexadeca-1(9),2,4,7,11(16)-pentaene", "PUBCHEM_IUPAC_NAME_MARKUP": "10-thia-3,4,6,8-tetrazatetracyclo[7.7.0.0<SUP>2,6</SUP>.0<SUP>11,16</SUP>]hexadeca-1(9),2,4,7,11(16)-pentaene", "PUBCHEM_IUPAC_NAME": "10-thia-3,4,6,8-tetrazatetracyclo[7.7.0.02,6.011,16]hexadeca-1(9),2,4,7,11(16)-pentaene", "PUBCHEM_IUPAC_SYSTEMATIC_NAME": "10-thia-3,4,6,8-tetrazatetracyclo[7.7.0.02,6.011,16]hexadeca-1(9),2,4,7,11(16)-pentaene", "PUBCHEM_IUPAC_TRADITIONAL_NAME": "10-thia-3,4,6,8-tetrazatetracyclo[7.7.0.02,6.011,16]hexadeca-1(9),2,4,7,11(16)-pentaene", "PUBCHEM_IUPAC_INCHI": "InChI=1S/C11H10N4S/c1-2-4-8-7(3-1)9-10-14-13-6-15(10)5-12-11(9)16-8/h5-6H,1-4H2", "PUBCHEM_IUPAC_INCHIKEY": "GOMHVJXSMQEXRS-UHFFFAOYSA-N", "PUBCHEM_XLOGP3_AA": 3.1, "PUBCHEM_EXACT_MASS": 230.0626, "PUBCHEM_MOLECULAR_FORMULA": "C11H10N4S", "PUBCHEM_MOLECULAR_WEIGHT": 230.29, "PUBCHEM_OPENEYE_CAN_SMILES": "C1CCC2=C(C1)C3=C(S2)N=CN4C3=NN=C4", "PUBCHEM_OPENEYE_ISO_SMILES": "C1CCC2=C(C1)C3=C(S2)N=CN4C3=NN=C4", "PUBCHEM_CACTVS_TPSA": 71.3, "PUBCHEM_MONOISOTOPIC_WEIGHT": 230.0626, "PUBCHEM_TOTAL_CHARGE": 0, "PUBCHEM_HEAVY_ATOM_COUNT": 16, "PUBCHEM_ATOM_DEF_STEREO_COUNT": 0, "PUBCHEM_ATOM_UDEF_STEREO_COUNT": 0, "PUBCHEM_BOND_DEF_STEREO_COUNT": 0, "PUBCHEM_BOND_UDEF_STEREO_COUNT": 0, "PUBCHEM_ISOTOPIC_ATOM_COUNT": 0, "PUBCHEM_COMPONENT_COUNT": 1, "PUBCHEM_CACTVS_TAUTO_COUNT": 1, "PUBCHEM_COORDINATE_TYPE": "1\n5\n255", "PUBCHEM_BONDANNOTATIONS": "1  11  8\n1  13  8\n12  13  8\n12  14  8\n2  14  8\n2  15  8\n2  16  8\n3  13  8\n3  15  8\n4  14  8\n4  5  8\n5  16  8\n6  11  8\n6  12  8"}, "extensions": [{"name": "rdkitRepresentation", "formatVersion": 2, "toolkitVersion": "2022.03.5", "aromaticAtoms": [0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15], "aromaticBonds": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 17, 18], "cipRanks": [15, 12, 11, 14, 13, 4, 2, 0, 1, 3, 9, 5, 10, 8, 6, 7], "atomRings": [[0, 10, 5, 11, 12], [2, 14, 1, 13, 11, 12], [3, 4, 15, 1, 13], [6, 7, 8, 9, 10, 5]]}]}], "CAN_SELFIE": "[C][C][C][C][=C][Branch1][Ring2][C][Ring1][=Branch1][C][=C][Branch1][Ring2][S][Ring1][=Branch1][N][=C][N][C][Ring1][#Branch1][=N][N][=C][Ring1][Branch1]"}

Compound_000000001_000500000_failed.jsonl

{"commonchem": {"version": 10}, "defaults": {"atom": {"z": 6, "impHs": 0, "chg": 0, "nRad": 0, "isotope": 0, "stereo": "unspecified"}, "bond": {"bo": 1, "stereo": "unspecified"}}, "molecules": [{"name": "9717", "atoms": [{"z": 15, "chg": -1}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 7, "chg": 1}, {"z": 7}, {}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}], "bonds": [{"atoms": [0, 1]}, {"atoms": [0, 2]}, {"atoms": [0, 3]}, {"atoms": [0, 4]}, {"atoms": [0, 5]}, {"atoms": [0, 6]}, {"bo": 3, "atoms": [7, 8]}, {"atoms": [7, 9]}, {"bo": 2, "atoms": [9, 10]}, {"atoms": [9, 11]}, {"atoms": [10, 12]}, {"bo": 2, "atoms": [11, 13]}, {"bo": 2, "atoms": [12, 14]}, {"atoms": [13, 14]}], "conformers": [{"dim": 2, "coords": [[5.672, 2.31], [6.538, 2.81], [4.8059, 1.81], [4.8059, 2.81], [5.672, 1.31], [5.672, 3.31], [6.538, 1.81], [1.403, 3.62], [1.403, 4.62], [1.403, 2.62], [0.5369, 2.12], [2.269, 2.12], [0.5369, 1.12], [2.269, 1.12], [1.403, 0.62]]}], "properties": {"PUBCHEM_COMPOUND_CID": 9717, "PUBCHEM_COMPOUND_CANONICALIZED": 1, "PUBCHEM_CACTVS_COMPLEXITY": 168, "PUBCHEM_CACTVS_HBOND_ACCEPTOR": 8, "PUBCHEM_CACTVS_HBOND_DONOR": 0, "PUBCHEM_CACTVS_ROTATABLE_BOND": 0, "PUBCHEM_CACTVS_SUBSKEYS": "AAADcYBjAcIAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAHAAIAAgACAiBEAAwwIAAAACAACRCQACCAAAgAgAIiAAAZIgIICKAkZGAIABggAAIyAcQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", "PUBCHEM_IUPAC_OPENEYE_NAME": "benzenediazonium;hexafluorophosphate", "PUBCHEM_IUPAC_CAS_NAME": "benzenediazonium;hexafluorophosphate", "PUBCHEM_IUPAC_NAME_MARKUP": "benzenediazonium;hexafluorophosphate", "PUBCHEM_IUPAC_NAME": "benzenediazonium;hexafluorophosphate", "PUBCHEM_IUPAC_SYSTEMATIC_NAME": "benzenediazonium;hexafluorophosphate", "PUBCHEM_IUPAC_TRADITIONAL_NAME": "benzenediazonium;hexafluorophosphate", "PUBCHEM_IUPAC_INCHI": "InChI=1S/C6H5N2.F6P/c7-8-6-4-2-1-3-5-6;1-7(2,3,4,5)6/h1-5H;/q+1;-1", "PUBCHEM_IUPAC_INCHIKEY": "GIDPFONPWSEBOB-UHFFFAOYSA-N", "PUBCHEM_EXACT_MASS": 250.0094, "PUBCHEM_MOLECULAR_FORMULA": "C6H5F6N2P", "PUBCHEM_MOLECULAR_WEIGHT": 250.08, "PUBCHEM_OPENEYE_CAN_SMILES": "C1=CC=C(C=C1)[N+]#N.F[P-](F)(F)(F)(F)F", "PUBCHEM_OPENEYE_ISO_SMILES": "C1=CC=C(C=C1)[N+]#N.F[P-](F)(F)(F)(F)F", "PUBCHEM_CACTVS_TPSA": 28.2, "PUBCHEM_MONOISOTOPIC_WEIGHT": 250.0094, "PUBCHEM_TOTAL_CHARGE": 0, "PUBCHEM_HEAVY_ATOM_COUNT": 15, "PUBCHEM_ATOM_DEF_STEREO_COUNT": 0, "PUBCHEM_ATOM_UDEF_STEREO_COUNT": 0, "PUBCHEM_BOND_DEF_STEREO_COUNT": 0, "PUBCHEM_BOND_UDEF_STEREO_COUNT": 0, "PUBCHEM_ISOTOPIC_ATOM_COUNT": 0, "PUBCHEM_COMPONENT_COUNT": 2, "PUBCHEM_CACTVS_TAUTO_COUNT": -1, "PUBCHEM_COORDINATE_TYPE": "1\n5\n255", "PUBCHEM_BONDANNOTATIONS": "10  11  8\n10  12  8\n11  13  8\n12  14  8\n13  15  8\n14  15  8"}, "extensions": [{"name": "rdkitRepresentation", "formatVersion": 2, "toolkitVersion": "2022.03.5", "aromaticAtoms": [9, 10, 11, 12, 13, 14], "aromaticBonds": [8, 9, 10, 11, 12, 13], "cipRanks": [7, 6, 6, 6, 6, 6, 6, 5, 4, 3, 2, 2, 1, 1, 0], "atomRings": [[9, 11, 13, 14, 12, 10]]}]}], "ERROR": "input violates the currently-set semantic constraints\n\tSMILES: C1=CC=C(C=C1)[N+]#N.F[P-](F)(F)(F)(F)F\n\tErrors:\n\t[[P-1] with 6 bond(s) - a max. of 4 bond(s) was specified]\n"}
{"commonchem": {"version": 10}, "defaults": {"atom": {"z": 6, "impHs": 0, "chg": 0, "nRad": 0, "isotope": 0, "stereo": "unspecified"}, "bond": {"bo": 1, "stereo": "unspecified"}}, "molecules": [{"name": "9885", "atoms": [{"z": 15, "chg": -1}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 7, "chg": 1}, {"impHs": 2}, {"impHs": 2}, {"impHs": 2}, {"impHs": 2}, {"impHs": 3}, {"impHs": 3}, {"impHs": 3}, {"impHs": 3}], "bonds": [{"atoms": [0, 1]}, {"atoms": [0, 2]}, {"atoms": [0, 3]}, {"atoms": [0, 4]}, {"atoms": [0, 5]}, {"atoms": [0, 6]}, {"atoms": [7, 8]}, {"atoms": [7, 9]}, {"atoms": [7, 10]}, {"atoms": [7, 11]}, {"atoms": [8, 12]}, {"atoms": [9, 13]}, {"atoms": [10, 14]}, {"atoms": [11, 15]}], "conformers": [{"dim": 2, "coords": [[2.269, 1.0], [3.135, 1.5], [1.403, 0.5], [1.403, 1.5], [2.269, 0.0], [2.269, 2.0], [3.135, 0.5], [2.269, 6.269], [3.135, 6.769], [1.403, 5.769], [1.769, 7.135], [2.769, 5.403], [4.001, 6.269], [0.5369, 6.269], [2.269, 8.001], [2.269, 4.5369]]}], "properties": {"PUBCHEM_COMPOUND_CID": 9885, "PUBCHEM_COMPOUND_CANONICALIZED": 1, "PUBCHEM_CACTVS_COMPLEXITY": 110, "PUBCHEM_CACTVS_HBOND_ACCEPTOR": 7, "PUBCHEM_CACTVS_HBOND_DONOR": 0, "PUBCHEM_CACTVS_ROTATABLE_BOND": 4, "PUBCHEM_CACTVS_SUBSKEYS": "AAADceByAcIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHAAAAAgAAADBAAQCAAMAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", "PUBCHEM_IUPAC_OPENEYE_NAME": "tetraethylammonium;hexafluorophosphate", "PUBCHEM_IUPAC_CAS_NAME": "tetraethylammonium;hexafluorophosphate", "PUBCHEM_IUPAC_NAME_MARKUP": "tetraethylazanium;hexafluorophosphate", "PUBCHEM_IUPAC_NAME": "tetraethylazanium;hexafluorophosphate", "PUBCHEM_IUPAC_SYSTEMATIC_NAME": "tetraethylazanium;hexafluorophosphate", "PUBCHEM_IUPAC_TRADITIONAL_NAME": "tetraethylammonium;hexafluorophosphate", "PUBCHEM_IUPAC_INCHI": "InChI=1S/C8H20N.F6P/c1-5-9(6-2,7-3)8-4;1-7(2,3,4,5)6/h5-8H2,1-4H3;/q+1;-1", "PUBCHEM_IUPAC_INCHIKEY": "KLKUOIXSIDDDCN-UHFFFAOYSA-N", "PUBCHEM_EXACT_MASS": 275.1237, "PUBCHEM_MOLECULAR_FORMULA": "C8H20F6NP", "PUBCHEM_MOLECULAR_WEIGHT": 275.22, "PUBCHEM_OPENEYE_CAN_SMILES": "CC[N+](CC)(CC)CC.F[P-](F)(F)(F)(F)F", "PUBCHEM_OPENEYE_ISO_SMILES": "CC[N+](CC)(CC)CC.F[P-](F)(F)(F)(F)F", "PUBCHEM_CACTVS_TPSA": 0, "PUBCHEM_MONOISOTOPIC_WEIGHT": 275.1237, "PUBCHEM_TOTAL_CHARGE": 0, "PUBCHEM_HEAVY_ATOM_COUNT": 16, "PUBCHEM_ATOM_DEF_STEREO_COUNT": 0, "PUBCHEM_ATOM_UDEF_STEREO_COUNT": 0, "PUBCHEM_BOND_DEF_STEREO_COUNT": 0, "PUBCHEM_BOND_UDEF_STEREO_COUNT": 0, "PUBCHEM_ISOTOPIC_ATOM_COUNT": 0, "PUBCHEM_COMPONENT_COUNT": 2, "PUBCHEM_CACTVS_TAUTO_COUNT": 1, "PUBCHEM_COORDINATE_TYPE": "1\n5\n255"}, "extensions": [{"name": "rdkitRepresentation", "formatVersion": 2, "toolkitVersion": "2022.03.5", "cipRanks": [4, 3, 3, 3, 3, 3, 3, 2, 1, 1, 1, 1, 0, 0, 0, 0]}]}], "ERROR": "input violates the currently-set semantic constraints\n\tSMILES: CC[N+](CC)(CC)CC.F[P-](F)(F)(F)(F)F\n\tErrors:\n\t[[P-1] with 6 bond(s) - a max. of 4 bond(s) was specified]\n"}
{"commonchem": {"version": 10}, "defaults": {"atom": {"z": 6, "impHs": 0, "chg": 0, "nRad": 0, "isotope": 0, "stereo": "unspecified"}, "bond": {"bo": 1, "stereo": "unspecified"}}, "molecules": [{"name": "9886", "atoms": [{"z": 15, "chg": -1}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 9}], "bonds": [{"atoms": [0, 1]}, {"atoms": [0, 2]}, {"atoms": [0, 3]}, {"atoms": [0, 4]}, {"atoms": [0, 5]}, {"atoms": [0, 6]}], "conformers": [{"dim": 2, "coords": [[2.866, 0.0], [3.732, 0.5], [2.0, -0.5], [2.0, 0.5], [2.866, -1.0], [2.866, 1.0], [3.732, -0.5]]}], "properties": {"PUBCHEM_COMPOUND_CID": 9886, "PUBCHEM_COMPOUND_CANONICALIZED": 1, "PUBCHEM_CACTVS_COMPLEXITY": 62.7, "PUBCHEM_CACTVS_HBOND_ACCEPTOR": 7, "PUBCHEM_CACTVS_HBOND_DONOR": 0, "PUBCHEM_CACTVS_ROTATABLE_BOND": 0, "PUBCHEM_CACTVS_SUBSKEYS": "AAADcQAAAcIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", "PUBCHEM_IUPAC_OPENEYE_NAME": "hexafluorophosphate", "PUBCHEM_IUPAC_CAS_NAME": "hexafluorophosphate", "PUBCHEM_IUPAC_NAME_MARKUP": "hexafluorophosphate", "PUBCHEM_IUPAC_NAME": "hexafluorophosphate", "PUBCHEM_IUPAC_SYSTEMATIC_NAME": "hexafluorophosphate", "PUBCHEM_IUPAC_TRADITIONAL_NAME": "hexafluorophosphate", "PUBCHEM_IUPAC_INCHI": "InChI=1S/F6P/c1-7(2,3,4,5)6/q-1", "PUBCHEM_IUPAC_INCHIKEY": "LJQLCJWAZJINEB-UHFFFAOYSA-N", "PUBCHEM_XLOGP3_AA": 3.2, "PUBCHEM_EXACT_MASS": 144.9641, "PUBCHEM_MOLECULAR_FORMULA": "F6P-", "PUBCHEM_MOLECULAR_WEIGHT": 144.9641, "PUBCHEM_OPENEYE_CAN_SMILES": "F[P-](F)(F)(F)(F)F", "PUBCHEM_OPENEYE_ISO_SMILES": "F[P-](F)(F)(F)(F)F", "PUBCHEM_CACTVS_TPSA": 0, "PUBCHEM_MONOISOTOPIC_WEIGHT": 144.9641, "PUBCHEM_TOTAL_CHARGE": -1, "PUBCHEM_HEAVY_ATOM_COUNT": 7, "PUBCHEM_ATOM_DEF_STEREO_COUNT": 0, "PUBCHEM_ATOM_UDEF_STEREO_COUNT": 0, "PUBCHEM_BOND_DEF_STEREO_COUNT": 0, "PUBCHEM_BOND_UDEF_STEREO_COUNT": 0, "PUBCHEM_ISOTOPIC_ATOM_COUNT": 0, "PUBCHEM_COMPONENT_COUNT": 1, "PUBCHEM_CACTVS_TAUTO_COUNT": -1, "PUBCHEM_COORDINATE_TYPE": "1\n5\n255"}, "extensions": [{"name": "rdkitRepresentation", "formatVersion": 2, "toolkitVersion": "2022.03.5", "cipRanks": [1, 0, 0, 0, 0, 0, 0]}]}], "ERROR": "input violates the currently-set semantic constraints\n\tSMILES: F[P-](F)(F)(F)(F)F\n\tErrors:\n\t[[P-1] with 6 bond(s) - a max. of 4 bond(s) was specified]\n"}
...
{"commonchem": {"version": 10}, "defaults": {"atom": {"z": 6, "impHs": 0, "chg": 0, "nRad": 0, "isotope": 0, "stereo": "unspecified"}, "bond": {"bo": 1, "stereo": "unspecified"}}, "molecules": [{"name": "499576", "atoms": [{"z": 92}, {"z": 78}, {"z": 42}, {"z": 15, "impHs": 1, "chg": 1}, {"z": 15, "impHs": 1}, {"z": 15, "chg": -1}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 8, "chg": 1}, {"z": 8, "chg": 1}, {"z": 7}, {}, {}, {}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {}, {}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1}, {"impHs": 1, "nRad": 1}, {"impHs": 1, "nRad": 1}, {"impHs": 1, "nRad": 1}, {"impHs": 1, "nRad": 1}, {"impHs": 1, "nRad": 1}, {"impHs": 3}, {}, {"chg": -1}, {"chg": -1}], "bonds": [{"atoms": [3, 15]}, {"atoms": [3, 16]}, {"atoms": [3, 17]}, {"atoms": [4, 24]}, {"atoms": [4, 25]}, {"atoms": [5, 6]}, {"atoms": [5, 7]}, {"atoms": [5, 8]}, {"atoms": [5, 9]}, {"atoms": [5, 10]}, {"atoms": [5, 11]}, {"bo": 3, "atoms": [12, 52]}, {"bo": 3, "atoms": [13, 53]}, {"bo": 3, "atoms": [14, 51]}, {"bo": 2, "atoms": [15, 18]}, {"atoms": [15, 21]}, {"bo": 2, "atoms": [16, 19]}, {"atoms": [16, 22]}, {"bo": 2, "atoms": [17, 20]}, {"atoms": [17, 23]}, {"atoms": [18, 26]}, {"atoms": [19, 27]}, {"atoms": [20, 28]}, {"bo": 2, "atoms": [21, 29]}, {"bo": 2, "atoms": [22, 30]}, {"bo": 2, "atoms": [23, 31]}, {"bo": 2, "atoms": [24, 35]}, {"atoms": [24, 37]}, {"bo": 2, "atoms": [25, 36]}, {"atoms": [25, 38]}, {"bo": 2, "atoms": [26, 32]}, {"bo": 2, "atoms": [27, 33]}, {"bo": 2, "atoms": [28, 34]}, {"atoms": [29, 32]}, {"atoms": [30, 33]}, {"atoms": [31, 34]}, {"atoms": [35, 39]}, {"atoms": [36, 40]}, {"bo": 2, "atoms": [37, 41]}, {"bo": 2, "atoms": [38, 42]}, {"bo": 2, "atoms": [39, 43]}, {"bo": 2, "atoms": [40, 44]}, {"atoms": [41, 43]}, {"atoms": [42, 44]}, {"atoms": [45, 46]}, {"atoms": [45, 47]}, {"atoms": [46, 48]}, {"atoms": [47, 49]}, {"atoms": [48, 49]}, {"atoms": [50, 51]}], "conformers": [{"dim": 2, "coords": [[0.5326, 1.9869], [9.3867, 3.6939], [8.3529, 9.0972], [8.4085, 3.486], [10.0558, 2.9508], [1.3987, 5.9868], [2.2647, 6.4868], [0.5326, 5.4868], [0.5326, 6.4868], [1.3987, 4.9868], [1.3987, 6.9868], [2.2647, 5.4868], [10.085, 10.0972], [2.2647, 2.9869], [10.3534, 13.6341], [8.9085, 2.62], [8.3388, 4.4836], [7.411, 3.5558], [9.7746, 2.12], [9.1678, 5.0428], [6.8517, 2.7268], [8.0425, 2.12], [7.44, 4.922], [6.9726, 4.4546], [11.0461, 2.8116], [10.3648, 3.9018], [9.7746, 1.12], [9.098, 6.0404], [5.8542, 2.7965], [8.0425, 1.12], [7.3702, 5.9195], [5.975, 4.5243], [8.9085, 0.62], [8.1992, 6.4787], [5.4158, 3.6953], [11.6617, 3.5996], [11.343, 4.1098], [11.4207, 1.8844], [9.6957, 4.645], [12.652, 3.4605], [11.652, 5.0608], [12.4109, 1.7453], [10.0047, 5.596], [13.0266, 2.5333], [10.9829, 5.804], [2.2077, 10.4395], [1.3987, 11.0273], [1.8987, 9.4884], [0.5897, 10.4395], [0.8987, 9.4884], [8.6214, 12.6341], [9.4874, 13.1341], [9.219, 9.5972], [1.3987, 2.4869]]}], "properties": {"PUBCHEM_COMPOUND_CID": 499576, "PUBCHEM_COMPOUND_CANONICALIZED": 1, "PUBCHEM_CACTVS_COMPLEXITY": 523, "PUBCHEM_CACTVS_HBOND_ACCEPTOR": 10, "PUBCHEM_CACTVS_HBOND_DONOR": 0, "PUBCHEM_CACTVS_ROTATABLE_BOND": 5, "PUBCHEM_CACTVS_SUBSKEYS": "AAADcfB+McMAAAAAAEAABAAAIAAAAYAAAAAwYMGDAAAAAAABVAAAHggAAAgQCACBEAAyAIAAACCQBCBCAAACAAAgAAAIiAAAAIgIICKAERCAIAAggAAIiAcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", "PUBCHEM_IUPAC_INCHI": "InChI=1S/C18H15P.C12H11P.C5H5.C2H3N.2CO.F6P.Mo.Pt.U/c1-4-10-16(11-5-1)19(17-12-6-2-7-13-17)18-14-8-3-9-15-18;1-3-7-11(8-4-1)13-12-9-5-2-6-10-12;1-2-4-5-3-1;1-2-3;2*1-2;1-7(2,3,4,5)6;;;/h1-15H;1-10,13H;1-5H;1H3;;;;;;/q;;;;;;-1;;;/p+1", "PUBCHEM_IUPAC_INCHIKEY": "KQWMQADRALXQKS-UHFFFAOYSA-O", "PUBCHEM_EXACT_MASS": 1287.0994, "PUBCHEM_MOLECULAR_FORMULA": "C39H35F6MoNO2P3PtU", "PUBCHEM_MOLECULAR_WEIGHT": 1285.7, "PUBCHEM_OPENEYE_CAN_SMILES": "CC#N.[C-]#[O+].[C-]#[O+].C1=CC=C(C=C1)PC2=CC=CC=C2.C1=CC=C(C=C1)[PH+](C2=CC=CC=C2)C3=CC=CC=C3.[CH]1[CH][CH][CH][CH]1.F[P-](F)(F)(F)(F)F.[Mo].[Pt].[U]", "PUBCHEM_OPENEYE_ISO_SMILES": "CC#N.[C-]#[O+].[C-]#[O+].C1=CC=C(C=C1)PC2=CC=CC=C2.C1=CC=C(C=C1)[PH+](C2=CC=CC=C2)C3=CC=CC=C3.[CH]1[CH][CH][CH][CH]1.F[P-](F)(F)(F)(F)F.[Mo].[Pt].[U]", "PUBCHEM_CACTVS_TPSA": 25.8, "PUBCHEM_MONOISOTOPIC_WEIGHT": 1287.0994, "PUBCHEM_TOTAL_CHARGE": 0, "PUBCHEM_HEAVY_ATOM_COUNT": 54, "PUBCHEM_ATOM_DEF_STEREO_COUNT": 0, "PUBCHEM_ATOM_UDEF_STEREO_COUNT": 0, "PUBCHEM_BOND_DEF_STEREO_COUNT": 0, "PUBCHEM_BOND_UDEF_STEREO_COUNT": 0, "PUBCHEM_ISOTOPIC_ATOM_COUNT": 0, "PUBCHEM_COMPONENT_COUNT": 10, "PUBCHEM_CACTVS_TAUTO_COUNT": -1, "PUBCHEM_NONSTANDARDBOND": "2  4  6\n2  5  6", "PUBCHEM_COORDINATE_TYPE": "1\n5\n255", "PUBCHEM_BONDANNOTATIONS": "16  19  8\n16  22  8\n17  20  8\n17  23  8\n18  21  8\n18  24  8\n19  27  8\n20  28  8\n21  29  8\n22  30  8\n23  31  8\n24  32  8\n25  36  8\n25  38  8\n26  37  8\n26  39  8\n27  33  8\n28  34  8\n29  35  8\n30  33  8\n31  34  8\n32  35  8\n36  40  8\n37  41  8\n38  42  8\n39  43  8\n40  44  8\n41  45  8\n42  44  8\n43  45  8"}, "extensions": [{"name": "rdkitRepresentation", "formatVersion": 2, "toolkitVersion": "2022.03.5", "aromaticAtoms": [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "aromaticBonds": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43], "cipRanks": [20, 19, 18, 16, 15, 17, 14, 14, 14, 14, 14, 14, 13, 13, 12, 11, 11, 11, 7, 7, 7, 7, 7, 7, 10, 10, 5, 5, 5, 5, 5, 5, 3, 3, 3, 6, 6, 6, 6, 4, 4, 4, 4, 2, 2, 1, 1, 1, 1, 1, 0, 8, 9, 9], "atomRings": [[18, 26, 32, 29, 21, 15], [19, 27, 33, 30, 22, 16], [20, 28, 34, 31, 23, 17], [35, 39, 43, 41, 37, 24], [36, 40, 44, 42, 38, 25], [45, 46, 48, 49, 47]]}]}], "ERROR": "input violates the currently-set semantic constraints\n\tSMILES: CC#N.[C-]#[O+].[C-]#[O+].C1=CC=C(C=C1)PC2=CC=CC=C2.C1=CC=C(C=C1)[PH+](C2=CC=CC=C2)C3=CC=CC=C3.[CH]1[CH][CH][CH][CH]1.F[P-](F)(F)(F)(F)F.[Mo].[Pt].[U]\n\tErrors:\n\t[[P-1] with 6 bond(s) - a max. of 4 bond(s) was specified]\n"}
{"commonchem": {"version": 10}, "defaults": {"atom": {"z": 6, "impHs": 0, "chg": 0, "nRad": 0, "isotope": 0, "stereo": "unspecified"}, "bond": {"bo": 1, "stereo": "unspecified"}}, "molecules": [{"name": "499597", "atoms": [{"z": 25}, {"z": 15, "chg": -1}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 9}, {"z": 8}, {"z": 8}, {"z": 8}, {"z": 8, "chg": 1}, {"z": 8, "chg": 1}, {"z": 8, "chg": 1}, {}, {"chg": 1}, {}, {}, {"impHs": 1, "nRad": 1}, {"impHs": 3}, {"impHs": 1, "nRad": 1}, {"impHs": 1, "nRad": 1}, {}, {"impHs": 3}, {"impHs": 1, "nRad": 1}, {"impHs": 1, "nRad": 1}, {}, {"impHs": 1}, {"impHs": 1}, {}, {"impHs": 1}, {"impHs": 3}, {"impHs": 3}, {"chg": -1}, {"chg": -1}, {"chg": -1}], "bonds": [{"atoms": [1, 2]}, {"atoms": [1, 3]}, {"atoms": [1, 4]}, {"atoms": [1, 5]}, {"atoms": [1, 6]}, {"atoms": [1, 7]}, {"atoms": [8, 26]}, {"atoms": [8, 31]}, {"bo": 2, "atoms": [9, 26]}, {"atoms": [10, 29]}, {"atoms": [10, 32]}, {"bo": 3, "atoms": [11, 33]}, {"bo": 3, "atoms": [12, 34]}, {"bo": 3, "atoms": [13, 35]}, {"atoms": [14, 15]}, {"atoms": [14, 17]}, {"atoms": [14, 18]}, {"atoms": [14, 19]}, {"atoms": [15, 16]}, {"atoms": [15, 21]}, {"atoms": [16, 20]}, {"atoms": [16, 23]}, {"atoms": [16, 26]}, {"bo": 2, "atoms": [17, 22]}, {"atoms": [17, 27]}, {"atoms": [18, 24]}, {"atoms": [20, 24]}, {"atoms": [21, 25]}, {"atoms": [22, 25]}, {"atoms": [22, 28]}, {"bo": 2, "atoms": [27, 29]}, {"bo": 2, "atoms": [28, 30]}, {"atoms": [29, 30]}], "conformers": [{"dim": 2, "coords": [[1.732, 6.4819], [8.7906, 1.0], [9.6566, 1.5], [7.9245, 0.5], [7.9245, 1.5], [8.7906, 0.0], [8.7906, 2.0], [9.6566, 0.5], [6.5198, 5.4375], [8.0297, 4.5888], [10.6477, 10.9247], [0.0, 7.4819], [1.732, 4.4819], [3.4641, 7.4819], [8.9036, 7.8554], [8.9036, 6.8554], [8.0096, 6.3208], [9.7696, 8.3554], [8.0096, 8.3901], [8.9036, 8.8554], [7.1036, 6.8346], [9.7696, 6.3554], [10.6356, 7.8554], [8.5196, 5.4606], [7.1036, 7.8762], [10.6356, 6.8554], [7.5197, 5.449], [9.7536, 9.397], [11.5456, 8.3623], [10.6516, 9.9247], [11.5537, 9.4039], [6.0298, 4.5657], [11.5118, 11.428], [0.866, 6.9819], [1.732, 5.4819], [2.5981, 6.9819]]}], "properties": {"PUBCHEM_COMPOUND_CID": 499597, "PUBCHEM_COMPOUND_CANONICALIZED": 1, "PUBCHEM_CACTVS_COMPLEXITY": 507, "PUBCHEM_CACTVS_HBOND_ACCEPTOR": 13, "PUBCHEM_CACTVS_HBOND_DONOR": 0, "PUBCHEM_CACTVS_ROTATABLE_BOND": 3, "PUBCHEM_CACTVS_SUBSKEYS": "AAADceB4OcIAAABAAAAAAAAAAAAAAAAAAAAwYMAAAAAAAADBAAAAGgAAAAgADgSAmAIyDoAABACIAiDSCAACCAAgIAAIiAEGCIgMJjKEMRqAMCAkwBEIqAeIyPCPoAABAAAQAADAAAYAACAAAAAAAAAAAA==", "PUBCHEM_IUPAC_INCHI": "InChI=1S/C19H20O3.3CO.F6P.Mn/c1-18-10-5-11-19(2,17(20)22-4)16(18)9-7-13-6-8-14(21-3)12-15(13)18;3*1-2;1-7(2,3,4,5)6;/h5-12H,1-4H3;;;;;/q+1;;;;-1;", "PUBCHEM_IUPAC_INCHIKEY": "CZQVTDNHTUIRDD-UHFFFAOYSA-N", "PUBCHEM_EXACT_MASS": 580.0282, "PUBCHEM_MOLECULAR_FORMULA": "C22H20F6MnO6P", "PUBCHEM_MOLECULAR_WEIGHT": 580.3, "PUBCHEM_OPENEYE_CAN_SMILES": "CC12[CH][CH][CH]C([C+]1[CH][CH]C3=C2C=C(C=C3)OC)(C)C(=O)OC.[C-]#[O+].[C-]#[O+].[C-]#[O+].F[P-](F)(F)(F)(F)F.[Mn]", "PUBCHEM_OPENEYE_ISO_SMILES": "CC12[CH][CH][CH]C([C+]1[CH][CH]C3=C2C=C(C=C3)OC)(C)C(=O)OC.[C-]#[O+].[C-]#[O+].[C-]#[O+].F[P-](F)(F)(F)(F)F.[Mn]", "PUBCHEM_CACTVS_TPSA": 38.5, "PUBCHEM_MONOISOTOPIC_WEIGHT": 580.0282, "PUBCHEM_TOTAL_CHARGE": 0, "PUBCHEM_HEAVY_ATOM_COUNT": 36, "PUBCHEM_ATOM_DEF_STEREO_COUNT": 0, "PUBCHEM_ATOM_UDEF_STEREO_COUNT": 2, "PUBCHEM_BOND_DEF_STEREO_COUNT": 0, "PUBCHEM_BOND_UDEF_STEREO_COUNT": 0, "PUBCHEM_ISOTOPIC_ATOM_COUNT": 0, "PUBCHEM_COMPONENT_COUNT": 6, "PUBCHEM_CACTVS_TAUTO_COUNT": -1, "PUBCHEM_COORDINATE_TYPE": "1\n5\n255", "PUBCHEM_BONDANNOTATIONS": "15  20  3\n17  24  3\n18  23  8\n18  28  8\n23  29  8\n28  30  8\n29  31  8\n30  31  8"}, "extensions": [{"name": "rdkitRepresentation", "formatVersion": 2, "toolkitVersion": "2022.03.5", "aromaticAtoms": [17, 22, 27, 28, 29, 30], "aromaticBonds": [23, 24, 29, 30, 31, 32], "cipRanks": [26, 25, 24, 24, 24, 24, 24, 24, 21, 22, 20, 23, 23, 23, 11, 7, 14, 13, 4, 0, 6, 3, 12, 1, 2, 5, 19, 10, 8, 17, 9, 16, 15, 18, 18, 18], "atomRings": [[16, 20, 24, 18, 14, 15], [21, 25, 22, 17, 14, 15], [27, 29, 30, 28, 22, 17]]}]}], "ERROR": "input violates the currently-set semantic constraints\n\tSMILES: CC12[CH][CH][CH]C([C+]1[CH][CH]C3=C2C=C(C=C3)OC)(C)C(=O)OC.[C-]#[O+].[C-]#[O+].[C-]#[O+].F[P-](F)(F)(F)(F)F.[Mn]\n\tErrors:\n\t[[P-1] with 6 bond(s) - a max. of 4 bond(s) was specified]\n"}

Output Sample:

| Processed 18000 molecules
Failed to encode 19654 with error input violates the currently-set semantic constraints
        SMILES: OCl(=O)=O
        Errors:
        [Cl with 5 bond(s) - a max. of 1 bond(s) was specified]

| Processed 19000 molecules
| Processed 20000 molecules
| Processed 21000 molecules
Failed to encode 22939 with error input violates the currently-set semantic constraints
        SMILES: C[N+](C)(C)CC1=CC=CC=C1.F[P-](F)(F)(F)(F)F
        Errors:
        [[P-1] with 6 bond(s) - a max. of 4 bond(s) was specified]

| Processed 22000 molecules
Failed to encode 24247 with error input violates the currently-set semantic constraints
        SMILES: OCl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

[06:42:47] Explicit valence for atom # 0 Cl, 7, is greater than permitted
[06:42:47] ERROR: Could not sanitize molecule ending on line 4301269
[06:42:47] ERROR: Explicit valence for atom # 0 Cl, 7, is greater than permitted
Failed to encode 24345 with error input violates the currently-set semantic constraints
        SMILES: OI(=O)=O
        Errors:
        [I with 5 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 24445 with error input violates the currently-set semantic constraints
        SMILES: OBr(=O)=O
        Errors:
        [Br with 5 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 24453 with error input violates the currently-set semantic constraints
        SMILES: OCl=O
        Errors:
        [Cl with 3 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 24505 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)(=O)=O.[K+]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 24562 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)(=O)=O.[Ag+]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

[06:42:49] Explicit valence for atom # 0 Br, 3, is greater than permitted
[06:42:49] ERROR: Could not sanitize molecule ending on line 4345855
[06:42:49] ERROR: Explicit valence for atom # 0 Br, 3, is greater than permitted
[06:42:49] Explicit valence for atom # 0 Br, 5, is greater than permitted
[06:42:49] ERROR: Could not sanitize molecule ending on line 4347130
[06:42:49] ERROR: Explicit valence for atom # 0 Br, 5, is greater than permitted
Failed to encode 24619 with error input violates the currently-set semantic constraints
        SMILES: [O-]I(=O)=O.[O-]I(=O)=O.[Ca+2]
        Errors:
        [I with 5 bond(s) - a max. of 1 bond(s) was specified]
        [I with 5 bond(s) - a max. of 1 bond(s) was specified]

| Processed 23000 molecules
[06:42:49] Explicit valence for atom # 0 Cl, 3, is greater than permitted
[06:42:49] ERROR: Could not sanitize molecule ending on line 4350864
[06:42:49] ERROR: Explicit valence for atom # 0 Cl, 3, is greater than permitted
Failed to encode 24639 with error input violates the currently-set semantic constraints
        SMILES: [NH4+].[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 24641 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)=O.[O-]Cl(=O)=O.[Sr+2]
        Errors:
        [Cl with 5 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 5 bond(s) - a max. of 1 bond(s) was specified]

[06:42:50] WARNING: not removing hydrogen atom without neighbors
Failed to encode 24840 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[Mg+2]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 24870 with error input violates the currently-set semantic constraints
        SMILES: O=Cl[O]
        Errors:
        [Cl with 3 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 24978 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)=O.[O-]Cl(=O)=O.[Ca+2]
        Errors:
        [Cl with 5 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 5 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 25155 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)=O.[O-]Cl(=O)=O.[Mg+2]
        Errors:
        [Cl with 5 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 5 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 25206 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)=O.[O-]Cl(=O)=O.[Zn+2]
        Errors:
        [Cl with 5 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 5 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 25289 with error input violates the currently-set semantic constraints
        SMILES: OI(=O)(O)(O)(O)O
        Errors:
        [I with 7 bond(s) - a max. of 1 bond(s) was specified]

| Processed 24000 molecules
Failed to encode 26059 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)=O.[O-]Cl(=O)=O.[Ba+2]
        Errors:
        [Cl with 5 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 5 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 26064 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[Co+2]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 26158 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[Ni+2]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

| Processed 25000 molecules
Failed to encode 27335 with error input violates the currently-set semantic constraints
        SMILES: CN(C)P(=O)(N(C)C)OP(=O)(N(C)C)N(C)C.CN(C)P(=O)(N(C)C)OP(=O)(N(C)C)N(C)C.CN(C)P(=O)(N(C)C)OP(=O)(N(C)C)N(C)C.[O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[Cu+2]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 27416 with error input violates the currently-set semantic constraints
        SMILES: CN(C)P(=O)(N(C)C)OP(=O)(N(C)C)N(C)C.CN(C)P(=O)(N(C)C)OP(=O)(N(C)C)N(C)C.CN(C)P(=O)(N(C)C)OP(=O)(N(C)C)N(C)C.[O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[Fe+3]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 27464 with error input violates the currently-set semantic constraints
        SMILES: C(C[NH3+])[NH3+].[O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

| Processed 26000 molecules
Failed to encode 27988 with error input violates the currently-set semantic constraints
        SMILES: C[N+](C)(C)CCC(=O)OC.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

[06:43:10] Explicit valence for atom # 1 Si, 8, is greater than permitted
[06:43:10] ERROR: Could not sanitize molecule ending on line 4947186
[06:43:10] ERROR: Explicit valence for atom # 1 Si, 8, is greater than permitted
[06:43:10] Explicit valence for atom # 0 Si, 8, is greater than permitted
[06:43:10] ERROR: Could not sanitize molecule ending on line 4947304
[06:43:10] ERROR: Explicit valence for atom # 0 Si, 8, is greater than permitted
[06:43:10] WARNING: not removing hydrogen atom without neighbors
[06:43:10] Explicit valence for atom # 0 Si, 8, is greater than permitted
[06:43:10] ERROR: Could not sanitize molecule ending on line 4949030
[06:43:10] ERROR: Explicit valence for atom # 0 Si, 8, is greater than permitted
[06:43:10] WARNING: not removing hydrogen atom without neighbors
[06:43:10] Explicit valence for atom # 0 Si, 8, is greater than permitted
[06:43:10] ERROR: Could not sanitize molecule ending on line 4952274
[06:43:10] ERROR: Explicit valence for atom # 0 Si, 8, is greater than permitted
[06:43:10] WARNING: not removing hydrogen atom without neighbors
[06:43:10] WARNING: not removing hydrogen atom without neighbors
Failed to encode 28211 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[Cu+2]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

[06:43:11] Explicit valence for atom # 1 Si, 8, is greater than permitted
[06:43:11] ERROR: Could not sanitize molecule ending on line 4979701
[06:43:11] ERROR: Explicit valence for atom # 1 Si, 8, is greater than permitted
[06:43:11] WARNING: not removing hydrogen atom without neighbors
| Processed 27000 molecules
Failed to encode 29690 with error input violates the currently-set semantic constraints
        SMILES: CC[N+](CC)(CC)CC1=CC=CC=C1.[O-]I(=O)=O
        Errors:
        [I with 5 bond(s) - a max. of 1 bond(s) was specified]

[06:43:21] WARNING: not removing hydrogen atom without neighbors
[06:43:21] WARNING: not removing hydrogen atom without neighbors
| Processed 28000 molecules
| Processed 29000 molecules
Failed to encode 31589 with error input violates the currently-set semantic constraints
        SMILES: CCCN(CCC)C=CC=CC=[N+](CCC)CCC.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

| Processed 30000 molecules
Failed to encode 32961 with error input violates the currently-set semantic constraints
        SMILES: CC[NH+](CC)CCCON=C1C2=CC=CC=C2C=CC3=CC=CC=C31.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 33343 with error input violates the currently-set semantic constraints
        SMILES: CN(C)P(=O)(N(C)C)OP(=O)(N(C)C)N(C)C.CN(C)P(=O)(N(C)C)OP(=O)(N(C)C)N(C)C.CN(C)P(=O)(N(C)C)OP(=O)(N(C)C)N(C)C.[O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[Co+2]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

| Processed 31000 molecules
| Processed 32000 molecules
| Processed 33000 molecules
[06:43:56] Explicit valence for atom # 0 Ge, 8, is greater than permitted
[06:43:56] ERROR: Could not sanitize molecule ending on line 6309909
[06:43:56] ERROR: Explicit valence for atom # 0 Ge, 8, is greater than permitted
| Processed 34000 molecules
| Processed 35000 molecules
| Processed 36000 molecules
Failed to encode 39606 with error input violates the currently-set semantic constraints
        SMILES: COC=[O+]C.F[P-](F)(F)(F)(F)F
        Errors:
        [[P-1] with 6 bond(s) - a max. of 4 bond(s) was specified]

| Processed 37000 molecules
Failed to encode 40464 with error input violates the currently-set semantic constraints
        SMILES: CC1=CC2=NC3=C(C=C(C4=CC=CC=C43)N)OC2=CC1=[N+](C)C.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 40736 with error input violates the currently-set semantic constraints
        SMILES: CC[N+](=C1C=CC2=NC3=C(C=C(C4=CC=CC=C43)N)OC2=C1)CC.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 40892 with error input violates the currently-set semantic constraints
        SMILES: CCCCNCCCC.F[P-](F)(F)(F)(F)F
        Errors:
        [[P-1] with 6 bond(s) - a max. of 4 bond(s) was specified]

| Processed 38000 molecules
| Processed 39000 molecules
| Processed 40000 molecules
| Processed 41000 molecules
| Processed 42000 molecules
| Processed 43000 molecules
| Processed 44000 molecules
| Processed 45000 molecules
Failed to encode 50244 with error input violates the currently-set semantic constraints
        SMILES: C[N+]1=CC=C(C=C1)NC2=CC=C(C=C2)S(=O)(=O)NC3=CC=C(C=C3)NC4=C5C=CC(=CC5=[N+](C6=CC=CC=C64)C)N.[O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 50320 with error input violates the currently-set semantic constraints
        SMILES: CC(=O)OI(C1=CC=CC=C1OC)OC(=O)C
        Errors:
        [I with 3 bond(s) - a max. of 1 bond(s) was specified]

| Processed 46000 molecules
| Processed 47000 molecules
[06:45:29] Explicit valence for atom # 2 Cl, 3, is greater than permitted
[06:45:29] ERROR: Could not sanitize molecule ending on line 9206824
[06:45:29] ERROR: Explicit valence for atom # 2 Cl, 3, is greater than permitted
[06:45:29] Explicit valence for atom # 2 Cl, 3, is greater than permitted
[06:45:29] ERROR: Could not sanitize molecule ending on line 9206916
[06:45:29] ERROR: Explicit valence for atom # 2 Cl, 3, is greater than permitted
Failed to encode 52033 with error input violates the currently-set semantic constraints
        SMILES: CCCCCCCCCCCCC1=CC=C(C=C1)C[N+](C)(C)C.F[P-](F)(F)(F)(F)F
        Errors:
        [[P-1] with 6 bond(s) - a max. of 4 bond(s) was specified]

| Processed 48000 molecules
Failed to encode 53136 with error input violates the currently-set semantic constraints
        SMILES: C[N+](C)(C)CCOCCCl.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 53472 with error input violates the currently-set semantic constraints
        SMILES: C1=CC=C2C=[N+](C=CC2=C1)N.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

| Processed 49000 molecules
| Processed 50000 molecules
| Processed 51000 molecules
| Processed 52000 molecules
Failed to encode 57348 with error input violates the currently-set semantic constraints
        SMILES: CC1=[N+](C2=C(C3=CC(=C(C=C3C(=C2C4=CC(=C(C=C14)OC)OC)OC(=O)C)OC)OC)OC(=O)C)C.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 58215 with error input violates the currently-set semantic constraints
        SMILES: O.O.O.O.O.O.[O-]Cl(=O)=O.[O-]Cl(=O)=O.[Cl-].[Cl-].[Ca+2].[Ca+2]
        Errors:
        [Cl with 5 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 5 bond(s) - a max. of 1 bond(s) was specified]

| Processed 53000 molecules
| Processed 54000 molecules
Failed to encode 59903 with error input violates the currently-set semantic constraints
        SMILES: CC1=[N+](C2=C(C3=CC(=C(C=C13)OC)OC)C(=C(C4=CC(=C(C=C42)OC)OC)OC(=O)C)OC(=O)C)C.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

| Processed 55000 molecules
Failed to encode 61478 with error input violates the currently-set semantic constraints
        SMILES: [O-]Br(=O)=O.[O-]Br(=O)=O.[Ca+2]
        Errors:
        [Br with 5 bond(s) - a max. of 1 bond(s) was specified]
        [Br with 5 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 61491 with error input violates the currently-set semantic constraints
        SMILES: N.OCl(=O)=O
        Errors:
        [Cl with 5 bond(s) - a max. of 1 bond(s) was specified]

| Processed 56000 molecules
Failed to encode 61607 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[Sr+2]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 61610 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)=O.[Tl+]
        Errors:
        [Cl with 5 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 61623 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[Ba+2]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 61629 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[Ca+2]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 61644 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[Cr+3]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

[06:46:27] Explicit valence for atom # 0 Cl, 5, is greater than permitted
[06:46:27] ERROR: Could not sanitize molecule ending on line 11005834
[06:46:27] ERROR: Explicit valence for atom # 0 Cl, 5, is greater than permitted
Failed to encode 61655 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[Pb+2]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 61691 with error input violates the currently-set semantic constraints
        SMILES: [NH4+].[O-]Br(=O)=O
        Errors:
        [Br with 5 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 61706 with error input violates the currently-set semantic constraints
        SMILES: [O-]Br(=O)=O.[O-]Br(=O)=O.[Ba+2]
        Errors:
        [Br with 5 bond(s) - a max. of 1 bond(s) was specified]
        [Br with 5 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 61749 with error input violates the currently-set semantic constraints
        SMILES: [O-]Br(=O)=O.[O-]Br(=O)=O.[Zn+2]
        Errors:
        [Br with 5 bond(s) - a max. of 1 bond(s) was specified]
        [Br with 5 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 61750 with error input violates the currently-set semantic constraints
        SMILES: [O-]Br(=O)=O.[O-]Br(=O)=O.[Mg+2]
        Errors:
        [Br with 5 bond(s) - a max. of 1 bond(s) was specified]
        [Br with 5 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 61757 with error input violates the currently-set semantic constraints
        SMILES: [O-]Cl=O.[O-]Cl=O.[Ca+2]
        Errors:
        [Cl with 3 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 3 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 61805 with error input violates the currently-set semantic constraints
        SMILES: C1=CC=NC=C1.OCl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

[06:46:28] Explicit valence for atom # 2 Si, 8, is greater than permitted
[06:46:28] ERROR: Could not sanitize molecule ending on line 11032342
[06:46:28] ERROR: Explicit valence for atom # 2 Si, 8, is greater than permitted
[06:46:28] WARNING: not removing hydrogen atom without neighbors
[06:46:28] WARNING: not removing hydrogen atom without neighbors
[06:46:28] Explicit valence for atom # 0 Si, 8, is greater than permitted
[06:46:28] ERROR: Could not sanitize molecule ending on line 11033470
[06:46:28] ERROR: Explicit valence for atom # 0 Si, 8, is greater than permitted
Failed to encode 62070 with error input violates the currently-set semantic constraints
        SMILES: O.O=Cl[O]
        Errors:
        [Cl with 3 bond(s) - a max. of 1 bond(s) was specified]

[06:46:31] Explicit valence for atom # 0 Cl, 3, is greater than permitted
[06:46:31] ERROR: Could not sanitize molecule ending on line 11115648
[06:46:31] ERROR: Explicit valence for atom # 0 Cl, 3, is greater than permitted
[06:46:33] WARNING: not removing hydrogen atom without neighbors
| Processed 57000 molecules
| Processed 58000 molecules
Failed to encode 64625 with error input violates the currently-set semantic constraints
        SMILES: CCOP(=O)(NC12CC3CC(C1)CC(C3)C2)OC4=CC=CC=C4OCCOCCOCCOC5=CC=CC=C5OP(=O)(NC67CC8CC(C6)CC(C8)C7)OCC.[O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[Ca+2]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 64628 with error input violates the currently-set semantic constraints
        SMILES: C1COCCOC2=CC=CC=C2OP(=O)(OC3=CC=CC=C3OCCO1)NC45CC6CC(C4)CC(C6)C5.C1COCCOC2=CC=CC=C2OP(=O)(OC3=CC=CC=C3OCCO1)NC45CC6CC(C4)CC(C6)C5.[O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O.[Ca+2]
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

| Processed 59000 molecules
Failed to encode 65185 with error input violates the currently-set semantic constraints
        SMILES: OI(=O)(=O)=O
        Errors:
        [I with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 65202 with error input violates the currently-set semantic constraints
        SMILES: CCNC1=CC2=C(C=C1C)C(=C3C=C(C(=[NH+]CC)C=C3O2)C)C4=CC=CC=C4C(=O)OCC.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 65205 with error input violates the currently-set semantic constraints
        SMILES: CCN(CC)C1=CC2=C(C=C1)C(=C3C=CC(=[N+](CC)CC)C=C3O2)C4=CC=CC=C4C(=O)OCC.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 65215 with error input violates the currently-set semantic constraints
        SMILES: CCN(CC)C1=CC2=C(C=C1C)C(=C3C=C(C(=[N+](CC)CC)C=C3O2)C)C4=CC=CC=C4C(=O)O.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 65216 with error input violates the currently-set semantic constraints
        SMILES: CCN(CC)C1=CC2=C(C=C1)C=C3C=CC(=[N+](CC)CC)C=C3O2.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 65219 with error input violates the currently-set semantic constraints
        SMILES: CN(C)C1=CC2=C(C=C1)C(=C3C=CC(=[N+](C)C)C=C3O2)C4=CC=CC=C4C(=O)O.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 65221 with error input violates the currently-set semantic constraints
        SMILES: CNC1=CC2=C(C=C1)C(=C3C=CC(=[NH+]C)C=C3O2)C4=CC=CC=C4C(=O)O.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

| Processed 60000 molecules
| Processed 61000 molecules
Failed to encode 67231 with error input violates the currently-set semantic constraints
        SMILES: C1=CC=C2C(=C1)C(=O)OI2O
        Errors:
        [I with 3 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 67537 with error input violates the currently-set semantic constraints
        SMILES: C1=CC=C(C(=C1)C(=O)O)I=O
        Errors:
        [I with 3 bond(s) - a max. of 1 bond(s) was specified]

| Processed 62000 molecules
| Processed 63000 molecules
Failed to encode 70076 with error input violates the currently-set semantic constraints
        SMILES: ClI(Cl)Cl
        Errors:
        [I with 3 bond(s) - a max. of 1 bond(s) was specified]

| Processed 64000 molecules
| Processed 65000 molecules
| Processed 66000 molecules
[06:57:08] WARNING: not removing hydrogen atom without neighbors
[06:57:08] WARNING: not removing hydrogen atom without neighbors
| Processed 67000 molecules
Failed to encode 73875 with error input violates the currently-set semantic constraints
        SMILES: C1=CC=C(C=C1)C2=CC(=[O+]C(=C2)C3=CC=CC=C3)C4=CC=CC=C4.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

Failed to encode 74723 with error input violates the currently-set semantic constraints
        SMILES: CCCC[N+](CCCC)(CCCC)CCCC.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

| Processed 68000 molecules
| Processed 69000 molecules
Failed to encode 76227 with error input violates the currently-set semantic constraints
        SMILES: CN(C)C1=CC=C(C=C1)N=[NH2+].F[P-](F)(F)(F)(F)F
        Errors:
        [[P-1] with 6 bond(s) - a max. of 4 bond(s) was specified]

Failed to encode 76724 with error input violates the currently-set semantic constraints
        SMILES: CC(=O)OI(C1=CC=CC=C1)OC(=O)C
        Errors:
        [I with 3 bond(s) - a max. of 1 bond(s) was specified]

| Processed 70000 molecules
[06:57:36] Explicit valence for atom # 0 Br, 3, is greater than permitted
[06:57:36] ERROR: Could not sanitize molecule ending on line 13865189
[06:57:36] ERROR: Explicit valence for atom # 0 Br, 3, is greater than permitted
[06:57:36] Explicit valence for atom # 0 Br, 3, is greater than permitted
[06:57:36] ERROR: Could not sanitize molecule ending on line 13865289
[06:57:36] ERROR: Explicit valence for atom # 0 Br, 3, is greater than permitted
| Processed 71000 molecules
Failed to encode 77938 with error input violates the currently-set semantic constraints
        SMILES: CCOC1=CC(=C(C=C1[N+]#N)OCC)N2CCOCC2.F[P-](F)(F)(F)(F)F
        Errors:
        [[P-1] with 6 bond(s) - a max. of 4 bond(s) was specified]

Failed to encode 78388 with error input violates the currently-set semantic constraints
        SMILES: CCCCCC[N+](CCCCCC)(CCCCCC)CCCCCC.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

| Processed 72000 molecules
Failed to encode 79165 with error input violates the currently-set semantic constraints
        SMILES: C1=CC=C2C(=C1)[N+](=CS2)CCCCCCCCCC[N+]3=CSC4=CC=CC=C43.[O-]Cl(=O)(=O)=O.[O-]Cl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

| Processed 73000 molecules
Failed to encode 80794 with error input violates the currently-set semantic constraints
        SMILES: CC(C)(C)OCl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

| Processed 74000 molecules
Failed to encode 81730 with error input violates the currently-set semantic constraints
        SMILES: CC1=CC(=NCCN1)C.OCl(=O)(=O)=O
        Errors:
        [Cl with 7 bond(s) - a max. of 1 bond(s) was specified]

[06:58:01] WARNING: not removing hydrogen atom without neighbors
| Processed 75000 molecules
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment