Skip to content

Instantly share code, notes, and snippets.

@iwatobipen
Created April 6, 2023 12:50
Show Gist options
  • Save iwatobipen/f4bd06c0e38fd07cd463ad9a21e115b1 to your computer and use it in GitHub Desktop.
Save iwatobipen/f4bd06c0e38fd07cd463ad9a21e115b1 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "3cae514e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[21:10:28] Initializing Normalizer\n",
"[21:10:28] Explicit valence for atom # 8 Sb, 9, is greater than permitted\n",
"[21:10:28] ERROR: Could not sanitize molecule on line 1825\n",
"[21:10:28] ERROR: Explicit valence for atom # 8 Sb, 9, is greater than permitted\n",
"[21:10:28] Explicit valence for atom # 9 N, 5, is greater than permitted\n",
"[21:10:28] ERROR: Could not sanitize molecule on line 2097\n",
"[21:10:28] ERROR: Explicit valence for atom # 9 N, 5, is greater than permitted\n",
"[21:10:28] Explicit valence for atom # 4 Al, 9, is greater than permitted\n",
"[21:10:28] ERROR: Could not sanitize molecule on line 3226\n",
"[21:10:28] ERROR: Explicit valence for atom # 4 Al, 9, is greater than permitted\n",
"[21:10:28] Explicit valence for atom # 4 C, 5, is greater than permitted\n",
"[21:10:28] ERROR: Could not sanitize molecule on line 3399\n",
"[21:10:28] ERROR: Explicit valence for atom # 4 C, 5, is greater than permitted\n",
"[21:10:28] Explicit valence for atom # 2 O, 4, is greater than permitted\n",
"[21:10:28] ERROR: Could not sanitize molecule on line 4508\n",
"[21:10:28] ERROR: Explicit valence for atom # 2 O, 4, is greater than permitted\n",
"[21:10:28] Explicit valence for atom # 3 Be, 4, is greater than permitted\n",
"[21:10:28] ERROR: Could not sanitize molecule on line 4596\n",
"[21:10:28] ERROR: Explicit valence for atom # 3 Be, 4, is greater than permitted\n"
]
}
],
"source": [
"from rdkit import Chem\n",
"from rdkit.Chem import AllChem\n",
"from rdkit.Chem.Draw import IPythonConsole\n",
"from chembl_structure_pipeline import standardizer\n",
"from rdkit import RDLogger\n",
"import time\n",
"RDLogger.DisableLog('rdApp.info')\n",
"suppl = Chem.SmilesMolSupplier('./first_5K.smi', smilesColumn=0, delimiter='\\t')\n",
"mols = [m for m in suppl if m!=None]\n",
"mols = mols[:1000]\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "74b598d5",
"metadata": {},
"outputs": [],
"source": [
"\n",
"def stdmol(mol):\n",
" stdmol = standardizer.standardize_mol(mol)\n",
" pmol, _ = standardizer.get_parent_mol(stdmol)\n",
" return pmol\n",
"mols = [stdmol(m) for m in mols]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f750f1e4",
"metadata": {},
"outputs": [],
"source": [
"def confgen(tpl, mols=mols):\n",
" from rdkit import Chem\n",
" from rdkit.Chem import AllChem \n",
" nengine, clsidx = tpl\n",
" nwrite = 0\n",
" for idx, mol in enumerate(mols):\n",
" if idx % nengine == clsidx:\n",
" try:\n",
" hmol = Chem.AddHs(mol)\n",
" res = AllChem.EmbedMolecule(hmol)\n",
" if res == 0:\n",
" AllChem.MMFFOptimizeMolecule(hmol)\n",
" nwrite += 1\n",
" except:\n",
" continue\n",
" return nwrite"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "525afae9",
"metadata": {},
"outputs": [],
"source": [
"import ipyparallel as ipp"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "470611a5",
"metadata": {},
"outputs": [],
"source": [
"def runtask(nengines=1):\n",
" start = time.time()\n",
" with ipp.Cluster(n=nengines) as rc:\n",
" view = rc.load_balanced_view()\n",
" asyncresult = view.map_async(confgen, [(nengines, idx) for idx in range(nengines)])\n",
" asyncresult.wait_interactive()\n",
" # retrieve actual results\n",
" result = asyncresult.get()\n",
" print(result)\n",
" finish = time.time()\n",
" print(finish - start)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "11e016c5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting 1 engines with <class 'ipyparallel.cluster.launcher.LocalEngineSetLauncher'>\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "53bc35b5a1364998b4749afac3a3a0d1",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1 [00:00<?, ?engine/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "24386b2a4ea047be88cae75313cf5a8e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"confgen: 0%| | 0/1 [00:00<?, ?tasks/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[992]\n",
"Stopping engine(s): 1680783042\n",
"engine set stopped 1680783042: {'engines': {'0': {'exit_code': 0, 'pid': 429592, 'identifier': '0'}}, 'exit_code': 0}\n",
"Stopping controller\n",
"Controller stopped: {'exit_code': 0, 'pid': 429563, 'identifier': 'ipcontroller-1680783041-w6r9-429542'}\n",
"32.62002611160278\n"
]
}
],
"source": [
"runtask(1)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "679451a7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting 16 engines with <class 'ipyparallel.cluster.launcher.LocalEngineSetLauncher'>\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a5058565a70948b3a453522b496044b3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/16 [00:00<?, ?engine/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c7599b5a98ad43dc8e3cf4d187689fef",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"confgen: 0%| | 0/16 [00:00<?, ?tasks/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[63, 61, 63, 63, 63, 62, 63, 63, 61, 61, 61, 62, 62, 62, 62, 61]\n",
"Stopping engine(s): 1680783075\n",
"engine set stopped 1680783075: {'engines': {'0': {'exit_code': 0, 'pid': 429664, 'identifier': '0'}, '1': {'exit_code': 0, 'pid': 429666, 'identifier': '1'}, '2': {'exit_code': 0, 'pid': 429668, 'identifier': '2'}, '3': {'exit_code': 0, 'pid': 429670, 'identifier': '3'}, '4': {'exit_code': 0, 'pid': 429676, 'identifier': '4'}, '5': {'exit_code': 0, 'pid': 429682, 'identifier': '5'}, '6': {'exit_code': 0, 'pid': 429688, 'identifier': '6'}, '7': {'exit_code': 0, 'pid': 429700, 'identifier': '7'}, '8': {'exit_code': 0, 'pid': 429713, 'identifier': '8'}, '9': {'exit_code': 0, 'pid': 429728, 'identifier': '9'}, '10': {'exit_code': 0, 'pid': 429738, 'identifier': '10'}, '11': {'exit_code': 0, 'pid': 429754, 'identifier': '11'}, '12': {'exit_code': 0, 'pid': 429770, 'identifier': '12'}, '13': {'exit_code': 0, 'pid': 429786, 'identifier': '13'}, '14': {'exit_code': 0, 'pid': 429802, 'identifier': '14'}, '15': {'exit_code': 0, 'pid': 429818, 'identifier': '15'}}, 'exit_code': 0}\n",
"Stopping controller\n",
"Controller stopped: {'exit_code': 0, 'pid': 429633, 'identifier': 'ipcontroller-1680783074-y6mr-429542'}\n",
"12.983936071395874\n"
]
}
],
"source": [
"runtask(16)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "55eeb63e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment