Created
September 1, 2022 15:55
-
-
Save greglandrum/5cf3d6710659bde75519c7325bdbbcf8 to your computer and use it in GitHub Desktop.
fingerprint_screenout_with_words.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from rdkit import Chem\nfrom rdkit.Chem import DataStructs", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "ms = [Chem.MolFromSmiles(x) for x in ('Cc1ccccc1','Cc1ncccc1')]\nq = Chem.MolFromSmiles('c1ccccc1')\n[x.HasSubstructMatch(q) for x in ms]", | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 2, | |
"data": { | |
"text/plain": "[True, False]" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "pfps = [Chem.PatternFingerprint(x,fpSize=1024) for x in ms]\nqfp = Chem.PatternFingerprint(q,fpSize=1024)", | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Check the condition for substructure matching with the pattern fingerprints:" | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"scrolled": true | |
}, | |
"cell_type": "code", | |
"source": "[x&qfp == qfp for x in pfps]", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 4, | |
"data": { | |
"text/plain": "[True, False]" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "def convert_bv_to_64bit_ints(fp):\n ''' use a combination of RDKit+python functionality to convert a\n fingerprint into a list of 64bit ints \n '''\n qt = DataStructs.BitVectToBinaryText(fp)\n words = [int.from_bytes(qt[i:i+8],'big') for i in range(0,len(qt),8)]\n return words", | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Convert the query fp and each of the molecule fps to lists of 64 bit ints" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "qwords = convert_bv_to_64bit_ints(qfp)\npwords = [convert_bv_to_64bit_ints(x) for x in pfps]", | |
"execution_count": 6, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Here's the simple python way to check the substructure condition using those vectors of ints" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "[all(x&y==y for x,y in zip(pw,qwords)) for pw in pwords]", | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 7, | |
"data": { | |
"text/plain": "[True, False]" | |
}, | |
"metadata": {} | |
} | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python" | |
}, | |
"toc": { | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"base_numbering": 1, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.9.12", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "fingerprint_screenout_with_words.ipynb", | |
"public": true | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment