Skip to content

Instantly share code, notes, and snippets.

@PatWalters
Created October 2, 2018 01:20
Show Gist options
  • Save PatWalters/c046fee2760e6894ed13e19b8c99193b to your computer and use it in GitHub Desktop.
Save PatWalters/c046fee2760e6894ed13e19b8c99193b to your computer and use it in GitHub Desktop.
An improved script to extract a ligand from a protein-ligand complex and assign bond orders
#!/usr/bin/env python
import sys
from prody import *
from rdkit import Chem
from rdkit.Chem import AllChem
from io import StringIO
import pypdb
def get_pdb_components(pdb_id):
"""
Split a protein-ligand pdb into protein and ligand components
:param pdb_id:
:return:
"""
pdb = parsePDB(pdb_id)
protein = pdb.select('protein')
ligand = pdb.select('not protein and not water')
return protein, ligand
def process_ligand(ligand, res_name):
"""
Add bond orders to a pdb ligand
1. Select the ligand component with name "res_name"
2. Get the corresponding SMILES from pypdb
3. Create a template molecule from the SMILES in step 2
4. Write the PDB file to a stream
5. Read the stream into an RDKit molecule
6. Assign the bond orders from the template from step 3
:param ligand: ligand as generated by prody
:param res_name: residue name of ligand to extract
:return: molecule with bond orders assigned
"""
output = StringIO()
sub_mol = ligand.select(f"resname {res_name}")
chem_desc = pypdb.describe_chemical(f"{res_name}")
sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"]
template = AllChem.MolFromSmiles(sub_smiles)
writePDBStream(output, sub_mol)
pdb_string = output.getvalue()
rd_mol = AllChem.MolFromPDBBlock(pdb_string)
new_mol = AllChem.AssignBondOrdersFromTemplate(template, rd_mol)
return new_mol
def write_pdb(protein, pdb_name):
"""
Write a prody protein to a pdb file
:param protein: protein object from prody
:param pdb_name: base name for the pdb file
:return: None
"""
output_pdb_name = f"{pdb_name}_protein.pdb"
writePDB(f"{output_pdb_name}", protein)
print(f"wrote {output_pdb_name}")
def write_sdf(new_mol, pdb_name, res_name):
"""
Write an RDKit molecule to an SD file
:param new_mol:
:param pdb_name:
:param res_name:
:return:
"""
outfile_name = f"{pdb_name}_{res_name}_ligand.sdf"
writer = Chem.SDWriter(f"{outfile_name}")
writer.write(new_mol)
print(f"wrote {outfile_name}")
def main(pdb_name):
"""
Read Ligand Expo data, split pdb into protein and ligands,
write protein pdb, write ligand sdf files
:param pdb_name: id from the pdb, doesn't need to have an extension
:return:
"""
protein, ligand = get_pdb_components(pdb_name)
write_pdb(protein, pdb_name)
res_name_list = list(set(ligand.getResnames()))
for res in res_name_list:
new_mol = process_ligand(ligand, res)
write_sdf(new_mol, pdb_name, res)
if __name__ == "__main__":
if len(sys.argv) == 2:
main(sys.argv[1])
else:
print("Usage: {sys.argv[1]} pdb_id", file=sys.stderr)
@iamkaant
Copy link

Dear Dr. Walters,

Thank you for sharing this script; it works like a charm! The only thing I had to update was due to the changes in pypdb. Instead of
sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"]
I used

    sub_smiles = None
    for item in chem_desc.get('pdbx_chem_comp_descriptor', []):
        if item.get('type') == 'SMILES':
            sub_smiles = item.get('descriptor')
            break

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment