Created
September 27, 2022 10:56
-
-
Save padix-key/ecd5c5b494242d7eb1c4004e677d8191 to your computer and use it in GitHub Desktop.
Removal of incomplete residues
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import biotite.structure as struc | |
import biotite.structure.io.mmtf as mmtf | |
import biotite.structure.info as info | |
import biotite.database.rcsb as rcsb | |
def remove_incomplete_residues(atoms): | |
include_mask = np.zeros(atoms.array_length(), dtype=bool) | |
residue_starts = struc.get_residue_starts(atoms, add_exclusive_stop=True) | |
# Iterate over each residue in the input structure | |
for i in range(len(residue_starts) - 1): | |
start = residue_starts[i] | |
stop = residue_starts[i+1] | |
residue_length = stop - start | |
# Obtain the reference residue | |
# from Chemical Component Dictionary | |
ref_residue = info.residue(atoms.res_name[start]) | |
ref_residue = ref_residue[ref_residue.element != "H"] | |
if stop != atoms.array_length(): | |
# If this residue is not the final residue, | |
# the 'OXT' atom is missing due to the peptide bond | |
ref_residue = ref_residue[ref_residue.atom_name != "OXT"] | |
ref_length = ref_residue.array_length() | |
# Expect at least as many atoms in the residue as in the reference | |
# If this is not the case, at least one atom is missing and the | |
# entire residue is filtered out | |
if residue_length >= ref_length: | |
include_mask[start : stop] = True | |
atoms = atoms[include_mask] | |
return atoms | |
### Example code | |
atoms = mmtf.get_structure( | |
mmtf.MMTFFile.read(rcsb.fetch("1l2y", "mmtf")), | |
model=1 | |
) | |
atoms = atoms[atoms.element != "H"] | |
print(len(atoms)) | |
# All residues are complete -> no atom is filtered out | |
atoms = remove_incomplete_residues(atoms) | |
print(len(atoms)) | |
# Remove a single atom... | |
atoms = atoms[1:] | |
# ... -> the whole residue is filtered out | |
atoms = remove_incomplete_residues(atoms) | |
print(len(atoms)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment