Skip to content

Instantly share code, notes, and snippets.

@JoaoRodrigues
Created August 22, 2020 22:40
Show Gist options
  • Save JoaoRodrigues/39134ebe6fac51de8f702ab3d151bc01 to your computer and use it in GitHub Desktop.
Save JoaoRodrigues/39134ebe6fac51de8f702ab3d151bc01 to your computer and use it in GitHub Desktop.
Script to test PDBParser (biopython)
#!/usr/bin/env python
"""
Script to benchmark Bio.PDB PDBParser on a collection
of PDB files.
"""
import argparse
import gzip
import pathlib
import time
import traceback
import psutil
from Bio.PDB import PDBParser
from Bio.PDB import PDBIO
parser = PDBParser(PERMISSIVE=1, QUIET=1)
writer = PDBIO()
def get_models(s):
return sorted(m.id for m in s)
def get_chains(s):
return sorted(c.id for c in s.get_chains())
def get_resids(s):
return sorted(r.id[1] for r in s.get_residues())
def get_anames(s):
return sorted(
a.name for r in s.get_residues() for a in r.get_unpacked_list()
)
def read_pdb(fpath):
"""Round-trip parse/write/parse a PDB file."""
try:
with gzip.open(fpath, mode='rt') as handle:
t0 = time.time()
s1 = parser.get_structure(fpath.name, handle)
t1 = time.time()
except:
raise
else:
nmodels = len(s1)
nchains = len(s1[0])
nres = sum(1 for r in s1.get_residues())
natoms = sum(
1 for r in s1.get_residues() for a in r.get_unpacked_list()
)
with fpath.with_suffix('.success').open('w') as f:
print(f'Read time: {t1 - t0:8.3f}', file=f)
print(f'No. Models: {nmodels}', file=f)
print(f'No. Chains: {nchains}', file=f)
print(f'No. Residues: {nres}', file=f)
print(f'No. Atoms: {natoms}', file=f)
t0 = time.time()
writer.set_structure(s1)
writer.save('save.pdb')
t1 = time.time()
print(f'Write time: {t1 - t0:8.3f}', file=f)
s2 = parser.get_structure('redo', 'save.pdb')
assert get_models(s1) == get_models(s2), 'Models differ'
assert get_chains(s1) == get_chains(s2), 'Chains differ'
assert get_resids(s1) == get_resids(s2), 'Resids differ'
assert get_anames(s1) == get_anames(s2), 'Atoms differ'
print('Round trip: OK', file=f)
def main():
ap = argparse.ArgumentParser(description=__doc__)
ap.add_argument(
'folder',
type=pathlib.Path,
help='Top-level folder with PDB files'
)
args = ap.parse_args()
pdblist = list(args.folder.rglob('*.ent.gz'))
print(f'Found {len(pdblist)} files')
for idx, pdbf in enumerate(pdblist, start=1):
try:
read_pdb(pdbf)
except Exception as err:
with pdbf.with_suffix('.failed').open('w') as f:
print(err, file=f)
print(traceback.format_exc(), file=f)
memuse = psutil.virtual_memory().percent
print(f'[{idx:>7d}/{len(pdblist)}] Memory = {memuse}%')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment