Last active
February 3, 2020 11:47
-
-
Save rob-miller/4da6a4bab5fa91635f5b748d4e60e875 to your computer and use it in GitHub Desktop.
demo for internal_coords pull request to Biopython
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python3 | |
# Copyright (c) 2019 Robert T. Miller. All rights reserved. | |
# -*- coding: latin-1 -*- | |
"""Interconvert PDB internal and external coordinates. | |
Interconvert PDB Structure data between external (X, Y, Z cartesian) | |
coordinates and internal (bond length, angle and dihedral angle) measurements. | |
""" | |
# | |
# replicate buildprot with biopython | |
# | |
import argparse | |
import os | |
import sys | |
import re | |
import cProfile | |
import timeit | |
import sys | |
print(sys.path) | |
# import pyximport | |
# pyximport.install(language_level=3) | |
# import itertools | |
# print(sys.path) | |
import gzip | |
import warnings | |
from Bio.PDB.PDBParser import PDBParser | |
from Bio.PDB.MMCIFParser import MMCIFParser | |
from Bio.PDB.mmcifio import MMCIFIO | |
from io import StringIO | |
from Bio.PDB.ic_rebuild import ( | |
report_IC, | |
structure_rebuild_test, | |
write_PDB, | |
IC_duplicate, | |
) | |
from Bio.PDB.PICIO import write_PIC, read_PIC | |
from Bio.PDB.internal_coords import IC_Residue, AtomKey, IC_Chain | |
from Bio.PDB.SCADIO import write_SCAD | |
PDB_repository_base = None | |
if os.path.isdir("/media/data/pdb"): | |
PDB_repository_base = "/media/data/pdb/" | |
elif os.path.isdir("/Volumes/data/pdb"): | |
PDB_repository_base = "/Volumes/data/pdb/" | |
print(sys.version) | |
scale_val = 2 | |
scadCode_val = True | |
ti_iter = 3 | |
arg_parser = argparse.ArgumentParser( | |
description="Interconvert .pic (protein internal coordinates) and " | |
".pdb (protein data bank) files." | |
) | |
arg_parser.add_argument( | |
"file", | |
nargs="*", | |
help="a .pdb, .cif or .pic path/filename to read, or a PDB idCode with " | |
"optional chain ID to read from {0} as .ent.gz".format( | |
( | |
PDB_repository_base | |
or "[PDB resource not defined - please configure before use]" | |
) | |
), | |
) | |
arg_parser.add_argument( | |
"-f", | |
dest="filelist", | |
help="a Dunbrack cullPDB pdb ID list to read from {0} as .ent.gz".format( | |
( | |
PDB_repository_base | |
or "[PDB resource not defined - please configure before use]" | |
) | |
), | |
) | |
arg_parser.add_argument( | |
"-skip", dest="skip_count", help="count of pdb ID list entries to skip" | |
) | |
arg_parser.add_argument( | |
"-limit", dest="limit_count", help="stop after this many pdb ID list entries" | |
) | |
arg_parser.add_argument( | |
"-chain", dest="sel_chain", help="chain to pick from PDB/mmCIF file" | |
) | |
arg_parser.add_argument( | |
"-wp", help="write pdb file with .pdb extension", action="store_true" | |
) | |
arg_parser.add_argument( | |
"-wip", | |
help="convert to internal coords and back, write .pdb file", | |
action="store_true", | |
) | |
arg_parser.add_argument( | |
"-wpi", help="convert to atom coords and back, write .pic file", action="store_true" | |
) | |
arg_parser.add_argument( | |
"-wc", help="write MMCIF file with .cif extension", action="store_true" | |
) | |
arg_parser.add_argument( | |
"-wi", | |
help="write internal coordinates file with .pic extension", | |
action="store_true", | |
) | |
arg_parser.add_argument( | |
"-ws", help="write OpenSCAD file with .scad extension", action="store_true" | |
) | |
arg_parser.add_argument( | |
"-scale", | |
dest="scale", | |
help="OpenSCAD output: units (usually mm) per " | |
"angstrom, default " + str(scale_val), | |
) | |
arg_parser.add_argument( | |
"-nsc", help="OpenSCAD output: no OpenSCAD code, just arrays", action="store_true" | |
) | |
arg_parser.add_argument( | |
"-flex", help="OpenSCAD output: rotatable backbone CA bonds", action="store_true" | |
) | |
arg_parser.add_argument( | |
"-hb", help="OpenSCAD output: magnetic backbone H bonds", action="store_true" | |
) | |
arg_parser.add_argument( | |
"-maxp", | |
dest="maxp", | |
help="max N-C peptide bond length for chain breaks, " | |
"default " + str(IC_Chain.MaxPeptideBond), | |
) | |
arg_parser.add_argument( | |
"-backbone", help="OpenSCAD output: skip sidechains", action="store_true" | |
) | |
arg_parser.add_argument( | |
"-t", help="test conversion pdb/pic to pic/pdb", action="store_true" | |
) | |
arg_parser.add_argument( | |
"-tv", help="verbose test conversion pdb<>pic", action="store_true" | |
) | |
arg_parser.add_argument("-cp", help="cprofile option for -t, -tv", action="store_true") | |
arg_parser.add_argument("-ti", help="timeit option for -t, -tv", action="store_true") | |
arg_parser.add_argument( | |
"-ti_iter", | |
dest="ti_iter", | |
help="timeit iteration count option for -t, -tv, " "default " + str(ti_iter), | |
) | |
arg_parser.add_argument("-nh", help="ignore hydrogens on PDB read", action="store_true") | |
arg_parser.add_argument( | |
"-d2h", help="swap D (deuterium) for H on PDB read", action="store_true" | |
) | |
arg_parser.add_argument( | |
"-amide", help="only amide proton, skip other Hs on PDB read", action="store_true" | |
) | |
arg_parser.add_argument("-gcb", help="generate GLY C-beta atoms", action="store_true") | |
arg_parser.add_argument( | |
"-rama", help="print psi, phi, omega values", action="store_true" | |
) | |
arg_parser.add_argument( | |
"-rama2", help="test setting dihedral angles", action="store_true" | |
) | |
arg_parser.add_argument( | |
"-i", help="test just convert to internal coordinates", action="store_true" | |
) | |
args = arg_parser.parse_args() | |
# print(args) | |
if args.nh: | |
IC_Residue.accept_atoms = IC_Residue.accept_mainchain | |
if args.amide: | |
IC_Residue.accept_atoms = IC_Residue.accept_mainchain + ("H",) | |
if args.d2h: | |
IC_Residue.accept_atoms += IC_Residue.accept_deuteriums | |
AtomKey.d2h = True | |
if args.gcb: | |
IC_Residue.gly_Cbeta = True | |
if args.maxp: | |
IC_Chain.MaxPeptideBond = float(args.maxp) | |
if args.scale: | |
scale_val = float(args.scale) | |
if args.skip_count: | |
args.skip_count = int(args.skip_count) | |
if args.limit_count: | |
args.limit_count = int(args.limit_count) | |
toProcess = args.file | |
pdbidre = re.compile(r"(^\d(\w\w)\w)(\w)?$") | |
if args.filelist: | |
flist = open(args.filelist, "r") | |
for aline in flist: | |
fields = aline.split() | |
pdbidMatch = pdbidre.match(fields[0]) | |
if pdbidMatch: | |
# print(m.group(1) + ' ' + m.group(2)) | |
# toProcess.append(PDB_repository_base + m.group(2) | |
# + '/pdb' + m.group(1) + '.ent.gz' ) | |
toProcess.append(pdbidMatch.group(0)) | |
if len(toProcess): | |
print(len(toProcess), "entries to process") | |
else: | |
print("no files to process. use '-h' for help") | |
sys.exit(0) | |
PDB_parser = PDBParser(PERMISSIVE=True, QUIET=True) | |
CIF_parser = MMCIFParser(QUIET=True) | |
fileNo = 1 | |
for target in toProcess: | |
if args.skip_count and fileNo <= args.skip_count: | |
fileNo += 1 | |
continue | |
if args.limit_count is not None: | |
if args.limit_count <= 0: | |
sys.exit(0) | |
args.limit_count -= 1 | |
pdb_input = False | |
pic_input = False | |
cif_input = False | |
pdb_structure = None | |
# pdb_chain = None | |
prot_id = "" | |
outfile = os.path.basename(target) | |
pdbidMatch = pdbidre.match(target) | |
if pdbidMatch is not None: | |
assert PDB_repository_base, "PDB repository base directory missing, " | |
"please configure for this host" | |
pdb_input = True | |
filename = ( | |
PDB_repository_base | |
+ pdbidMatch.group(2).lower() | |
+ "/pdb" | |
+ pdbidMatch.group(1).lower() | |
+ ".ent.gz" | |
) | |
prot_id = pdbidMatch.group(1) | |
else: | |
filename = target | |
pdbidre = re.compile(r"(\d\w\w\w)(\w)?") # find anywhere in string | |
pdbidMatch2 = pdbidre.search(target) | |
if pdbidMatch2: | |
prot_id = pdbidMatch2.group(0) | |
else: | |
prot_id = target | |
if not pdb_input: | |
try: | |
pdb_structure = read_PIC( | |
gzip.open(filename, mode="rt") if filename.endswith(".gz") else filename | |
) | |
except FileNotFoundError: # OSError python2 | |
# print(print(os.getcwd()), filename, "not found") | |
print(os.getcwd(), filename, "not found") | |
continue | |
if pdb_structure is not None: | |
pic_input = True | |
if pdb_structure is None: | |
pdb_input = True | |
try: | |
pdb_structure = PDB_parser.get_structure( | |
prot_id, | |
gzip.open(filename, mode="rt") | |
if filename.endswith(".gz") | |
else filename, | |
) | |
except FileNotFoundError: # OSError python2 | |
# print(print(os.getcwd()), filename, "not found") | |
print(os.getcwd(), filename, "not found") | |
continue | |
except Exception: | |
try: | |
pdb_structure = CIF_parser.get_structure( | |
prot_id, | |
gzip.open(filename, mode="rt") | |
if filename.endswith(".gz") | |
else filename, | |
) | |
except Exception: | |
print("unable to open ", filename, " as PDB, MMCIF or PIC file format.") | |
continue | |
# get specified chain if given | |
if pdbidMatch is not None and pdbidMatch.group(3) is not None: | |
# have chain specifier for PDBid | |
# if pdb_structure[0][pdbidMatch.group(3)] is not None: | |
if pdbidMatch.group(3) in pdb_structure[0]: | |
pdb_chain = pdb_structure[0][pdbidMatch.group(3)] | |
pdb_structure = pdb_chain | |
else: | |
print("chain " + pdbidMatch.group(3) + " not found in " + filename) | |
continue | |
elif args.sel_chain is not None: | |
if args.sel_chain in pdb_structure[0]: | |
pdb_chain = pdb_structure[0][args.sel_chain] | |
pdb_structure = pdb_chain | |
else: | |
print("chain " + args.sel_chain + " not found in " + filename) | |
continue | |
if pdb_input: | |
if not args.t: | |
print(fileNo, "parsed pdb input ID", prot_id, "file:", filename) | |
# print('header:', pdb_structure.header.get('head', 'NONE')) | |
# print('idcode:', pdb_structure.header.get('idcode', 'NONE')) | |
# print('deposition date:', pdb_structure.header.get( | |
# 'deposition_date', 'NONE')) | |
# for res in pdb_chain.get_residues(): # pdb_structure.get_residues(): | |
# print(res.get_full_id(), res.resname, | |
# 'disordered' if res.disordered else '') | |
else: | |
print(fileNo, "parsed pic input ", filename) | |
report_IC(pdb_structure, verbose=True) | |
# print(pdb_structure.header['idcode'], pdb_chain.id, ':', | |
# pdb_structure.header['head']) | |
if args.wp: | |
if pic_input: | |
pdb_structure.internal_to_atom_coordinates() | |
write_PDB(pdb_structure, outfile + ".pdb") | |
print("wrote pdb output for", outfile) | |
if args.wip: | |
if pdb_input: | |
pdb2 = IC_duplicate(pdb_structure) | |
pdb2.internal_to_atom_coordinates() | |
write_PDB(pdb2, outfile + ".pdb") | |
print("wrote pdb output for converted", outfile) | |
else: | |
print("-wip requires PDB or MMCIF input") | |
if args.wpi: | |
if pic_input: | |
pdb_structure.internal_to_atom_coordinates() | |
pdb_structure.atom_to_internal_coordinates() | |
write_PIC(pdb_structure, outfile + ".pic") | |
else: | |
print("-wpi requires PIC input") | |
if args.wc: | |
if pic_input: | |
pdb_structure.internal_to_atom_coordinates() | |
io = MMCIFIO() | |
io.set_structure(pdb_structure) | |
io.save(outfile + ".cif") | |
print("wrote cif output for", outfile) | |
if args.i: | |
if pdb_input: | |
# add_PIC(pdb_structure) | |
pdb_structure.atom_to_internal_coordinates() | |
if args.wi: | |
if pdb_input: | |
# add_PIC(pdb_structure) | |
pdb_structure.atom_to_internal_coordinates() | |
write_PIC(pdb_structure, outfile + ".pic") | |
print("wrote pic output for", outfile) | |
if args.t or args.tv: | |
sp = StringIO() | |
if pdb_input: | |
with warnings.catch_warnings(record=True) as w: | |
# warnings.simplefilter("error") | |
if True: # try: | |
if args.cp: | |
cProfile.run( | |
"r = structure_rebuild_test(pdb_structure, args.tv)", | |
sort="time", | |
) | |
elif args.ti: | |
tr = timeit.repeat( | |
stmt="structure_rebuild_test(pdb_structure, args.tv)", | |
repeat=ti_iter, | |
number=1, | |
globals=globals(), | |
) | |
print(min(tr)) | |
else: | |
r = structure_rebuild_test(pdb_structure, args.tv) | |
warns = len(w) > 0 | |
if args.tv and warns: | |
for wrn in w: | |
print(wrn.message) | |
if not args.ti: | |
print( | |
prot_id, fileNo, r["report"], ("WARNINGS" if warns else "") | |
) | |
# except Exception as e: | |
# print(prot_id, fileNo, "EXCEPTION:", type(e), e) | |
elif pic_input: | |
pdb_structure.internal_to_atom_coordinates() | |
write_PDB(pdb_structure, sp) | |
sp.seek(0) | |
pdb2 = PDB_parser.get_structure(prot_id, sp) | |
pdb2.atom_to_internal_coordinates() | |
sp2 = StringIO() | |
write_PIC(pdb2, sp2) | |
sp2.seek(0) | |
inf = open(filename, "r") | |
lineCount = 0 | |
matchCount = 0 | |
diffCount = 0 | |
# for line1, line2 in itertools.zip_longest(inf, sp2): | |
for line1, line2 in zip(inf, sp2): | |
lineCount += 1 | |
if line1 == line2: | |
matchCount += 1 | |
else: | |
diffCount += 1 | |
if args.tv: | |
print(line1, "!=", line2) | |
print(lineCount, matchCount, diffCount) | |
if args.rama: | |
if pdb_input: | |
pdb_structure.atom_to_internal_coordinates() | |
for r in pdb_structure.get_residues(): | |
ric = r.internal_coord | |
if ric: | |
print( | |
r, | |
ric.get_angle("psi"), | |
ric.get_angle("phi"), | |
ric.get_angle("omg"), | |
ric.get_angle("tau"), | |
ric.get_angle("chi2"), | |
ric.get_length("0C:1N"), | |
) | |
print( | |
r, | |
ric.get_angle("N:CA:C:1N"), | |
ric.get_angle("-1C:N:CA:C"), | |
ric.get_angle("-1CA:-1C:N:CA"), | |
ric.get_angle("N:CA:C"), | |
ric.get_angle("CA:CB:CG:CD"), | |
None | |
if not ric.rnext | |
else ric.get_length((ric.rak("C"), ric.rnext[0].rak("N"))), | |
) | |
# print(r.internal_coord.get_dihedral('N:CA:C:O')) | |
if args.rama2: | |
if pdb_input: | |
pdb_structure.atom_to_internal_coordinates() | |
nvc1 = {} | |
nvpsi = {} | |
nvtau = {} | |
c1count = 0 | |
psicount = 0 | |
taucount = 0 | |
for r in pdb_structure.get_residues(): | |
# print(r.internal_coord.get_dihedral('N:CA:C:O')) | |
if r.internal_coord: | |
ric = r.internal_coord | |
chi1 = ric.get_angle("chi1") | |
if chi1 is not None: | |
c1count += 1 | |
nv = chi1 + 90 | |
if nv > 180.0: | |
nv -= 360.0 | |
ric.set_angle("chi1", nv) | |
nvc1[str(r)] = nv | |
psi = ric.get_angle("psi") | |
if psi is not None: | |
psicount += 1 | |
nv = psi - 90 | |
if nv < -180.0: | |
nv += 360.0 | |
ric.set_angle("psi", nv) | |
nvpsi[str(r)] = nv | |
tau = ric.get_angle("tau") | |
if tau is not None: | |
taucount += 1 | |
nv = tau - 5 | |
ric.set_angle("tau", nv) | |
nvtau[str(r)] = nv | |
pdb2 = IC_duplicate(pdb_structure) | |
write_PIC(pdb2, "foo.pic") | |
pdb2.internal_to_atom_coordinates() | |
sf = StringIO() | |
write_PDB(pdb2, sf) | |
sf.seek(0) | |
new_pdb = PDB_parser.get_structure("1NEW", sf) | |
new_pdb.atom_to_internal_coordinates() | |
c1tcount = 0 | |
psitcount = 0 | |
tautcount = 0 | |
for r in new_pdb.get_residues(): | |
ric = r.internal_coord | |
if ric: | |
chi1 = ric.get_angle("chi1") | |
if chi1 is not None: | |
c1tcount += 1 | |
print( | |
f"chi1 {chi1} should be {nvc1[str(r)]} rslt= {chi1 == nvc1[str(r)]}" | |
) | |
psi = ric.get_angle("psi") | |
if psi is not None: | |
psitcount += 1 | |
print( | |
f"psi {psi} should be {nvpsi[str(r)]} rslt= {psi == nvpsi[str(r)]}" | |
) | |
tau = ric.get_angle("tau") | |
if tau is not None: | |
tautcount += 1 | |
print( | |
f"tau {tau} should be {nvtau[str(r)]} rslt= {tau == nvtau[str(r)]}" | |
) | |
print(f"chi1count {c1count} {c1tcount} {c1count == c1tcount}") | |
print(f"psicount {psicount} {psitcount} {psicount == psitcount}") | |
print(f"taucount {taucount} {tautcount} {taucount == tautcount}") | |
if args.ws: | |
if pdb_input: | |
pdb_structure.atom_to_internal_coordinates() | |
if args.flex: | |
if args.gcb and pic_input: | |
pdb_structure.internal_to_atom_coordinates() | |
pdb_structure.atom_to_internal_coordinates() # build C-beta's | |
for r in pdb_structure.get_residues(): | |
if r.internal_coord: | |
r.internal_coord.set_flexible() | |
if args.hb: | |
for r in pdb_structure.get_residues(): | |
if r.internal_coord: | |
r.internal_coord.set_hbond() | |
write_SCAD( | |
pdb_structure, | |
outfile + ".scad", | |
scale_val, | |
pdbid=prot_id, | |
backboneOnly=args.backbone, | |
includeCode=(not args.nsc), | |
) | |
fileNo += 1 | |
print("normal termination") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment