Skip to content

Instantly share code, notes, and snippets.

@fabian-paul
Last active April 5, 2020 08:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fabian-paul/1cac21668db608d4954fdbf81521bf1f to your computer and use it in GitHub Desktop.
Save fabian-paul/1cac21668db608d4954fdbf81521bf1f to your computer and use it in GitHub Desktop.
Generate trajectories for computational alanine scan (mass generation of mutant structures)
import numpy as np
import mdtraj
import os
import os.path
import argparse
def prepare_maps(mut_pdb='mutant.pdb', wt_pdb=None):
if wt_pdb is None:
wt_pdb = os.path.expanduser('~/system/crystal.pdb')
wt = mdtraj.load(wt_pdb)
mut = mdtraj.load(mut_pdb)
wt = wt.atom_slice(wt.top.select('protein'))
mut = mut.atom_slice(mut.top.select('protein'))
dest = []
source = []
# go throught list of unmodified residues and map atoms
for rw, rm in zip(wt.top.residues, mut.top.residues):
if rw.name == rm.name and rw.index == rm.index:
for aw,am in zip(rw.atoms, rm.atoms):
assert aw.name == am.name
i = aw.index
source.append(i)
j = am.index
dest.append(j)
# add mutated backbone to the map
away = None
for rw, rm in zip(wt.top.residues, mut.top.residues):
if rw.name != rm.name and rw.index == rm.index:
if away is None:
away = rm.index
else:
assert rm.index == away # allow only one mutation at the same time
for aw in rw.atoms:
i = aw.index
if aw.name in ['C','O','N','H','CA','HA','H1','H2','H3','OXT']:
j = next(am.index for am in rm.atoms if am.name == aw.name)
source.append(i)
dest.append(j)
assert away is not None # should have least one mutation
source = np.array(source)
dest = np.array(dest)
# find the indices of ALA-HB1 to ALA-HB3 in mutant topology
r_mut_ala = next(rm for rm in mut.top.residues if rm.index == away)
h_and_cb_indices = []
for n in ['HB1', 'HB2', 'HB3', 'CB']:
h_and_cb_indices.append(next(a.index for a in r_mut_ala.atoms if a.name == n))
h_and_cb_indices = np.array(h_and_cb_indices)
# find the indices of mutated-away backbone in wt topology
r_wt_away = next(rw for rw in wt.top.residues if rw.index == away)
bb_frame = []
for n in ['C', 'CA', 'N', 'O', 'CB']:
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == n))
# CG is kind of tough...
for n in ['CG', 'CG2', 'OG']:
if any(a.name == n for a in r_wt_away.atoms):
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == n))
# find HB2 and HB3 positions
if r_wt_away.name=='THR': # THR is an exception
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == 'OG1'))
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == 'HB'))
if r_wt_away.name=='ILE' or r_wt_away.name=='VAL':
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == 'CG1'))
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == 'HB'))
else:
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == 'HB2'))
bb_frame.append(next(a.index for a in r_wt_away.atoms if a.name == 'HB3'))
bb_frame = np.array(bb_frame)
# if we are mutating a proline, we have to place the bb hydrogen too ...
if wt.top.residue(away).name == 'PRO':
proline = True
PRO_H_index = next(am.index for am in mut.top.residue(away).atoms if am.name=='H')
PRO_N_CD = np.array([
next(aw.index for aw in wt.top.residue(away).atoms if aw.name=='N'),
next(aw.index for aw in wt.top.residue(away).atoms if aw.name=='CD')
])
else:
proline = False
PRO_H_index = None
PRO_N_CD = None
return source, dest, h_and_cb_indices, bb_frame, proline, PRO_H_index, PRO_N_CD, wt, mut
# positions of hydrogens in the coordiante system attached to the CA of alanine
# * z-axis is oriented along the CA-CB bond
# * x-axis is oriented along the direction from CA to GG orthogonalized w.r.t the z-axis (with Gautam method)
# * x-axis is oriented along along the bisectrix of the angle (N-CA-C) orthogonalized w.r.t the z-axis (with my method)
t = 1.91113553093 # the diamond angle 2*arctan(sqrt(2)) +/- Amber stupidity
c = 0.1526 # bond length CA-CB (Amber)
b = 0.109 # bond length CB-H (Amber)
sqrt_3 = np.sqrt(3.0)
H1 = np.array([b*np.sin(t), 0.0, -b*np.cos(t)])
H2 = np.array([-0.5*b*np.sin(t), 0.5*sqrt_3*b*np.sin(t), -b*np.cos(t)])
H3 = np.array([-0.5*b*np.sin(t), -0.5*sqrt_3*b*np.sin(t), -b*np.cos(t)])
ceZ = np.array([0.0, 0.0, c])
def normalized(v):
return v/np.linalg.norm(v)
def orthogonalized(v, w):
return v - v.dot(w)/w.dot(w)* w
def orthonormalized(v, w):
return normalized(orthogonalized(v, w))
def compute(frame, massova=False, constant_angles=False, fixCB=False):
C, CA, N, O, CB, CG, HB2, HB3 = frame
O = np.zeros((3,3))
vZ = normalized(CB-CA)
if constant_angles:
assert not fixCB, 'not implemented'
H1p = b*normalized(CG-CB) + CB
H2p = b*normalized(HB2-CB) + CB
H3p = b*normalized(HB3-CB) + CB
CBp = CB
return np.vstack((H1p, H2p, H3p, CBp))
if massova:
vX = orthonormalized(CG-CB, vZ)
else:
cc = orthonormalized(C-CA, vZ)
n = orthonormalized(N-CA, vZ)
vX = normalized(cc+n)
vY = np.cross(vZ, vX)
assert np.cross(vX, vY).dot(vZ) > 0
# O = map from the coordiante system attached to ALA-CA to the laboratory frame
O[:,0] = vX # remember: columns of a matrix are the images of the canonical unit vectors
O[:,1] = vY
O[:,2] = vZ
if fixCB: # everything relative to CA
H1p = O.dot(H1 + ceZ) + CA
H2p = O.dot(H2 + ceZ) + CA
H3p = O.dot(H3 + ceZ) + CA
CBp = O.dot(ceZ) + CA
else: # everything relative to CB
H1p = O.dot(H1) + CB
H2p = O.dot(H2) + CB
H3p = O.dot(H3) + CB
CBp = CB
return np.vstack((H1p, H2p, H3p, CBp))
def place_PRO_hydrogen(frame):
k = 0.101 # bond length N-H in Amber
N,CD = frame
v = normalized(CD-N)
return N + k*v
if __name__=='__main__':
parser = argparse.ArgumentParser(description='mutate trajectory')
parser.add_argument('-i', '--trajectory', required=True, help='(input) trajectory', metavar='file')
parser.add_argument('-m', '--mutant_topology', required=True, help='(input) topology file for mutant', metavar='file')
parser.add_argument('-w', '--wild_type_topology', required=True, help='(input) topology file for wild type', metavar='file')
parser.add_argument('-o', '--outdir', default='.', help='directory where mutant trajectories are written to, default=.', metavar='path')
parser.add_argument('-g', '--massova', default=False, help='orient ALA methyl group like CG atom, default=false', action='store_true')
parser.add_argument('-b', '--fixcb', default=False, help='reset C-alpha-C-beta bond length to rest length, default=false', action='store_true')
parser.add_argument('-c', '--cangles', default=False, help='keep all angles and dihedrals, default=false', action='store_true')
parser.add_argument('-s', '--stride', default=1, help='apply a stride of n steps to the mutant trajectory default=1', metavar='steps')
# fixcb: place all hydrogens and CB to rest positions
# default: place all hydrogens to rest positions
# massova: like default but keep the rotation of the dihedral, reset angles and bond lengths
# cangles: keep all angles and dihedrals, only reset hydrogen bond lengths
args = parser.parse_args()
stride = int(args.stride)
source, dest, h_and_cb_indices, bb_frame, proline, PRO_H_index, PRO_N_CD, wt, mut = prepare_maps(mut_pdb=args.mutant_topology, wt_pdb=args.wild_type_topology)
# load trajectory
wt_traj = mdtraj.load(args.trajectory, top=wt.top).atom_slice(wt.top.select('protein'))
n_frames = len(wt_traj)
n_frames_out = ((n_frames - 1) // stride) + 1
mut_traj = mdtraj.Trajectory(np.zeros((n_frames_out, mut.top.n_atoms, 3))*np.nan, mut.top, time=wt_traj.time[::stride],
unitcell_lengths=wt_traj.unitcell_lengths[::stride], unitcell_angles=wt_traj.unitcell_angles[::stride])
if proline:
assert len(source)==len(dest)
assert len(source)+len(h_and_cb_indices)+1==mut.n_atoms
else:
assert len(source)==len(dest)==mut.n_atoms-len(h_and_cb_indices)
for t in range(n_frames_out):
mut_traj.xyz[t, dest] = wt_traj.xyz[t*stride, source]
mut_traj.xyz[t, h_and_cb_indices] = compute(wt_traj.xyz[t*stride, bb_frame], constant_angles=args.cangles, massova=args.massova, fixCB=args.fixcb)
if proline:
mut_traj.xyz[t, PRO_H_index] = place_PRO_hydrogen(wt_traj.xyz[t*stride, PRO_N_CD])
assert not np.any(np.isnan(mut_traj.xyz))
#mut_traj.save_xtc(args.outdir + os.sep + os.path.basename(args.trajectory))
#mut_traj.save_netcdf(args.outdir + os.sep + os.path.splitext(os.path.basename(args.trajectory))[0] + '.nc')
mut_traj.save_dcd(args.outdir + os.sep + os.path.splitext(os.path.basename(args.trajectory))[0] + '.dcd')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment