Skip to content

Instantly share code, notes, and snippets.

@bougui505
Last active April 10, 2019 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bougui505/6fe478600ebf82420713d529107c0c05 to your computer and use it in GitHub Desktop.
Save bougui505/6fe478600ebf82420713d529107c0c05 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: UTF8 -*-
# Author: Guillaume Bouvier -- guillaume.bouvier@pasteur.fr
# https://research.pasteur.fr/en/member/guillaume-bouvier/
# 2019-04-08 16:15:11 (UTC+0200)
import sys
import __main__
__main__.pymol_argv = ['pymol', '-cq'] # Optionnaly pass options to pymol
import pymol
stdout = sys.stdout # To get the stdout in the notebook instead of in the pymol console
import pymol.cmd as cmd
pymol.finish_launching()
sys.stdout = stdout
import itertools
import argparse
import numpy
import mrcfile
from scipy.spatial import cKDTree
def read_mrc(mrcfilename):
"""
Read a mrc file and return the xyz and density values at the given level
if given
"""
print "Reading mrc file ..."
xyz = []
with mrcfile.open(mrcfilename) as emd:
nx, ny, nz = emd.header['nx'], emd.header['ny'], emd.header['nz']
x0, y0, z0 = emd.header['origin']['x'], emd.header['origin']['y'],\
emd.header['origin']['z']
x, y, z = numpy.float(x0), numpy.float(y0), numpy.float(z0)
dx, dy, dz = emd.voxel_size['x'], emd.voxel_size['y'],\
emd.voxel_size['z']
xyz = numpy.meshgrid(numpy.arange(x0, x0+nx*dx, dx),
numpy.arange(y0, y0+ny*dy, dy),
numpy.arange(z0, z0+nz*dz, dz),
indexing='ij')
xyz = numpy.asarray(xyz)
xyz = xyz.reshape(3, nx*ny*nz)
xyz = xyz.T
print "done"
return xyz, emd.data.flatten(order='F').reshape(nx, ny, nz)
def save_density(data, grid_spacing, outfilename, origin=None):
"""
Save the density to an mrc file. The origin of the grid will be (0,0,0)
• outfilename: the mrc file name for the output
"""
print "Saving mrc file ..."
data = data.astype('float32')
with mrcfile.new(outfilename, overwrite=True) as mrc:
mrc.set_data(data.T)
mrc.voxel_size = grid_spacing
if origin is not None:
mrc.header['origin']['x'] = origin[0]
mrc.header['origin']['y'] = origin[1]
mrc.header['origin']['z'] = origin[2]
mrc.update_header_from_data()
mrc.update_header_stats()
print "done"
def read_pdb(pdbfilename):
"""
Read the given pdb file
"""
print "Reading pdb file ..."
cmd.load(pdbfilename, 'struct')
coords = cmd.get_coords('struct')
print "done"
return coords
def box_cut(emd, grid, coords, radius, padding=7):
"""
Cut a box around coords
"""
print "Cropping density ..."
nx, ny, nz = emd.shape
spacing = numpy.linalg.norm(numpy.diff(grid, axis=0)[0])
xm, ym, zm = coords.min(axis=0) - radius - spacing*padding
xM, yM, zM = coords.max(axis=0) + radius + spacing*padding
x0, y0, z0 = grid[0]
x1, y1, z1 = grid[-1] + spacing
X = numpy.arange(x0, x1, spacing)
Y = numpy.arange(y0, y1, spacing)
Z = numpy.arange(z0, z1, spacing)
selx = numpy.logical_and(X>=xm, X<=xM)
sely = numpy.logical_and(Y>=ym, Y<=yM)
selz = numpy.logical_and(Z>=zm, Z<=zM)
indx = numpy.where(selx)[0]
indy = numpy.where(sely)[0]
indz = numpy.where(selz)[0]
i0, i1 = indx[0], indx[-1]
j0, j1 = indy[0], indy[-1]
k0, k1 = indz[0], indz[-1]
emd = emd[i0:i1, j0:j1, k0:k1]
grid = grid.reshape((nx, ny, nz, 3))
grid = grid[i0:i1, j0:j1, k0:k1, :].reshape((-1, 3))
print "done"
return grid, emd
def zone(emd, grid, coords, radius, padding=7):
"""
• emd: EM density data
• grid: em grid
• coords: the atomic coordinates of the model
• radius: distance threshold in Å
• padding: padding (margin) to add to the resulting density
"""
print "Zone selection ..."
ni, nj, nk = emd.shape
ravel = lambda x: numpy.ravel_multi_index(x, (ni, nj, nk))
kdtree = cKDTree(grid)
invoxels_ = kdtree.query_ball_point(coords, radius)
invoxels = []
for l in invoxels_:
invoxels.extend(l)
invoxels = numpy.unique(invoxels)
sel = numpy.zeros(grid.shape[0], dtype=bool)
sel[invoxels] = True
sel = sel.reshape(emd.shape)
emd[~sel] = emd.min()
selindex = numpy.asarray(numpy.where(emd > emd.min()))
ind_min = selindex.min(axis=1)
ind_max = selindex.max(axis=1)
ind_min = numpy.maximum(ind_min-padding, 0)
ind_max = numpy.minimum(ind_max+padding, emd.shape)
i0, j0, k0 = ind_min
i1, j1, k1 = ind_max
emd = emd[i0:i1, j0:j1, k0:k1]
origin = grid[ravel((i0, j0, k0))]
print "done"
return emd, origin
if __name__ == '__main__':
PARSER = argparse.ArgumentParser(description=\
'Zone selection of EM density')
PARSER.add_argument('--mrc',
dest='MRCFILENAME',
type=str,
help='mrc file name')
PARSER.add_argument('--pdb',
dest='PDBFILENAME',
type=str,
help='pdb file name')
PARSER.add_argument('--threshold',
dest='THRESHOLD',
type=float,
help='distance threshold in Å')
PARSER.add_argument('--out',
dest='OUTFILENAME',
type=str,
help='OUTPUT MRC FILENAME')
ARGS = PARSER.parse_args()
GRID, EMD = read_mrc(ARGS.MRCFILENAME)
COORDS = read_pdb(ARGS.PDBFILENAME)
GRID, EMD = box_cut(EMD, GRID, COORDS, ARGS.THRESHOLD)
SPACING = numpy.linalg.norm(numpy.diff(GRID, axis=0)[0])
EMD, ORIGIN = zone(EMD, GRID, COORDS, ARGS.THRESHOLD)
save_density(EMD, SPACING, ARGS.OUTFILENAME, origin=ORIGIN)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment