Last active
April 10, 2019 14:02
-
-
Save bougui505/6fe478600ebf82420713d529107c0c05 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: UTF8 -*- | |
# Author: Guillaume Bouvier -- guillaume.bouvier@pasteur.fr | |
# https://research.pasteur.fr/en/member/guillaume-bouvier/ | |
# 2019-04-08 16:15:11 (UTC+0200) | |
import sys | |
import __main__ | |
__main__.pymol_argv = ['pymol', '-cq'] # Optionnaly pass options to pymol | |
import pymol | |
stdout = sys.stdout # To get the stdout in the notebook instead of in the pymol console | |
import pymol.cmd as cmd | |
pymol.finish_launching() | |
sys.stdout = stdout | |
import itertools | |
import argparse | |
import numpy | |
import mrcfile | |
from scipy.spatial import cKDTree | |
def read_mrc(mrcfilename): | |
""" | |
Read a mrc file and return the xyz and density values at the given level | |
if given | |
""" | |
print "Reading mrc file ..." | |
xyz = [] | |
with mrcfile.open(mrcfilename) as emd: | |
nx, ny, nz = emd.header['nx'], emd.header['ny'], emd.header['nz'] | |
x0, y0, z0 = emd.header['origin']['x'], emd.header['origin']['y'],\ | |
emd.header['origin']['z'] | |
x, y, z = numpy.float(x0), numpy.float(y0), numpy.float(z0) | |
dx, dy, dz = emd.voxel_size['x'], emd.voxel_size['y'],\ | |
emd.voxel_size['z'] | |
xyz = numpy.meshgrid(numpy.arange(x0, x0+nx*dx, dx), | |
numpy.arange(y0, y0+ny*dy, dy), | |
numpy.arange(z0, z0+nz*dz, dz), | |
indexing='ij') | |
xyz = numpy.asarray(xyz) | |
xyz = xyz.reshape(3, nx*ny*nz) | |
xyz = xyz.T | |
print "done" | |
return xyz, emd.data.flatten(order='F').reshape(nx, ny, nz) | |
def save_density(data, grid_spacing, outfilename, origin=None): | |
""" | |
Save the density to an mrc file. The origin of the grid will be (0,0,0) | |
• outfilename: the mrc file name for the output | |
""" | |
print "Saving mrc file ..." | |
data = data.astype('float32') | |
with mrcfile.new(outfilename, overwrite=True) as mrc: | |
mrc.set_data(data.T) | |
mrc.voxel_size = grid_spacing | |
if origin is not None: | |
mrc.header['origin']['x'] = origin[0] | |
mrc.header['origin']['y'] = origin[1] | |
mrc.header['origin']['z'] = origin[2] | |
mrc.update_header_from_data() | |
mrc.update_header_stats() | |
print "done" | |
def read_pdb(pdbfilename): | |
""" | |
Read the given pdb file | |
""" | |
print "Reading pdb file ..." | |
cmd.load(pdbfilename, 'struct') | |
coords = cmd.get_coords('struct') | |
print "done" | |
return coords | |
def box_cut(emd, grid, coords, radius, padding=7): | |
""" | |
Cut a box around coords | |
""" | |
print "Cropping density ..." | |
nx, ny, nz = emd.shape | |
spacing = numpy.linalg.norm(numpy.diff(grid, axis=0)[0]) | |
xm, ym, zm = coords.min(axis=0) - radius - spacing*padding | |
xM, yM, zM = coords.max(axis=0) + radius + spacing*padding | |
x0, y0, z0 = grid[0] | |
x1, y1, z1 = grid[-1] + spacing | |
X = numpy.arange(x0, x1, spacing) | |
Y = numpy.arange(y0, y1, spacing) | |
Z = numpy.arange(z0, z1, spacing) | |
selx = numpy.logical_and(X>=xm, X<=xM) | |
sely = numpy.logical_and(Y>=ym, Y<=yM) | |
selz = numpy.logical_and(Z>=zm, Z<=zM) | |
indx = numpy.where(selx)[0] | |
indy = numpy.where(sely)[0] | |
indz = numpy.where(selz)[0] | |
i0, i1 = indx[0], indx[-1] | |
j0, j1 = indy[0], indy[-1] | |
k0, k1 = indz[0], indz[-1] | |
emd = emd[i0:i1, j0:j1, k0:k1] | |
grid = grid.reshape((nx, ny, nz, 3)) | |
grid = grid[i0:i1, j0:j1, k0:k1, :].reshape((-1, 3)) | |
print "done" | |
return grid, emd | |
def zone(emd, grid, coords, radius, padding=7): | |
""" | |
• emd: EM density data | |
• grid: em grid | |
• coords: the atomic coordinates of the model | |
• radius: distance threshold in Å | |
• padding: padding (margin) to add to the resulting density | |
""" | |
print "Zone selection ..." | |
ni, nj, nk = emd.shape | |
ravel = lambda x: numpy.ravel_multi_index(x, (ni, nj, nk)) | |
kdtree = cKDTree(grid) | |
invoxels_ = kdtree.query_ball_point(coords, radius) | |
invoxels = [] | |
for l in invoxels_: | |
invoxels.extend(l) | |
invoxels = numpy.unique(invoxels) | |
sel = numpy.zeros(grid.shape[0], dtype=bool) | |
sel[invoxels] = True | |
sel = sel.reshape(emd.shape) | |
emd[~sel] = emd.min() | |
selindex = numpy.asarray(numpy.where(emd > emd.min())) | |
ind_min = selindex.min(axis=1) | |
ind_max = selindex.max(axis=1) | |
ind_min = numpy.maximum(ind_min-padding, 0) | |
ind_max = numpy.minimum(ind_max+padding, emd.shape) | |
i0, j0, k0 = ind_min | |
i1, j1, k1 = ind_max | |
emd = emd[i0:i1, j0:j1, k0:k1] | |
origin = grid[ravel((i0, j0, k0))] | |
print "done" | |
return emd, origin | |
if __name__ == '__main__': | |
PARSER = argparse.ArgumentParser(description=\ | |
'Zone selection of EM density') | |
PARSER.add_argument('--mrc', | |
dest='MRCFILENAME', | |
type=str, | |
help='mrc file name') | |
PARSER.add_argument('--pdb', | |
dest='PDBFILENAME', | |
type=str, | |
help='pdb file name') | |
PARSER.add_argument('--threshold', | |
dest='THRESHOLD', | |
type=float, | |
help='distance threshold in Å') | |
PARSER.add_argument('--out', | |
dest='OUTFILENAME', | |
type=str, | |
help='OUTPUT MRC FILENAME') | |
ARGS = PARSER.parse_args() | |
GRID, EMD = read_mrc(ARGS.MRCFILENAME) | |
COORDS = read_pdb(ARGS.PDBFILENAME) | |
GRID, EMD = box_cut(EMD, GRID, COORDS, ARGS.THRESHOLD) | |
SPACING = numpy.linalg.norm(numpy.diff(GRID, axis=0)[0]) | |
EMD, ORIGIN = zone(EMD, GRID, COORDS, ARGS.THRESHOLD) | |
save_density(EMD, SPACING, ARGS.OUTFILENAME, origin=ORIGIN) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment