benjaminhwilliams/background_and_peak_pixels.py

## background_and_peak_pixels.py
#!dials.python
# -*- coding:utf-8 -*-

# The lines above just let your computer know how to read this file and
# that this program should be run with the in-built dials python distribution.

u"""
Filter background pixels from signal pixels

This program separates pixels that are likely to be part of reflections
from pixels that belong to the background, and pixels that are classed as
'bad' (either having negative intensity, or having an intensity outside the
detector's trusted range, referred to as a 'hot' pixel.  The program works
from a datablock file as generated by dials.import.

You can run the individual commands, one by one, in the dials interpreter,
or you can run the whole program all at once, by calling
    dials.python background_and_peak_pixels.py
in the terminal.  It will leave two PNG plots in your working directory.
"""

# The dials flex module lets you manipulate reflection tables, and the
# columns of which they are constructed, in clever ways.
from dials.array_family import flex
# In order to load the datablock, we need some functionality from teh
# diffraction experiment toolbox (dxtbx, a component of dials).
from dxtbx.datablock import DataBlockFactory
# We want to know whether a pixel passes muster as a signal pixel,
# potentially part of a reflection shoebox, so we get the relevant
# spotfinder threshold algorithm from dials:
from dials.extensions.dispersion_spotfinder_threshold_ext import \
    DispersionSpotFinderThresholdExt
# This import just provides us with the default spotfinding parameters,
# as used by dials.find_spots.  We will pass them to the above threshold
# algorithm, to save us having to set up all the parameters ourselves:
from dials.command_line.find_spots import phil_scope
# We use pyplot and matplotlib.colors to plot the data:
from matplotlib import pyplot
from matplotlib import colors as colours

# Get the datablock from storage in its Json file.
dblock = DataBlockFactory.from_args(['datablock.json'])[0]
# Get the first (only) imageset associated with the datablock, which is the
# set of images from the rotation sweep.
imageset = dblock.extract_imagesets()[0]
# We need to get the detector info from the imageset too, since we need to
# know the detector trusted range in order to determine whether a pixel is hot.
detector = imageset.get_detector()[0]
trusted = detector.get_trusted_range()

# From just the first image, we extract the raw data, in the form of a vector
# of all the pixel intensities.  The data are no-longer stored as a 2-d map
# of the detector.
data = imageset.get_raw_data(0)[0]
# Pixels are considered bad if they have negative intensity or intensity
# outside the trusted range.
negative = (data < 0)
hot = (data > int(round(trusted[1])))
bad = negative | hot

# Here we set up the spotfinding threshold algorithm, and flag pixels that
# have the characteristics of belonging to reflections.
data = data.as_double()
thresholder = DispersionSpotFinderThresholdExt(phil_scope.extract())
peak = thresholder.compute_threshold(data, ~bad)

# Most of the pixels don't belong to reflections, so if we want to
# extract just the background, it makes sense to start with a copy of the
# data...
background = data.deep_copy()
# ...from which we can remove pixels flagged as bad or belonging to
# reflections, by setting their intensity to zero.
not_background = (bad | peak)
background.set_selected(not_background, 0)
# Using the dimensions of the detector, in pixels, allows us to reconstruct
# the shape of the background data from the one-dimensional intensity data.
background_2d = background.as_numpy_array()
background_2d.resize(detector.get_image_size()[::-1])

# Now we can plot the background pixels.
pyplot.pcolormesh(background_2d,
                  norm=colours.SymLogNorm(.0001),
                  cmap='copper')
pyplot.axes().invert_yaxis()
pyplot.axes().xaxis.tick_top()
pyplot.axes().set_aspect('equal', 'box')
pyplot.savefig('background_pixels', transparent=True, dpi=300)
pyplot.clf()

# By contrast to the background, the signal pixels are few and far between.
# It therefore makes sense to start with a column of zeroes then and add in
# the intensity of the signal pixels.
signal = flex.double(data.size(), 0)
peak = peak.iselection()
signal.set_selected(peak, data.select(peak))
# Once again, we then need to reconstruct the layout of the pixels on the
# detector.
signal_2d = signal.as_numpy_array()
signal_2d.resize(detector.get_image_size()[::-1])

# And finally try to plot it.  Feel free to play with this to try to make
# the pixels more visible.  I haven't done a very good job!
pyplot.pcolormesh(signal_2d,
                  norm=colours.SymLogNorm(.0001),
                  cmap='pink')
pyplot.axes().invert_yaxis()
pyplot.axes().xaxis.tick_top()
pyplot.axes().set_aspect('equal', 'box')
pyplot.savefig('peak_pixels', transparent=True, dpi=300)
pyplot.clf()

## fleeting_spots.py
#!dials.python
# -*- coding:utf-8 -*-

# The lines above just let your computer know how to read this file and
# that this program should be run with the in-built dials python distribution.

u"""
Discover the rotation axis of a sweep by finding reflections in a single image

A simple program for finding those reflections, as discovered by
dials.find_spots, that are only present on a single image, within the first
1200 frames.  This would be useful to determine the axis of rotation in a
sample where radiation damage had compromised the later frames.

You can run the individual commands, one by one, in the dials interpreter,
or you can run the whole program all at once, by calling
    dials.python fleeting_spots.py
in the terminal.  It will leave a PNG plot in your working directory.
"""

# The pickle module is used to read the reflection table files into the
# program:
import cPickle as pickle
# We use pyplot to plot the data:
from matplotlib import pyplot

# We'll want to load the strong spots (the output of dials.find_spots):
with open('strong.pickle') as f:
    refl = pickle.load(f)

# We want to get the image-number (z-component) of the spot positions,
# so we can select only those on the first 1200 frames.
z = refl['xyzobs.px.value'].parts()[2]
refl = refl.select(z < 1200)

# Then we'll want to look at the reflection bounding boxes.  The bounding box
# encloses the region between x₀ and x₁, between y₀ and y₁ and between z₀
# and z₁, and the information is stored as a list [x₀, x₁, y₀, y₁, z₀, z₁].
# Since we want to know which spots occur on just one image, i.e. z₁ - z₀ = 1,
# we should extract the 4th and 5th components of the list and compare their
# values, keeping only those reflections that meet the criterion.
z0, z1 = refl['bbox'].parts()[4:]
refl = refl.select((z1 - z0) == 1)

# Now we have found the spots we're interested in, we would like to
# visualise them in teh detector plane.  We get the x, y (and z) values in
# the same way as we did to get the z-values for selecting the first 1200
# frames above:
x, y, z = refl['xyzobs.px.value'].parts()

# And simply plot a 2-d histogram of the spot positions.
pyplot.hist2d(x, y, bins=[100,100])
pyplot.axes().invert_yaxis()
pyplot.axes().xaxis.tick_top()
pyplot.axes().set_aspect('equal', 'box')
pyplot.savefig('fleeting_spots', transparent=True)
pyplot.clf()

## spot_sizes.py
#!dials.python
# -*- coding:utf-8 -*-

# The lines above just let your computer know how to read this file and
# that this program should be run with the in-built dials python distribution.

u"""
Plot the distribution of spot sizes

A simple program for finding the distribution of sizes of strong spots,
as discovered by dials.find_spots.

You can run the individual commands, one by one, in the dials interpreter,
or you can run the whole program all at once, by calling
    dials.python spot_sizes.py
in the terminal.  It will leave a PNG plot in your working directory.
"""

# The pickle module is used to read the reflection table files into the
# program:
import cPickle as pickle
# We use pyplot to plot the data:
from matplotlib import pyplot
# The dials flex module lets you manipulate reflection tables, and the
# columns of which they are constructed, in clever ways.
from dials.array_family import flex
# And we'll need the information about the different masks, or flags,
# that the pixels within the reflection shoebox are labelled with:
from dials.algorithms.shoebox import MaskCode

# We load the strong spots, the output of dials.find_spots:
with open('strong.pickle') as f:
    refl = pickle.load(f)

# We define a 'good' pixel as one that is labelled with a 'foreground' mask
# (i.e. it is not a background pixel) or it is marked as 'valid', i.e. it is
# not 'hot' or adjacent to a pixel from another reflection shoebox.
good = (MaskCode.Foreground | MaskCode.Valid)
# Now we count the 'good' pixels in each shoebox, and keep these counts as a
# list (actually, a flex array, a type of data structure specific to flex):
n_signal = refl['shoebox'].count_mask_values(good)

# Finally we plot a histogram of reflection pixel counts.
max_signal = flex.max(n_signal)
pyplot.hist(n_signal,
            bins=max_signal,
            range=[0, max_signal],
            log=True)
pyplot.xlabel('Number of good pixels in a reflection')
pyplot.ylabel('Occurrences (number of reflections)')
pyplot.savefig('strong_spots', transparent=True)
pyplot.clf()

## systematic_absences.py
#!dials.python
# -*- coding:utf-8 -*-

# The lines above just let your computer know how to read this file and
# that this program should be run with the in-built dials python distribution.

u"""
Plot the distribution of integrated I/σ for systematically absent spots

A simple program for exploring the intensities of systematic absences in a
dataset with a screw axis, which indexes in P4₃2₁2.

You can run the individual commands, one by one, in the dials interpreter,
or you can run the whole program all at once, by calling
    dials.python systematic_absences.py
in the terminal.  It will leave a PNG plot in your working directory.
"""

# The pickle module is used to read the reflection table files into the
# program:
import cPickle as pickle
# We use pyplot to plot the data:
from matplotlib import pyplot
# The dials flex module lets you manipulate reflection tables, and the
# columns of which they are constructed, in clever ways.
from dials.array_family import flex
# The space group toolbox, from the computational crystallography toolbox,
# gives you access to all the mathematical properties of the space groups.
from cctbx import sgtbx

# Let's get some info about space group P4₃2₁2
sg = sgtbx.space_group_info('P43212').group()

# And load the integrated data (the output of dials.integrate):
with open('integrated.pickle') as f:
    data = pickle.load(f)

# We want to mae sure we have only those reflections that were
# successfully integrated.  They are flagged as such, so we can select them
# alone:
data = data.select(data.get_flags(data.flags.integrated))
# Now, for each reflection, we can use the reflection's Miller index to
# determine whether is will be systematically absent.  We'll keep only those
# such reflections:
absent = data.select(flex.bool(
    [sg.is_sys_absent(m) for m in data['miller_index']]))

# Now it only remains to get the integrated intensity...
i = absent['intensity.prf.value']
# ...and σ values for each systematically absent reflection:
s = flex.sqrt(absent['intensity.prf.variance'])

# And we can plot a histogram of the I/σ values, which we expect to be a
# distribution centred on zero.
pyplot.hist(i/s)
pyplot.xlabel(u'I/σ')
pyplot.ylabel('Number of (absent) reflections')
pyplot.savefig('absence_i_over_sigma', transparent=True)
pyplot.clf()
	#!dials.python
	# -- coding:utf-8 --

	# The lines above just let your computer know how to read this file and
	# that this program should be run with the in-built dials python distribution.

	u"""
	Filter background pixels from signal pixels

	This program separates pixels that are likely to be part of reflections
	from pixels that belong to the background, and pixels that are classed as
	'bad' (either having negative intensity, or having an intensity outside the
	detector's trusted range, referred to as a 'hot' pixel. The program works
	from a datablock file as generated by dials.import.

	You can run the individual commands, one by one, in the dials interpreter,
	or you can run the whole program all at once, by calling
	dials.python background_and_peak_pixels.py
	in the terminal. It will leave two PNG plots in your working directory.
	"""

	# The dials flex module lets you manipulate reflection tables, and the
	# columns of which they are constructed, in clever ways.
	from dials.array_family import flex
	# In order to load the datablock, we need some functionality from teh
	# diffraction experiment toolbox (dxtbx, a component of dials).
	from dxtbx.datablock import DataBlockFactory
	# We want to know whether a pixel passes muster as a signal pixel,
	# potentially part of a reflection shoebox, so we get the relevant
	# spotfinder threshold algorithm from dials:
	from dials.extensions.dispersion_spotfinder_threshold_ext import \
	DispersionSpotFinderThresholdExt
	# This import just provides us with the default spotfinding parameters,
	# as used by dials.find_spots. We will pass them to the above threshold
	# algorithm, to save us having to set up all the parameters ourselves:
	from dials.command_line.find_spots import phil_scope
	# We use pyplot and matplotlib.colors to plot the data:
	from matplotlib import pyplot
	from matplotlib import colors as colours

	# Get the datablock from storage in its Json file.
	dblock = DataBlockFactory.from_args(['datablock.json'])[0]
	# Get the first (only) imageset associated with the datablock, which is the
	# set of images from the rotation sweep.
	imageset = dblock.extract_imagesets()[0]
	# We need to get the detector info from the imageset too, since we need to
	# know the detector trusted range in order to determine whether a pixel is hot.
	detector = imageset.get_detector()[0]
	trusted = detector.get_trusted_range()

	# From just the first image, we extract the raw data, in the form of a vector
	# of all the pixel intensities. The data are no-longer stored as a 2-d map
	# of the detector.
	data = imageset.get_raw_data(0)[0]
	# Pixels are considered bad if they have negative intensity or intensity
	# outside the trusted range.
	negative = (data < 0)
	hot = (data > int(round(trusted[1])))
	bad = negative \| hot

	# Here we set up the spotfinding threshold algorithm, and flag pixels that
	# have the characteristics of belonging to reflections.
	data = data.as_double()
	thresholder = DispersionSpotFinderThresholdExt(phil_scope.extract())
	peak = thresholder.compute_threshold(data, ~bad)

	# Most of the pixels don't belong to reflections, so if we want to
	# extract just the background, it makes sense to start with a copy of the
	# data...
	background = data.deep_copy()
	# ...from which we can remove pixels flagged as bad or belonging to
	# reflections, by setting their intensity to zero.
	not_background = (bad \| peak)
	background.set_selected(not_background, 0)
	# Using the dimensions of the detector, in pixels, allows us to reconstruct
	# the shape of the background data from the one-dimensional intensity data.
	background_2d = background.as_numpy_array()
	background_2d.resize(detector.get_image_size()[::-1])

	# Now we can plot the background pixels.
	pyplot.pcolormesh(background_2d,
	norm=colours.SymLogNorm(.0001),
	cmap='copper')
	pyplot.axes().invert_yaxis()
	pyplot.axes().xaxis.tick_top()
	pyplot.axes().set_aspect('equal', 'box')
	pyplot.savefig('background_pixels', transparent=True, dpi=300)
	pyplot.clf()

	# By contrast to the background, the signal pixels are few and far between.
	# It therefore makes sense to start with a column of zeroes then and add in
	# the intensity of the signal pixels.
	signal = flex.double(data.size(), 0)
	peak = peak.iselection()
	signal.set_selected(peak, data.select(peak))
	# Once again, we then need to reconstruct the layout of the pixels on the
	# detector.
	signal_2d = signal.as_numpy_array()
	signal_2d.resize(detector.get_image_size()[::-1])

	# And finally try to plot it. Feel free to play with this to try to make
	# the pixels more visible. I haven't done a very good job!
	pyplot.pcolormesh(signal_2d,
	norm=colours.SymLogNorm(.0001),
	cmap='pink')
	pyplot.axes().invert_yaxis()
	pyplot.axes().xaxis.tick_top()
	pyplot.axes().set_aspect('equal', 'box')
	pyplot.savefig('peak_pixels', transparent=True, dpi=300)
	pyplot.clf()