Created
December 27, 2016 22:25
-
-
Save lgarrison/e6376d2cba4d10b42f7b797640c1e39b to your computer and use it in GitHub Desktop.
Fast repeated histogramming in numpy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note: this function uses the term 'radius', but the histogramming is completely general | |
import numpy as np | |
def RadialBin(radii, values, bin_edges, bin_indices=None): | |
""" | |
Radial histogramming that supports fast re-evaluation | |
for the same `radii` with new `values` by passing back `bin_indices`. | |
Parameters | |
---------- | |
radii: ndarray | |
The radii of the samples | |
values: ndarray | |
The values of the samples | |
bin_edges: array_like, length nbins+1 | |
The radial bin edges | |
bin_indices: ndarray, shape `radii.shape`, optional | |
The bin into which each radius falls. Default: None. | |
Returns | |
------- | |
bin_radii: ndarray, shape (nbins,) | |
The mean radius of the samples in the bin. | |
Empty bins use the bin center. | |
mean: ndarray, shape (nbins,) | |
The mean value of the samples in each bin. | |
count: ndarray, shape (nbins,) | |
The number of samples in each bin | |
bin_indices: ndarray, shape (radii.size,) | |
The bin into which each radius falls. | |
""" | |
radii = radii.reshape(-1) | |
values = values.reshape(-1) | |
nbins = len(bin_edges) - 1 | |
if bin_indices is not None: | |
# In theory, we could save `count` and `bin_radii` just like bin_indices. | |
# But hopefully these are fast. | |
count = np.bincount(bin_indices, minlength=nbins+2) | |
bin_radii = np.bincount(bin_indices, weights=radii, minlength=nbins+2) | |
weight = np.bincount(bin_indices, weights=values, minlength=nbins+2) | |
# The first element of `count` will be the number of occurences of 0 | |
# which is the number of elements below the first bin. Discard this. | |
# Even though values in the 1st bin are assigned to count[1], discarding | |
# count[0] will shift these down so count[0] holds the first bin. | |
# Values above the last bin will be assigned `nbins+1`. Discard these. | |
count = count[1:-1] | |
bin_radii = bin_radii[1:-1] | |
weight = weight[1:-1] | |
zeros = count == 0 | |
count[zeros] = 1 | |
mean = weight/count | |
bin_radii = bin_radii/count | |
mean[zeros] = 0 | |
count[zeros] = 0 | |
bin_centers = (bin_edges[:-1] + bin_edges[1:])/2. | |
bin_radii[zeros] = bin_centers[zeros] | |
return bin_radii, mean, count, bin_indices | |
# Note: digitize seems to convert radii to double internally. | |
bin_indices = np.digitize(radii, bin_edges) | |
return RadialBin(radii, values, bin_edges, bin_indices=bin_indices) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment