xmodar/num_bins_in_hist.py

## num_bins_in_hist.py
import torch

def _indexer_into(x, dim=0, keepdim=False):
    '''indexes into x along dim.'''
    def indexer(i):
        # (e.g., x[:, 2, :] is indexer(2) if dim == 1)
        out = x[[slice(None, None)] * dim + [i, ...]]
        return out.unsqueeze(dim) if keepdim and x.dim() != out.dim() else out
    return indexer


def percentile(x, q, dim=0, keepdim=False, sort=True):
    '''The percentile of data samples.

    Args:
        x: Data tensor.
        q: The percentage in [0, 100].
        dim: The dimension along which we perform the operation.
        keepdim: Whether to keep the dimension of operation.
        sort: Set to False only if x is sorted to save computation.

    Returns:
        The `q`th percentile of the data.
    '''
    if sort:
        x = x.data.sort(dim)[0]
    if not (0 <= q <= 100):
        raise ValueError('`q` must be in between 0 and 100.')
    value = (q / 100) * x.size(dim) - 1
    if value < 0:
        return 0
    index = int(value)
    frac = value - index
    next_index = min(index + 1, x.size(dim) - 1)
    X = _indexer_into(x.data, dim, keepdim)
    return (1 - frac) * X(index) + frac * X(next_index)


def inter_quartile_range(x, dim=0, keepdim=False, sort=True):
    '''IQR: the inter-quartile range of samples of data.

    Args:
        x: Data tensor.
        dim: The dimension along which we perform the operation.
        keepdim: Whether to keep the dimension of operation.
        sort: Set to False only if x is sorted to save computation.

    Returns:
        The IQR of the data.
    '''
    if sort:
        x = x.data.sort(dim)[0]
    q1 = percentile(x, 25, dim=dim, keepdim=keepdim, sort=False)
    q3 = percentile(x, 75, dim=dim, keepdim=keepdim, sort=False)
    return q3 - q1


def num_hist_bins(x, min_bins=1, max_bins=1e6,
                  dim=0, keepdim=False, sort=True):
    '''Freedman-Diaconis rule for the number of bins in a histogram.

    Args:
        x: Data tensor.
        dim: The dimension along which we perform the operation.
        keepdim: Whether to keep the dimension of operation.
        min_bins: The minimum number of bins.
        max_bins: The maximum number of bins.
        sort: Set to False only if x is sorted to save computation.

    Returns:
        The appropriate number of bins to histogram the given data.
    '''
    if sort:
        x = x.data.sort(dim)[0]
    long = lambda v: torch.tensor(v, dtype=torch.long, device=x.device)
    iqr = inter_quartile_range(x, dim=dim, keepdim=keepdim, sort=False)
    bin_width = 2 * x.size(dim)**(-1 / 3) * iqr
    X = _indexer_into(x.data, dim, keepdim)
    num_bins = ((X(-1) - X(0)) / bin_width).round().long()
    return num_bins.clamp(min=long(min_bins), max=long(max_bins))
	import torch

	def _indexer_into(x, dim=0, keepdim=False):
	'''indexes into x along dim.'''
	def indexer(i):
	# (e.g., x[:, 2, :] is indexer(2) if dim == 1)
	out = x[[slice(None, None)] * dim + [i, ...]]
	return out.unsqueeze(dim) if keepdim and x.dim() != out.dim() else out
	return indexer


	def percentile(x, q, dim=0, keepdim=False, sort=True):
	'''The percentile of data samples.

	Args:
	x: Data tensor.
	q: The percentage in [0, 100].
	dim: The dimension along which we perform the operation.
	keepdim: Whether to keep the dimension of operation.
	sort: Set to False only if x is sorted to save computation.

	Returns:
	The `q`th percentile of the data.
	'''
	if sort:
	x = x.data.sort(dim)[0]
	if not (0 <= q <= 100):
	raise ValueError('`q` must be in between 0 and 100.')
	value = (q / 100) * x.size(dim) - 1
	if value < 0:
	return 0
	index = int(value)
	frac = value - index
	next_index = min(index + 1, x.size(dim) - 1)
	X = _indexer_into(x.data, dim, keepdim)
	return (1 - frac) * X(index) + frac * X(next_index)


	def inter_quartile_range(x, dim=0, keepdim=False, sort=True):
	'''IQR: the inter-quartile range of samples of data.

	Args:
	x: Data tensor.
	dim: The dimension along which we perform the operation.
	keepdim: Whether to keep the dimension of operation.
	sort: Set to False only if x is sorted to save computation.

	Returns:
	The IQR of the data.
	'''
	if sort:
	x = x.data.sort(dim)[0]
	q1 = percentile(x, 25, dim=dim, keepdim=keepdim, sort=False)
	q3 = percentile(x, 75, dim=dim, keepdim=keepdim, sort=False)
	return q3 - q1


	def num_hist_bins(x, min_bins=1, max_bins=1e6,
	dim=0, keepdim=False, sort=True):
	'''Freedman-Diaconis rule for the number of bins in a histogram.

	Args:
	x: Data tensor.
	dim: The dimension along which we perform the operation.
	keepdim: Whether to keep the dimension of operation.
	min_bins: The minimum number of bins.
	max_bins: The maximum number of bins.
	sort: Set to False only if x is sorted to save computation.

	Returns:
	The appropriate number of bins to histogram the given data.
	'''
	if sort:
	x = x.data.sort(dim)[0]
	long = lambda v: torch.tensor(v, dtype=torch.long, device=x.device)
	iqr = inter_quartile_range(x, dim=dim, keepdim=keepdim, sort=False)
	bin_width = 2 * x.size(dim)*(-1 / 3) iqr
	X = _indexer_into(x.data, dim, keepdim)
	num_bins = ((X(-1) - X(0)) / bin_width).round().long()
	return num_bins.clamp(min=long(min_bins), max=long(max_bins))