Source code for pygram11.hist

"""pygram11 Histogram API."""

# MIT License
#
# Copyright (c) 2020 Douglas Davis
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import numpy as np
import numbers

from pygram11._backend1d import _v1dw, _f1dw, _f1dmw, _v1dmw
from pygram11._backend2d import _f2dw, _v2dw


def _likely_uniform_bins(edges):
    """Test if bin edges describe a set of fixed width bins."""
    diffs = np.ediff1d(edges)
    ones = np.ones_like(diffs)
    max_close = np.allclose(ones, diffs / np.amax(diffs))
    min_close = np.allclose(ones, diffs / np.amin(diffs))
    return max_close and min_close


[docs]def fix1d(x, bins=10, range=None, weights=None, density=False, flow=False):
    r"""Histogram data with fixed (uniform) bin widths.

    Parameters
    ----------
    x : array_like
        Data to histogram.
    bins : int
        The number of bins.
    range : (float, float), optional
        The minimum and maximum of the histogram axis.
    weights : array_like, optional
        The weights for each element of ``x``.
    density : bool
        If True, normalize histogram bins as value of PDF such that
        the integral over the range is one.
    flow : bool
        If True, the under and overflow bin contents are added to the
        first and last bins, respectively.

    Returns
    -------
    :py:obj:`numpy.ndarray`
        The bin counts.
    :py:obj:`numpy.ndarray`
        The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.

    Examples
    --------
    A histogram of ``x`` with 20 bins between 0 and 100:

    >>> h, __ = fix1d(x, bins=20, range=(0, 100))

    The same data, now histogrammed with weights:

    >>> w = np.abs(np.random.randn(x.shape[0]))
    >>> h, h_err = fix1d(x, bins=20, range=(0, 100), weights=w)

    """
    x = np.ascontiguousarray(x)
    if weights is not None:
        weights = np.ascontiguousarray(weights)
    else:
        weights = np.ones_like(x, order="C")
        if not (weights.dtype == np.float32 or weights.dtype == np.float64):
            weights = weights.astype(np.float64)

    if range is not None:
        start, stop = range[0], range[1]
    else:
        start, stop = np.amin(x), np.amax(x)

    return _f1dw(x, weights, bins, start, stop, flow, density, True)


[docs]def fix1dmw(x, weights, bins=10, range=None, flow=False):
    r"""Histogram data with multiple weight variations and fixed width bins.

    Parameters
    ----------
    x : array_like
        data to histogram.
    weights : array_like
        The weight variations for the elements of ``x``, first
        dimension is the length of ``x``, second dimension is the
        number of weights variations.
    bins : int
        The number of bins.
    range : (float, float), optional
        The minimum and maximumm of the histogram axis.
    flow : bool
        If True, the under and overflow bin contents are added to the
        first and last bins, respectively.

    Returns
    -------
    :py:obj:`numpy.ndarray`
        The bin counts.
    :py:obj:`numpy.ndarray`
        The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.

    Examples
    --------
    Multiple histograms of ``x`` with 50 bins between 0 and 100; using
    20 different weight variations:

    >>> x = np.random.randn(10000)
    >>> twenty_weights = np.random.rand(x.shape[0], 20)
    >>> h, err = fix1dmw(x, w, bins=50, range=(-3, 3))

    ``h`` and ``err`` are now shape ``(50, 20)``. Each column
    represents the histogram of the data using its respective weight.

    """
    x = np.ascontiguousarray(x)
    weights = np.ascontiguousarray(weights)
    if not (weights.dtype == np.float32 or weights.dtype == np.float64):
        weights = weights.astype(np.float64)

    if range is not None:
        start, stop = range[0], range[1]
    else:
        start, stop = np.amin(x), np.amax(x)

    return _f1dmw(x, weights, bins, start, stop, flow, True)


[docs]def var1d(x, bins, weights=None, density=False, flow=False):
    r"""Histogram data with variable bin widths.

    Parameters
    ----------
    x : array_like
        data to histogram
    bins : array_like
        bin edges
    weights : array_like, optional
        weight for each element of ``x``
    density : bool
        normalize histogram bins as value of PDF such that the integral
        over the range is 1.
    flow : bool
        if ``True`` the under and overflow bin contents are added to the first
        and last bins, respectively

    Returns
    -------
    :py:obj:`numpy.ndarray`
        The bin counts.
    :py:obj:`numpy.ndarray`
        The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.

    Examples
    --------
    A simple histogram with variable width bins:

    >>> x = np.random.randn(10000)
    >>> bin_edges = [-3.0, -2.5, -1.5, -0.25, 0.25, 2.0, 3.0]
    >>> h, __ = var1d(x, bin_edges)

    """
    x = np.ascontiguousarray(x)
    if weights is not None:
        weights = np.ascontiguousarray(weights)
    else:
        weights = np.ones_like(x, order="C")
        if not (weights.dtype == np.float32 or weights.dtype == np.float64):
            weights = weights.astype(np.float64)

    bins = np.ascontiguousarray(bins)
    if not np.all(bins[1:] >= bins[:-1]):
        raise ValueError("bins sequence must monotonically increase")

    if _likely_uniform_bins(bins):
        return _f1dw(x, weights, len(bins) - 1, bins[0], bins[-1], flow, density, True)

    return _v1dw(x, weights, bins, flow, density, True)


[docs]def var1dmw(x, weights, bins, flow=False):
    r"""Histogram data with multiple weight variations and variable width bins.

    Parameters
    ----------
    x : array_like
        data to histogram
    bins : array_like
        bin edges
    weights : array_like
        weight variations for the elements of ``x``, first dimension
        is the shape of ``x``, second dimension is the number of weights.
    density : bool
        normalize histogram bins as value of PDF such that the integral
        over the range is 1.
    flow : bool
        if ``True`` the under and overflow bin contents are added to the first
        and last bins, respectively

    Returns
    -------
    :py:obj:`numpy.ndarray`
        The bin counts.
    :py:obj:`numpy.ndarray`
        The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.

    Examples
    --------
    Using three different weight variations:

    >>> x = np.random.randn(10000)
    >>> weights = np.abs(np.random.randn(x.shape[0], 3))
    >>> bin_edges = [-3.0, -2.5, -1.5, -0.25, 0.25, 2.0, 3.0]
    >>> h, err = var1dmw(x, weights, bin_edges)
    >>> h.shape
    (6, 3)
    >>> err.shape
    (6, 3)

    """
    x = np.ascontiguousarray(x)
    weights = np.ascontiguousarray(weights)
    if not (weights.dtype == np.float32 or weights.dtype == np.float64):
        weights = weights.astype(np.float64)

    bins = np.ascontiguousarray(bins)
    if not np.all(bins[1:] >= bins[:-1]):
        raise ValueError("bins sequence must monotonically increase")

    if _likely_uniform_bins(bins):
        return _f1dmw(x, weights, len(bins) - 1, bins[0], bins[-1], flow, True)

    return _v1dmw(x, weights, bins, flow, True)


[docs]def histogram(x, bins=10, range=None, weights=None, density=False, flow=False):
    r"""Histogram data in one dimension.

    Parameters
    ----------
    x : array_like
        data to histogram.
    bins : int or array_like
        if int: the number of bins; if array_like: the bin edges.
    range : tuple(float, float), optional
        the definition of the edges of the bin range (start, stop).
    weights : array_like, optional
        a set of weights associated with the elements of ``x``. This
        can also be a two dimensional set of multiple weights
        varitions with shape (len(x), n_weight_variations).
    density : bool
        normalize counts such that the integral over the range is
        equal to 1. If ``weights`` is two dimensional this argument is
        ignored.
    flow : bool
        if ``True``, include under/overflow in the first/last bins.

    Returns
    -------
    :py:obj:`numpy.ndarray`
        The bin counts.
    :py:obj:`numpy.ndarray`
        The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.

    Examples
    --------
    A simple fixed width histogram:

    >>> h, __ = histogram(x, bins=20, range=(0, 100))

    And with variable width histograms and weights:

    >>> h, err = histogram(x, bins=[-3, -2, -1.5, 1.5, 3.5], weights=w)

    """
    # fixed bins
    if isinstance(bins, numbers.Integral):
        if weights is not None:
            if weights.shape != x.shape:
                return fix1dmw(x, weights, bins=bins, range=range, flow=flow)
        return fix1d(
            x, weights=weights, bins=bins, range=range, density=density, flow=flow
        )

    # variable bins
    else:
        if range is not None:
            raise TypeError("range must be None if bins is non-int")
        if weights is not None:
            if weights.shape != x.shape:
                return var1dmw(x, weights, bins=bins, flow=flow)
        return var1d(x, weights=weights, bins=bins, density=density, flow=flow)


[docs]def fix2d(x, y, bins=10, range=None, weights=None):
    r"""Histogram the ``x``, ``y`` data with fixed (uniform) binning.

    Parameters
    ----------
    x : array_like
       first entries in data pairs to histogram
    y : array_like
       second entries in data pairs to histogram
    bins : int or iterable
       if int, both dimensions will have that many bins,
       if iterable, the number of bins for each dimension
    range : iterable, optional
       axis limits to histogram over in the form [(xmin, xmax), (ymin, ymax)]
    weights : array_like, optional
       weight for each :math:`(x_i, y_i)` pair.

    Returns
    -------
    :py:obj:`numpy.ndarray`
        The bin counts.
    :py:obj:`numpy.ndarray`
        The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.

    Examples
    --------
    A histogram of (``x``, ``y``) with 20 bins between 0 and 100 in
    the ``x`` dimention and 10 bins between 0 and 50 in the ``y``
    dimension:

    >>> h, __ = fix2d(x, y, bins=(20, 10), range=((0, 100), (0, 50)))

    The same data, now histogrammed weighted (via ``w``):

    >>> h, err = fix2d(x, y, bins=(20, 10), range=((0, 100), (0, 50)), weights=w)

    """
    x = np.ascontiguousarray(x)
    y = np.ascontiguousarray(y)
    if x.shape != y.shape:
        raise ValueError("x and y must be the same shape")
    if weights is None:
        weights = np.ones_like(x, dtype=np.float64)
    else:
        weights = np.ascontiguousarray(weights)

    if isinstance(bins, numbers.Integral):
        nx = ny = bins
    else:
        nx, ny = bins

    if range is None:
        range = [(x.min(), x.max()), (y.min(), y.max())]
    (xmin, xmax), (ymin, ymax) = range

    return _f2dw(x, y, weights, nx, xmin, xmax, ny, ymin, ymax, False, True)


[docs]def var2d(x, y, xbins, ybins, weights=None):
    r"""Histogram the ``x``, ``y`` data with variable width binning.

    Parameters
    ----------
    x : array_like
       first entries in the data pairs to histogram
    y : array_like
       second entries in the data pairs to histogram
    xbins : array_like
       bin edges for the ``x`` dimension
    ybins : array_like
       bin edges for the ``y`` dimension
    weights : array_like, optional
       weights for each :math:`(x_i, y_i)` pair.

    Returns
    -------
    :py:obj:`numpy.ndarray`
        The bin counts.
    :py:obj:`numpy.ndarray`
        The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.

    Examples
    --------
    A histogram of (``x``, ``y``) where the edges are defined by a
    :func:`numpy.logspace` in both dimensions:

    >>> bins = numpy.logspace(0.1, 1.0, 10, endpoint=True)
    >>> h, __ = var2d(x, y, bins, bins)

    """
    x = np.ascontiguousarray(x)
    y = np.ascontiguousarray(y)
    if x.shape != y.shape:
        raise ValueError("x and y must be the same shape")
    xbins = np.ascontiguousarray(xbins)
    ybins = np.ascontiguousarray(ybins)
    if not np.all(xbins[1:] >= xbins[:-1]):
        raise ValueError("xbins sequence must monotonically increase")
    if not np.all(ybins[1:] >= ybins[:-1]):
        raise ValueError("ybins sequence must monotonically increase")

    if weights is None:
        weights = np.ones_like(x, dtype=np.float64)
    else:
        weights = np.ascontiguousarray(weights)

    return _v2dw(x, y, weights, xbins, ybins, False, True)


[docs]def histogram2d(x, y, bins=10, range=None, weights=None):
    r"""Histogram data in two dimensions.

    This function provides an API very simiar to
    :func:`numpy.histogram2d`. Keep in mind that the returns are
    different.

    Parameters
    ----------
    x: array_like
       Array representing the ``x`` coordinate of the data to histogram.
    y: array_like
       Array representing the ``y`` coordinate of the data to histogram.
    bins: int or array_like or [int, int] or [array, array], optional
       The bin specification:
          * If `int`, the number of bins for the two dimensions
            (``nx = ny = bins``).
          * If `array_like`, the bin edges for the two dimensions
            (``x_edges = y_edges = bins``).
          * If [int, int], the number of bins in each dimension
            (``nx, ny = bins``).
          * If [`array_like`, `array_like`], the bin edges in each
            dimension (``x_edges, y_edges = bins``).
    range: array_like, shape(2,2), optional
       The edges of this histogram along each dimension. If ``bins``
       is not integral, then this parameter is ignored. If None, the
       default is ``[[x.min(), x.max()], [y.min(), y.max()]]``.
    weights: array_like
       An array of weights associated to each element :math:`(x_i,
       y_i)` pair.  Each pair of the data will contribute its
       associated weight to the bin count.

    Returns
    -------
    :py:obj:`numpy.ndarray`
        The bin counts.
    :py:obj:`numpy.ndarray`
        The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.

    Examples
    --------
    >>> h, err = histogram2d(x, y, weights=w)

    """
    try:
        N = len(bins)
    except TypeError:
        N = 1

    if N != 1 and N != 2:
        return var2d(x, y, bins, bins, weights=weights)

    if N == 1:
        return fix2d(x, y, bins=bins, range=range, weights=weights)

    if N == 2:
        if isinstance(bins[0], numbers.Integral) and isinstance(
            bins[1], numbers.Integral
        ):
            return fix2d(x, y, bins=bins, range=range, weights=weights)
        else:
            return var2d(x, y, bins[0], bins[1], weights=weights)