"""pygram11 Histogram API."""
# MIT License
#
# Copyright (c) 2020 Douglas Davis
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import numpy as np
import numbers
from pygram11._backend1d import _v1dw, _f1dw, _f1dmw, _v1dmw
from pygram11._backend2d import _f2dw, _v2dw
def _likely_uniform_bins(edges):
"""Test if bin edges describe a set of fixed width bins."""
diffs = np.ediff1d(edges)
ones = np.ones_like(diffs)
max_close = np.allclose(ones, diffs / np.amax(diffs))
min_close = np.allclose(ones, diffs / np.amin(diffs))
return max_close and min_close
[docs]def fix1d(x, bins=10, range=None, weights=None, density=False, flow=False):
r"""Histogram data with fixed (uniform) bin widths.
Parameters
----------
x : array_like
Data to histogram.
bins : int
The number of bins.
range : (float, float), optional
The minimum and maximum of the histogram axis.
weights : array_like, optional
The weights for each element of ``x``.
density : bool
If True, normalize histogram bins as value of PDF such that
the integral over the range is one.
flow : bool
If True, the under and overflow bin contents are added to the
first and last bins, respectively.
Returns
-------
:py:obj:`numpy.ndarray`
The bin counts.
:py:obj:`numpy.ndarray`
The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.
Examples
--------
A histogram of ``x`` with 20 bins between 0 and 100:
>>> h, __ = fix1d(x, bins=20, range=(0, 100))
The same data, now histogrammed with weights:
>>> w = np.abs(np.random.randn(x.shape[0]))
>>> h, h_err = fix1d(x, bins=20, range=(0, 100), weights=w)
"""
x = np.ascontiguousarray(x)
if weights is not None:
weights = np.ascontiguousarray(weights)
else:
weights = np.ones_like(x, order="C")
if not (weights.dtype == np.float32 or weights.dtype == np.float64):
weights = weights.astype(np.float64)
if range is not None:
start, stop = range[0], range[1]
else:
start, stop = np.amin(x), np.amax(x)
return _f1dw(x, weights, bins, start, stop, flow, density, True)
[docs]def fix1dmw(x, weights, bins=10, range=None, flow=False):
r"""Histogram data with multiple weight variations and fixed width bins.
Parameters
----------
x : array_like
data to histogram.
weights : array_like
The weight variations for the elements of ``x``, first
dimension is the length of ``x``, second dimension is the
number of weights variations.
bins : int
The number of bins.
range : (float, float), optional
The minimum and maximumm of the histogram axis.
flow : bool
If True, the under and overflow bin contents are added to the
first and last bins, respectively.
Returns
-------
:py:obj:`numpy.ndarray`
The bin counts.
:py:obj:`numpy.ndarray`
The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.
Examples
--------
Multiple histograms of ``x`` with 50 bins between 0 and 100; using
20 different weight variations:
>>> x = np.random.randn(10000)
>>> twenty_weights = np.random.rand(x.shape[0], 20)
>>> h, err = fix1dmw(x, w, bins=50, range=(-3, 3))
``h`` and ``err`` are now shape ``(50, 20)``. Each column
represents the histogram of the data using its respective weight.
"""
x = np.ascontiguousarray(x)
weights = np.ascontiguousarray(weights)
if not (weights.dtype == np.float32 or weights.dtype == np.float64):
weights = weights.astype(np.float64)
if range is not None:
start, stop = range[0], range[1]
else:
start, stop = np.amin(x), np.amax(x)
return _f1dmw(x, weights, bins, start, stop, flow, True)
[docs]def var1d(x, bins, weights=None, density=False, flow=False):
r"""Histogram data with variable bin widths.
Parameters
----------
x : array_like
data to histogram
bins : array_like
bin edges
weights : array_like, optional
weight for each element of ``x``
density : bool
normalize histogram bins as value of PDF such that the integral
over the range is 1.
flow : bool
if ``True`` the under and overflow bin contents are added to the first
and last bins, respectively
Returns
-------
:py:obj:`numpy.ndarray`
The bin counts.
:py:obj:`numpy.ndarray`
The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.
Examples
--------
A simple histogram with variable width bins:
>>> x = np.random.randn(10000)
>>> bin_edges = [-3.0, -2.5, -1.5, -0.25, 0.25, 2.0, 3.0]
>>> h, __ = var1d(x, bin_edges)
"""
x = np.ascontiguousarray(x)
if weights is not None:
weights = np.ascontiguousarray(weights)
else:
weights = np.ones_like(x, order="C")
if not (weights.dtype == np.float32 or weights.dtype == np.float64):
weights = weights.astype(np.float64)
bins = np.ascontiguousarray(bins)
if not np.all(bins[1:] >= bins[:-1]):
raise ValueError("bins sequence must monotonically increase")
if _likely_uniform_bins(bins):
return _f1dw(x, weights, len(bins) - 1, bins[0], bins[-1], flow, density, True)
return _v1dw(x, weights, bins, flow, density, True)
[docs]def var1dmw(x, weights, bins, flow=False):
r"""Histogram data with multiple weight variations and variable width bins.
Parameters
----------
x : array_like
data to histogram
bins : array_like
bin edges
weights : array_like
weight variations for the elements of ``x``, first dimension
is the shape of ``x``, second dimension is the number of weights.
density : bool
normalize histogram bins as value of PDF such that the integral
over the range is 1.
flow : bool
if ``True`` the under and overflow bin contents are added to the first
and last bins, respectively
Returns
-------
:py:obj:`numpy.ndarray`
The bin counts.
:py:obj:`numpy.ndarray`
The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.
Examples
--------
Using three different weight variations:
>>> x = np.random.randn(10000)
>>> weights = np.abs(np.random.randn(x.shape[0], 3))
>>> bin_edges = [-3.0, -2.5, -1.5, -0.25, 0.25, 2.0, 3.0]
>>> h, err = var1dmw(x, weights, bin_edges)
>>> h.shape
(6, 3)
>>> err.shape
(6, 3)
"""
x = np.ascontiguousarray(x)
weights = np.ascontiguousarray(weights)
if not (weights.dtype == np.float32 or weights.dtype == np.float64):
weights = weights.astype(np.float64)
bins = np.ascontiguousarray(bins)
if not np.all(bins[1:] >= bins[:-1]):
raise ValueError("bins sequence must monotonically increase")
if _likely_uniform_bins(bins):
return _f1dmw(x, weights, len(bins) - 1, bins[0], bins[-1], flow, True)
return _v1dmw(x, weights, bins, flow, True)
[docs]def histogram(x, bins=10, range=None, weights=None, density=False, flow=False):
r"""Histogram data in one dimension.
Parameters
----------
x : array_like
data to histogram.
bins : int or array_like
if int: the number of bins; if array_like: the bin edges.
range : tuple(float, float), optional
the definition of the edges of the bin range (start, stop).
weights : array_like, optional
a set of weights associated with the elements of ``x``. This
can also be a two dimensional set of multiple weights
varitions with shape (len(x), n_weight_variations).
density : bool
normalize counts such that the integral over the range is
equal to 1. If ``weights`` is two dimensional this argument is
ignored.
flow : bool
if ``True``, include under/overflow in the first/last bins.
Returns
-------
:py:obj:`numpy.ndarray`
The bin counts.
:py:obj:`numpy.ndarray`
The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.
Examples
--------
A simple fixed width histogram:
>>> h, __ = histogram(x, bins=20, range=(0, 100))
And with variable width histograms and weights:
>>> h, err = histogram(x, bins=[-3, -2, -1.5, 1.5, 3.5], weights=w)
"""
# fixed bins
if isinstance(bins, numbers.Integral):
if weights is not None:
if weights.shape != x.shape:
return fix1dmw(x, weights, bins=bins, range=range, flow=flow)
return fix1d(
x, weights=weights, bins=bins, range=range, density=density, flow=flow
)
# variable bins
else:
if range is not None:
raise TypeError("range must be None if bins is non-int")
if weights is not None:
if weights.shape != x.shape:
return var1dmw(x, weights, bins=bins, flow=flow)
return var1d(x, weights=weights, bins=bins, density=density, flow=flow)
[docs]def fix2d(x, y, bins=10, range=None, weights=None):
r"""Histogram the ``x``, ``y`` data with fixed (uniform) binning.
Parameters
----------
x : array_like
first entries in data pairs to histogram
y : array_like
second entries in data pairs to histogram
bins : int or iterable
if int, both dimensions will have that many bins,
if iterable, the number of bins for each dimension
range : iterable, optional
axis limits to histogram over in the form [(xmin, xmax), (ymin, ymax)]
weights : array_like, optional
weight for each :math:`(x_i, y_i)` pair.
Returns
-------
:py:obj:`numpy.ndarray`
The bin counts.
:py:obj:`numpy.ndarray`
The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.
Examples
--------
A histogram of (``x``, ``y``) with 20 bins between 0 and 100 in
the ``x`` dimention and 10 bins between 0 and 50 in the ``y``
dimension:
>>> h, __ = fix2d(x, y, bins=(20, 10), range=((0, 100), (0, 50)))
The same data, now histogrammed weighted (via ``w``):
>>> h, err = fix2d(x, y, bins=(20, 10), range=((0, 100), (0, 50)), weights=w)
"""
x = np.ascontiguousarray(x)
y = np.ascontiguousarray(y)
if x.shape != y.shape:
raise ValueError("x and y must be the same shape")
if weights is None:
weights = np.ones_like(x, dtype=np.float64)
else:
weights = np.ascontiguousarray(weights)
if isinstance(bins, numbers.Integral):
nx = ny = bins
else:
nx, ny = bins
if range is None:
range = [(x.min(), x.max()), (y.min(), y.max())]
(xmin, xmax), (ymin, ymax) = range
return _f2dw(x, y, weights, nx, xmin, xmax, ny, ymin, ymax, False, True)
[docs]def var2d(x, y, xbins, ybins, weights=None):
r"""Histogram the ``x``, ``y`` data with variable width binning.
Parameters
----------
x : array_like
first entries in the data pairs to histogram
y : array_like
second entries in the data pairs to histogram
xbins : array_like
bin edges for the ``x`` dimension
ybins : array_like
bin edges for the ``y`` dimension
weights : array_like, optional
weights for each :math:`(x_i, y_i)` pair.
Returns
-------
:py:obj:`numpy.ndarray`
The bin counts.
:py:obj:`numpy.ndarray`
The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.
Examples
--------
A histogram of (``x``, ``y``) where the edges are defined by a
:func:`numpy.logspace` in both dimensions:
>>> bins = numpy.logspace(0.1, 1.0, 10, endpoint=True)
>>> h, __ = var2d(x, y, bins, bins)
"""
x = np.ascontiguousarray(x)
y = np.ascontiguousarray(y)
if x.shape != y.shape:
raise ValueError("x and y must be the same shape")
xbins = np.ascontiguousarray(xbins)
ybins = np.ascontiguousarray(ybins)
if not np.all(xbins[1:] >= xbins[:-1]):
raise ValueError("xbins sequence must monotonically increase")
if not np.all(ybins[1:] >= ybins[:-1]):
raise ValueError("ybins sequence must monotonically increase")
if weights is None:
weights = np.ones_like(x, dtype=np.float64)
else:
weights = np.ascontiguousarray(weights)
return _v2dw(x, y, weights, xbins, ybins, False, True)
[docs]def histogram2d(x, y, bins=10, range=None, weights=None):
r"""Histogram data in two dimensions.
This function provides an API very simiar to
:func:`numpy.histogram2d`. Keep in mind that the returns are
different.
Parameters
----------
x: array_like
Array representing the ``x`` coordinate of the data to histogram.
y: array_like
Array representing the ``y`` coordinate of the data to histogram.
bins: int or array_like or [int, int] or [array, array], optional
The bin specification:
* If `int`, the number of bins for the two dimensions
(``nx = ny = bins``).
* If `array_like`, the bin edges for the two dimensions
(``x_edges = y_edges = bins``).
* If [int, int], the number of bins in each dimension
(``nx, ny = bins``).
* If [`array_like`, `array_like`], the bin edges in each
dimension (``x_edges, y_edges = bins``).
range: array_like, shape(2,2), optional
The edges of this histogram along each dimension. If ``bins``
is not integral, then this parameter is ignored. If None, the
default is ``[[x.min(), x.max()], [y.min(), y.max()]]``.
weights: array_like
An array of weights associated to each element :math:`(x_i,
y_i)` pair. Each pair of the data will contribute its
associated weight to the bin count.
Returns
-------
:py:obj:`numpy.ndarray`
The bin counts.
:py:obj:`numpy.ndarray`
The standard error of each bin count, :math:`\sqrt{\sum_i w_i^2}`.
Examples
--------
>>> h, err = histogram2d(x, y, weights=w)
"""
try:
N = len(bins)
except TypeError:
N = 1
if N != 1 and N != 2:
return var2d(x, y, bins, bins, weights=weights)
if N == 1:
return fix2d(x, y, bins=bins, range=range, weights=weights)
if N == 2:
if isinstance(bins[0], numbers.Integral) and isinstance(
bins[1], numbers.Integral
):
return fix2d(x, y, bins=bins, range=range, weights=weights)
else:
return var2d(x, y, bins[0], bins[1], weights=weights)