# Source code for pygram11._hist

# stdlib
from typing import Any, Sequence, Tuple, Optional, Union

# third party
import numpy as np

# pygram11
from pygram11._backend import (
_f1d,
_v1d,
_f1dw,
_v1dw,
_f1dmw,
_v1dmw,
_f2d,
_f2dw,
_v2d,
_v2dw,
)
from ._helpers import limits_1d, limits_2d, likely_uniform_bins

def _densify_fixed_counts(counts: np.ndarray, width: float) -> np.ndarray:
"""Convert fixed width histogram to unity integral over PDF."""
return np.array(counts / (width * counts.sum()), dtype=np.float64)

def _densify_fixed_weighted_counts(
raw: Tuple[np.ndarray, np.ndarray], width: float
) -> Tuple[np.ndarray, np.ndarray]:
"""Convert fixed width weighted histogram to unity integral over PDF."""
counts = raw
integral = counts.sum()
res0 = _densify_fixed_counts(counts, width)
variances = raw
f1 = 1.0 / ((width * integral) ** 2)
f2 = counts / integral
res1 = f1 * (variances + (f2 * f2 * variances.sum()))
return res0, np.sqrt(res1)

def _densify_variable_counts(
counts: np.ndarray,
edges: np.ndarray,
) -> np.ndarray:
"""Convert variable width histogram to unity integral over PDF."""
widths = edges[1:] - edges[:-1]
integral = float(np.sum(counts))
return np.array(counts / widths / integral, dtype=np.float64)

def _densify_variable_weighted_counts(
raw: Tuple[np.ndarray, np.ndarray], edges: np.ndarray
) -> Tuple[np.ndarray, np.ndarray]:
"""Convert variable width histogram to unity integral over PDF."""
counts = raw
variances = raw
integral = counts.sum()
widths = edges[1:] - edges[:-1]
res0 = _densify_variable_counts(counts, edges)
f1 = 1.0 / ((widths * integral) ** 2)
f2 = counts / integral
res1 = f1 * (variances + (f2 * f2 * variances.sum()))
return res0, res1

[docs]def bin_edges(bins: int, range: Tuple[float, float]) -> np.ndarray:
"""Construct bin edges given number of bins and axis limits.

Parameters
----------
bins : int
Total number of bins.
range : (float, float)
Minimum and maximum of the histogram axis.

Returns
-------
numpy.ndarray
Edges defined by the number of bins and axis limits.

Examples
--------
>>> bin_edges(bins=8, range=(-2, 2))
array([-2. , -1.5, -1. , -0.5,  0. ,  0.5,  1. ,  1.5,  2. ])

"""
return np.linspace(range, range, bins + 1)

[docs]def bin_centers(
bins: Union[int, Sequence[float], np.ndarray],
range: Optional[Tuple[float, float]] = None,
) -> np.ndarray:
"""Construct array of center values for each bin.

Parameters
----------
bins : int or array_like
Number of bins or bin edges array.
range : (float, float), optional
The minimum and maximum of the histogram axis.

Returns
-------
numpy.ndarray
Array of bin centers.

Raises
------
ValueError
If bins is an integer and range is undefined (None).

Examples
--------
The centers given the number of bins and max/min:

>>> bin_centers(10, range=(-3, 3))
array([-2.7, -2.1, -1.5, -0.9, -0.3,  0.3,  0.9,  1.5,  2.1,  2.7])

Or given bin edges:

>>> bin_centers([0, 1, 2, 3, 4])
array([0.5, 1.5, 2.5, 3.5])

"""
if isinstance(bins, int):
if range is None:
raise ValueError("Integer bins requires defining range")
bins = bin_edges(bins, range=range)
b = np.asarray(bins)
return 0.5 * (b[1:] + b[:-1])

[docs]def fix1d(
x: np.ndarray,
bins: int = 10,
range: Optional[Tuple[float, float]] = None,
weights: Optional[np.ndarray] = None,
density: bool = False,
flow: bool = False,
cons_var: bool = False,
) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
r"""Histogram data with fixed (uniform) bin widths.

Parameters
----------
x : numpy.ndarray
Data to histogram.
bins : int
The number of bins.
range : (float, float), optional
The minimum and maximum of the histogram axis. If None,
min and max of x will be used.
weights : numpy.ndarray, optional
The weights for each element of x. If weights are absent,
the second return type will be None.
density : bool
Normalize histogram counts as value of PDF such that the
integral over the range is unity.
flow : bool
Include under/overflow in the first/last bins.
cons_var : bool
If True, conserve the variance rather than return the
standard error (square root of the variance).

Raises
------
ValueError
If x and weights have incompatible shapes.
TypeError
If x or weights are unsupported types

Returns
-------
:py:obj:numpy.ndarray
The resulting histogram bin counts.
:py:obj:numpy.ndarray, optional
The standard error of each bin count, :math:\sqrt{\sum_i
w_i^2}. The return is None if weights are not used. If
cons_var is True, the variances are returned.

Examples
--------
A histogram of x with 20 bins between 0 and 100:

>>> rng = np.random.default_rng(123)
>>> x = rng.uniform(0, 100, size=(100,))
>>> h, __ = fix1d(x, bins=20, range=(0, 100))

When weights are absent the second return is None. The same
data, now histogrammed with weights and over/underflow included:

>>> rng = np.random.default_rng(123)
>>> x = rng.uniform(0, 100, size=(100,))
>>> w = rng.uniform(0.1, 0.9, x.shape)
>>> h, stderr = fix1d(x, bins=20, range=(0, 100), weights=w, flow=True)

"""
xmin, xmax = limits_1d(x, range)

if weights is None:
result = _f1d(x, bins, xmin, xmax, flow)
if density:
width = (xmax - xmin) / bins
result = _densify_fixed_counts(result, width)
return result, None

if np.shape(x) != np.shape(weights):
raise ValueError("x and weights must have the same shape")

result = _f1dw(x, weights, int(bins), xmin, xmax, flow)
if density:
width = (xmax - xmin) / bins
result = _densify_fixed_weighted_counts(result, width)
counts, variances = result

if cons_var:
return counts, variances
return counts, np.sqrt(variances)

[docs]def fix1dmw(
x: np.ndarray,
weights: np.ndarray,
bins: int = 10,
range: Optional[Tuple[float, float]] = None,
flow: bool = False,
cons_var: bool = False,
) -> Tuple[np.ndarray, np.ndarray]:
r"""Histogram data with multiple weight variations and fixed width bins.

The weights array must have a total number of rows equal to the
length of the input data. The number of columns in the weights
array is equal to the number of weight variations. (The weights
array must be an M x N matrix where M is the length of x and
N is the number of weight variations).

Parameters
----------
x : numpy.ndarray
Data to histogram.
weights : numpy.ndarray
The weight variations for the elements of x, first
dimension is the length of x, second dimension is the
number of weights variations.
bins : int
The number of bins.
range : (float, float), optional
The minimum and maximum of the histogram axis. If None,
min and max of x will be used.
flow : bool
Include under/overflow in the first/last bins.
cons_var : bool
If True, conserve the variance rather than return the
standard error (square root of the variance).

Raises
------
ValueError
If x and weights have incompatible shapes (if
x.shape != weights.shape).
ValueError
If weights is not a two dimensional array.
TypeError
If x or weights are unsupported types

Returns
-------
:py:obj:numpy.ndarray
The bin counts.
:py:obj:numpy.ndarray
The standard error of each bin count, :math:\sqrt{\sum_i
w_i^2}. If cons_var is True, the variances are
returned.

Examples
--------
Multiple histograms of x using 20 different weight variations:

>>> rng = np.random.default_rng(123)
>>> x = rng.standard_normal(10000)
>>> twenty_weights = np.abs(rng.standard_normal((x.shape, 20)))
>>> h, err = fix1dmw(x, twenty_weights, bins=50, range=(-3, 3))

h and err are now shape (50, 20). Each column represents
the histogram of the data using its respective weight.

"""
if len(np.shape(weights)) != 2:
raise ValueError("weights must be a two dimensional array.")

if np.shape(x) != np.shape(weights):
raise ValueError("x and weights have incompatible shapes.")

xmin, xmax = limits_1d(x, range)
counts, variances = _f1dmw(x, weights, int(bins), xmin, xmax, flow)
if cons_var:
return counts, variances
return counts, np.sqrt(variances)

[docs]def var1d(
x: np.ndarray,
bins: np.ndarray,
weights: Optional[np.ndarray] = None,
density: bool = False,
flow: bool = False,
cons_var: bool = False,
) -> Tuple[np.ndarray, Optional[np.ndarray]]:
r"""Histogram data with variable bin widths.

Parameters
----------
x : numpy.ndarray
Data to histogram
bins : numpy.ndarray
Bin edges
weights : numpy.ndarray, optional
The weights for each element of x. If weights are absent,
the second return type will be None.
density : bool
Normalize histogram counts as value of PDF such that the
integral over the range is unity.
flow : bool
Include under/overflow in the first/last bins.
cons_var : bool
If True, conserve the variance rather than return the
standard error (square root of the variance).

Raises
------
ValueError
If the array of bin edges is not monotonically increasing.
ValueError
If x and weights have incompatible shapes.
TypeError
If x or weights are unsupported types

Returns
-------
:py:obj:numpy.ndarray
The bin counts.
:py:obj:numpy.ndarray, optional
The standard error of each bin count, :math:\sqrt{\sum_i
w_i^2}. If cons_var is True, the variances are returned.
The return is None if weights are not used.

Examples
--------
A simple histogram with variable width bins:

>>> rng = np.random.default_rng(123)
>>> x = rng.standard_normal(1000)
>>> edges = np.array([-3.0, -2.5, -1.5, -0.25, 0.25, 2.0, 3.0])
>>> h, __ = var1d(x, edges)

"""
if not np.all(bins[1:] >= bins[:-1]):
raise ValueError("bins sequence must monotonically increase")

if likely_uniform_bins(bins):
nbins = np.shape(bins) - 1
return fix1d(
x,
bins=nbins,
weights=weights,
range=(bins, bins[-1]),
flow=flow,
density=density,
)

bins = np.array(bins, dtype=np.float64, copy=False)
if weights is None:
result = _v1d(x, bins, flow)
if density:
result = _densify_variable_counts(result, bins)
return result, None

if np.shape(x) != np.shape(weights):
raise ValueError("x and weights have incompatible shapes.")

result = _v1dw(x, weights, bins, flow)
if density:
result = _densify_variable_weighted_counts(result, bins)
counts, variances = result
if cons_var:
return counts, variances
return counts, np.sqrt(variances)

[docs]def var1dmw(
x: np.ndarray,
weights: np.ndarray,
bins: np.ndarray,
flow: bool = False,
cons_var: bool = False,
) -> Tuple[np.ndarray, np.ndarray]:
r"""Histogram data with multiple weight variations and variable width bins.

The weights array must have a total number of rows equal to the
length of the input data. The number of columns in the weights
array is equal to the number of weight variations. (The weights
array must be an M x N matrix where M is the length of x and
N is the number of weight variations).

Parameters
----------
x : numpy.ndarray
Data to histogram.
weights : numpy.ndarray
Weight variations for the elements of x, first dimension
is the shape of x, second dimension is the number of weights.
bins : numpy.ndarray
Bin edges.
flow : bool
Include under/overflow in the first/last bins.
cons_var : bool
If True, conserve the variance rather than return the
standard error (square root of the variance).

Raises
------
ValueError
If the array of bin edges is not monotonically increasing.
ValueError
If x and weights have incompatible shapes.
ValueError
If weights is not a two dimensional array.
TypeError
If x or weights are unsupported types

Returns
-------
:py:obj:numpy.ndarray
The bin counts.
:py:obj:numpy.ndarray
The standard error of each bin count, :math:\sqrt{\sum_i
w_i^2}. If cons_var is True, the variances are
returned.

Examples
--------
Using three different weight variations:

>>> rng = np.random.default_rng(123)
>>> x = rng.standard_normal(10000)
>>> weights = np.abs(rng.standard_normal((x.shape, 3)))
>>> edges = np.array([-3.0, -2.5, -1.5, -0.25, 0.25, 2.0, 3.0])
>>> h, err = var1dmw(x, weights, edges)
>>> h.shape
(6, 3)
>>> err.shape
(6, 3)

"""
if len(np.shape(weights)) != 2:
raise ValueError("weights must be a two dimensional array.")
if np.shape(x) != np.shape(weights):
raise ValueError("x and weights have incompatible shapes.")
if not np.all(bins[1:] >= bins[:-1]):
raise ValueError("bins sequence must monotonically increase.")

if likely_uniform_bins(bins):
return fix1dmw(
x,
weights,
bins=(len(bins) - 1),
range=(bins, bins[-1]),
flow=flow,
)

counts, variances = _v1dmw(x, weights, bins, flow)
if cons_var:
return counts, variances
return counts, np.sqrt(variances)

[docs]def fix2d(
x: np.ndarray,
y: np.ndarray,
bins: Union[int, Tuple[int, int]] = 10,
range: Optional[Sequence[Tuple[float, float]]] = None,
weights: Optional[np.ndarray] = None,
flow: bool = False,
cons_var: bool = False,
) -> Tuple[np.ndarray, Optional[np.ndarray]]:
r"""Histogram two dimensional data with fixed (uniform) binning.

The two input arrays (x and y) must be the same length
(shape).

Parameters
----------
x : numpy.ndarray
First entries in data pairs to histogram.
y : numpy.ndarray
Second entries in data pairs to histogram.
bins : int or (int, int)
If int, both dimensions will have that many bins;
if tuple, the number of bins for each dimension
range : Sequence[Tuple[float, float]], optional
Axis limits in the form [(xmin, xmax), (ymin, ymax)]. If
None the input data min and max will be used.
weights : array_like, optional
The weights for data element. If weights are absent, the
second return type will be None.
flow : bool
Include over/underflow.
cons_var : bool
If True, conserve the variance rather than return the
standard error (square root of the variance).

Raises
------
ValueError
If x and y have incompatible shapes.
ValueError
If the shape of weights is incompatible with x and y
TypeError
If x, y, or weights are unsupported types

Returns
-------
:py:obj:numpy.ndarray
The bin counts.
:py:obj:numpy.ndarray, optional
The standard error of each bin count, :math:\sqrt{\sum_i
w_i^2}. If cons_var is True, the variances are
returned.

Examples
--------
A histogram of (x, y) with 20 bins between 0 and 100 in
the x dimention and 10 bins between 0 and 50 in the y
dimension:

>>> rng = np.random.default_rng(123)
>>> x = rng.uniform(0, 100, size=(200,))
>>> y = rng.uniform(0, 50, size=(200,))
>>> h, __ = fix2d(x, y, bins=(20, 10), range=((0, 100), (0, 50)))

The same data, now histogrammed weighted (via w):

>>> w = rng.uniform(0.2, 0.9, size=x.shape)
>>> h, err = fix2d(x, y, bins=(20, 10), range=((0, 100), (0, 50)), weights=w)

"""
if np.shape(x) != np.shape(y):
raise ValueError("x and y must be the same shape.")
if weights is not None:
if np.shape(weights) != np.shape(x):
raise ValueError("data and weights must be the same shape.")

if isinstance(bins, int):
nx = ny = bins
else:
nx, ny = bins

xmin, xmax, ymin, ymax = limits_2d(x, y, range)

if weights is None:
result = _f2d(x, y, int(nx), xmin, xmax, int(ny), ymin, ymax, flow)
return result, None

counts, variances = _f2dw(
x, y, weights, int(nx), xmin, xmax, int(ny), ymin, ymax, flow
)

if cons_var:
return counts, variances
return counts, np.sqrt(variances)

[docs]def var2d(
x: np.ndarray,
y: np.ndarray,
xbins: np.ndarray,
ybins: np.ndarray,
weights: Optional[np.ndarray] = None,
flow: bool = False,
cons_var: bool = False,
) -> Tuple[np.ndarray, Optional[np.ndarray]]:
r"""Histogram two dimensional data with variable width binning.

The two input arrays (x and y) must be the same length
(shape).

Parameters
----------
x : numpy.ndarray
First entries in data pairs to histogram.
y : numpy.ndarray
Second entries in data pairs to histogram.
xbins : numpy.ndarray
Bin edges for the x dimension.
ybins : np.ndarray
Bin edges for the y dimension.
weights : array_like, optional
The weights for data element. If weights are absent, the
second return type will be None.
flow : bool
Include under/overflow.
cons_var : bool
If True, conserve the variance rather than return the
standard error (square root of the variance).

Raises
------
ValueError
If x and y have different shape.
ValueError
If either bin edge definition is not monotonically increasing.
TypeError
If x, y, or weights are unsupported types

Returns
-------
:py:obj:numpy.ndarray
The bin counts.
:py:obj:numpy.ndarray, optional
The standard error of each bin count, :math:\sqrt{\sum_i
w_i^2}. If cons_var is True, the variances are
returned.

Examples
--------
A histogram of (x, y) where the edges are defined by a
:func:numpy.logspace in both dimensions:

>>> x = np.exp(np.random.uniform(0, 1, size=(10000,)))
>>> y = np.exp(np.random.uniform(0, 1, size=(10000,)))
>>> bins = np.logspace(0.1, 1.0, 10, endpoint=True)
>>> h, __ = var2d(x, y, bins, bins)

"""
if np.shape(x) != np.shape(y):
raise ValueError("x and y must be the same shape.")
if not np.all(xbins[1:] >= xbins[:-1]):
raise ValueError("xbins sequence must monotonically increase.")
if not np.all(ybins[1:] >= ybins[:-1]):
raise ValueError("ybins sequence must monotonically increase.")
if weights is not None:
weights = np.asarray(weights)
if np.shape(weights) != np.shape(x):
raise ValueError("data and weights must be the same shape.")

if weights is None:
result = _v2d(x, y, xbins, ybins, flow)
return result, None

counts, variances = _v2dw(x, y, weights, xbins, ybins, flow)
if cons_var:
return counts, variances
return counts, np.sqrt(variances)