You are viewing the latest unreleased documentation v3.3.dev0. You may prefer a stable version.

Source code for iris.analysis

# Copyright Iris contributors
#
# This file is part of Iris and is released under the LGPL license.
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""
A package providing :class:`iris.cube.Cube` analysis support.

This module defines a suite of :class:`~iris.analysis.Aggregator` instances,
which are used to specify the statistical measure to calculate over a
:class:`~iris.cube.Cube`, using methods such as
:meth:`~iris.cube.Cube.aggregated_by` and :meth:`~iris.cube.Cube.collapsed`.

The :class:`~iris.analysis.Aggregator` is a convenience class that allows
specific statistical aggregation operators to be defined and instantiated.
These operators can then be used to collapse, or partially collapse, one or
more dimensions of a :class:`~iris.cube.Cube`, as discussed in
:ref:`cube-statistics`.

In particular, :ref:`cube-statistics-collapsing` discusses how to use
:const:`MEAN` to average over one dimension of a :class:`~iris.cube.Cube`,
and also how to perform weighted :ref:`cube-statistics-collapsing-average`.
While :ref:`cube-statistics-aggregated-by` shows how to aggregate similar
groups of data points along a single dimension, to result in fewer points
in that dimension.

The gallery contains several interesting worked examples of how an
:class:`~iris.analysis.Aggregator` may be used, including:

 * :ref:`sphx_glr_generated_gallery_meteorology_plot_COP_1d.py`
 * :ref:`sphx_glr_generated_gallery_general_plot_SOI_filtering.py`
 * :ref:`sphx_glr_generated_gallery_meteorology_plot_hovmoller.py`
 * :ref:`sphx_glr_generated_gallery_meteorology_plot_lagged_ensemble.py`
 * :ref:`sphx_glr_generated_gallery_general_plot_custom_aggregation.py`

"""

from collections import OrderedDict
from collections.abc import Iterable
import functools
from functools import wraps

import dask.array as da
import numpy as np
import numpy.ma as ma
import scipy.interpolate
import scipy.stats.mstats

import iris._lazy_data
from iris.analysis._area_weighted import AreaWeightedRegridder
from iris.analysis._interpolation import (
    EXTRAPOLATION_MODES,
    RectilinearInterpolator,
)
from iris.analysis._regrid import CurvilinearRegridder, RectilinearRegridder
import iris.coords
from iris.exceptions import LazyAggregatorError

__all__ = (
    "Aggregator",
    "AreaWeighted",
    "COUNT",
    "GMEAN",
    "HMEAN",
    "Linear",
    "MAX",
    "MAX_RUN",
    "MEAN",
    "MEDIAN",
    "MIN",
    "Nearest",
    "PEAK",
    "PERCENTILE",
    "PROPORTION",
    "PointInCell",
    "RMS",
    "STD_DEV",
    "SUM",
    "UnstructuredNearest",
    "VARIANCE",
    "WPERCENTILE",
    "WeightedAggregator",
    "clear_phenomenon_identity",
    "create_weighted_aggregator_fn",
)


class _CoordGroup:
    """
    Represents a list of coordinates, one for each given cube. Which can be
    operated on conveniently.

    """

    def __init__(self, coords, cubes):
        self.coords = coords
        self.cubes = cubes

    def __iter__(self):
        return iter(self.coords)

    def __getitem__(self, key):
        return list(self).__getitem__(key)

    def _first_coord_w_cube(self):
        """
        Return the first none None coordinate, and its associated cube
        as (cube, coord).

        """
        return next(
            filter(
                lambda cube_coord: cube_coord[1] is not None,
                zip(self.cubes, self.coords),
            )
        )

    def __repr__(self):
        # No exact repr, so a helpful string is given instead
        return (
            "["
            + ", ".join(
                [
                    coord.name() if coord is not None else "None"
                    for coord in self
                ]
            )
            + "]"
        )

    def name(self):
        _, first_coord = self._first_coord_w_cube()
        return first_coord.name()

    def _oid_tuple(self):
        """Return a tuple of object ids for this _CoordGroup's coordinates"""
        return tuple((id(coord) for coord in self))

    def __hash__(self):
        return hash(self._oid_tuple())

    def __eq__(self, other):
        # equals is overridden to guarantee that two _CoordGroups are only
        # equal if their coordinates are the same objects (by object id)
        # this is useful in the context of comparing _CoordGroups if they are
        # part of a set operation such as that in coord_compare, but
        # not useful in many other circumstances (i.e. deepcopying a
        # _CoordGroups instance would mean that copy != original)
        result = NotImplemented
        if isinstance(other, _CoordGroup):
            result = self._oid_tuple() == other._oid_tuple()
        return result

    def matches(self, predicate, default_val=True):
        """
        Apply a function to a coord group returning a list of bools
        for each coordinate.

        The predicate function should take exactly 2 arguments (cube, coord)
        and return a boolean.

        If None is in the coord group then return True.

        """
        for cube, coord in zip(self.cubes, self.coords):
            if coord is None:
                yield default_val
            else:
                yield predicate(cube, coord)

    def matches_all(self, predicate):
        """
        Return whether all coordinates match the given function after running
        it through :meth:`matches`.

        If None is in the coord group then return True.

        """
        return all(self.matches(predicate))

    def matches_any(self, predicate):
        """
        Return whether any coordinates match the given function after running
        it through :meth:`matches`.

        If None is in the coord group then return True.

        """
        return any(self.matches(predicate))


def _dimensional_metadata_comparison(*cubes, object_get=None):
    """
    Convenience function to help compare coordinates, cell-measures or
    ancillary-variables, on one or more cubes, by their metadata.

    .. Note::

        Up to Iris 2.x, this _used_ to be the public API method
        "iris.analysis.coord_comparison".
        It has since been generalised, and made private.
        However, the cube elements handled are still mostly referred to as 'coords' /
        'coordinates' throughout, for simplicity :  In fact, they will all be either
        `iris.coords.Coord`, `iris.coords.CellMeasure` or
        `iris.coords.AncillaryVariable`, the cube element type being controlled by the
        'object_get' keyword.

    Args:

    * cubes (iterable of `iris.cube.Cube`):
        a set of cubes whose coordinates, cell-measures or ancillary-variables are to
        be compared.

    Kwargs:

    * object_get (callable(cube) or None):
        If not None, this must be a cube method returning a list of all cube elements
        of the required type, i.e. one of `iris.cube.Cube.coords`,
        `iris.cube.Cube.cell_measures`, or `iris.cube.Cube.ancillary_variables`.
        If not specified, defaults to `iris.cube.Cube.coords`

    Returns:

        result (dict mapping string: list of _CoordGroup):
            A dictionary whose keys are match categories and values are groups of
            coordinates, cell-measures or ancillary-variables.

    The values of the returned dictionary are lists of _CoordGroup representing
    grouped coordinates.  Each _CoordGroup contains all the input 'cubes', and a
    matching list of the coord within each cube that matches some specific CoordDefn
    (or maybe None).

    The keys of the returned dictionary are strings naming 'categories' :  Each
    represents a statement,
    "Given these cubes list the coordinates which,
    when grouped by metadata, are/have..."

    Returned Keys:

    * grouped_coords
       A list of coordinate groups of all the coordinates grouped together
       by their coordinate definition
    * ungroupable
       A list of coordinate groups which contain at least one None,
       meaning not all Cubes provide an equivalent coordinate
    * not_equal
       A list of coordinate groups of which not all are equal
       (superset of ungroupable)
    * no_data_dimension
       A list of coordinate groups of which all have no data dimensions on
       their respective cubes
    * scalar
       A list of coordinate groups of which all have shape (1, )
    * non_equal_data_dimension
       A list of coordinate groups of which not all have the same
       data dimension on their respective cubes
    * non_equal_shape
       A list of coordinate groups of which not all have the same shape
    * equal_data_dimension
       A list of coordinate groups of which all have the same data dimension
       on their respective cubes
    * equal
       A list of coordinate groups of which all are equal
    * ungroupable_and_dimensioned
       A list of coordinate groups of which not all cubes had an equivalent
       (in metadata) coordinate which also describe a data dimension
    * dimensioned
       A list of coordinate groups of which all describe a data dimension on
       their respective cubes
    * ignorable
       A list of scalar, ungroupable non_equal coordinate groups
    * resamplable
        A list of equal, different data dimensioned coordinate groups
    * transposable
       A list of non equal, same data dimensioned, non scalar coordinate groups

    Example usage::

        result = _dimensional_metadata_comparison(cube1, cube2)
        print('All equal coordinates: ', result['equal'])

    """
    if object_get is None:
        from iris.cube import Cube

        object_get = Cube.coords

    all_coords = [object_get(cube) for cube in cubes]
    grouped_coords = []

    # set of coordinates id()s of coordinates which have been processed
    processed_coords = set()

    # iterate through all cubes, then by each coordinate in the cube looking
    # for coordinate groups
    for cube, coords in zip(cubes, all_coords):
        for coord in coords:

            # if this coordinate has already been processed, then continue on
            # to the next one
            if id(coord) in processed_coords:
                continue

            # setup a list to hold the coordinates which will be turned into a
            # coordinate group and added to the grouped_coords list
            this_coords_coord_group = []

            for other_cube_i, other_cube in enumerate(cubes):
                # setup a variable to hold the coordinate which will be added
                # to the coordinate group for this cube
                coord_to_add_to_group = None

                # don't bother checking if the current cube is the one we are
                # trying to match coordinates too
                if other_cube is cube:
                    coord_to_add_to_group = coord
                else:
                    # iterate through all coordinates in this cube
                    for other_coord in all_coords[other_cube_i]:
                        # for optimisation, check that the name is equivalent
                        # *before* checking all of the metadata is equivalent
                        eq = (
                            other_coord is coord
                            or other_coord.name() == coord.name()
                            and other_coord.metadata == coord.metadata
                        )
                        if eq:
                            coord_to_add_to_group = other_coord
                            break

                # add the coordinate to the group
                if coord_to_add_to_group is None:
                    this_coords_coord_group.append(None)
                else:
                    this_coords_coord_group.append(coord_to_add_to_group)
                    # add the object id of the coordinate which is being added
                    # to the group to the processed coordinate list
                    processed_coords.add(id(coord_to_add_to_group))

            # add the group to the list of groups
            grouped_coords.append(_CoordGroup(this_coords_coord_group, cubes))

    # define some sets which will be populated in the subsequent loop
    ungroupable = set()
    different_shaped_coords = set()
    different_data_dimension = set()
    no_data_dimension = set()
    scalar_coords = set()
    not_equal = set()

    for coord_group in grouped_coords:
        first_cube, first_coord = coord_group._first_coord_w_cube()

        # Get all coordinate groups which aren't complete (i.e. there is a
        # None in the group)
        def coord_is_None_fn(cube, coord):
            return coord is None

        if coord_group.matches_any(coord_is_None_fn):
            ungroupable.add(coord_group)

        # Get all coordinate groups which don't all equal one another
        # (None -> group not all equal)
        def not_equal_fn(cube, coord):
            return coord != first_coord

        if coord_group.matches_any(not_equal_fn):
            not_equal.add(coord_group)

        # Get all coordinate groups which don't all share the same shape
        # (None -> group has different shapes)
        def diff_shape_fn(cube, coord):
            return coord.shape != first_coord.shape

        if coord_group.matches_any(diff_shape_fn):
            different_shaped_coords.add(coord_group)

        # Get all coordinate groups which don't all share the same data
        # dimension on their respective cubes
        # (None -> group describes a different dimension)
        def diff_data_dim_fn(cube, coord):
            return coord.cube_dims(cube) != first_coord.cube_dims(first_cube)

        if coord_group.matches_any(diff_data_dim_fn):
            different_data_dimension.add(coord_group)

        # get all coordinate groups which don't describe a dimension
        # (None -> doesn't describe a dimension)
        def no_data_dim_fn(cube, coord):
            return coord.cube_dims(cube) == ()

        if coord_group.matches_all(no_data_dim_fn):
            no_data_dimension.add(coord_group)

        # get all coordinate groups which don't describe a dimension
        # (None -> not a scalar coordinate)
        def no_data_dim_fn(cube, coord):
            return coord.shape == (1,)

        if coord_group.matches_all(no_data_dim_fn):
            scalar_coords.add(coord_group)

    result = {}
    result["grouped_coords"] = set(grouped_coords)
    result["not_equal"] = not_equal
    result["ungroupable"] = ungroupable
    result["no_data_dimension"] = no_data_dimension
    result["scalar"] = scalar_coords
    result["non_equal_data_dimension"] = different_data_dimension
    result["non_equal_shape"] = different_shaped_coords

    result["equal_data_dimension"] = (
        result["grouped_coords"] - result["non_equal_data_dimension"]
    )
    result["equal"] = result["grouped_coords"] - result["not_equal"]
    result["dimensioned"] = (
        result["grouped_coords"] - result["no_data_dimension"]
    )
    result["ungroupable_and_dimensioned"] = (
        result["ungroupable"] & result["dimensioned"]
    )
    result["ignorable"] = (
        result["not_equal"] | result["ungroupable"]
    ) & result["no_data_dimension"]
    result["resamplable"] = (
        result["not_equal"] & result["equal_data_dimension"] - result["scalar"]
    )
    result["transposable"] = (
        result["equal"] & result["non_equal_data_dimension"]
    )

    # for convenience, turn all of the sets in the dictionary into lists,
    # sorted by the name of the group
    for key, groups in result.items():
        result[key] = sorted(groups, key=lambda group: group.name())

    return result


class _Aggregator:
    """
    The :class:`_Aggregator` base class provides common aggregation
    functionality.

    """

    def __init__(
        self, cell_method, call_func, units_func=None, lazy_func=None, **kwargs
    ):
        r"""
        Create an aggregator for the given :data:`call_func`.

        Args:

        * cell_method (string):
            Cell method definition formatter.  Used in the fashion
            "cell_method.format(\**kwargs)", to produce a cell-method string
            which can include keyword values.

        * call_func (callable):
            | *Call signature*: (data, axis=None, \**kwargs)

            Data aggregation function.
            Returns an aggregation result, collapsing the 'axis' dimension of
            the 'data' argument.

        Kwargs:

        * units_func (callable):
            | *Call signature*: (units)

            If provided, called to convert a cube's units.
            Returns an :class:`cf_units.Unit`, or a
            value that can be made into one.

        * lazy_func (callable or None):
            An alternative to :data:`call_func` implementing a lazy
            aggregation. Note that, it need not support all features of the
            main operation, but should raise an error in unhandled cases.

        Additional kwargs::
            Passed through to :data:`call_func` and :data:`lazy_func`.

        Aggregators are used by cube aggregation methods such as
        :meth:`~iris.cube.Cube.collapsed` and
        :meth:`~iris.cube.Cube.aggregated_by`.  For example::

            result = cube.collapsed('longitude', iris.analysis.MEAN)

        A variety of ready-made aggregators are provided in this module, such
        as :data:`~iris.analysis.MEAN` and :data:`~iris.analysis.MAX`.  Custom
        aggregators can also be created for special purposes, see
        :ref:`sphx_glr_generated_gallery_general_plot_custom_aggregation.py`
        for a worked example.

        """
        #: Cube cell method string.
        self.cell_method = cell_method
        #: Data aggregation function.
        self.call_func = call_func
        #: Unit conversion function.
        self.units_func = units_func
        #: Lazy aggregation function, may be None to indicate that a lazy
        #: operation is not available.
        self.lazy_func = lazy_func

        self._kwargs = kwargs

    def lazy_aggregate(self, data, axis, **kwargs):
        """
        Perform aggregation over the data with a lazy operation, analogous to
        the 'aggregate' result.

        Keyword arguments are passed through to the data aggregation function
        (for example, the "percent" keyword for a percentile aggregator).
        This function is usually used in conjunction with update_metadata(),
        which should be passed the same keyword arguments.

        Args:

        * data (array):
            A lazy array (:class:`dask.array.Array`).

        * axis (int or list of int):
            The dimensions to aggregate over -- note that this is defined
            differently to the 'aggregate' method 'axis' argument, which only
            accepts a single dimension index.

        Kwargs:

        * kwargs:
            All keyword arguments are passed through to the data aggregation
            function.

        Returns:
            A lazy array representing the aggregation operation
            (:class:`dask.array.Array`).

        """
        if self.lazy_func is None:
            msg = "{} aggregator does not support lazy operation."
            raise LazyAggregatorError(msg.format(self.name()))

        # Combine keyword args with `kwargs` taking priority over those
        # provided to __init__.
        kwargs = dict(list(self._kwargs.items()) + list(kwargs.items()))

        return self.lazy_func(data, axis=axis, **kwargs)

    def aggregate(self, data, axis, **kwargs):
        """
        Perform the aggregation function given the data.

        Keyword arguments are passed through to the data aggregation function
        (for example, the "percent" keyword for a percentile aggregator).
        This function is usually used in conjunction with update_metadata(),
        which should be passed the same keyword arguments.

        Args:

        * data (array):
            Data array.

        * axis (int):
            Axis to aggregate over.

        Kwargs:

        * mdtol (float):
            Tolerance of missing data. The value returned will be masked if
            the fraction of data to missing data is less than or equal to
            mdtol.  mdtol=0 means no missing data is tolerated while mdtol=1
            will return the resulting value from the aggregation function.
            Defaults to 1.

        * kwargs:
            All keyword arguments apart from those specified above, are
            passed through to the data aggregation function.

        Returns:
            The aggregated data.

        """
        kwargs = dict(list(self._kwargs.items()) + list(kwargs.items()))
        mdtol = kwargs.pop("mdtol", None)

        result = self.call_func(data, axis=axis, **kwargs)
        if (
            mdtol is not None
            and ma.is_masked(data)
            and result is not ma.masked
        ):
            fraction_not_missing = data.count(axis=axis) / data.shape[axis]
            mask_update = 1 - mdtol > fraction_not_missing
            if ma.isMaskedArray(result):
                result.mask = result.mask | mask_update
            else:
                result = ma.array(result, mask=mask_update)

        return result

    def update_metadata(self, cube, coords, **kwargs):
        """
        Update common cube metadata w.r.t the aggregation function.

        Args:

        * cube (:class:`iris.cube.Cube`):
            Source cube that requires metadata update.
        * coords (:class:`iris.coords.Coord`):
            The one or more coordinates that were aggregated.

        Kwargs:

        * This function is intended to be used in conjunction with aggregate()
          and should be passed the same keywords (for example, the "ddof"
          keyword for a standard deviation aggregator).

        """
        # Update the units if required.
        if self.units_func is not None:
            cube.units = self.units_func(cube.units)

    def post_process(self, collapsed_cube, data_result, coords, **kwargs):
        """
        Process the result from :func:`iris.analysis.Aggregator.aggregate`.

        Args:

        * collapsed_cube:
            A :class:`iris.cube.Cube`.
        * data_result:
            Result from :func:`iris.analysis.Aggregator.aggregate`
        * coords:
            The one or more coordinates that were aggregated over.

        Kwargs:

        * This function is intended to be used in conjunction with aggregate()
          and should be passed the same keywords (for example, the "ddof"
          keyword from a standard deviation aggregator).

        Returns:
            The collapsed cube with its aggregated data payload.

        """
        collapsed_cube.data = data_result
        return collapsed_cube

    def aggregate_shape(self, **kwargs):
        """
        The shape of the new dimension/s created by the aggregator.

        Kwargs:

        * This function is intended to be used in conjunction with aggregate()
          and should be passed the same keywords.

        Returns:
            A tuple of the aggregate shape.

        """
        return ()

    def name(self):
        """
        Returns the name of the aggregator.

        """
        try:
            name = "_".join(self.cell_method.split())
        except AttributeError:
            name = "unknown"
        return name


class PercentileAggregator(_Aggregator):
    """
    The :class:`PercentileAggregator` class provides percentile aggregation
    functionality.

    This aggregator *may* introduce a new dimension to the data for the
    statistic being calculated, but only if more than one quantile is required.
    For example, calculating the 50th and 90th percentile will result in a new
    data dimension with an extent of 2, for each of the quantiles calculated.

    """

    def __init__(self, units_func=None, **kwargs):
        """
        Create a percentile aggregator.

        Kwargs:

        * units_func (callable):
            | *Call signature*: (units)

            If provided, called to convert a cube's units.
            Returns an :class:`cf_units.Unit`, or a
            value that can be made into one.

        Additional kwargs::
            Passed through to :data:`call_func` and :data:`lazy_func`.

        This aggregator can used by cube aggregation methods such as
        :meth:`~iris.cube.Cube.collapsed` and
        :meth:`~iris.cube.Cube.aggregated_by`.  For example::

            cube.collapsed('longitude', iris.analysis.PERCENTILE, percent=50)

        """
        self._name = "percentile"
        self._args = ["percent"]
        _Aggregator.__init__(
            self,
            None,
            _percentile,
            units_func=units_func,
            lazy_func=_build_dask_mdtol_function(_percentile),
            **kwargs,
        )

    def aggregate(self, data, axis, **kwargs):
        """
        Perform the percentile aggregation over the given data.

        Keyword arguments are passed through to the data aggregation function
        (for example, the "percent" keyword for a percentile aggregator).
        This function is usually used in conjunction with update_metadata(),
        which should be passed the same keyword arguments.

        Args:

        * data (array):
            Data array.

        * axis (int):
            Axis to aggregate over.

        Kwargs:

        * mdtol (float):
            Tolerance of missing data. The value returned will be masked if
            the fraction of data to missing data is less than or equal to
            mdtol.  mdtol=0 means no missing data is tolerated while mdtol=1
            will return the resulting value from the aggregation function.
            Defaults to 1.

        * kwargs:
            All keyword arguments apart from those specified above, are
            passed through to the data aggregation function.

        Returns:
            The aggregated data.

        """

        msg = "{} aggregator requires the mandatory keyword argument {!r}."
        for arg in self._args:
            if arg not in kwargs:
                raise ValueError(msg.format(self.name(), arg))

        return _Aggregator.aggregate(self, data, axis, **kwargs)

    def lazy_aggregate(self, data, axis, **kwargs):
        """
        Perform aggregation over the data with a lazy operation, analogous to
        the 'aggregate' result.

        Keyword arguments are passed through to the data aggregation function
        (for example, the "percent" keyword for a percentile aggregator).
        This function is usually used in conjunction with update_metadata(),
        which should be passed the same keyword arguments.

        Args:

        * data (array):
            A lazy array (:class:`dask.array.Array`).

        * axis (int or list of int):
            The dimensions to aggregate over -- note that this is defined
            differently to the 'aggregate' method 'axis' argument, which only
            accepts a single dimension index.

        Kwargs:

        * kwargs:
            All keyword arguments are passed through to the data aggregation
            function.

        Returns:
            A lazy array representing the result of the aggregation operation
            (:class:`dask.array.Array`).

        """

        msg = "{} aggregator requires the mandatory keyword argument {!r}."
        for arg in self._args:
            if arg not in kwargs:
                raise ValueError(msg.format(self.name(), arg))

        return _Aggregator.lazy_aggregate(self, data, axis, **kwargs)

    def post_process(self, collapsed_cube, data_result, coords, **kwargs):
        """
        Process the result from :func:`iris.analysis.Aggregator.aggregate`.

        Args:

        * collapsed_cube:
            A :class:`iris.cube.Cube`.
        * data_result:
            Result from :func:`iris.analysis.Aggregator.aggregate`
        * coords:
            The one or more coordinates that were aggregated over.

        Kwargs:

        * This function is intended to be used in conjunction with aggregate()
          and should be passed the same keywords (for example, the "percent"
          keywords from a percentile aggregator).

        Returns:
            The collapsed cube with it's aggregated data payload.

        """
        cubes = iris.cube.CubeList()
        # The additive aggregator requires a mandatory keyword.
        msg = "{} aggregator requires the mandatory keyword argument {!r}."
        for arg in self._args:
            if arg not in kwargs:
                raise ValueError(msg.format(self.name(), arg))

        points = kwargs[self._args[0]]
        # Derive the name of the additive coordinate.
        names = [coord.name() for coord in coords]
        coord_name = "{}_over_{}".format(self.name(), "_".join(names))

        if not isinstance(points, Iterable):
            points = [points]

        # Decorate a collapsed cube with a scalar additive coordinate
        # for each of the additive points, to result in a possibly higher
        # order cube.
        for point in points:
            cube = collapsed_cube.copy()
            coord = iris.coords.AuxCoord(
                point, long_name=coord_name, units="percent"
            )
            cube.add_aux_coord(coord)
            cubes.append(cube)

        collapsed_cube = cubes.merge_cube()

        # Ensure to roll the data payload additive dimension, which should
        # be the last dimension for an additive operation with more than
        # one point, to be the first dimension, thus matching the collapsed
        # cube.
        if self.aggregate_shape(**kwargs):
            # Roll the last additive dimension to be the first.
            data_result = np.rollaxis(data_result, -1)

        # Marry the collapsed cube and the data payload together.
        result = _Aggregator.post_process(
            self, collapsed_cube, data_result, coords, **kwargs
        )
        return result

    def aggregate_shape(self, **kwargs):
        """
        The shape of the additive dimension created by the aggregator.

        Kwargs:

        * This function is intended to be used in conjunction with aggregate()
          and should be passed the same keywords.

        Returns:
            A tuple of the additive dimension shape.

        """

        msg = "{} aggregator requires the mandatory keyword argument {!r}."
        for arg in self._args:
            if arg not in kwargs:
                raise ValueError(msg.format(self.name(), arg))

        points = kwargs[self._args[0]]
        shape = ()

        if not isinstance(points, Iterable):
            points = [points]

        points = np.array(points)

        if points.shape > (1,):
            shape = points.shape

        return shape

    def name(self):
        """
        Returns the name of the aggregator.

        """
        return self._name


class WeightedPercentileAggregator(PercentileAggregator):
    """
    The :class:`WeightedPercentileAggregator` class provides percentile
    aggregation functionality.

    This aggregator *may* introduce a new dimension to the data for the
    statistic being calculated, but only if more than one quantile is required.
    For example, calculating the 50th and 90th percentile will result in a new
    data dimension with an extent of 2, for each of the quantiles calculated.

    """

    def __init__(self, units_func=None, lazy_func=None, **kwargs):
        """
        Create a weighted percentile aggregator.

        Kwargs:

        * units_func (callable):
            | *Call signature*: (units)

            If provided, called to convert a cube's units.
            Returns an :class:`cf_units.Unit`, or a
            value that can be made into one.

        * lazy_func (callable or None):
            An alternative to :data:`call_func` implementing a lazy
            aggregation. Note that, it need not support all features of the
            main operation, but should raise an error in unhandled cases.

        Additional kwargs::
            Passed through to :data:`call_func` and :data:`lazy_func`.

        This aggregator can used by cube aggregation methods such as
        :meth:`~iris.cube.Cube.collapsed` and
        :meth:`~iris.cube.Cube.aggregated_by`.  For example::

            cube.collapsed('longitude', iris.analysis.WPERCENTILE, percent=50,
                             weights=iris.analysis.cartography.area_weights(cube))

        """
        _Aggregator.__init__(
            self,
            None,
            _weighted_percentile,
            units_func=units_func,
            lazy_func=lazy_func,
            **kwargs,
        )

        self._name = "weighted_percentile"
        self._args = ["percent", "weights"]

        #: A list of keywords associated with weighted behaviour.
        self._weighting_keywords = ["returned", "weights"]

    def post_process(self, collapsed_cube, data_result, coords, **kwargs):
        """
        Process the result from :func:`iris.analysis.Aggregator.aggregate`.

        Returns a tuple(cube, weights) if a tuple(data, weights) was returned
        from :func:`iris.analysis.Aggregator.aggregate`.

        Args:

        * collapsed_cube:
            A :class:`iris.cube.Cube`.
        * data_result:
            Result from :func:`iris.analysis.Aggregator.aggregate`
        * coords:
            The one or more coordinates that were aggregated over.

        Kwargs:

        * This function is intended to be used in conjunction with aggregate()
          and should be passed the same keywords (for example, the "weights"
          keyword).

        Returns:
            The collapsed cube with it's aggregated data payload. Or a tuple
            pair of (cube, weights) if the keyword "returned" is specified
            and True.

        """
        if kwargs.get("returned", False):
            # Package the data into the cube and return a tuple
            collapsed_cube = PercentileAggregator.post_process(
                self, collapsed_cube, data_result[0], coords, **kwargs
            )

            result = (collapsed_cube, data_result[1])
        else:
            result = PercentileAggregator.post_process(
                self, collapsed_cube, data_result, coords, **kwargs
            )

        return result


[docs]class Aggregator(_Aggregator): """ The :class:`Aggregator` class provides common aggregation functionality. """
[docs] def update_metadata(self, cube, coords, **kwargs): """ Update cube cell method metadata w.r.t the aggregation function. Args: * cube (:class:`iris.cube.Cube`): Source cube that requires metadata update. * coords (:class:`iris.coords.Coord`): The one or more coordinates that were aggregated. Kwargs: * This function is intended to be used in conjunction with aggregate() and should be passed the same keywords (for example, the "ddof" keyword for a standard deviation aggregator). """ _Aggregator.update_metadata(self, cube, coords, **kwargs) kwargs = dict(list(self._kwargs.items()) + list(kwargs.items())) if not isinstance(coords, (list, tuple)): coords = [coords] coord_names = [] for coord in coords: if not isinstance(coord, iris.coords.Coord): raise TypeError( "Coordinate instance expected to the " "Aggregator object." ) coord_names.append(coord.name()) # Add a cell method. if self.cell_method is not None: method_name = self.cell_method.format(**kwargs) cell_method = iris.coords.CellMethod(method_name, coord_names) cube.add_cell_method(cell_method)
[docs]class WeightedAggregator(Aggregator): """ Convenience class that supports common weighted aggregation functionality. """ def __init__( self, cell_method, call_func, units_func=None, lazy_func=None, **kwargs ): """ Create a weighted aggregator for the given :data:`call_func`. Args: * cell_method (string): Cell method string that supports string format substitution. * call_func (callable): Data aggregation function. Call signature `(data, axis, **kwargs)`. Kwargs: * units_func (callable): Units conversion function. * lazy_func (callable or None): An alternative to :data:`call_func` implementing a lazy aggregation. Note that, it need not support all features of the main operation, but should raise an error in unhandled cases. Additional kwargs: Passed through to :data:`call_func` and :data:`lazy_func`. """ Aggregator.__init__( self, cell_method, call_func, units_func=units_func, lazy_func=lazy_func, **kwargs, ) #: A list of keywords that trigger weighted behaviour. self._weighting_keywords = ["returned", "weights"]
[docs] def uses_weighting(self, **kwargs): """ Determine whether this aggregator uses weighting. Kwargs: * kwargs: Arguments to filter of weighted keywords. Returns: Boolean. """ result = False for kwarg in kwargs.keys(): if kwarg in self._weighting_keywords: result = True break return result
[docs] def post_process(self, collapsed_cube, data_result, coords, **kwargs): """ Process the result from :func:`iris.analysis.Aggregator.aggregate`. Returns a tuple(cube, weights) if a tuple(data, weights) was returned from :func:`iris.analysis.Aggregator.aggregate`. Args: * collapsed_cube: A :class:`iris.cube.Cube`. * data_result: Result from :func:`iris.analysis.Aggregator.aggregate` * coords: The one or more coordinates that were aggregated over. Kwargs: * This function is intended to be used in conjunction with aggregate() and should be passed the same keywords (for example, the "weights" keywords from a mean aggregator). Returns: The collapsed cube with it's aggregated data payload. Or a tuple pair of (cube, weights) if the keyword "returned" is specified and True. """ if kwargs.get("returned", False): # Package the data into the cube and return a tuple collapsed_cube.data, collapsed_weights = data_result result = (collapsed_cube, collapsed_weights) else: result = Aggregator.post_process( self, collapsed_cube, data_result, coords, **kwargs ) return result
[docs]def create_weighted_aggregator_fn(aggregator_fn, axis, **kwargs): """Return an aggregator function that can explicitely handle weights. Args: * aggregator_fn (callable): An aggregator function, i.e., a callable that takes arguments ``data``, ``axis`` and ``**kwargs`` and returns an array. Examples: :meth:`Aggregator.aggregate`, :meth:`Aggregator.lazy_aggregate`. This function should accept the keyword argument ``weights``. * axis (int): Axis to aggregate over. This argument is directly passed to ``aggregator_fn``. Kwargs: * Arbitrary keyword arguments passed to ``aggregator_fn``. Should not include ``weights`` (this will be removed if present). Returns: A function that takes two arguments ``data_arr`` and ``weights`` (both should be an array of the same shape) and returns an array. """ kwargs_copy = dict(kwargs) kwargs_copy.pop("weights", None) aggregator_fn = functools.partial(aggregator_fn, axis=axis, **kwargs_copy) def new_aggregator_fn(data_arr, weights): """Weighted aggregation.""" if weights is None: return aggregator_fn(data_arr) return aggregator_fn(data_arr, weights=weights) return new_aggregator_fn
def _build_dask_mdtol_function(dask_stats_function): """ Make a wrapped dask statistic function that supports the 'mdtol' keyword. 'dask_function' must be a dask statistical function, compatible with the call signature : "dask_stats_function(data, axis=axis, **kwargs)". It must be masked-data tolerant, i.e. it ignores masked input points and performs a calculation on only the unmasked points. For example, mean([1, --, 2]) = (1 + 2) / 2 = 1.5. If an additional dimension is created by 'dask_function', it is assumed to be the trailing one (as for '_percentile'). The returned value is a new function operating on dask arrays. It has the call signature `stat(data, axis=-1, mdtol=None, **kwargs)`. """ @wraps(dask_stats_function) def inner_stat(array, axis=-1, mdtol=None, **kwargs): # Call the statistic to get the basic result (missing-data tolerant). dask_result = dask_stats_function(array, axis=axis, **kwargs) if mdtol is None or mdtol >= 1.0: result = dask_result else: # Build a lazy computation to compare the fraction of missing # input points at each output point to the 'mdtol' threshold. point_mask_counts = da.sum(da.ma.getmaskarray(array), axis=axis) points_per_calc = array.size / dask_result.size masked_point_fractions = point_mask_counts / points_per_calc boolean_mask = masked_point_fractions > mdtol if dask_result.ndim > boolean_mask.ndim: # dask_stats_function created trailing dimension. boolean_mask = da.broadcast_to( boolean_mask.reshape(boolean_mask.shape + (1,)), dask_result.shape, ) # Return an mdtol-masked version of the basic result. result = da.ma.masked_array( da.ma.getdata(dask_result), boolean_mask ) return result return inner_stat def _axis_to_single_trailing(stats_function): """ Given a statistical function that acts on the trailing axis of a 1D or 2D array, wrap it so that higher dimension arrays can be passed, as well as any axis as int or tuple. """ @wraps(stats_function) def inner_stat(data, axis, *args, **kwargs): # Get data as a 1D or 2D view with the target axis as the trailing one. if not isinstance(axis, Iterable): axis = (axis,) end = range(-len(axis), 0) data = np.moveaxis(data, axis, end) shape = data.shape[: -len(axis)] # Shape of dims we won't collapse. if shape: data = data.reshape(np.prod(shape), -1) else: data = data.flatten() result = stats_function(data, *args, **kwargs) # Ensure to unflatten any leading dimensions. if shape: # Account for the additive dimension if necessary. if result.size > np.prod(shape): shape += (-1,) result = result.reshape(shape) return result return inner_stat def _calc_percentile(data, percent, fast_percentile_method=False, **kwargs): """ Calculate percentiles along the trailing axis of a 1D or 2D array. """ if fast_percentile_method: msg = "Cannot use fast np.percentile method with masked array." if ma.is_masked(data): raise TypeError(msg) result = np.percentile(data, percent, axis=-1) result = result.T else: quantiles = percent / 100.0 result = scipy.stats.mstats.mquantiles( data, quantiles, axis=-1, **kwargs ) if not ma.isMaskedArray(data) and not ma.is_masked(result): return np.asarray(result) else: return ma.MaskedArray(result) @_axis_to_single_trailing def _percentile(data, percent, fast_percentile_method=False, **kwargs): """ The percentile aggregator is an additive operation. This means that it *may* introduce a new dimension to the data for the statistic being calculated, but only if more than one percentile point is requested. If a new additive dimension is formed, then it will always be the last dimension of the resulting percentile data payload. Args: * data (array-like) array from which percentiles are to be calculated Kwargs: * fast_percentile_method (boolean) When set to True, uses the numpy.percentiles method as a faster alternative to the scipy.mstats.mquantiles method. Does not handle masked arrays. **kwargs passed to scipy.stats.mstats.mquantiles if fast_percentile_method is False """ if not isinstance(percent, Iterable): percent = [percent] percent = np.array(percent) # Perform the percentile calculation. _partial_percentile = functools.partial( _calc_percentile, percent=percent, fast_percentile_method=fast_percentile_method, **kwargs, ) result = iris._lazy_data.map_complete_blocks( data, _partial_percentile, (-1,), percent.shape ) # Check whether to reduce to a scalar result, as per the behaviour # of other aggregators. if result.shape == (1,): result = np.squeeze(result) return result def _weighted_quantile_1D(data, weights, quantiles, **kwargs): """ Compute the weighted quantile of a 1D numpy array. Adapted from `wquantiles <https://github.com/nudomarinero/wquantiles/>`_ Args: * data (array) One dimensional data array * weights (array) Array of the same size of `data`. If data is masked, weights must have matching mask. * quantiles : (float or sequence of floats) Quantile(s) to compute. Must have a value between 0 and 1. **kwargs passed to `scipy.interpolate.interp1d` Returns: array or float. Calculated quantile values (set to np.nan wherever sum of weights is zero or masked) """ # Return np.nan if no useable points found if np.isclose(weights.sum(), 0.0) or ma.is_masked(weights.sum()): return np.resize(np.array(np.nan), len(quantiles)) # Sort the data ind_sorted = ma.argsort(data) sorted_data = data[ind_sorted] sorted_weights = weights[ind_sorted] # Compute the auxiliary arrays Sn = np.cumsum(sorted_weights) Pn = (Sn - 0.5 * sorted_weights) / np.sum(sorted_weights) # Get the value of the weighted quantiles interpolator = scipy.interpolate.interp1d( Pn, sorted_data, bounds_error=False, **kwargs ) result = interpolator(quantiles) # Set cases where quantile falls outside data range to min or max np.place(result, Pn.min() > quantiles, sorted_data.min()) np.place(result, Pn.max() < quantiles, sorted_data.max()) return result def _weighted_percentile( data, axis, weights, percent, returned=False, **kwargs ): """ The weighted_percentile aggregator is an additive operation. This means that it *may* introduce a new dimension to the data for the statistic being calculated, but only if more than one percentile point is requested. If a new additive dimension is formed, then it will always be the last dimension of the resulting percentile data payload. Args: * data: ndarray or masked array * axis: int axis to calculate percentiles over * weights: ndarray array with the weights. Must have same shape as data * percent: float or sequence of floats Percentile rank/s at which to extract value/s. * returned: bool, optional Default False. If True, returns a tuple with the percentiles as the first element and the sum of the weights as the second element. """ # Ensure that data and weights arrays are same shape. if data.shape != weights.shape: raise ValueError("_weighted_percentile: weights wrong shape.") # Ensure that the target axis is the last dimension. data = np.rollaxis(data, axis, start=data.ndim) weights = np.rollaxis(weights, axis, start=data.ndim) quantiles = np.array(percent) / 100.0 # Add data mask to weights if necessary. if ma.isMaskedArray(data): weights = ma.array(weights, mask=data.mask) shape = data.shape[:-1] # Flatten any leading dimensions and loop over them if shape: data = data.reshape([np.prod(shape), data.shape[-1]]) weights = weights.reshape([np.prod(shape), data.shape[-1]]) result = np.empty((np.prod(shape), quantiles.size)) # Perform the percentile calculation. for res, dat, wt in zip(result, data, weights): res[:] = _weighted_quantile_1D(dat, wt, quantiles, **kwargs) else: # Data is 1D result = _weighted_quantile_1D(data, weights, quantiles, **kwargs) if np.any(np.isnan(result)): result = ma.masked_invalid(result) if not ma.isMaskedArray(data) and not ma.is_masked(result): result = np.asarray(result) # Ensure to unflatten any leading dimensions. if shape: if not isinstance(percent, Iterable): percent = [percent] percent = np.array(percent) # Account for the additive dimension. if percent.shape > (1,): shape += percent.shape result = result.reshape(shape) # Check whether to reduce to a scalar result, as per the behaviour # of other aggregators. if result.shape == (1,) and quantiles.ndim == 0: result = result[0] if returned: return result, weights.sum(axis=-1) else: return result @_build_dask_mdtol_function def _lazy_count(array, **kwargs): array = iris._lazy_data.as_lazy_data(array) func = kwargs.pop("function", None) if not callable(func): emsg = "function must be a callable. Got {}." raise TypeError(emsg.format(type(func))) return da.sum(func(array), **kwargs) def _proportion(array, function, axis, **kwargs): count = iris._lazy_data.non_lazy(_lazy_count) # if the incoming array is masked use that to count the total number of # values if ma.isMaskedArray(array): # calculate the total number of non-masked values across the given axis if array.mask is np.bool_(False): # numpy will return a single boolean as a mask if the mask # was not explicitly specified on array construction, so in this # case pass the array shape instead of the mask: total_non_masked = array.shape[axis] else: total_non_masked = count( array.mask, axis=axis, function=np.logical_not, **kwargs ) total_non_masked = ma.masked_equal(total_non_masked, 0) else: total_non_masked = array.shape[axis] # Sanitise the result of this operation thru ma.asarray to ensure that # the dtype of the fill-value and the dtype of the array are aligned. # Otherwise, it is possible for numpy to return a masked array that has # a dtype for its data that is different to the dtype of the fill-value, # which can cause issues outside this function. # Reference - tests/unit/analyis/test_PROPORTION.py Test_masked.test_ma numerator = count(array, axis=axis, function=function, **kwargs) result = ma.asarray(numerator / total_non_masked) return result def _lazy_max_run(array, axis=-1, **kwargs): """ Lazily perform the calculation of maximum run lengths along the given axis """ array = iris._lazy_data.as_lazy_data(array) func = kwargs.pop("function", None) if not callable(func): emsg = "function must be a callable. Got {}." raise TypeError(emsg.format(type(func))) bool_array = da.ma.getdata(func(array)) bool_array = da.logical_and( bool_array, da.logical_not(da.ma.getmaskarray(array)) ) padding = [(0, 0)] * array.ndim padding[axis] = (0, 1) ones_zeros = da.pad(bool_array, padding).astype(int) cum_sum = da.cumsum(ones_zeros, axis=axis) run_totals = da.where(ones_zeros == 0, cum_sum, 0) stepped_run_lengths = da.reductions.cumreduction( np.maximum.accumulate, np.maximum, np.NINF, run_totals, axis=axis, dtype=cum_sum.dtype, out=None, method="sequential", preop=None, ) run_lengths = da.diff(stepped_run_lengths, axis=axis) result = da.max(run_lengths, axis=axis) # Check whether to reduce to a scalar result, as per the behaviour # of other aggregators. if result.shape == (1,): result = da.squeeze(result) return result def _rms(array, axis, **kwargs): # XXX due to the current limitations in `da.average` (see below), maintain # an explicit non-lazy aggregation function for now. # Note: retaining this function also means that if weights are passed to # the lazy aggregator, the aggregation will fall back to using this # non-lazy aggregator. rval = np.sqrt(ma.average(np.square(array), axis=axis, **kwargs)) if not ma.isMaskedArray(array): rval = np.asarray(rval) return rval @_build_dask_mdtol_function def _lazy_rms(array, axis, **kwargs): # XXX This should use `da.average` and not `da.mean`, as does the above. # However `da.average` current doesn't handle masked weights correctly # (see https://github.com/dask/dask/issues/3846). # To work around this we use da.mean, which doesn't support weights at # all. Thus trying to use this aggregator with weights will currently # raise an error in dask due to the unexpected keyword `weights`, # rather than silently returning the wrong answer. return da.sqrt(da.mean(array**2, axis=axis, **kwargs)) @_build_dask_mdtol_function def _lazy_sum(array, **kwargs): array = iris._lazy_data.as_lazy_data(array) # weighted or scaled sum axis_in = kwargs.get("axis", None) weights_in = kwargs.pop("weights", None) returned_in = kwargs.pop("returned", False) if weights_in is not None: wsum = da.sum(weights_in * array, **kwargs) else: wsum = da.sum(array, **kwargs) if returned_in: if weights_in is None: weights = iris._lazy_data.as_lazy_data(np.ones_like(array)) else: weights = weights_in rvalue = (wsum, da.sum(weights, axis=axis_in)) else: rvalue = wsum return rvalue def _peak(array, **kwargs): def column_segments(column): nan_indices = np.where(np.isnan(column))[0] columns = [] if len(nan_indices) == 0: columns.append(column) else: for index, nan_index in enumerate(nan_indices): if index == 0: if index != nan_index: columns.append(column[:nan_index]) elif nan_indices[index - 1] != (nan_index - 1): columns.append( column[nan_indices[index - 1] + 1 : nan_index] ) if nan_indices[-1] != len(column) - 1: columns.append(column[nan_indices[-1] + 1 :]) return columns def interp_order(length): if length == 1: k = None elif length > 5: k = 5 else: k = length - 1 return k # Collapse array to its final data shape. slices = [slice(None)] * array.ndim endslice = slice(0, 1) if len(slices) == 1 else 0 slices[-1] = endslice slices = tuple(slices) # Numpy>=1.16 : index with tuple, *not* list. if isinstance(array.dtype, np.float64): data = array[slices] else: # Cast non-float data type. data = array.astype("float32")[slices] # Generate nd-index iterator over array. shape = list(array.shape) shape[-1] = 1 ndindices = np.ndindex(*shape) for ndindex in ndindices: ndindex_slice = list(ndindex) ndindex_slice[-1] = slice(None) column_slice = array[tuple(ndindex_slice)] # Check if the column slice contains a single value, nans only, # masked values only or if the values are all equal. equal_slice = ( np.ones(column_slice.size, dtype=column_slice.dtype) * column_slice[0] ) if ( column_slice.size == 1 or all(np.isnan(column_slice)) or ma.count(column_slice) == 0 or np.all(np.equal(equal_slice, column_slice)) ): continue # Check if the column slice is masked. if ma.isMaskedArray(column_slice): # Check if the column slice contains only nans, without inf # or -inf values, regardless of the mask. if not np.any(np.isfinite(column_slice)) and not np.any( np.isinf(column_slice) ): data[ndindex[:-1]] = np.nan continue # Replace masked values with nans. column_slice = column_slice.filled(np.nan) # Determine the column segments that require a fitted spline. columns = column_segments(column_slice) column_peaks = [] for column in columns: # Determine the interpolation order for the spline fit. k = interp_order(column.size) if k is None: column_peaks.append(column[0]) continue tck = scipy.interpolate.splrep(np.arange(column.size), column, k=k) npoints = column.size * 100 points = np.linspace(0, column.size - 1, npoints) spline = scipy.interpolate.splev(points, tck) column_max = np.max(column) spline_max = np.max(spline) # Check if the max value of the spline is greater than the # max value of the column. if spline_max > column_max: column_peaks.append(spline_max) else: column_peaks.append(column_max) data[ndindex[:-1]] = np.max(column_peaks) return data # # Common partial Aggregation class constructors. # COUNT = Aggregator( "count", iris._lazy_data.non_lazy(_lazy_count), units_func=lambda units: 1, lazy_func=_lazy_count, ) """ An :class:`~iris.analysis.Aggregator` instance that counts the number of :class:`~iris.cube.Cube` data occurrences that satisfy a particular criterion, as defined by a user supplied *function*. **Required** kwargs associated with the use of this aggregator: * function (callable): A function which converts an array of data values into a corresponding array of True/False values. **For example**: To compute the number of *ensemble members* with precipitation exceeding 10 (in cube data units) could be calculated with:: result = precip_cube.collapsed('ensemble_member', iris.analysis.COUNT, function=lambda values: values > 10) .. seealso:: The :func:`~iris.analysis.PROPORTION` aggregator. This aggregator handles masked data. """ MAX_RUN = Aggregator( None, iris._lazy_data.non_lazy(_lazy_max_run), units_func=lambda units: 1, lazy_func=_build_dask_mdtol_function(_lazy_max_run), ) """ An :class:`~iris.analysis.Aggregator` instance that finds the longest run of :class:`~iris.cube.Cube` data occurrences that satisfy a particular criterion, as defined by a user supplied *function*, along the given axis. **Required** kwargs associated with the use of this aggregator: * function (callable): A function which converts an array of data values into a corresponding array of True/False values. **For example**: The longest run of days with precipitation exceeding 10 (in cube data units) at each grid location could be calculated with:: result = precip_cube.collapsed('time', iris.analysis.MAX_RUN, function=lambda values: values > 10) This aggregator handles masked data, which it treats as interrupting a run. """ MAX_RUN.name = lambda: "max_run" GMEAN = Aggregator("geometric_mean", scipy.stats.mstats.gmean) """ An :class:`~iris.analysis.Aggregator` instance that calculates the geometric mean over a :class:`~iris.cube.Cube`, as computed by :func:`scipy.stats.mstats.gmean`. **For example**: To compute zonal geometric means over the *longitude* axis of a cube:: result = cube.collapsed('longitude', iris.analysis.GMEAN) This aggregator handles masked data. """ HMEAN = Aggregator("harmonic_mean", scipy.stats.mstats.hmean) """ An :class:`~iris.analysis.Aggregator` instance that calculates the harmonic mean over a :class:`~iris.cube.Cube`, as computed by :func:`scipy.stats.mstats.hmean`. **For example**: To compute zonal harmonic mean over the *longitude* axis of a cube:: result = cube.collapsed('longitude', iris.analysis.HMEAN) .. note:: The harmonic mean is only valid if all data values are greater than zero. This aggregator handles masked data. """ MEAN = WeightedAggregator( "mean", ma.average, lazy_func=_build_dask_mdtol_function(da.ma.average) ) """ An :class:`~iris.analysis.Aggregator` instance that calculates the mean over a :class:`~iris.cube.Cube`, as computed by :func:`numpy.ma.average`. Additional kwargs associated with the use of this aggregator: * mdtol (float): Tolerance of missing data. The value returned in each element of the returned array will be masked if the fraction of masked data contributing to that element exceeds mdtol. This fraction is calculated based on the number of masked elements. mdtol=0 means no missing data is tolerated while mdtol=1 means the resulting element will be masked if and only if all the contributing elements are masked. Defaults to 1. * weights (float ndarray): Weights matching the shape of the cube or the length of the window for rolling window operations. Note that, latitude/longitude area weights can be calculated using :func:`iris.analysis.cartography.area_weights`. * returned (boolean): Set this to True to indicate that the collapsed weights are to be returned along with the collapsed data. Defaults to False. **For example**: To compute zonal means over the *longitude* axis of a cube:: result = cube.collapsed('longitude', iris.analysis.MEAN) To compute a weighted area average:: coords = ('longitude', 'latitude') collapsed_cube, collapsed_weights = cube.collapsed(coords, iris.analysis.MEAN, weights=weights, returned=True) .. note:: Lazy operation is supported, via :func:`dask.array.ma.average`. This aggregator handles masked data. """ MEDIAN = Aggregator("median", ma.median) """ An :class:`~iris.analysis.Aggregator` instance that calculates the median over a :class:`~iris.cube.Cube`, as computed by :func:`numpy.ma.median`. **For example**: To compute zonal medians over the *longitude* axis of a cube:: result = cube.collapsed('longitude', iris.analysis.MEDIAN) This aggregator handles masked data. """ MIN = Aggregator( "minimum", ma.min, lazy_func=_build_dask_mdtol_function(da.min) ) """ An :class:`~iris.analysis.Aggregator` instance that calculates the minimum over a :class:`~iris.cube.Cube`, as computed by :func:`numpy.ma.min`. **For example**: To compute zonal minimums over the *longitude* axis of a cube:: result = cube.collapsed('longitude', iris.analysis.MIN) This aggregator handles masked data. """ MAX = Aggregator( "maximum", ma.max, lazy_func=_build_dask_mdtol_function(da.max) ) """ An :class:`~iris.analysis.Aggregator` instance that calculates the maximum over a :class:`~iris.cube.Cube`, as computed by :func:`numpy.ma.max`. **For example**: To compute zonal maximums over the *longitude* axis of a cube:: result = cube.collapsed('longitude', iris.analysis.MAX) This aggregator handles masked data. """ PEAK = Aggregator("peak", _peak) """ An :class:`~iris.analysis.Aggregator` instance that calculates the peak value derived from a spline interpolation over a :class:`~iris.cube.Cube`. The peak calculation takes into account nan values. Therefore, if the number of non-nan values is zero the result itself will be an array of nan values. The peak calculation also takes into account masked values. Therefore, if the number of non-masked values is zero the result itself will be a masked array. If multiple coordinates are specified, then the peak calculations are performed individually, in sequence, for each coordinate specified. **For example**: To compute the peak over the *time* axis of a cube:: result = cube.collapsed('time', iris.analysis.PEAK) This aggregator handles masked data. """ PERCENTILE = PercentileAggregator(alphap=1, betap=1) """ A :class:`~iris.analysis.PercentileAggregator` instance that calculates the percentile over a :class:`~iris.cube.Cube`, as computed by :func:`scipy.stats.mstats.mquantiles` (default) or :func:`numpy.percentile` (if fast_percentile_method is True). **Required** kwargs associated with the use of this aggregator: * percent (float or sequence of floats): Percentile rank/s at which to extract value/s. Additional kwargs associated with the use of this aggregator: * alphap (float): Plotting positions parameter, see :func:`scipy.stats.mstats.mquantiles`. Defaults to 1. * betap (float): Plotting positions parameter, see :func:`scipy.stats.mstats.mquantiles`. Defaults to 1. * fast_percentile_method (boolean): When set to True, uses :func:`numpy.percentile` method as a faster alternative to the :func:`scipy.stats.mstats.mquantiles` method. alphap and betap are ignored. An exception is raised if the data are masked. Defaults to False. **For example**: To compute the 10th and 90th percentile over *time*:: result = cube.collapsed('time', iris.analysis.PERCENTILE, percent=[10, 90]) This aggregator handles masked data and lazy data. .. note:: Performance of this aggregator on lazy data is particularly sensitive to the dask array chunking, so it may be useful to test with various chunk sizes for a given application. Any chunking along the dimensions to be aggregated is removed by the aggregator prior to calculating the percentiles. """ PROPORTION = Aggregator("proportion", _proportion, units_func=lambda units: 1) """ An :class:`~iris.analysis.Aggregator` instance that calculates the proportion, as a fraction, of :class:`~iris.cube.Cube` data occurrences that satisfy a particular criterion, as defined by a user supplied *function*. **Required** kwargs associated with the use of this aggregator: * function (callable): A function which converts an array of data values into a corresponding array of True/False values. **For example**: To compute the probability of precipitation exceeding 10 (in cube data units) across *ensemble members* could be calculated with:: result = precip_cube.collapsed('ensemble_member', iris.analysis.PROPORTION, function=lambda values: values > 10) Similarly, the proportion of *time* precipitation exceeded 10 (in cube data units) could be calculated with:: result = precip_cube.collapsed('time', iris.analysis.PROPORTION, function=lambda values: values > 10) .. seealso:: The :func:`~iris.analysis.COUNT` aggregator. This aggregator handles masked data. """ RMS = WeightedAggregator( "root mean square", _rms, lazy_func=_build_dask_mdtol_function(_lazy_rms) ) """ An :class:`~iris.analysis.Aggregator` instance that calculates the root mean square over a :class:`~iris.cube.Cube`, as computed by ((x0**2 + x1**2 + ... + xN-1**2) / N) ** 0.5. Additional kwargs associated with the use of this aggregator: * weights (float ndarray): Weights matching the shape of the cube or the length of the window for rolling window operations. The weights are applied to the squares when taking the mean. **For example**: To compute the zonal root mean square over the *longitude* axis of a cube:: result = cube.collapsed('longitude', iris.analysis.RMS) This aggregator handles masked data. """ STD_DEV = Aggregator( "standard_deviation", ma.std, ddof=1, lazy_func=_build_dask_mdtol_function(da.std), ) """ An :class:`~iris.analysis.Aggregator` instance that calculates the standard deviation over a :class:`~iris.cube.Cube`, as computed by :func:`numpy.ma.std`. Additional kwargs associated with the use of this aggregator: * ddof (integer): Delta degrees of freedom. The divisor used in calculations is N - ddof, where N represents the number of elements. Defaults to 1. **For example**: To compute zonal standard deviations over the *longitude* axis of a cube:: result = cube.collapsed('longitude', iris.analysis.STD_DEV) To obtain the biased standard deviation:: result = cube.collapsed('longitude', iris.analysis.STD_DEV, ddof=0) .. note:: Lazy operation is supported, via :func:`dask.array.nanstd`. This aggregator handles masked data. """ SUM = WeightedAggregator( "sum", iris._lazy_data.non_lazy(_lazy_sum), lazy_func=_build_dask_mdtol_function(_lazy_sum), ) """ An :class:`~iris.analysis.Aggregator` instance that calculates the sum over a :class:`~iris.cube.Cube`, as computed by :func:`numpy.ma.sum`. Additional kwargs associated with the use of this aggregator: * weights (float ndarray): Weights matching the shape of the cube, or the length of the window for rolling window operations. Weights should be normalized before using them with this aggregator if scaling is not intended. * returned (boolean): Set this to True to indicate the collapsed weights are to be returned along with the collapsed data. Defaults to False. **For example**: To compute an accumulation over the *time* axis of a cube:: result = cube.collapsed('time', iris.analysis.SUM) To compute a weighted rolling sum e.g. to apply a digital filter:: weights = np.array([.1, .2, .4, .2, .1]) result = cube.rolling_window('time', iris.analysis.SUM, len(weights), weights=weights) This aggregator handles masked data. """ VARIANCE = Aggregator( "variance", ma.var, units_func=lambda units: units * units, lazy_func=_build_dask_mdtol_function(da.var), ddof=1, ) """ An :class:`~iris.analysis.Aggregator` instance that calculates the variance over a :class:`~iris.cube.Cube`, as computed by :func:`numpy.ma.var`. Additional kwargs associated with the use of this aggregator: * ddof (integer): Delta degrees of freedom. The divisor used in calculations is N - ddof, where N represents the number of elements. Defaults to 1. **For example**: To compute zonal variance over the *longitude* axis of a cube:: result = cube.collapsed('longitude', iris.analysis.VARIANCE) To obtain the biased variance:: result = cube.collapsed('longitude', iris.analysis.VARIANCE, ddof=0) .. note:: Lazy operation is supported, via :func:`dask.array.nanvar`. This aggregator handles masked data. """ WPERCENTILE = WeightedPercentileAggregator() """ An :class:`~iris.analysis.WeightedPercentileAggregator` instance that calculates the weighted percentile over a :class:`~iris.cube.Cube`. **Required** kwargs associated with the use of this aggregator: * percent (float or sequence of floats): Percentile rank/s at which to extract value/s. * weights (float ndarray): Weights matching the shape of the cube or the length of the window for rolling window operations. Note that, latitude/longitude area weights can be calculated using :func:`iris.analysis.cartography.area_weights`. Additional kwargs associated with the use of this aggregator: * returned (boolean): Set this to True to indicate that the collapsed weights are to be returned along with the collapsed data. Defaults to False. * kind (string or int): Specifies the kind of interpolation used, see :func:`scipy.interpolate.interp1d` Defaults to "linear", which is equivalent to alphap=0.5, betap=0.5 in `iris.analysis.PERCENTILE` """ class _Groupby: """ Convenience class to determine group slices over one or more group-by coordinates. Generate the coordinate slices for the groups and calculate the new group-by coordinates and the new shared coordinates given the group slices. Note that, new shared coordinates will be bounded coordinates. Assumes that all the coordinates share the same axis, therefore all of the coordinates must be of the same length. Group-by coordinates are those coordinates over which value groups are to be determined. Shared coordinates are those coordinates which share the same axis as group-by coordinates, but which are not to be included in the group-by analysis. """ def __init__( self, groupby_coords, shared_coords=None, climatological=False ): """ Determine the group slices over the group-by coordinates. Args: * groupby_coords (list :class:`iris.coords.Coord` instances): One or more coordinates from the same axis over which to group-by. Kwargs: * shared_coords (list of (:class:`iris.coords.Coord`, `int`) pairs): One or more coordinates (including multidimensional coordinates) that share the same group-by coordinate axis. The `int` identifies which dimension of the coord is on the group-by coordinate axis. * climatological (bool): Indicates whether the output is expected to be climatological. For any aggregated time coord(s), this causes the climatological flag to be set and the point for each cell to equal its first bound, thereby preserving the time of year. """ #: Group-by and shared coordinates that have been grouped. self.coords = [] self._groupby_coords = [] self._shared_coords = [] self._slices_by_key = OrderedDict() self._stop = None # Ensure group-by coordinates are iterable. if not isinstance(groupby_coords, Iterable): raise TypeError( "groupby_coords must be a " "`collections.Iterable` type." ) # Add valid group-by coordinates. for coord in groupby_coords: self._add_groupby_coord(coord) # Add the coordinates sharing the same axis as the group-by # coordinates. if shared_coords is not None: # Ensure shared coordinates are iterable. if not isinstance(shared_coords, Iterable): raise TypeError( "shared_coords must be a " "`collections.Iterable` type." ) # Add valid shared coordinates. for coord, dim in shared_coords: self._add_shared_coord(coord, dim) # Aggregation is climatological in nature self.climatological = climatological # Stores mapping from original cube coords to new ones, as metadata may # not match self.coord_replacement_mapping = [] def _add_groupby_coord(self, coord): if coord.ndim != 1: raise iris.exceptions.CoordinateMultiDimError(coord) if self._stop is None: self._stop = coord.shape[0] if coord.shape[0] != self._stop: raise ValueError("Group-by coordinates have different lengths.") self._groupby_coords.append(coord) def _add_shared_coord(self, coord, dim): if coord.shape[dim] != self._stop and self._stop is not None: raise ValueError("Shared coordinates have different lengths.") self._shared_coords.append((coord, dim)) def group(self): """ Calculate the groups and associated slices over one or more group-by coordinates. Also creates new group-by and shared coordinates given the calculated group slices. Returns: A generator of the coordinate group slices. """ if self._groupby_coords: if not self._slices_by_key: items = [] groups = [] for coord in self._groupby_coords: groups.append(iris.coords._GroupIterator(coord.points)) items.append(next(groups[-1])) # Construct the group slice for each group over the group-by # coordinates. Keep constructing until all group-by coordinate # groups are exhausted. while any([item is not None for item in items]): # Determine the extent (start, stop) of the group given # each current group-by coordinate group. start = max( [ item.groupby_slice.start for item in items if item is not None ] ) stop = min( [ item.groupby_slice.stop for item in items if item is not None ] ) # Construct composite group key for the group using the # start value from each group-by coordinate. key = tuple( [coord.points[start] for coord in self._groupby_coords] ) # Associate group slice with group key within the ordered # dictionary. self._slices_by_key.setdefault(key, []).append( slice(start, stop) ) # Prepare for the next group slice construction over the # group-by coordinates. for item_index, item in enumerate(items): if item is None: continue # Get coordinate current group slice. groupby_slice = item.groupby_slice # Determine whether coordinate has spanned all its # groups i.e. its full length # or whether we need to get the coordinates next group. if groupby_slice.stop == self._stop: # This coordinate has exhausted all its groups, # so remove it. items[item_index] = None elif groupby_slice.stop == stop: # The current group of this coordinate is # exhausted, so get the next one. items[item_index] = next(groups[item_index]) # Merge multiple slices together into one tuple. self._slice_merge() # Calculate the new group-by coordinates. self._compute_groupby_coords() # Calculate the new shared coordinates. self._compute_shared_coords() # Generate the group-by slices/groups. for groupby_slice in self._slices_by_key.values(): yield groupby_slice return def _slice_merge(self): """ Merge multiple slices into one tuple and collapse items from containing list. """ # Iterate over the ordered dictionary in order to reduce # multiple slices into a single tuple and collapse # all items from containing list. for key, groupby_slices in self._slices_by_key.items(): if len(groupby_slices) > 1: # Compress multiple slices into tuple representation. groupby_indicies = [] for groupby_slice in groupby_slices: groupby_indicies.extend( range(groupby_slice.start, groupby_slice.stop) ) self._slices_by_key[key] = tuple(groupby_indicies) else: # Remove single inner slice from list. self._slices_by_key[key] = groupby_slices[0] def _compute_groupby_coords(self): """Create new group-by coordinates given the group slices.""" groupby_slice = [] # Iterate over the ordered dictionary in order to construct # a group-by slice that samples the first element from each group. for key_slice in self._slices_by_key.values(): if isinstance(key_slice, tuple): groupby_slice.append(key_slice[0]) else: groupby_slice.append(key_slice.start) groupby_slice = np.array(groupby_slice) # Create new group-by coordinates from the group-by slice. self.coords = [coord[groupby_slice] for coord in self._groupby_coords] def _compute_shared_coords(self): """Create the new shared coordinates given the group slices.""" groupby_indices = [] groupby_bounds = [] # Iterate over the ordered dictionary in order to construct a list of # tuple group indices, and a list of the respective bounds of those # indices. for key_slice in self._slices_by_key.values(): if isinstance(key_slice, tuple): indices = key_slice else: indices = tuple(range(*key_slice.indices(self._stop))) groupby_indices.append(indices) groupby_bounds.append((indices[0], indices[-1])) # Create new shared bounded coordinates. for coord, dim in self._shared_coords: climatological_coord = ( self.climatological and coord.units.is_time_reference() ) if coord.points.dtype.kind in "SU": if coord.bounds is None: new_points = [] new_bounds = None # np.apply_along_axis does not work with str.join, so we # need to loop through the array directly. First move axis # of interest to trailing dim and flatten the others. work_arr = np.moveaxis(coord.points, dim, -1) shape = work_arr.shape work_shape = (-1, shape[-1]) new_shape = (len(self),) if coord.ndim > 1: new_shape += shape[:-1] work_arr = work_arr.reshape(work_shape) for indices in groupby_indices: for arr in work_arr: new_points.append("|".join(arr.take(indices))) # Reinstate flattened dimensions. Aggregated dim now leads. new_points = np.array(new_points).reshape(new_shape) # Move aggregated dimension back to position it started in. new_points = np.moveaxis(new_points, 0, dim) else: msg = ( "collapsing the bounded string coordinate {0!r}" " is not supported".format(coord.name()) ) raise ValueError(msg) else: new_bounds = [] if coord.has_bounds(): # Derive new coord's bounds from bounds. item = coord.bounds maxmin_axis = (dim, -1) first_choices = coord.bounds.take(0, -1) last_choices = coord.bounds.take(1, -1) else: # Derive new coord's bounds from points. item = coord.points maxmin_axis = dim first_choices = last_choices = coord.points # Check whether item is monotonic along the dimension of interest. deltas = np.diff(item, 1, dim) monotonic = np.all(deltas >= 0) or np.all(deltas <= 0) # Construct list of coordinate group boundary pairs. if monotonic: # Use first and last bound or point for new bounds. for start, stop in groupby_bounds: if ( getattr(coord, "circular", False) and (stop + 1) == self._stop ): new_bounds.append( [ first_choices.take(start, dim), first_choices.take(0, dim) + coord.units.modulus, ] ) else: new_bounds.append( [ first_choices.take(start, dim), last_choices.take(stop, dim), ] ) else: # Use min and max bound or point for new bounds. for indices in groupby_indices: item_slice = item.take(indices, dim) new_bounds.append( [ item_slice.min(axis=maxmin_axis), item_slice.max(axis=maxmin_axis), ] ) # Bounds needs to be an array with the length 2 start-stop # dimension last, and the aggregated dimension back in its # original position. new_bounds = np.moveaxis( np.array(new_bounds), (0, 1), (dim, -1) ) # Now create the new bounded group shared coordinate. try: if climatological_coord: # Use the first bound as the point new_points = new_bounds[..., 0] else: new_points = new_bounds.mean(-1) except TypeError: msg = ( "The {0!r} coordinate on the collapsing dimension" " cannot be collapsed.".format(coord.name()) ) raise ValueError(msg) try: new_coord = coord.copy(points=new_points, bounds=new_bounds) except ValueError: # non monotonic points/bounds new_coord = iris.coords.AuxCoord.from_coord(coord).copy( points=new_points, bounds=new_bounds ) if climatological_coord: new_coord.climatological = True self.coord_replacement_mapping.append((coord, new_coord)) self.coords.append(new_coord) def __len__(self): """Calculate the number of groups given the group-by coordinates.""" if self._slices_by_key: value = len(self._slices_by_key) else: value = len([s for s in self.group()]) return value def __repr__(self): groupby_coords = [coord.name() for coord in self._groupby_coords] if self._shared_coords_by_name: shared_coords = [coord.name() for coord in self._shared_coords] shared_string = ", shared_coords=%r)" % shared_coords else: shared_string = ")" return "%s(%r%s" % ( self.__class__.__name__, groupby_coords, shared_string, )
[docs]def clear_phenomenon_identity(cube): """ Helper function to clear the standard_name, attributes, and cell_methods of a cube. """ cube.rename(None) cube.attributes.clear() cube.cell_methods = tuple()
############################################################################### # # Interpolation API # ###############################################################################
[docs]class Linear: """ This class describes the linear interpolation and regridding scheme for interpolating or regridding over one or more orthogonal coordinates, typically for use with :meth:`iris.cube.Cube.interpolate()` or :meth:`iris.cube.Cube.regrid()`. """ LINEAR_EXTRAPOLATION_MODES = list(EXTRAPOLATION_MODES.keys()) + ["linear"] def __init__(self, extrapolation_mode="linear"): """ Linear interpolation and regridding scheme suitable for interpolating or regridding over one or more orthogonal coordinates. Kwargs: * extrapolation_mode: Must be one of the following strings: * 'extrapolate' or 'linear' - The extrapolation points will be calculated by extending the gradient of the closest two points. * 'nan' - The extrapolation points will be be set to NaN. * 'error' - A ValueError exception will be raised, notifying an attempt to extrapolate. * 'mask' - The extrapolation points will always be masked, even if the source data is not a MaskedArray. * 'nanmask' - If the source data is a MaskedArray the extrapolation points will be masked. Otherwise they will be set to NaN. The default mode of extrapolation is 'linear'. """ if extrapolation_mode not in self.LINEAR_EXTRAPOLATION_MODES: msg = "Extrapolation mode {!r} not supported." raise ValueError(msg.format(extrapolation_mode)) self.extrapolation_mode = extrapolation_mode def __repr__(self): return "Linear({!r})".format(self.extrapolation_mode) def _normalised_extrapolation_mode(self): mode = self.extrapolation_mode if mode == "linear": mode = "extrapolate" return mode
[docs] def interpolator(self, cube, coords): """ Creates a linear interpolator to perform interpolation over the given :class:`~iris.cube.Cube` specified by the dimensions of the given coordinates. Typically you should use :meth:`iris.cube.Cube.interpolate` for interpolating a cube. There are, however, some situations when constructing your own interpolator is preferable. These are detailed in the :ref:`user guide <caching_an_interpolator>`. Args: * cube: The source :class:`iris.cube.Cube` to be interpolated. * coords: The names or coordinate instances that are to be interpolated over. Returns: A callable with the interface: `callable(sample_points, collapse_scalar=True)` where `sample_points` is a sequence containing an array of values for each of the coordinates passed to this method, and `collapse_scalar` determines whether to remove length one dimensions in the result cube caused by scalar values in `sample_points`. The values for coordinates that correspond to date/times may optionally be supplied as datetime.datetime or cftime.datetime instances. For example, for the callable returned by: `Linear().interpolator(cube, ['latitude', 'longitude'])`, sample_points must have the form `[new_lat_values, new_lon_values]`. """ return RectilinearInterpolator( cube, coords, "linear", self._normalised_extrapolation_mode() )
[docs] def regridder(self, src_grid, target_grid): """ Creates a linear regridder to perform regridding from the source grid to the target grid. Typically you should use :meth:`iris.cube.Cube.regrid` for regridding a cube. There are, however, some situations when constructing your own regridder is preferable. These are detailed in the :ref:`user guide <caching_a_regridder>`. Supports lazy regridding. Any `chunks <https://docs.dask.org/en/latest/array-chunks.html>`__ in horizontal dimensions will be combined before regridding. Args: * src_grid: The :class:`~iris.cube.Cube` defining the source grid. * target_grid: The :class:`~iris.cube.Cube` defining the target grid. Returns: A callable with the interface: `callable(cube)` where `cube` is a cube with the same grid as `src_grid` that is to be regridded to the `target_grid`. """ return RectilinearRegridder( src_grid, target_grid, "linear", self._normalised_extrapolation_mode(), )
[docs]class AreaWeighted: """ This class describes an area-weighted regridding scheme for regridding between 'ordinary' horizontal grids with separated X and Y coordinates in a common coordinate system. Typically for use with :meth:`iris.cube.Cube.regrid()`. """ def __init__(self, mdtol=1): """ Area-weighted regridding scheme suitable for regridding between different orthogonal XY grids in the same coordinate system. Kwargs: * mdtol (float): Tolerance of missing data. The value returned in each element of the returned array will be masked if the fraction of missing data exceeds mdtol. This fraction is calculated based on the area of masked cells within each target cell. mdtol=0 means no masked data is tolerated while mdtol=1 will mean the resulting element will be masked if and only if all the overlapping elements of the source grid are masked. Defaults to 1. .. Note: Both sourge and target cubes must have an XY grid defined by separate X and Y dimensions with dimension coordinates. All of the XY dimension coordinates must also be bounded, and have the same cooordinate system. """ if not (0 <= mdtol <= 1): msg = "Value for mdtol must be in range 0 - 1, got {}." raise ValueError(msg.format(mdtol)) self.mdtol = mdtol def __repr__(self): return "AreaWeighted(mdtol={})".format(self.mdtol)
[docs] def regridder(self, src_grid_cube, target_grid_cube): """ Creates an area-weighted regridder to perform regridding from the source grid to the target grid. Typically you should use :meth:`iris.cube.Cube.regrid` for regridding a cube. There are, however, some situations when constructing your own regridder is preferable. These are detailed in the :ref:`user guide <caching_a_regridder>`. Supports lazy regridding. Any `chunks <https://docs.dask.org/en/latest/array-chunks.html>`__ in horizontal dimensions will be combined before regridding. Args: * src_grid_cube: The :class:`~iris.cube.Cube` defining the source grid. * target_grid_cube: The :class:`~iris.cube.Cube` defining the target grid. Returns: A callable with the interface: `callable(cube)` where `cube` is a cube with the same grid as `src_grid_cube` that is to be regridded to the grid of `target_grid_cube`. """ return AreaWeightedRegridder( src_grid_cube, target_grid_cube, mdtol=self.mdtol )
[docs]class Nearest: """ This class describes the nearest-neighbour interpolation and regridding scheme for interpolating or regridding over one or more orthogonal coordinates, typically for use with :meth:`iris.cube.Cube.interpolate()` or :meth:`iris.cube.Cube.regrid()`. """ def __init__(self, extrapolation_mode="extrapolate"): """ Nearest-neighbour interpolation and regridding scheme suitable for interpolating or regridding over one or more orthogonal coordinates. Kwargs: * extrapolation_mode: Must be one of the following strings: * 'extrapolate' - The extrapolation points will take their value from the nearest source point. * 'nan' - The extrapolation points will be be set to NaN. * 'error' - A ValueError exception will be raised, notifying an attempt to extrapolate. * 'mask' - The extrapolation points will always be masked, even if the source data is not a MaskedArray. * 'nanmask' - If the source data is a MaskedArray the extrapolation points will be masked. Otherwise they will be set to NaN. The default mode of extrapolation is 'extrapolate'. """ if extrapolation_mode not in EXTRAPOLATION_MODES: msg = "Extrapolation mode {!r} not supported." raise ValueError(msg.format(extrapolation_mode)) self.extrapolation_mode = extrapolation_mode def __repr__(self): return "Nearest({!r})".format(self.extrapolation_mode)
[docs] def interpolator(self, cube, coords): """ Creates a nearest-neighbour interpolator to perform interpolation over the given :class:`~iris.cube.Cube` specified by the dimensions of the specified coordinates. Typically you should use :meth:`iris.cube.Cube.interpolate` for interpolating a cube. There are, however, some situations when constructing your own interpolator is preferable. These are detailed in the :ref:`user guide <caching_an_interpolator>`. Args: * cube: The source :class:`iris.cube.Cube` to be interpolated. * coords: The names or coordinate instances that are to be interpolated over. Returns: A callable with the interface: `callable(sample_points, collapse_scalar=True)` where `sample_points` is a sequence containing an array of values for each of the coordinates passed to this method, and `collapse_scalar` determines whether to remove length one dimensions in the result cube caused by scalar values in `sample_points`. The values for coordinates that correspond to date/times may optionally be supplied as datetime.datetime or cftime.datetime instances. For example, for the callable returned by: `Nearest().interpolator(cube, ['latitude', 'longitude'])`, sample_points must have the form `[new_lat_values, new_lon_values]`. """ return RectilinearInterpolator( cube, coords, "nearest", self.extrapolation_mode )
[docs] def regridder(self, src_grid, target_grid): """ Creates a nearest-neighbour regridder to perform regridding from the source grid to the target grid. Typically you should use :meth:`iris.cube.Cube.regrid` for regridding a cube. There are, however, some situations when constructing your own regridder is preferable. These are detailed in the :ref:`user guide <caching_a_regridder>`. Supports lazy regridding. Any `chunks <https://docs.dask.org/en/latest/array-chunks.html>`__ in horizontal dimensions will be combined before regridding. Args: * src_grid: The :class:`~iris.cube.Cube` defining the source grid. * target_grid: The :class:`~iris.cube.Cube` defining the target grid. Returns: A callable with the interface: `callable(cube)` where `cube` is a cube with the same grid as `src_grid` that is to be regridded to the `target_grid`. """ return RectilinearRegridder( src_grid, target_grid, "nearest", self.extrapolation_mode )
[docs]class UnstructuredNearest: """ This is a nearest-neighbour regridding scheme for regridding data whose horizontal (X- and Y-axis) coordinates are mapped to the *same* dimensions, rather than being orthogonal on independent dimensions. For latitude-longitude coordinates, the nearest-neighbour distances are computed on the sphere, otherwise flat Euclidean distances are used. The source X and Y coordinates can have any shape. The target grid must be of the "normal" kind, i.e. it has separate, 1-dimensional X and Y coordinates. Source and target XY coordinates must have the same coordinate system, which may also be None. If any of the XY coordinates are latitudes or longitudes, then they *all* must be. Otherwise, the corresponding X and Y coordinates must have the same units in the source and grid cubes. .. Note:: Currently only supports regridding, not interpolation. """ # Note: the argument requirements are simply those of the underlying # regridder class, # :class:`iris.analysis.trajectory.UnstructuredNearestNeigbourRegridder`. def __init__(self): """ Nearest-neighbour interpolation and regridding scheme suitable for interpolating or regridding from un-gridded data such as trajectories or other data where the X and Y coordinates share the same dimensions. """ pass def __repr__(self): return "UnstructuredNearest()" # TODO: add interpolator usage # def interpolator(self, cube):
[docs] def regridder(self, src_cube, target_grid): """ Creates a nearest-neighbour regridder, of the :class:`~iris.analysis.trajectory.UnstructuredNearestNeigbourRegridder` type, to perform regridding from the source grid to the target grid. This can then be applied to any source data with the same structure as the original 'src_cube'. Typically you should use :meth:`iris.cube.Cube.regrid` for regridding a cube. There are, however, some situations when constructing your own regridder is preferable. These are detailed in the :ref:`user guide <caching_a_regridder>`. Does not support lazy regridding. Args: * src_cube: The :class:`~iris.cube.Cube` defining the source grid. The X and Y coordinates can have any shape, but must be mapped over the same cube dimensions. * target_grid: The :class:`~iris.cube.Cube` defining the target grid. The X and Y coordinates must be one-dimensional dimension coordinates, mapped to different dimensions. All other cube components are ignored. Returns: A callable with the interface: `callable(cube)` where `cube` is a cube with the same grid as `src_cube` that is to be regridded to the `target_grid`. """ from iris.analysis.trajectory import ( UnstructuredNearestNeigbourRegridder, ) return UnstructuredNearestNeigbourRegridder(src_cube, target_grid)
[docs]class PointInCell: """ This class describes the point-in-cell regridding scheme for use typically with :meth:`iris.cube.Cube.regrid()`. Each result datapoint is an average over all source points that fall inside that (bounded) target cell. The PointInCell regridder can regrid data from a source grid of any dimensionality and in any coordinate system. The location of each source point is specified by X and Y coordinates mapped over the same cube dimensions, aka "grid dimensions" : the grid may have any dimensionality. The X and Y coordinates must also have the same, defined coord_system. The weights, if specified, must have the same shape as the X and Y coordinates. The output grid can be any 'normal' XY grid, specified by *separate* X and Y coordinates : That is, X and Y have two different cube dimensions. The output X and Y coordinates must also have a common, specified coord_system. """ def __init__(self, weights=None): """ Point-in-cell regridding scheme suitable for regridding from a source cube with X and Y coordinates all on the same dimensions, to a target cube with bounded X and Y coordinates on separate X and Y dimensions. Each result datapoint is an average over all source points that fall inside that (bounded) target cell. Optional Args: * weights: A :class:`numpy.ndarray` instance that defines the weights for the grid cells of the source grid. Must have the same shape as the data of the source grid. If unspecified, equal weighting is assumed. """ self.weights = weights
[docs] def regridder(self, src_grid, target_grid): """ Creates a point-in-cell regridder to perform regridding from the source grid to the target grid. Typically you should use :meth:`iris.cube.Cube.regrid` for regridding a cube. There are, however, some situations when constructing your own regridder is preferable. These are detailed in the :ref:`user guide <caching_a_regridder>`. Does not support lazy regridding. Args: * src_grid: The :class:`~iris.cube.Cube` defining the source grid. * target_grid: The :class:`~iris.cube.Cube` defining the target grid. Returns: A callable with the interface: `callable(cube)` where `cube` is a cube with the same grid as `src_grid` that is to be regridded to the `target_grid`. """ return CurvilinearRegridder(src_grid, target_grid, self.weights)