Source code for iris.experimental.ugrid.utils

# Copyright Iris contributors
#
# This file is part of Iris and is released under the LGPL license.
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.

"""
Utility operations specific to unstructured data.

"""
from typing import AnyStr, Iterable, Union

import dask.array as da
import numpy as np

from iris.cube import Cube


[docs]def recombine_submeshes( mesh_cube: Cube, submesh_cubes: Union[Iterable[Cube], Cube], index_coord_name: AnyStr = "i_mesh_index", ) -> Cube: """ Put data from sub-meshes back onto the original full mesh. The result is a cube like ``mesh_cube``, but with its data replaced by a combination of the data in the ``submesh_cubes``. Parameters ---------- mesh_cube : Cube Describes the mesh and mesh-location onto which the all the ``submesh-cubes``' data are mapped, and acts as a template for the result. Must have a :class:`~iris.experimental.ugrid.mesh.Mesh`. submesh_cubes : iterable of Cube, or Cube Cubes, each with data on a _subset_ of the ``mesh_cube`` datapoints (within the mesh dimension). The submesh cubes do not need to have a mesh. There must be at least 1 of them, to determine the result phenomenon. Their metadata (names, units and attributes) must all be the same, _except_ that 'var_name' is ignored. Their dtypes must all be the same. Their shapes and dimension-coords must all match those of ``mesh_cube``, except in the mesh dimension, which can have different sizes between the submeshes, and from the ``mesh_cube``. The mesh dimension of each must have a 1-D coord named by ``index_coord_name``. These "index coords" can vary in length, but they must all have matching metadata (units, attributes and names except 'var_name'), and must also match the coord of that name in ``mesh_cube``, if there is one. The ".points" values of the index coords specify, for each datapoint, its location in the original mesh -- i.e. they are indices into the relevant mesh-location dimension. index_coord_name : Cube Coord name of an index coord containing the mesh location indices, in every submesh cube. Returns ------- result_cube A cube with the same mesh, location, and shape as ``mesh_cube``, but with its data replaced by that from the``submesh_cubes``. The result phenomeon identity is also that of the``submesh_cubes``, i.e. units, attributes and names (except 'var_name', which is None). Notes ----- Where regions overlap, the result data comes from the submesh cube containing that location which appears _last_ in ``submesh_cubes``. Where no region contains a datapoint, it will be masked in the result. HINT: alternatively, values covered by no region can be set to the original 'full_mesh_cube' data value, if 'full_mesh_cube' is *also* passed as the first of the 'region_cubes'. The ``result_cube`` dtype is that of the ``submesh_cubes``. """ if not submesh_cubes: raise ValueError("'submesh_cubes' must be non-empty.") mesh_dim = mesh_cube.mesh_dim() if mesh_dim is None: raise ValueError("'mesh_cube' has no \".mesh\".") # # Perform consistency checks on all the region-cubes. # if not isinstance(submesh_cubes, Iterable): # Treat a single submesh cube input as a list-of-1. submesh_cubes = [submesh_cubes] result_metadata = None result_dtype = None indexcoord_metadata = None for i_sub, cube in enumerate(submesh_cubes): sub_str = ( f"Submesh cube #{i_sub + 1}/{len(submesh_cubes)}, " f'"{cube.name()}"' ) # Check dimensionality. if cube.ndim != mesh_cube.ndim: err = ( f"{sub_str} has {cube.ndim} dimensions, but " f"'mesh_cube' has {mesh_cube.ndim}." ) raise ValueError(err) # Get cube metadata + dtype : must match, and will apply to the result dtype = cube.dtype metadata = cube.metadata._replace(var_name=None) if i_sub == 0: # Store the first-cube metadata + dtype as reference result_metadata = metadata result_dtype = dtype else: # Check subsequent region-cubes metadata + dtype against the first if metadata != result_metadata: err = ( f"{sub_str} has metadata {metadata}, " "which does not match that of the other region_cubes, " f"which is {result_metadata}." ) raise ValueError(err) elif dtype != result_dtype: err = ( f"{sub_str} has a dtype of {dtype}, " "which does not match that of the other region_cubes, " f"which is {result_dtype}." ) raise ValueError(err) # For each dim, check that coords match other regions, and full-cube for i_dim in range(mesh_cube.ndim): if i_dim == mesh_dim: # mesh dim : look for index coords (by name) full_coord = mesh_cube.coords( name_or_coord=index_coord_name, dimensions=(i_dim,) ) sub_coord = cube.coords( name_or_coord=index_coord_name, dimensions=(i_dim,) ) else: # non-mesh dims : look for dim-coords (only) full_coord = mesh_cube.coords( dim_coords=True, dimensions=(i_dim,) ) sub_coord = cube.coords(dim_coords=True, dimensions=(i_dim,)) if full_coord: (full_coord,) = full_coord full_dimname = full_coord.name() full_metadata = full_coord.metadata._replace(var_name=None) if sub_coord: (sub_coord,) = sub_coord sub_dimname = sub_coord.name() sub_metadata = sub_coord.metadata._replace(var_name=None) err = None # N.B. checks for mesh- and non-mesh-dims are different if i_dim != mesh_dim: # i_dim == mesh_dim : checks for non-mesh dims if full_coord and not sub_coord: err = ( f"{sub_str} has no dim-coord for dimension " f"{i_dim}, to match the 'mesh_cube' dimension " f'"{full_dimname}".' ) elif sub_coord and not full_coord: err = ( f'{sub_str} has a dim-coord "{sub_dimname}" for ' f"dimension {i_dim}, but 'mesh_cube' has none." ) elif sub_coord != full_coord: err = ( f'{sub_str} has a dim-coord "{sub_dimname}" for ' f"dimension {i_dim}, which does not match that " f"of 'mesh_cube', \"{full_dimname}\"." ) else: # i_dim == mesh_dim : different rules for this one if not sub_coord: # Must have an index coord on the mesh dimension err = ( f'{sub_str} has no "{index_coord_name}" coord on ' f"the mesh dimension (dimension {mesh_dim})." ) elif full_coord and sub_metadata != full_metadata: # May *not* have full-cube index, but if so it must match err = ( f"{sub_str} has an index coord " f'"{index_coord_name}" whose ".metadata" does not ' f"match that of the same name in 'mesh_cube' : " f"{sub_metadata} != {full_metadata}." ) else: # At this point, we know we *have* an index coord, and it does # not conflict with the one on 'mesh_cube' (if any). # Now check for matches between the region cubes. if indexcoord_metadata is None: # Store first occurrence (from first region-cube) indexcoord_metadata = sub_metadata elif sub_metadata != indexcoord_metadata: # Compare subsequent occurrences (from other region-cubes) err = ( f"{sub_str} has an index coord " f'"{index_coord_name}" whose ".metadata" does not ' f"match that of the other submesh-cubes : " f"{sub_metadata} != {indexcoord_metadata}." ) if err: raise ValueError(err) # Use the mesh_dim to transpose inputs + outputs, if required, as it is # simpler for all the array operations to always have the mesh dim *last*. if mesh_dim == mesh_cube.ndim - 1: # Mesh dim is already the last one : no tranpose required untranspose_dims = None else: dim_range = np.arange(mesh_cube.ndim, dtype=int) # Transpose all inputs to mesh-last order tranpose_dims = [i_dim for i_dim in dim_range if i_dim != mesh_dim] + [ mesh_dim ] # chop out mesh_dim + put it at the end def transposed_copy(cube, dim_order): cube = cube.copy() cube.transpose(dim_order) return cube mesh_cube = transposed_copy(mesh_cube, tranpose_dims) submesh_cubes = [ transposed_copy(region_cube, tranpose_dims) for region_cube in submesh_cubes ] # Also prepare for transforming the output back to the original order untranspose_dims = dim_range.copy() # Neat trick to produce the reverse operation untranspose_dims[tranpose_dims] = dim_range # # Here's the core operation.. # def fill_region(target, regiondata, regioninds): if not target.flags.writeable: # The initial input can be a section of a da.zeros(), which has no # real array "behind" it. This means that real arrays created in # memory are only chunk-sized, but it also means that 'target' may # not be writeable. So take a copy to fix that, where needed. target = target.copy() # N.B. Indices are basically 1D, but may have leading *1 dims for # alignment, to satisfy da.map_blocks assert all(size == 1 for size in regioninds.shape[:-1]) inds = regioninds.flatten() # Assign blocks with indexing on the last dim only target[..., inds] = regiondata return target # Create an initially 'empty' (all-masked) dask array matching the input. # N.B. this does not use the mesh_cube.lazy_data() array, but only its # shape and dtype, since the data itself is not used in the calculation. # N.B. chunking matches the input cube, allowing performance control. input_data = mesh_cube.lazy_data() result_array = da.ma.masked_array( da.zeros( input_data.shape, dtype=result_dtype, chunks=input_data.chunksize, ), True, ) # Wrap this repeatedly with a lazy operation to assign each region. # It is done this way because we couldn't get map_blocks to correctly wrap # a function which does all regions in a single operation. # TODO: replace with a single-stage solution: Probably better, if possible. # Notes on resultant calculation properties: # 1. map_blocks is chunk-mapped, so it is parallelisable and space-saving # 2. However, fetching less than a whole chunk is not efficient for cube in submesh_cubes: # Lazy data array from the region cube sub_data = cube.lazy_data() # Lazy indices from the mesh-dim coord mesh_dimcoord = cube.coord( name_or_coord=index_coord_name, dimensions=cube.ndim - 1 ) indarr = mesh_dimcoord.lazy_points() # Extend indarr dimensions to align it with the 'target' array dims assert indarr.ndim == 1 shape = (1,) * (cube.ndim - 1) + indarr.shape indarr = indarr.reshape(shape) # Apply the operation to paste from one region into the target # N.B. replacing 'result_array' each time around the loop result_array = da.map_blocks( fill_region, result_array, sub_data, indarr, dtype=result_array.dtype, meta=np.ndarray, ) # Construct the result cube result_cube = mesh_cube.copy() result_cube.data = result_array # Copy names, units + attributes from region data (N.B. but not var_name) result_cube.metadata = result_metadata if untranspose_dims is not None: # Re-order dims as in the original input result_cube.transpose(untranspose_dims) return result_cube