You are viewing the latest unreleased documentation 3.10.0.dev22. You can switch to a stable version.

Source code for iris.fileformats.um._fast_load

# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Support for "fast" loading of structured UM files in iris load functions.

i.e. :meth:`iris.load` and its associates.

This provides a context manager to enable structured loading via all the iris
load function variants.  It is made public in :mod:`iris.fileformats.um`.

This works with either PP or Fieldsfiles.
The various existing PP and FF format loaders all call into
:meth:`iris.fileformats.pp._load_cubes_variable_loader`.
When enabled, that function calls private functions in this module, to replace
the 'normal' calls with 'structured' loading.

At present, there is *no* public low-level fields-to-cube interface, equivalent
to the pp "as_pairs" functions.

"""

from contextlib import contextmanager
import os.path
import threading

import numpy as np

# Be minimal about what we import from iris, to avoid circular imports.
# Below, other parts of iris.fileformats are accessed via deferred imports.
from iris.coords import DimCoord
from iris.cube import CubeList
from iris.exceptions import TranslationError
from iris.fileformats.um._fast_load_structured_fields import (
    BasicFieldCollation,
    group_structured_fields,
)

# Strings to identify the PP and FF file format handler specs.
_FF_SPEC_NAME = "UM Fieldsfile"
_PP_SPEC_NAME = "UM Post Processing file"


[docs] class FieldCollation(BasicFieldCollation): # This class specialises the BasicFieldCollation by adding the file-index # and file-path concepts. # This preserves the more abstract scope of the original 'FieldCollation' # class, now renamed 'BasicFieldCollation'. def __init__(self, fields, filepath): """FieldCollation initialise. Parameters ---------- fields : iterable of :class:`iris.fileformats.pp.PPField` The fields in the collation. filepath : str The path of the file the collation is loaded from. """ super().__init__(fields) self._load_filepath = filepath @property def data_filepath(self): return self._load_filepath @property def data_field_indices(self): """Field indices of the contained PPFields in the input file. This records the original file location of the individual data fields contained, within the input datafile. Returns ------- An integer array of shape `self.vector_dims_shape`. """ # Get shape : N.B. this calculates (and caches) the structure. vector_dims_shape = self.vector_dims_shape # Get index-in-file of each contained field. indices = np.array( [field._index_in_structured_load_file for field in self._fields], dtype=np.int64, ) return indices.reshape(vector_dims_shape)
def _basic_load_function(filename, pp_filter=None, **kwargs): # The low-level 'fields from filename' loader. # # This is the 'loader.generator' in the control structure passed to the # generic rules code, :meth:`iris.fileformats.rules.load_cubes`. # # As called by generic rules code, this replaces pp.load (and the like). # It yields a sequence of "fields", but in this case the 'fields' are # :class:`iris.fileformats.um._fast_load_structured_fields.FieldCollation` # objects. # # NOTE: so, a FieldCollation is passed as the 'field' in user callbacks. # # Also in our case, we need to support the basic single-field filtering # operation that speeds up phenomenon selection. # Therefore, the actual loader will pass this as the 'pp_filter' keyword, # when it is present. # Additional load keywords are 'passed on' to the lower-level function. # Helper function to select the correct fields loader call. def _select_raw_fields_loader(fname): # Return the PPfield loading function for a file name. # # This decides whether the underlying file is an FF or PP file. # Because it would be too awkward to modify the whole iris loading # callchain to "pass down" the file format, this function instead # 'recreates' that information by calling the format picker again. # NOTE: this may be inefficient, especially for web resources. from iris.fileformats import FORMAT_AGENT from iris.fileformats.pp import load as pp_load from iris.fileformats.um import um_to_pp with open(fname, "rb") as fh: spec = FORMAT_AGENT.get_spec(os.path.basename(fname), fh) if spec.name.startswith(_FF_SPEC_NAME): loader = um_to_pp elif spec.name.startswith(_PP_SPEC_NAME): loader = pp_load else: emsg = "Require {!r} to be a structured FieldsFile or a PP file." raise ValueError(emsg.format(fname)) return loader loader = _select_raw_fields_loader(filename) def iter_fields_decorated_with_load_indices(fields_iter): for i_field, field in enumerate(fields_iter): field._index_in_structured_load_file = i_field yield field fields = iter_fields_decorated_with_load_indices( field for field in loader(filename, **kwargs) if pp_filter is None or pp_filter(field) ) return group_structured_fields( fields, collation_class=FieldCollation, filepath=filename ) # Define the preferred order of candidate dimension coordinates, as used by # _convert_collation. _HINT_COORDS = ["time", "forecast_reference_time", "model_level_number"] _HINTS = {name: i for i, name in zip(range(len(_HINT_COORDS)), _HINT_COORDS)} def _convert_collation(collation): """Convert a FieldCollation into the corresponding items of Cube metadata. Parameters ---------- collation : A FieldCollation object. Returns ------- A :class:`iris.fileformats.rules.ConversionMetadata` object. Notes ----- .. note: This is the 'loader.converter', in the control structure passed to the generic rules code, :meth:`iris.fileformats.rules.load_cubes`. """ from iris.fileformats.pp_load_rules import ( _all_other_rules, _convert_scalar_pseudo_level_coords, _convert_scalar_realization_coords, _convert_time_coords, _convert_vertical_coords, ) from iris.fileformats.rules import ConversionMetadata # For all the scalar conversions, all fields in the collation will # give the same result, so the choice is arbitrary. field = collation.fields[0] # Call "all other" rules. ( references, standard_name, long_name, units, attributes, cell_methods, dim_coords_and_dims, aux_coords_and_dims, ) = _all_other_rules(field) # Adjust any dimension bindings to account for the extra leading # dimensions added by the collation. if collation.vector_dims_shape: def _adjust_dims(coords_and_dims, n_dims): def adjust(dims): if dims is not None: dims += n_dims return dims return [(coord, adjust(dims)) for coord, dims in coords_and_dims] n_collation_dims = len(collation.vector_dims_shape) dim_coords_and_dims = _adjust_dims(dim_coords_and_dims, n_collation_dims) aux_coords_and_dims = _adjust_dims(aux_coords_and_dims, n_collation_dims) # Dimensions to which we've already assigned dimension coordinates. dim_coord_dims = set() # Helper call to choose which coords are dimensions and which auxiliary. def _bind_coords( coords_and_dims, dim_coord_dims, dim_coords_and_dims, aux_coords_and_dims, ): def key_func(item): return _HINTS.get(item[0].name(), len(_HINTS)) # Target the first DimCoord for a dimension at dim_coords, # and target everything else at aux_coords. for coord, dims in sorted(coords_and_dims, key=key_func): if ( isinstance(coord, DimCoord) and dims is not None and len(dims) == 1 and dims[0] not in dim_coord_dims ): dim_coords_and_dims.append((coord, dims)) dim_coord_dims.add(dims[0]) else: aux_coords_and_dims.append((coord, dims)) # Call "time" rules. # # For "normal" (non-cross-sectional) time values. vector_headers = collation.element_arrays_and_dims # If the collation doesn't define a vector of values for a # particular header then it must be constant over all fields in the # collation. In which case it's safe to get the value from any field. t1, t1_dims = vector_headers.get("t1", (field.t1, ())) t2, t2_dims = vector_headers.get("t2", (field.t2, ())) lbft, lbft_dims = vector_headers.get("lbft", (field.lbft, ())) coords_and_dims = _convert_time_coords( field.lbcode, field.lbtim, field.time_unit("hours"), t1, t2, lbft, t1_dims, t2_dims, lbft_dims, ) # Bind resulting coordinates to dimensions, where suitable. _bind_coords( coords_and_dims, dim_coord_dims, dim_coords_and_dims, aux_coords_and_dims, ) # Call "vertical" rules. # # "Normal" (non-cross-sectional) vertical levels blev, blev_dims = vector_headers.get("blev", (field.blev, ())) lblev, lblev_dims = vector_headers.get("lblev", (field.lblev, ())) bhlev, bhlev_dims = vector_headers.get("bhlev", (field.bhlev, ())) bhrlev, bhrlev_dims = vector_headers.get("bhrlev", (field.bhrlev, ())) brsvd1, brsvd1_dims = vector_headers.get("brsvd1", (field.brsvd[0], ())) brsvd2, brsvd2_dims = vector_headers.get("brsvd2", (field.brsvd[1], ())) brlev, brlev_dims = vector_headers.get("brlev", (field.brlev, ())) # Find all the non-trivial dimension values dims = set( filter( None, [ blev_dims, lblev_dims, bhlev_dims, bhrlev_dims, brsvd1_dims, brsvd2_dims, brlev_dims, ], ) ) if len(dims) > 1: raise TranslationError("Unsupported multiple values for vertical dimension.") if dims: v_dims = dims.pop() if len(v_dims) > 1: raise TranslationError("Unsupported multi-dimension vertical headers.") else: v_dims = () coords_and_dims, factories = _convert_vertical_coords( field.lbcode, field.lbvc, blev, lblev, field.stash, bhlev, bhrlev, brsvd1, brsvd2, brlev, v_dims, ) # Bind resulting coordinates to dimensions, where suitable. _bind_coords( coords_and_dims, dim_coord_dims, dim_coords_and_dims, aux_coords_and_dims, ) # Realization (aka ensemble) (--> scalar coordinates) aux_coords_and_dims.extend( _convert_scalar_realization_coords(lbrsvd4=field.lbrsvd[3]) ) # Pseudo-level coordinate (--> scalar coordinates) aux_coords_and_dims.extend( _convert_scalar_pseudo_level_coords(lbuser5=field.lbuser[4]) ) return ConversionMetadata( factories, references, standard_name, long_name, units, attributes, cell_methods, dim_coords_and_dims, aux_coords_and_dims, ) def _combine_structured_cubes(cubes): # Combine structured cubes from different sourcefiles, in the style of # merge/concatenate. # # Because standard Cube.merge employed in loading can't do this. if STRUCTURED_LOAD_CONTROLS.structured_load_is_raw: # Cross-file concatenate is disabled, during a "load_raw" call. result = cubes else: result = iter(CubeList(cubes).concatenate()) return result class StructuredLoadFlags(threading.local): # A thread-safe object to control structured loading operations. # The object properties are the control flags. # # Inheriting from 'threading.local' provides a *separate* set of the # object properties for each thread. def __init__(self): # Control whether iris load functions do structured loads. self.loads_use_structured = False # Control whether structured load 'combine' is enabled. self.structured_load_is_raw = False @contextmanager def context(self, loads_use_structured=None, structured_load_is_raw=None): # Snapshot current states, for restoration afterwards. old_structured = self.loads_use_structured old_raw_load = self.structured_load_is_raw try: # Set flags for duration, as requested. if loads_use_structured is not None: self.loads_use_structured = loads_use_structured if structured_load_is_raw is not None: self.structured_load_is_raw = structured_load_is_raw # Yield to caller operation. yield finally: # Restore entry state of flags. self.loads_use_structured = old_structured self.structured_load_is_raw = old_raw_load # A singleton structured-load-control object. # Used in :meth:`iris.fileformats.pp._load_cubes_variable_loader`. STRUCTURED_LOAD_CONTROLS = StructuredLoadFlags()
[docs] @contextmanager def structured_um_loading(): """Load cubes from structured UM Fieldsfile and PP files. "Structured" loading is a streamlined, fast load operation, to be used **only** on fieldsfiles or PP files whose fields repeat regularly over the same vertical levels and times (see full details below). This method is a context manager which enables an alternative loading mechanism for 'structured' UM files, providing much faster load times. Within the scope of the context manager, this affects all standard Iris load functions (:func:`~iris.load`, :func:`~iris.load_cube`, :func:`~iris.load_cubes` and :func:`~iris.load_raw`), when loading from UM format files (PP or fieldsfiles). For example: >>> import iris >>> filepath = iris.sample_data_path('uk_hires.pp') >>> from iris.fileformats.um import structured_um_loading >>> with structured_um_loading(): ... cube = iris.load_cube(filepath, 'air_potential_temperature') ... >>> cube <iris 'Cube' of air_potential_temperature / (K) \ (time: 3; model_level_number: 7; grid_latitude: 204; grid_longitude: 187)> The results from this are normally equivalent to those generated by :func:`iris.load`, but the operation is substantially faster for input which is structured. For calls other than :meth:`~iris.load_raw`, the resulting cubes are concatenated over all the input files, so there is normally just one output cube per phenomenon. However, actual loaded results are somewhat different from non-structured loads in many cases, and in a variety of ways. Most commonly, dimension ordering and the choice of dimension coordinates are often different. Use of load callbacks: When a user callback function is used with structured-loading, it is called in a somewhat different way than in a 'normal' load : The callback is called once for each basic *structured* cube loaded, which is normally the whole of one phenomenon from a single input file. In particular, the callback's "field" argument is a :class:`~iris.fileformats.um.FieldCollation`, from which "field.fields" gives a *list* of PPFields from which that cube was built, and the properties "field.load_filepath" and "field.load_file_indices" reference the original file locations of the cube data. The code required is therefore different from a 'normal' callback. For an example of this, see `this example in the Iris test code <https://github.com/SciTools/iris/ blob/ddb46f78e54b6ef4110357dfe9cfcffa7d186d90/ lib/iris/tests/integration/fast_load/test_fast_load.py#L409>`_. Notes on applicability: For results to be **correct and reliable**, the input files must conform to the following requirements : * the file must contain fields for all possible combinations of the vertical levels and time points found in the file. * the fields must occur in a regular repeating order within the file, within the fields of each phenomenon. For example: a sequence of fields for NV vertical levels, repeated for NP different forecast periods, repeated for NT different forecast times. * all other metadata must be identical across all fields of the same phenomenon. Each group of fields with the same values of LBUSER4, LBUSER7 and LBPROC is identified as a separate phenomenon: These groups are processed independently and returned as separate result cubes. The need for a regular sequence of fields applies separately to the fields of each phenomenon, such that different phenomena may have different field structures, and can be interleaved in any way at all. .. note:: At present, fields with different values of 'LBUSER5' (pseudo-level) are *also* treated internally as different phenomena, yielding a raw cube per level. The effects of this are not normally noticed, as the resulting multiple raw cubes merge together again in a 'normal' load. However, it is not an ideal solution as operation is less efficient (in particular, slower) : it is done to avoid a limitation in the underlying code which would otherwise load data on pseudo-levels incorrectly. In future, this may be corrected. Known current shortcomings: * orography fields may be returned with extra dimensions, e.g. time, where multiple fields exist in an input file. * if some input files contain a *single* coordinate value while others contain *multiple* values, these will not be merged into a single cube over all input files : Instead, the single- and multiple-valued sets will typically produce two separate cubes with overlapping coordinates. * this can be worked around by loading files individually, or with :meth:`~iris.load_raw`, and merging/concatenating explicitly. .. note:: The resulting time-related coordinates ('time', 'forecast_time' and 'forecast_period') may be mapped to shared cube dimensions and in some cases can also be multidimensional. However, the vertical level information *must* have a simple one-dimensional structure, independent of the time points, otherwise an error will be raised. .. note:: Where input data does *not* have a fully regular arrangement, the corresponding result cube will have a single anonymous extra dimension which indexes over all the input fields. This can happen if, for example, some fields are missing; or have slightly different metadata; or appear out of order in the file. .. warning:: Restrictions and limitations: Any non-regular metadata variation in the input should be strictly avoided, as not all irregularities are detected, which can cause erroneous results. Various field header words which can in some cases vary are assumed to have a constant value throughout a given phenomenon. This is **not** checked, and can lead to erroneous results if it is not the case. Header elements of potential concern include LBTIM, LBCODE, LBVC and LBRSVD4 (ensemble number). """ with STRUCTURED_LOAD_CONTROLS.context(loads_use_structured=True): yield
@contextmanager def _raw_structured_loading(): """Prevent structured loading from concatenating its result cubes. Private context manager called by :func:`iris.load_raw` to prevent structured loading from concatenating its result cubes in that case. """ with STRUCTURED_LOAD_CONTROLS.context(structured_load_is_raw=True): yield