You are viewing the latest unreleased documentation 3.10.0.dev17. You can switch to a stable version.

Source code for iris.io

# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.

"""Provides an interface to manage URI scheme support in iris."""

import collections
from collections import OrderedDict
import glob
import os.path
import pathlib
import re

import iris.exceptions


# Saving routines, indexed by file extension.
class _SaversDict(dict):
    """A dictionary that can only have string keys with no overlap."""

    def __setitem__(self, key, value):
        if not isinstance(key, str):
            raise ValueError("key is not a string")
        if key in self:
            raise ValueError("A saver already exists for", key)
        for k in self.keys():
            if k.endswith(key) or key.endswith(k):
                raise ValueError("key %s conflicts with existing key %s" % (key, k))
        dict.__setitem__(self, key, value)


_savers = _SaversDict()


[docs] def run_callback(callback, cube, field, filename): """Run the callback mechanism given the appropriate arguments. Parameters ---------- callback : A function to add metadata from the originating field and/or URI which obeys the following rules: 1. Function signature must be: ``(cube, field, filename)``. 2. Modifies the given cube inplace, unless a new cube is returned by the function. 3. If the cube is to be rejected the callback must raise an :class:`iris.exceptions.IgnoreCubeException`. Notes ----- It is possible that this function returns None for certain callbacks, the caller of this function should handle this case. This function maintains laziness when called; it does not realise data. See more at :doc:`/userguide/real_and_lazy_data`. """ from iris.cube import Cube if callback is None: return cube # Call the callback function on the cube, generally the function will # operate on the cube in place, but it is also possible that the function # will return a completely new cube instance. try: result = callback(cube, field, filename) except iris.exceptions.IgnoreCubeException: result = None else: if result is None: result = cube elif not isinstance(result, Cube): raise TypeError("Callback function returned an unhandled data type.") return result
[docs] def decode_uri(uri, default="file"): r"""Decode a single URI into scheme and scheme-specific parts. In addition to well-formed URIs, it also supports bare file paths as strings or :class:`pathlib.PurePath`. Both Windows and UNIX style paths are accepted. It also supports 'bare objects', i.e. anything which is not a string. These are identified with a scheme of 'data', and returned unchanged. .. testsetup:: from iris.io import * Examples -------- >>> from iris.io import decode_uri >>> print(decode_uri('https://www.thing.com:8080/resource?id=a:b')) ('https', '//www.thing.com:8080/resource?id=a:b') >>> print(decode_uri('file:///data/local/dataZoo/...')) ('file', '///data/local/dataZoo/...') >>> print(decode_uri('/data/local/dataZoo/...')) ('file', '/data/local/dataZoo/...') >>> print(decode_uri('file:///C:\data\local\dataZoo\...')) ('file', '///C:\\data\\local\\dataZoo\\...') >>> print(decode_uri('C:\data\local\dataZoo\...')) ('file', 'C:\\data\\local\\dataZoo\\...') >>> print(decode_uri('dataZoo/...')) ('file', 'dataZoo/...') >>> print(decode_uri({})) ('data', {}) """ if isinstance(uri, pathlib.PurePath): uri = str(uri) if isinstance(uri, str): # make sure scheme has at least 2 letters to avoid windows drives # put - last in the brackets so it refers to the character, not a range # reference on valid schemes: https://tools.ietf.org/html/std66#section-3.1 match = re.match(r"^([a-zA-Z][a-zA-Z0-9+.-]+):(.+)", uri) if match: scheme = match.group(1) part = match.group(2) else: # Catch bare UNIX and Windows paths scheme = default part = uri else: # We can pass things other than strings, like open files. # These are simply identified as 'data objects'. scheme = "data" part = uri return scheme, part
[docs] def expand_filespecs(file_specs, files_expected=True): """Find all matching file paths from a list of file-specs. Parameters ---------- file_specs : iterable of str File paths which may contain ``~`` elements or wildcards. files_expected : bool, default=True Whether file is expected to exist (i.e. for load). Returns ------- list of str If files_expected is ``True``: A well-ordered list of matching absolute file paths. If any of the file-specs match no existing files, an exception is raised. If files_expected is ``False``: A list of expanded file paths. """ # Remove any hostname component - currently unused filenames = [ os.path.abspath(os.path.expanduser(fn[2:] if fn.startswith("//") else fn)) for fn in file_specs ] if files_expected: # Try to expand all filenames as globs glob_expanded = OrderedDict([[fn, sorted(glob.glob(fn))] for fn in filenames]) # If any of the specs expanded to an empty list then raise an error all_expanded = glob_expanded.values() if not all(all_expanded): msg = "One or more of the files specified did not exist:" for pattern, expanded in glob_expanded.items(): if expanded: msg += '\n - "{}" matched {} file(s)'.format( pattern, len(expanded) ) else: msg += '\n * "{}" didn\'t match any files'.format(pattern) raise IOError(msg) result = [fname for fnames in all_expanded for fname in fnames] else: result = filenames return result
[docs] def load_files(filenames, callback, constraints=None): """Create a generator of Cubes from given files. Take a list of filenames which may also be globs, and optionally a constraint set and a callback function, and returns a generator of Cubes from the given files. Notes ----- Typically, this function should not be called directly; instead, the intended interface for loading is :func:`iris.load`. """ from iris.fileformats import FORMAT_AGENT all_file_paths = expand_filespecs(filenames) # Create default dict mapping iris format handler to its associated filenames handler_map = collections.defaultdict(list) for fn in all_file_paths: with open(fn, "rb") as fh: handling_format_spec = FORMAT_AGENT.get_spec(os.path.basename(fn), fh) handler_map[handling_format_spec].append(fn) # Call each iris format handler with the appropriate filenames for handling_format_spec in sorted(handler_map): fnames = handler_map[handling_format_spec] if handling_format_spec.constraint_aware_handler: for cube in handling_format_spec.handler(fnames, callback, constraints): yield cube else: for cube in handling_format_spec.handler(fnames, callback): yield cube
[docs] def load_http(urls, callback): """Create generator of Cubes from the given OPeNDAP URLs. Take a list of OPeNDAP URLs and a callback function, and returns a generator of Cubes from the given URLs. Notes ----- Typically, this function should not be called directly; instead, the intended interface for loading is :func:`iris.load`. """ # # NOTE: this routine is *also* called by "load_data_objects", in which case the # 'urls' will actually be 'data objects'. # In principle, however, their scopes are different, so it's just an implementation # detail that right now the same code will do for both. # If that changes sometime, the two routines may go their separate ways. # Create default dict mapping iris format handler to its associated filenames from iris.fileformats import FORMAT_AGENT handler_map = collections.defaultdict(list) for url in urls: handling_format_spec = FORMAT_AGENT.get_spec(url, None) handler_map[handling_format_spec].append(url) # Call each iris format handler with the appropriate filenames for handling_format_spec in sorted(handler_map): fnames = handler_map[handling_format_spec] for cube in handling_format_spec.handler(fnames, callback): yield cube
[docs] def load_data_objects(urls, callback): """Take a list of data-source objects and a callback function, returns a generator of Cubes. The 'objects' take the place of 'uris' in the load calls. The appropriate types of the data-source objects are expected to be recognised by the handlers : This is done in the usual way by passing the context to the format picker to get a handler for each. .. note:: Typically, this function should not be called directly; instead, the intended interface for loading is :func:`iris.load`. """ # NOTE: this operation is currently *identical* to the http one. But it seems # sensible to provide a distinct handler function for this scheme. yield from load_http(urls, callback)
def _dot_save(cube, target): # A simple wrapper for `iris.fileformats.dot.save` which allows the # saver to be registered without triggering the import of # `iris.fileformats.dot`. from iris.fileformats.dot import save return save(cube, target) def _dot_save_png(cube, target, **kwargs): # A simple wrapper for `iris.fileformats.dot.save_png` which allows the # saver to be registered without triggering the import of # `iris.fileformats.dot`. from iris.fileformats.dot import save_png return save_png(cube, target, **kwargs) def _grib_save(cube, target, append=False, **kwargs): # A simple wrapper for the grib save routine, which allows the saver to be # registered without having the grib implementation installed. try: from iris_grib import save_grib2 except ImportError: raise RuntimeError( "Unable to save GRIB file - " '"iris_grib" package is not installed.' ) save_grib2(cube, target, append, **kwargs) def _check_init_savers(): from iris.fileformats import netcdf, pp if "pp" not in _savers: _savers.update( { "pp": pp.save, "nc": netcdf.save, "dot": _dot_save, "dotpng": _dot_save_png, "grib2": _grib_save, } )
[docs] def add_saver(file_extension, new_saver): """Add a custom saver to the Iris session. Parameters ---------- file_extension : str A string such as "pp" or "my_format". new_saver : function A function of the form ``my_saver(cube, target)``. See Also -------- iris.io.save : Save one or more Cubes to file (or other writeable). """ # Make sure it's a func with 2+ args if not hasattr(new_saver, "__call__") or new_saver.__code__.co_argcount < 2: raise ValueError("Saver routines must be callable with 2+ arguments.") # Try to add this saver. Invalid keys will be rejected. _savers[file_extension] = new_saver
[docs] def find_saver(filespec): """Find the saver function appropriate to the given filename or extension. Parameters ---------- filespec : str A string such as "my_file.pp" or "PP". Returns ------- Save function Save functions can be passed to :func:`iris.io.save`. Value may also be None. """ _check_init_savers() matches = [ ext for ext in _savers if filespec.lower().endswith("." + ext) or filespec.lower() == ext ] # Multiple matches could occur if one of the savers included a '.': # e.g. _savers = {'.dot.png': dot_png_saver, '.png': png_saver} if len(matches) > 1: fmt = "Multiple savers found for %r: %s" matches = ", ".join(map(repr, matches)) raise ValueError(fmt % (filespec, matches)) return _savers[matches[0]] if matches else None
[docs] def save(source, target, saver=None, **kwargs): """Save one or more Cubes to file (or other writeable). Iris currently supports three file formats for saving, which it can recognise by filename extension: * **netCDF** - the Unidata network Common Data Format, see :func:`iris.fileformats.netcdf.save` * **GRIB2** - the WMO GRIdded Binary data format, see :func:`iris_grib.save_grib2`. * **PP** - the Met Office UM Post Processing Format, see :func:`iris.fileformats.pp.save` A custom saver can be provided to the function to write to a different file format. Parameters ---------- source : :class:`iris.cube.Cube` or :class:`iris.cube.CubeList` target : str or pathlib.PurePath or io.TextIOWrapper When given a filename or file, Iris can determine the file format. saver : str or function, optional Specifies the file format to save. If omitted, Iris will attempt to determine the format. If a string, this is the recognised filename extension (where the actual filename may not have it). Otherwise the value is a saver function, of the form: ``my_saver(cube, target)`` plus any custom keywords. It is assumed that a saver will accept an ``append`` keyword if its file format can handle multiple cubes. See also :func:`iris.io.add_saver`. **kwargs : dict, optional All other keywords are passed through to the saver function; see the relevant saver documentation for more information on keyword arguments. Warnings -------- Saving a cube whose data has been loaded lazily (if `cube.has_lazy_data()` returns `True`) to the same file it expects to load data from will cause both the data in-memory and the data on disk to be lost. .. code-block:: python cube = iris.load_cube("somefile.nc") # The next line causes data loss in 'somefile.nc' and the cube. iris.save(cube, "somefile.nc") In general, overwriting a file which is the source for any lazily loaded data can result in corruption. Users should proceed with caution when attempting to overwrite an existing file. Examples -------- >>> # Setting up >>> import iris >>> my_cube = iris.load_cube(iris.sample_data_path('air_temp.pp')) >>> my_cube_list = iris.load(iris.sample_data_path('space_weather.nc')) >>> # Save a cube to PP >>> iris.save(my_cube, "myfile.pp") >>> # Save a cube list to a PP file, appending to the contents of the file >>> # if it already exists >>> iris.save(my_cube_list, "myfile.pp", append=True) >>> # Save a cube to netCDF, defaults to NETCDF4 file format >>> iris.save(my_cube, "myfile.nc") >>> # Save a cube list to netCDF, using the NETCDF3_CLASSIC storage option >>> iris.save(my_cube_list, "myfile.nc", netcdf_format="NETCDF3_CLASSIC") Notes ----- This function maintains laziness when called; it does not realise data. See more at :doc:`/userguide/real_and_lazy_data`. """ from iris.cube import Cube, CubeList # Determine format from filename if isinstance(target, pathlib.PurePath): target = str(target) if isinstance(target, str) and saver is None: # Converts tilde or wildcards to absolute path (target,) = expand_filespecs([str(target)], False) saver = find_saver(target) elif hasattr(target, "name") and saver is None: saver = find_saver(target.name) elif isinstance(saver, str): saver = find_saver(saver) if saver is None: raise ValueError("Cannot save; no saver") # Single cube? if isinstance(source, Cube): result = saver(source, target, **kwargs) # CubeList or sequence of cubes? elif isinstance(source, CubeList) or ( isinstance(source, (list, tuple)) and all([isinstance(i, Cube) for i in source]) ): # Only allow cubelist saving for those fileformats that are capable. if "iris.fileformats.netcdf" not in saver.__module__: # Make sure the saver accepts an append keyword if "append" not in saver.__code__.co_varnames: raise ValueError( "Cannot append cubes using saver function " "'%s' in '%s'" % (saver.__code__.co_name, saver.__code__.co_filename) ) # Force append=True for the tail cubes. Don't modify the incoming # kwargs. kwargs = kwargs.copy() for i, cube in enumerate(source): if i != 0: kwargs["append"] = True saver(cube, target, **kwargs) result = None # Netcdf saver. else: result = saver(source, target, **kwargs) else: raise ValueError("Cannot save; non Cube found in source") return result