Source code for iris.io

# Copyright Iris contributors
#
# This file is part of Iris and is released under the LGPL license.
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""
Provides an interface to manage URI scheme support in iris.

"""

import collections
from collections import OrderedDict
import glob
import os.path
import re

import iris.exceptions


# Saving routines, indexed by file extension.
class _SaversDict(dict):
    """A dictionary that can only have string keys with no overlap."""

    def __setitem__(self, key, value):
        if not isinstance(key, str):
            raise ValueError("key is not a string")
        if key in self:
            raise ValueError("A saver already exists for", key)
        for k in self.keys():
            if k.endswith(key) or key.endswith(k):
                raise ValueError(
                    "key %s conflicts with existing key %s" % (key, k)
                )
        dict.__setitem__(self, key, value)


_savers = _SaversDict()


[docs]def run_callback(callback, cube, field, filename): """ Runs the callback mechanism given the appropriate arguments. Args: * callback: A function to add metadata from the originating field and/or URI which obeys the following rules: 1. Function signature must be: ``(cube, field, filename)``. 2. Modifies the given cube inplace, unless a new cube is returned by the function. 3. If the cube is to be rejected the callback must raise an :class:`iris.exceptions.IgnoreCubeException`. .. note:: It is possible that this function returns None for certain callbacks, the caller of this function should handle this case. """ from iris.cube import Cube if callback is None: return cube # Call the callback function on the cube, generally the function will # operate on the cube in place, but it is also possible that the function # will return a completely new cube instance. try: result = callback(cube, field, filename) except iris.exceptions.IgnoreCubeException: result = None else: if result is None: result = cube elif not isinstance(result, Cube): raise TypeError( "Callback function returned an " "unhandled data type." ) return result
[docs]def decode_uri(uri, default="file"): r""" Decodes a single URI into scheme and scheme-specific parts. In addition to well-formed URIs, it also supports bare file paths. Both Windows and UNIX style paths are accepted. .. testsetup:: from iris.io import * Examples: >>> from iris.io import decode_uri >>> print(decode_uri('http://www.thing.com:8080/resource?id=a:b')) ('http', '//www.thing.com:8080/resource?id=a:b') >>> print(decode_uri('file:///data/local/dataZoo/...')) ('file', '///data/local/dataZoo/...') >>> print(decode_uri('/data/local/dataZoo/...')) ('file', '/data/local/dataZoo/...') >>> print(decode_uri('file:///C:\data\local\dataZoo\...')) ('file', '///C:\\data\\local\\dataZoo\\...') >>> print(decode_uri('C:\data\local\dataZoo\...')) ('file', 'C:\\data\\local\\dataZoo\\...') >>> print(decode_uri('dataZoo/...')) ('file', 'dataZoo/...') """ # make sure scheme has at least 2 letters to avoid windows drives # put - last in the brackets so it refers to the character, not a range # reference on valid schemes: http://tools.ietf.org/html/std66#section-3.1 match = re.match(r"^([a-zA-Z][a-zA-Z0-9+.-]+):(.+)", uri) if match: scheme = match.group(1) part = match.group(2) else: # Catch bare UNIX and Windows paths scheme = default part = uri return scheme, part
[docs]def expand_filespecs(file_specs): """ Find all matching file paths from a list of file-specs. Args: * file_specs (iterable of string): File paths which may contain '~' elements or wildcards. Returns: A well-ordered list of matching absolute file paths. If any of the file-specs match no existing files, an exception is raised. """ # Remove any hostname component - currently unused filenames = [ os.path.abspath( os.path.expanduser(fn[2:] if fn.startswith("//") else fn) ) for fn in file_specs ] # Try to expand all filenames as globs glob_expanded = OrderedDict( [[fn, sorted(glob.glob(fn))] for fn in filenames] ) # If any of the specs expanded to an empty list then raise an error all_expanded = glob_expanded.values() if not all(all_expanded): msg = "One or more of the files specified did not exist:" for pattern, expanded in glob_expanded.items(): if expanded: msg += '\n - "{}" matched {} file(s)'.format( pattern, len(expanded) ) else: msg += '\n * "{}" didn\'t match any files'.format(pattern) raise IOError(msg) return [fname for fnames in all_expanded for fname in fnames]
[docs]def load_files(filenames, callback, constraints=None): """ Takes a list of filenames which may also be globs, and optionally a constraint set and a callback function, and returns a generator of Cubes from the given files. .. note:: Typically, this function should not be called directly; instead, the intended interface for loading is :func:`iris.load`. """ from iris.fileformats import FORMAT_AGENT all_file_paths = expand_filespecs(filenames) # Create default dict mapping iris format handler to its associated filenames handler_map = collections.defaultdict(list) for fn in all_file_paths: with open(fn, "rb") as fh: handling_format_spec = FORMAT_AGENT.get_spec( os.path.basename(fn), fh ) handler_map[handling_format_spec].append(fn) # Call each iris format handler with the approriate filenames for handling_format_spec in sorted(handler_map): fnames = handler_map[handling_format_spec] if handling_format_spec.constraint_aware_handler: for cube in handling_format_spec.handler( fnames, callback, constraints ): yield cube else: for cube in handling_format_spec.handler(fnames, callback): yield cube
[docs]def load_http(urls, callback): """ Takes a list of urls and a callback function, and returns a generator of Cubes from the given URLs. .. note:: Typically, this function should not be called directly; instead, the intended interface for loading is :func:`iris.load`. """ # Create default dict mapping iris format handler to its associated filenames handler_map = collections.defaultdict(list) for url in urls: handling_format_spec = iris.fileformats.FORMAT_AGENT.get_spec( url, None ) handler_map[handling_format_spec].append(url) # Call each iris format handler with the appropriate filenames for handling_format_spec in sorted(handler_map): fnames = handler_map[handling_format_spec] for cube in handling_format_spec.handler(fnames, callback): yield cube
def _dot_save(cube, target): # A simple wrapper for `iris.fileformats.dot.save` which allows the # saver to be registered without triggering the import of # `iris.fileformats.dot`. from iris.fileformats.dot import save return save(cube, target) def _dot_save_png(cube, target, **kwargs): # A simple wrapper for `iris.fileformats.dot.save_png` which allows the # saver to be registered without triggering the import of # `iris.fileformats.dot`. from iris.fileformats.dot import save_png return save_png(cube, target, **kwargs) def _grib_save(cube, target, append=False, **kwargs): # A simple wrapper for the grib save routine, which allows the saver to be # registered without having the grib implementation installed. try: from iris_grib import save_grib2 except ImportError: raise RuntimeError( "Unable to save GRIB file - " '"iris_grib" package is not installed.' ) save_grib2(cube, target, append, **kwargs) def _check_init_savers(): from iris.fileformats import netcdf, pp if "pp" not in _savers: _savers.update( { "pp": pp.save, "nc": netcdf.save, "dot": _dot_save, "dotpng": _dot_save_png, "grib2": _grib_save, } )
[docs]def add_saver(file_extension, new_saver): """ Add a custom saver to the Iris session. Args: * file_extension: A string such as "pp" or "my_format". * new_saver: A function of the form ``my_saver(cube, target)``. See also :func:`iris.io.save` """ # Make sure it's a func with 2+ args if ( not hasattr(new_saver, "__call__") or new_saver.__code__.co_argcount < 2 ): raise ValueError("Saver routines must be callable with 2+ arguments.") # Try to add this saver. Invalid keys will be rejected. _savers[file_extension] = new_saver
[docs]def find_saver(filespec): """ Find the saver function appropriate to the given filename or extension. Args: * filespec - A string such as "my_file.pp" or "PP". Returns: A save function or None. Save functions can be passed to :func:`iris.io.save`. """ _check_init_savers() matches = [ ext for ext in _savers if filespec.lower().endswith("." + ext) or filespec.lower() == ext ] # Multiple matches could occur if one of the savers included a '.': # e.g. _savers = {'.dot.png': dot_png_saver, '.png': png_saver} if len(matches) > 1: fmt = "Multiple savers found for %r: %s" matches = ", ".join(map(repr, matches)) raise ValueError(fmt % (filespec, matches)) return _savers[matches[0]] if matches else None
[docs]def save(source, target, saver=None, **kwargs): """ Save one or more Cubes to file (or other writeable). Iris currently supports three file formats for saving, which it can recognise by filename extension: * netCDF - the Unidata network Common Data Format: * see :func:`iris.fileformats.netcdf.save` * GRIB2 - the WMO GRIdded Binary data format: * see :func:`iris_grib.save_grib2`. * PP - the Met Office UM Post Processing Format: * see :func:`iris.fileformats.pp.save` A custom saver can be provided to the function to write to a different file format. Args: * source: :class:`iris.cube.Cube`, :class:`iris.cube.CubeList` or sequence of cubes. * target: A filename (or writeable, depending on file format). When given a filename or file, Iris can determine the file format. Kwargs: * saver: Optional. Specifies the file format to save. If omitted, Iris will attempt to determine the format. If a string, this is the recognised filename extension (where the actual filename may not have it). Otherwise the value is a saver function, of the form: ``my_saver(cube, target)`` plus any custom keywords. It is assumed that a saver will accept an ``append`` keyword if it's file format can handle multiple cubes. See also :func:`iris.io.add_saver`. All other keywords are passed through to the saver function; see the relevant saver documentation for more information on keyword arguments. Examples:: # Save a cube to PP iris.save(my_cube, "myfile.pp") # Save a cube list to a PP file, appending to the contents of the file # if it already exists iris.save(my_cube_list, "myfile.pp", append=True) # Save a cube to netCDF, defaults to NETCDF4 file format iris.save(my_cube, "myfile.nc") # Save a cube list to netCDF, using the NETCDF3_CLASSIC storage option iris.save(my_cube_list, "myfile.nc", netcdf_format="NETCDF3_CLASSIC") .. warning:: Saving a cube whose data has been loaded lazily (if `cube.has_lazy_data()` returns `True`) to the same file it expects to load data from will cause both the data in-memory and the data on disk to be lost. .. code-block:: python cube = iris.load_cube("somefile.nc") # The next line causes data loss in 'somefile.nc' and the cube. iris.save(cube, "somefile.nc") In general, overwriting a file which is the source for any lazily loaded data can result in corruption. Users should proceed with caution when attempting to overwrite an existing file. """ from iris.cube import Cube, CubeList # Determine format from filename if isinstance(target, str) and saver is None: saver = find_saver(target) elif hasattr(target, "name") and saver is None: saver = find_saver(target.name) elif isinstance(saver, str): saver = find_saver(saver) if saver is None: raise ValueError("Cannot save; no saver") # Single cube? if isinstance(source, Cube): saver(source, target, **kwargs) # CubeList or sequence of cubes? elif isinstance(source, CubeList) or ( isinstance(source, (list, tuple)) and all([isinstance(i, Cube) for i in source]) ): # Only allow cubelist saving for those fileformats that are capable. if "iris.fileformats.netcdf" not in saver.__module__: # Make sure the saver accepts an append keyword if "append" not in saver.__code__.co_varnames: raise ValueError( "Cannot append cubes using saver function " "'%s' in '%s'" % (saver.__code__.co_name, saver.__code__.co_filename) ) # Force append=True for the tail cubes. Don't modify the incoming # kwargs. kwargs = kwargs.copy() for i, cube in enumerate(source): if i != 0: kwargs["append"] = True saver(cube, target, **kwargs) # Netcdf saver. else: saver(source, target, **kwargs) else: raise ValueError("Cannot save; non Cube found in source")