You are viewing the latest unreleased documentation v3.1.dev0. You may prefer a stable version.

Source code for iris.pandas

# Copyright Iris contributors
#
# This file is part of Iris and is released under the LGPL license.
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""
Provide conversion to and from Pandas data structures.

See also: http://pandas.pydata.org/

"""

import datetime

import cf_units
from cf_units import Unit
import cftime
import numpy as np
import numpy.ma as ma
import pandas

try:
    from pandas.core.indexes.datetimes import DatetimeIndex  # pandas >=0.20
except ImportError:
    from pandas.tseries.index import DatetimeIndex  # pandas <0.20

import iris
from iris.coords import AuxCoord, DimCoord
from iris.cube import Cube


def _add_iris_coord(cube, name, points, dim, calendar=None):
    """
    Add a Coord to a Cube from a Pandas index or columns array.

    If no calendar is specified for a time series, Gregorian is assumed.

    """
    units = Unit("unknown")
    if calendar is None:
        calendar = cf_units.CALENDAR_GREGORIAN

    # Convert pandas datetime objects to python datetime obejcts.
    if isinstance(points, DatetimeIndex):
        points = np.array([i.to_pydatetime() for i in points])

    # Convert datetime objects to Iris' current datetime representation.
    if points.dtype == object:
        dt_types = (datetime.datetime, cftime.datetime)
        if all([isinstance(i, dt_types) for i in points]):
            units = Unit("hours since epoch", calendar=calendar)
            points = units.date2num(points)

    points = np.array(points)
    if np.issubdtype(points.dtype, np.number) and iris.util.monotonic(
        points, strict=True
    ):
        coord = DimCoord(points, units=units)
        coord.rename(name)
        cube.add_dim_coord(coord, dim)
    else:
        coord = AuxCoord(points, units=units)
        coord.rename(name)
        cube.add_aux_coord(coord, dim)


[docs]def as_cube(pandas_array, copy=True, calendars=None): """ Convert a Pandas array into an Iris cube. Args: * pandas_array - A Pandas Series or DataFrame. Kwargs: * copy - Whether to make a copy of the data. Defaults to True. * calendars - A dict mapping a dimension to a calendar. Required to convert datetime indices/columns. Example usage:: as_cube(series, calendars={0: cf_units.CALENDAR_360_DAY}) as_cube(data_frame, calendars={1: cf_units.CALENDAR_GREGORIAN}) .. note:: This function will copy your data by default. """ calendars = calendars or {} if pandas_array.ndim not in [1, 2]: raise ValueError( "Only 1D or 2D Pandas arrays " "can currently be conveted to Iris cubes." ) # Make the copy work consistently across NumPy 1.6 and 1.7. # (When 1.7 takes a copy it preserves the C/Fortran ordering, but # 1.6 doesn't. Since we don't care about preserving the order we can # just force it back to C-order.) order = "C" if copy else "A" data = np.array(pandas_array, copy=copy, order=order) cube = Cube(np.ma.masked_invalid(data, copy=False)) _add_iris_coord( cube, "index", pandas_array.index, 0, calendars.get(0, None) ) if pandas_array.ndim == 2: _add_iris_coord( cube, "columns", pandas_array.columns.values, 1, calendars.get(1, None), ) return cube
def _as_pandas_coord(coord): """Convert an Iris Coord into a Pandas index or columns array.""" index = coord.points if coord.units.is_time_reference(): index = coord.units.num2date(index) return index def _assert_shared(np_obj, pandas_obj): """Ensure the pandas object shares memory.""" values = pandas_obj.values def _get_base(array): # Chase the stack of NumPy `base` references back to the original array while array.base is not None: array = array.base return array base = _get_base(values) np_base = _get_base(np_obj) if base is not np_base: msg = "Pandas {} does not share memory".format( type(pandas_obj).__name__ ) raise AssertionError(msg)
[docs]def as_series(cube, copy=True): """ Convert a 1D cube to a Pandas Series. Args: * cube - The cube to convert to a Pandas Series. Kwargs: * copy - Whether to make a copy of the data. Defaults to True. Must be True for masked data. .. note:: This function will copy your data by default. If you have a large array that cannot be copied, make sure it is not masked and use copy=False. """ data = cube.data if ma.isMaskedArray(data): if not copy: raise ValueError("Masked arrays must always be copied.") data = data.astype("f").filled(np.nan) elif copy: data = data.copy() index = None if cube.dim_coords: index = _as_pandas_coord(cube.dim_coords[0]) series = pandas.Series(data, index) if not copy: _assert_shared(data, series) return series
[docs]def as_data_frame(cube, copy=True): """ Convert a 2D cube to a Pandas DataFrame. Args: * cube - The cube to convert to a Pandas DataFrame. Kwargs: * copy - Whether to make a copy of the data. Defaults to True. Must be True for masked data and some data types (see notes below). .. note:: This function will copy your data by default. If you have a large array that cannot be copied, make sure it is not masked and use copy=False. .. note:: Pandas will sometimes make a copy of the array, for example when creating from an int32 array. Iris will detect this and raise an exception if copy=False. """ data = cube.data if ma.isMaskedArray(data): if not copy: raise ValueError("Masked arrays must always be copied.") data = data.astype("f").filled(np.nan) elif copy: data = data.copy() index = columns = None if cube.coords(dimensions=[0]): index = _as_pandas_coord(cube.coord(dimensions=[0])) if cube.coords(dimensions=[1]): columns = _as_pandas_coord(cube.coord(dimensions=[1])) data_frame = pandas.DataFrame(data, index, columns) if not copy: _assert_shared(data, data_frame) return data_frame