# Copyright Iris contributors
#
# This file is part of Iris and is released under the LGPL license.
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""
Provide conversion to and from Pandas data structures.
See also: http://pandas.pydata.org/
"""
import datetime
import cf_units
from cf_units import Unit
import cftime
import numpy as np
import numpy.ma as ma
import pandas
try:
from pandas.core.indexes.datetimes import DatetimeIndex # pandas >=0.20
except ImportError:
from pandas.tseries.index import DatetimeIndex # pandas <0.20
import iris
from iris.coords import AuxCoord, DimCoord
from iris.cube import Cube
def _add_iris_coord(cube, name, points, dim, calendar=None):
"""
Add a Coord to a Cube from a Pandas index or columns array.
If no calendar is specified for a time series, Gregorian is assumed.
"""
units = Unit("unknown")
if calendar is None:
calendar = cf_units.CALENDAR_GREGORIAN
# Convert pandas datetime objects to python datetime obejcts.
if isinstance(points, DatetimeIndex):
points = np.array([i.to_pydatetime() for i in points])
# Convert datetime objects to Iris' current datetime representation.
if points.dtype == object:
dt_types = (datetime.datetime, cftime.datetime)
if all([isinstance(i, dt_types) for i in points]):
units = Unit("hours since epoch", calendar=calendar)
points = units.date2num(points)
points = np.array(points)
if np.issubdtype(points.dtype, np.number) and iris.util.monotonic(
points, strict=True
):
coord = DimCoord(points, units=units)
coord.rename(name)
cube.add_dim_coord(coord, dim)
else:
coord = AuxCoord(points, units=units)
coord.rename(name)
cube.add_aux_coord(coord, dim)
[docs]def as_cube(pandas_array, copy=True, calendars=None):
"""
Convert a Pandas array into an Iris cube.
Args:
* pandas_array - A Pandas Series or DataFrame.
Kwargs:
* copy - Whether to make a copy of the data.
Defaults to True.
* calendars - A dict mapping a dimension to a calendar.
Required to convert datetime indices/columns.
Example usage::
as_cube(series, calendars={0: cf_units.CALENDAR_360_DAY})
as_cube(data_frame, calendars={1: cf_units.CALENDAR_GREGORIAN})
.. note:: This function will copy your data by default.
"""
calendars = calendars or {}
if pandas_array.ndim not in [1, 2]:
raise ValueError(
"Only 1D or 2D Pandas arrays "
"can currently be conveted to Iris cubes."
)
# Make the copy work consistently across NumPy 1.6 and 1.7.
# (When 1.7 takes a copy it preserves the C/Fortran ordering, but
# 1.6 doesn't. Since we don't care about preserving the order we can
# just force it back to C-order.)
order = "C" if copy else "A"
data = np.array(pandas_array, copy=copy, order=order)
cube = Cube(np.ma.masked_invalid(data, copy=False))
_add_iris_coord(
cube, "index", pandas_array.index, 0, calendars.get(0, None)
)
if pandas_array.ndim == 2:
_add_iris_coord(
cube,
"columns",
pandas_array.columns.values,
1,
calendars.get(1, None),
)
return cube
def _as_pandas_coord(coord):
"""Convert an Iris Coord into a Pandas index or columns array."""
index = coord.points
if coord.units.is_time_reference():
index = coord.units.num2date(index)
return index
def _assert_shared(np_obj, pandas_obj):
"""Ensure the pandas object shares memory."""
values = pandas_obj.values
def _get_base(array):
# Chase the stack of NumPy `base` references back to the original array
while array.base is not None:
array = array.base
return array
base = _get_base(values)
np_base = _get_base(np_obj)
if base is not np_base:
msg = "Pandas {} does not share memory".format(
type(pandas_obj).__name__
)
raise AssertionError(msg)
[docs]def as_series(cube, copy=True):
"""
Convert a 1D cube to a Pandas Series.
Args:
* cube - The cube to convert to a Pandas Series.
Kwargs:
* copy - Whether to make a copy of the data.
Defaults to True. Must be True for masked data.
.. note::
This function will copy your data by default.
If you have a large array that cannot be copied,
make sure it is not masked and use copy=False.
"""
data = cube.data
if ma.isMaskedArray(data):
if not copy:
raise ValueError("Masked arrays must always be copied.")
data = data.astype("f").filled(np.nan)
elif copy:
data = data.copy()
index = None
if cube.dim_coords:
index = _as_pandas_coord(cube.dim_coords[0])
series = pandas.Series(data, index)
if not copy:
_assert_shared(data, series)
return series
[docs]def as_data_frame(cube, copy=True):
"""
Convert a 2D cube to a Pandas DataFrame.
Args:
* cube - The cube to convert to a Pandas DataFrame.
Kwargs:
* copy - Whether to make a copy of the data.
Defaults to True. Must be True for masked data
and some data types (see notes below).
.. note::
This function will copy your data by default.
If you have a large array that cannot be copied,
make sure it is not masked and use copy=False.
.. note::
Pandas will sometimes make a copy of the array,
for example when creating from an int32 array.
Iris will detect this and raise an exception if copy=False.
"""
data = cube.data
if ma.isMaskedArray(data):
if not copy:
raise ValueError("Masked arrays must always be copied.")
data = data.astype("f").filled(np.nan)
elif copy:
data = data.copy()
index = columns = None
if cube.coords(dimensions=[0]):
index = _as_pandas_coord(cube.coord(dimensions=[0]))
if cube.coords(dimensions=[1]):
columns = _as_pandas_coord(cube.coord(dimensions=[1]))
data_frame = pandas.DataFrame(data, index, columns)
if not copy:
_assert_shared(data, data_frame)
return data_frame