Source code for dantro.plot.funcs.generic

"""Generic, DAG-based plot functions for the
:py:class:`~dantro.plot.creators.pyplot.PyPlotCreator` and derived plot
creators.
"""

import copy
import logging
from functools import partial as _partial
from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    Dict,
    List,
    Literal,
    Tuple,
    Union,
)

if TYPE_CHECKING:
    import xarray

import matplotlib.colors as mcolors
import paramspace as psp

from ..._import_tools import LazyLoader
from ...exceptions import PlotConfigError, PlottingError, UpdatePlotConfig
from ...tools import recursive_update
from ..plot_helper import PlotHelper
from ..utils import figure_leak_prevention, is_plot_func
from ..utils.color_mngr import ColorManager, parse_cmap_and_norm_kwargs
from ._utils import determine_ideal_col_wrap
from ._utils import plot_errorbar as _plot_errorbar

# Local constants and lazy module imports
log = logging.getLogger(__name__)

xr = LazyLoader("xarray")

ENSURE_UNIQUE_DIMS: Dict[Tuple[str, bool], Union[bool, str]] = {
    ("raise_auto", True): False,
    ("raise_auto", False): True,
    ("warn_auto", True): False,
    ("warn_auto", False): "warn",
}
"""For ``auto`` mode, maps (``ensure_unique_dims``, ``data_vars is not None``)
tuples to the appropriate evaluated parameter."""

# .............................................................................

# fmt: off

_XR_PLOT_KINDS = {  # --- start literalinclude
    "scatter":      ("free", "hue", "col", "row"),
    "line":         ("x", "hue", "col", "row"),
    "step":         ("x", "col", "row"),
    "contourf":     ("x", "y", "col", "row"),
    "contour":      ("x", "y", "col", "row"),
    "imshow":       ("x", "y", "col", "row"),
    "pcolormesh":   ("x", "y", "col", "row"),
    "hist":         ("free",),  # can also set ("free", ...) to absorb all
}   # --- end literalinclude
"""The available plot kinds for the *xarray* plotting interface, together with
the supported layout specifier keywords."""

_FACET_GRID_KINDS = {
    # based on xarray plotting functions
    "scatter":      ("free", "hue", "col", "row", ("files", ...), "frames"),
    "line":         ("x", "hue", "col", "row", ("files", ...), "frames"),
    "step":         ("x", "col", "row", ("files", ...), "frames"),
    "contourf":     ("x", "y", "col", "row", ("files", ...), "frames"),
    "contour":      ("x", "y", "col", "row", ("files", ...), "frames"),
    "imshow":       ("x", "y", "col", "row", ("files", ...), "frames"),
    "pcolormesh":   ("x", "y", "col", "row", ("files", ...), "frames"),
    "hist":         ("free", ("files", ...), "frames",),

    # based on dantro plotting functions
    # NOTE These are dynamically added but generally look similar to the above:
    # "errorbars":    ("x", "hue", "col", "row", ("files", ...), "frames"),
    # "scatter3d":    ("hue", "col", "row", ("files", ...), "frames"),
}
"""The available plot kinds for the *dantro* plotting interface, together with
the supported layout specifiers, which include the ``frames`` option."""

_AUTO_PLOT_KINDS = {  # --- start literalinclude
    1:               "line",
    2:               "line",
    3:               "line",
    4:               "pcolormesh",
    5:               "pcolormesh",
    "with_hue":      "line",         # used when `hue` is explicitly set
    "with_x_and_y":  "pcolormesh",   # used when _both_ `x` and `y` were set
    "dataset":       "scatter",      # used for xr.Dataset-like data
    "fallback":      "hist",         # used when none of the above matches
}   # --- end literalinclude
"""A mapping from data dimensionality to preferred plot kind, used in automatic
plot kind selection. This assumes the specifiers of ``_FACET_GRID_KINDS``"""

# fmt: on

_FACET_GRID_FUNCS: Dict[str, Callable] = {}
"""A dict mapping additional facet grid kinds to callables.
This is populated by the ``make_facet_grid_plot`` decorator."""


# -----------------------------------------------------------------------------
# -- Helper functions ---------------------------------------------------------
# -----------------------------------------------------------------------------



[docs]
def _fmt_spec(spec: Union[str, Tuple[str, int]]) -> str:
    """Formats a single encoding specification, taking care of the various
    kinds of possible encodings, e.g. potential Ellipsis sizes."""
    if isinstance(spec, str):
        return spec
    spec, nd = spec
    if nd == 1:
        return spec
    elif nd is Ellipsis:
        return f"{spec}…"
    return f"{spec} ({nd}×)"




[docs]
def _fmt_specs(specs: list) -> str:
    """Formats an encoding specifications list, typically a list of strings
    or tuples."""
    return ", ".join(_fmt_spec(spec) for spec in specs)




[docs]
def _fmt_encoding(enc: dict, fstr="{s}: {d}") -> str:
    """Formats an encoding dictionary into a single-line, comma-separated
    string, taking care of multi-dimensional encoding specifiers."""

    def fmt_dim(d: Union[str, list]) -> str:
        if isinstance(d, (tuple, list)):
            return f"[{', '.join(str(_d) for _d in d)}]"
        return d

    return ",  ".join(fstr.format(s=s, d=fmt_dim(d)) for s, d in enc.items())



# .............................................................................



[docs]
def determine_plot_kind(
    d: Union["xarray.DataArray", "xarray.Dataset"],
    *,
    kind: Union[str, dict],
    default_kind_map: dict = _AUTO_PLOT_KINDS,
    **plot_kwargs,
) -> str:
    """Determines the plot kind to use for the given data. If ``kind: auto``,
    this will determine the plot kind depending on the dimensionality of the
    data and other (potentially fixed) encoding specifiers. Otherwise, it will
    simply return ``kind``.

    **What if layout encodings were partly fixed?** There are two special cases
    where this is of relevance, and both these cases are covered explicitly:

        - If *both* ``x`` and ``y`` are given, ``line``- or ``hist``-like plot
          kinds are no longer possible; hence, a ``pcolormesh``-like kind has
          to be chosen.
        - In turn, if ``hue`` was given, ``pcolormesh``-like plot kinds are no
          longer applicable, thus a ``line``-like argument needs to be chosen.

    These two special cases are specified via the extra keys ``with_x_and_y``
    and ``with_hue`` in the kind mapping.

    A kind mapping may look like this:

    .. literalinclude:: ../../dantro/plot/funcs/generic.py
        :language: python
        :start-after: _AUTO_PLOT_KINDS = {  # --- start literalinclude
        :end-before:  }   # --- end literalinclude
        :dedent: 4

    Args:
        d (Union[xarray.DataArray, xarray.Dataset]): The data for which to
            determine the plot kind.
        kind (Union[str, dict]): The given kind argument. If it is ``auto``,
            the ``kind_map`` is used to determine the ``kind`` from the
            dimensionality of ``d``.
            If it is a dict, ``auto`` is implied and the dict is assumed to be
            a (ndim -> kind) mapping, *updating* the ``default_kind_map``.
        default_kind_map (dict, optional): The default mapping to use for
            ``kind: auto``, with keys being ``d``'s dimensionality and values
            being the plot kind to use.
            There are two special keys, ``fallback`` and ``dataset``. The
            value belonging to ``dataset`` is used for data that is dataset-
            like, i.e. does not have an ``ndim`` attribute. The value of
            ``fallback`` specifies the plot kind for data dimensionalities
            that match no other key.
        **plot_kwargs: All remaining plot function arguments, including any
            layout encoding arguments that aim to *fix* a dimension; these are
            used to determine the ``with_hue`` and ``with_x_and_y`` special
            cases. Everything else is ignored.

    Returns:
        str: The selected plot kind. This is equal to the *given* ``kind`` if
            it was None or a string unequal to ``auto``.
    """
    # Was the plot kind already specified?
    if kind is None or (isinstance(kind, str) and kind != "auto"):
        # Yes. Just return that value.
        return kind

    # else: Need to determine it by inspecting the data and the kind mapping.
    # First, determine the mapping.
    kind_map = copy.deepcopy(default_kind_map)
    if isinstance(kind, dict):
        kind_map.update(kind)

    # Handle special cases ...
    # ... for datasets: always fall back to the specified default kind
    if not hasattr(d, "ndim"):
        return kind_map["dataset"]

    # ... for given x *and* y layout specifiers
    elif plot_kwargs.get("x") and plot_kwargs.get("y"):
        return kind_map["with_x_and_y"]

    # ... for given hue layout specifier
    elif plot_kwargs.get("hue"):
        return kind_map["with_hue"]

    # Select the kind from the dimensionality. If this fails, use the default
    # value instead.
    try:
        kind = kind_map[d.ndim]
    except KeyError:
        kind = kind_map["fallback"]

    log.remark("Using plot kind '%s' for %d-dimensional data.", kind, d.ndim)
    return kind




[docs]
def parse_encoding_spec(s: Union[str, Tuple[str, int]]) -> Tuple[str, int]:
    """Brings an encoding specification into a uniform 2-tuple shape, where
    the first is the name of the encoding and the second is how many dimensions
    it may absorb. The second value can also be ``Ellipsis`` to denote that
    all remaining dimensions are to be absorbed."""
    if isinstance(s, str):
        return (s, 1)
    name, nd = s
    if nd in (Ellipsis, ..., "...", "…"):
        nd = ...
    return (name, nd)




[docs]
def map_dims_to_encoding(
    all_specs: List[Union[str, Tuple[str, int]]],
    all_dims: List[str],
    *,
    encoding: Dict[str, Union[str, Tuple[str, ...]]] = None,
    drop_missing_dims: bool = False,
    data_vars: List[str] = None,
    ignore_encodings: List[str] = None,
    ensure_unique_dims: Union[bool, Literal["warn"]] = False,
) -> Tuple[
    Dict[str, Union[str, Tuple[str, ...]]], List[Tuple[str, int]], List[str]
]:
    """Maps encoding specifiers to one or multiple dimension names.

    Encoding specifiers are given as a list of encoding names that are filled,
    one by one, with a free dimension. The encoding specifier can be given as a
    string, assuming that it can absorb a single dimension; alternatively, if a
    2-tuple ``(name, num_dims)`` is given, the second entry denotes the number
    of dimensions that specifier can absorb.

    Specifiers are assigned in the given order. They may also appear multiple
    times, in which case they are handled as multi-dimensional encodings.

    .. code-block:: python

        specs = [x, hue, col, row]           # -> 1 absorbing dim each
        specs = [(x, 1), (hue, 1), (col, 1), (row, 1)]   # same as above
        specs = [x, col, row, (files, 5)]    # -> files absorbs 5 dims
        specs = [x, files, hue, (files, 4)]  # -> files absorbs 5 dims, but hue
                                             #    will be assigned in between
        specs = [x, (files, ...), hue]       # -> files absorbs all free dims

    Args:
        all_specs (List[Union[str, Tuple[str, int]]]): All available encoding
            specifiers as a list of string or list of ``(name, num_dims)``
            tuples. ``num_dims`` can also be ``...`` (an ``Ellipsis``) to
            denote that this dimension will absorb remaining free dimensions.
        all_dims (List[str]): List of all available dimension names; these will
            be the values of the returned mapping.
        encoding (Dict[str, Union[str, Tuple[str, ...]]], optional): If given,
            denotes which encodings and dimensions are already in use.
        drop_missing_dims (bool, optional): If True, will drop those entries
            in ``encoding`` that use a dimension name that is not part of
            ``all_dims``.
        ignore_encodings (List[str], optional): Names of encoding specifiers
            that should be ignored, i.e. which remain in ``all_specs`` but are
            not automatically assigned dimensions; note that they remain in
            ``encoding`` and retain the value they have received manually.
        ensure_unique_dims (Union[bool, Literal["warn"]], optional): If True,
            will make sure that the user-specified ``encoding`` does not cause
            dimensions to be assigned more than once. This should be set if the
            plot function does not support duplicate encodings, e.g. because it
            involves a sequential dimensionality reduction. It should *not* be
            set for plot functions that allow parallel encodings, e.g. scatter
            plots with ``hue`` and ``size`` encodings shared within a subplot.
            If set to ``warn``, will warn instead of raise.

    Returns:
        Tuple[Dict[str, Union[str, Tuple[str, ...]]], List[Tuple[str, int]], List[str]]:
            A 3-tuple ``(mapping, free_specs, free_dims)`` containing the
            desired mapping dictionary and information about possibly free
            encoding specifiers or dimensions.
    """

    def unpack_nested(l: list) -> list:
        """[1, 2, [3, 4], 5] --> [1, 2, 3, 4, 5]"""
        return [
            e for e in l for e in (e if isinstance(e, (list, tuple)) else [e])
        ]

    def get_spec_size(name: str, all_specs: list) -> Union[int, "Ellipsis"]:
        size = 0
        for spec, nd in all_specs:
            if name == spec:
                if nd is Ellipsis:
                    return Ellipsis
                size += nd
        return size

    # Sometimes includes Nones, which we can't do anything with here.
    all_dims = [d for d in all_dims if d is not None]

    if len(set(all_dims)) != len(all_dims):
        raise ValueError(
            "Dimension names to map to encodings need to be unique, but "
            f"duplicate dimension names were given:  {', '.join(all_dims)}"
        )

    # Bring dims and specs into a normalized form
    all_dims: List[str] = list(all_dims)
    log.remark("   data dims:     %s", ", ".join(all_dims))

    data_vars: List[str] = list(data_vars) if data_vars else []
    if data_vars:
        log.remark("   data vars:     %s", ", ".join(data_vars))

    all_specs: List[Tuple[str, int]] = [
        parse_encoding_spec(s) for s in all_specs
    ]

    all_spec_names: List[str] = [spec for spec, _ in all_specs]
    all_specs_sizes: Dict[str, int] = {
        spec_name: get_spec_size(spec_name, all_specs)
        for spec_name in set(all_spec_names)
    }
    multi_dim_specs = set(
        spec for spec, size in all_specs_sizes.items() if size not in (0, 1)
    )

    # Set up the target encoding dict, starting from the given keys
    encoding = encoding if encoding else {}
    encoding = {
        spec: dim if not dim or isinstance(dim, str) else tuple(dim)
        for spec, dim in encoding.items()
    }

    # For easier comprehension when writing this, sort the dict
    _seen = set()  # to only keep the first occurrence of a spec (if multiple)
    encoding = {
        spec: encoding[spec]
        for spec in all_spec_names
        if spec in encoding and spec not in _seen and not _seen.add(spec)
    }
    del _seen

    log.remark("   given:         %s", _fmt_encoding(encoding))

    # May want to modify the given encoding, e.g. if dimensions have been
    # specified that are missing in the data, they should be dropped
    used_dims: List[str] = unpack_nested(encoding.values())
    missing_dims = [
        dim
        for dim in used_dims
        if dim and dim not in all_dims and dim not in data_vars
    ]

    if missing_dims:
        log.caution("   missing dims:  %s", ", ".join(missing_dims))
        specs_with_missing_dims = [
            spec
            for spec, dim in encoding.items()
            if (isinstance(dim, str) and dim in missing_dims)
            or any(_dim in missing_dims for _dim in dim)
        ]
        if drop_missing_dims:
            # Filter out missing dimensions, either by dropping the whole entry
            # for a spec or dropping the dimension from a multi-dim spec.
            # This may result in empty lists, but that's fine.
            log.remark(
                "     → dropping from affected encodings (%s)",
                ", ".join(specs_with_missing_dims),
            )
            encoding = {
                spec: (
                    dim
                    if isinstance(dim, str)
                    else [_dim for _dim in dim if _dim not in missing_dims]
                )
                for spec, dim in encoding.items()
                if dim not in missing_dims or not isinstance(dim, str)
            }
            used_dims = unpack_nested(encoding.values())
        else:
            log.warning(
                "     → may cause errors downstream related to the "
                "affected encodings:  %s",
                ", ".join(specs_with_missing_dims),
            )

    # Check how many of these may have already been set in the encoding,
    # i.e. find out the free dimensions and free encoding specifiers.
    # Start with the used and free dimensions:
    used_dims: List[str] = [
        dim
        for dim in unpack_nested(encoding.values())
        if dim and dim not in data_vars
    ]
    used_data_vars: List[str] = [
        dim
        for dim in unpack_nested(encoding.values())
        if dim and dim in data_vars
    ]
    free_dims = [dim for dim in all_dims if dim not in used_dims]

    if ensure_unique_dims and len(set(used_dims)) != len(used_dims):
        if ensure_unique_dims == "warn":
            log.caution("   ∃ duplicates:  %s", _fmt_encoding(encoding))
            log.warning(
                "     → may cause errors downstream related to sequential "
                "dimensionality reduction of the specified dimensions.",
            )
        else:
            raise ValueError(
                "The given encoding contains duplicate dimension names! Make "
                "sure that each dimension only appears once.\n"
                f"  Encoding:         {_fmt_encoding(encoding)}\n"
                f"  Data dimensions:  {', '.join(all_dims)}\n"
                f"  Data variables:   {data_vars}"
            )

    # To determine the free specifiers, we first need to know which specifiers
    # have been previously set or should otherwise be regarded as "used";
    # either because they were already set or because they have been denoted as
    # to-be-ignored.
    # The used specs can be a dict because the order is not relevant.
    used_specs: Dict[str, int] = {
        spec: len(dim if dim else []) if not isinstance(dim, str) else 1
        for spec, dim in encoding.items()
    }

    if ignore_encodings:
        log.remark("   ignoring:      %s", ", ".join(ignore_encodings))
        for spec in ignore_encodings:
            used_specs[spec] = all_specs_sizes[spec]

    log.debug("   used specs:    %s", _fmt_specs(used_specs.items()))
    log.debug("   used data vars:  %s", ", ".join(used_data_vars))

    # Knowing the used specifiers, we can determine the free specifiers by
    # deducting the number of used dimensions from all specifiers. This is done
    # in the order in which they are to be filled.
    free_specs = []
    _used_specs = copy.copy(used_specs)  # to allow counting down
    for spec, nd in all_specs:
        if spec not in _used_specs:
            free_specs.append((spec, nd))

        elif _used_specs[spec] >= 1:
            free_specs.append(
                (spec, nd - 1 if nd is not Ellipsis else Ellipsis)
            )

            # Reduce and maybe remove the entry
            _used_specs[spec] -= 1
            if _used_specs[spec] <= 0:
                del _used_specs[spec]

    free_specs = [(spec, nd) for spec, nd in free_specs if nd != 0]
    del _used_specs

    if not free_dims:
        # No need to continue
        return encoding, free_specs, free_dims

    log.remark("   available:     %s", _fmt_specs(free_specs))
    log.remark("   free dims:     %s", ", ".join(free_dims))

    # Evaluate Ellipsis to fill unspecified encodings depending on the whole
    # sequence of available encodings.
    # For this, need to make sure that there is only one Ellipsis specified.
    # Then, count how many dimensions need to be absorbed by the Ellipsis
    # depending on how many dimensions are available in total and how man
    # other encodings come after the Ellipsis.
    # Also, the given encoding may already include an explicit setting of a
    # dimension to the ellipsis spec, we don't want to re-set that one!
    ellipses_specs = [spec for spec, nd in all_specs if nd is Ellipsis]
    if len(ellipses_specs) > 1:
        raise ValueError(
            "Only one encoding can be an Ellipsis, "
            f"got multiple:  {', '.join(ellipses_specs)}"
        )

    elif len(ellipses_specs) == 1:
        # How many dimensions are to be taken up by other free specifiers?
        n_other = sum(nd for _, nd in free_specs if nd is not Ellipsis)

        # How many of the free dimensions does the Ellipsis need to use up?
        n_free = max(0, len(free_dims) - n_other)
        free_specs = [
            (spec, nd if nd is not Ellipsis else n_free)
            for spec, nd in free_specs
        ]

    log.debug("   eff. free specs:  %s", _fmt_specs(free_specs))

    # Go over the dimensions, one by one, and map them to an encoding specifier
    while free_dims and free_specs:
        spec, nd = free_specs.pop(0)
        if nd < 1:
            continue

        dim = free_dims.pop(0)

        # Need to distinguish between multi-dim encodings and scalar ones
        if spec in multi_dim_specs:
            encoding[spec]: Tuple[str, ...] = tuple(encoding.get(spec, ())) + (
                dim,
            )
        else:
            encoding[spec] = dim

        # May need to put the specifier back with a reduced counter such that
        # it may be picked again in the next iteration.
        # NOTE No Ellipses are remaining here, they were made explicit above.
        if nd > 1:
            free_specs.insert(0, (spec, nd - 1))

    # Drop the zero-sized specs, they are all used up
    free_specs = [(spec, nd) for spec, nd in free_specs if nd != 0]

    # Return the mapping and the remaining free specs and free dims
    return encoding, free_specs, free_dims




[docs]
def determine_encoding(
    dims: Union[List[str], Dict[str, int]],
    *,
    kind: str,
    auto_encoding: Union[bool, dict],
    default_encodings: dict,
    plot_kwargs: dict,
    data_vars: List[str] = None,
    allow_y_for_x: List[str] = ("line",),
    drop_missing_dims: bool = False,
    ignore_encodings: List[str] = None,
    ensure_unique_dims: Union[bool, str] = False,
    return_encoding_info: bool = False,
) -> dict:
    """Determines the layout encoding for the given plot kind and the available
    data dimensions (as specified by the ``dims`` argument).

    If ``auto_encoding`` does not evaluate to true or ``kind is None``, this
    function does nothing and simply returns all given plotting arguments.

    Otherwise, it uses the chosen plot ``kind`` to associate layout specifiers
    with dimension names of ``d``.
    The available layout encoding specifiers (``x``, ``y``, ``col`` etc.) can
    be specified in two ways:

        - By default, ``default_encodings`` is used as a map from plot kind to
          a sequence of available layout encodings.
        - If ``auto_encoding`` is a dictionary, the default map will be
          *updated* with that dictionary.

    The association is done in the following way:

        1. Inspecting ``plot_kwargs``, all layout encoding specifiers are
           extracted, regardless of their value.
        2. The encodings mapping is determined (see above).
        3. The available dimension names are determined from ``dims``.
        4. Depending on ``kind`` and the already fixed specifiers, the *free*
           encoding specifiers and dimension names are extracted.
        5. These free specifiers are associated with free dimension names,
           in order of descending dimension size. Encoding specifiers that have
           previously been set will keep that value, even if it was ``None``.

    **Example:** Assume, the available specifiers are ``('x', 'y', 'col')`` and
    the data has dimensions ``dim0``, ``dim1`` and ``dim2``. Let's further say
    that ``y`` was already fixed to ``dim2``, leaving ``x`` and ``col`` as
    available encodings and ``dim0`` and ``dim1`` as free dimensions.
    With ``x`` being specified before ``col`` in the list of available
    encodings, ``x`` would be associated to the remaining dimension with the
    *larger* size and ``col`` to the remaining one.

    An encodings mapping may look like this:

    .. literalinclude:: ../../dantro/plot/funcs/generic.py
        :language: python
        :start-after: _XR_PLOT_KINDS = {  # --- start literalinclude
        :end-before:  }   # --- end literalinclude
        :dedent: 4

    Here, string-like specifiers denote encodings that can represent only a
    single data dimension. The ``(name, ndim)`` syntax can be used to let an
    encoding absorb ``ndim`` dimensions. Setting ``ndim`` to an Ellipsis
    (``...``, ``Ellipsis`` or the ``'...'`` string) specifies that encoding to
    take up *all* data dimensions that are not taken-up by other encodings.
    Encodings with ``ndim > 1`` are always multi-dimensional, regardless of
    how many dimensions will be associated with it.

    **Example:** Let's assume the available encoding is ``x, hue, files…`` and
    there are five free dimensions to assign. In this case, the largest will go
    to ``x``, the next-largest to ``hue`` and the remaining three to the
    multi-dimensional``files`` encoding.

    The ``drop_missing_dims`` option will unset a previously set encoding if
    that dimension does not exist in the data; a log message will inform about
    this case. Setting this can be useful to make a plot definition more
    flexible.

    The ``ignore_encodings`` option allows to not automatically assign certain
    encodings, e.g. if it is desired that an encoding is typically kept
    unassigned. Effectively, it is never regarded as an available encoding,
    regardless of its value. This can be useful to set if it is undesired to
    change the ``auto_encoding`` dict.

    When working with :py:class:`xarray.Dataset` objects, its data variables
    may play a role in the encoding as some specifiers (like ``hue`` in a
    ``scatter`` plot) accept both dimension names and data variables, behaving
    differently depending on which one was passed.
    By passing on the data variables via the ``data_vars`` argument, the
    encoding algorithm can take into account that a specified encoding does
    perhaps not refer to a data dimension but to a data *variable*.

    This function also implements **automatic column wrapping**, aiming to
    produce a efficient figure use with column wrapping. The prerequisites
    are the following:

        * The ``dims`` argument is a dict, containing size information
        * The ``col_wrap`` argument is given and set to ``"auto"``
        * The ``col`` specifier is in use
        * The ``row`` specifier is *not* used, i.e. wrapping is possible
        * There are more than three columns

    To determine the column wrapping number, a little optimization routine
    tries to reduce the number of empty spots in the last row while trying to
    get a square-like grid.
    To skip the optimization, potentially leading to last rows that have only
    one or few subplots, set ``col_wrap`` to ``"square"``, in which case
    wrapping will happen after ``ceil(sqrt(num_cols))`` columns; see
    :py:func:`~dantro.plot.funcs._utils.determine_ideal_col_wrap` for more
    information and implementation.

    Args:
        dims (Union[List[str], Dict[str, int]]): The dimension names (and, if
            given as dict: their sizes) that are to be encoded. If no sizes are
            provided, the assignment order will be the same as in the given
            sequence of dimension names. If sizes are given, these will be used
            to sort the dimension names in descending order of their sizes.
            For xarray objects, ``da.sizes`` or ``ds.sizes`` should be used.
        kind (str): The chosen plot kind. If this was None, will directly
            return, because auto-encoding information is missing.
        auto_encoding (Union[bool, dict]): Whether to perform auto-encoding.
            If a dict, will regard it as a mapping of available encodings and
            update ``default_encodings``.
        default_encodings (dict): A map from plot kinds to available layout
            specifiers, e.g. ``{"line": ("x", "hue", "col", "row")}``.
        allow_y_for_x (List[str], optional): A list of plot kinds for which the
            following replacement will be allowed: if a ``y`` specifier is
            given but *no* ``x`` specifier, the ``"x"`` in the list of
            available encodings will be replaced by a ``"y"``. This is to
            support plots that allow *either* an ``x`` or a ``y`` specifier,
            like the ``line`` kind.
        plot_kwargs (dict): The actual plot function arguments, including any
            layout encoding arguments that aim to *fix* a dimension. Everything
            else is ignored.
        drop_missing_dims (bool, optional): If set, will drop pre-specified
            encodings from ``plot_kwargs`` if they refer to a dimension that
            is not available in ``dims``. The encoding can then be filled with
            another dimension.
        data_vars (List[str], optional): If given, names of data variables
            that may (in addition to the ``dims``) be used for encoding; this
            is relevant when determining whether an encoding includes a
            missing dimension, as some encodings may also refer not to
            dimensions but to data variables.
        ignore_encodings (List[str], optional): If given, will ignore these
            encodings when automatically assigning.
        ensure_unique_dims (Union[bool, str], optional): If True,
            will make sure that the user-specified ``encoding`` does not cause
            dimensions to be assigned more than once. This should be set if the
            plot function does not support duplicate encodings, e.g. because it
            involves a sequential dimensionality reduction. It should *not* be
            set for plot functions that allow parallel encodings, e.g. scatter
            plots with ``hue`` and ``size`` encodings shared within a subplot.
            If set to ``warn``, will warn instead of raise.
            If set to ``warn_auto`` or ``raise_auto``, will warn or raise only
            if ``data_vars is None``; in such a case, encoding is typically
            used for dimensionality reduction, which can only be done once…
        return_encoding_info (bool, optional): If set, will return a 2-tuple of
            the updated plots config *and* the encoding information as a
            3-tuple ``(encoding, free_specs, free_dims)``.
    """

    if not auto_encoding or kind is None:
        log.debug("Layout auto-encoding was disabled (kind: %s).", kind)
        if not return_encoding_info:
            return plot_kwargs
        return plot_kwargs, ({}, (), ())

    log.note(
        "Automatically determining layout encoding for kind '%s' ...", kind
    )

    # Evaluate supported encodings, then get the available encoding specifiers
    encs = copy.deepcopy(default_encodings)
    if isinstance(auto_encoding, dict):
        encs.update(auto_encoding)

    try:
        all_specs = encs[kind]
    except KeyError as err:
        raise PlotConfigError(
            f"Unknown `kind` '{kind}' specified in auto-encoding. "
            f"Make sure you chose a valid kind ({', '.join(encs)}) or supply "
            "a default encoding explicitly."
        ) from err

    # Special case for line-like kinds
    if allow_y_for_x and kind in allow_y_for_x:
        if plot_kwargs.get("y") and not plot_kwargs.get("x"):
            all_specs = [s if s != "x" else "y" for s in all_specs]

    # Bring encoding specifiers into uniform shape
    all_specs = [parse_encoding_spec(spec) for spec in all_specs]
    all_spec_names: List[str] = [spec for spec, _ in all_specs]

    # Split plotting kwargs into a dict of layout specifiers (encoding) and one
    # that only includes the remaining plotting kwargs
    encoding: Dict[str, Union[str, Tuple[str, ...]]] = {
        k: v for k, v in plot_kwargs.items() if k in all_spec_names
    }
    plot_kwargs = {k: v for k, v in plot_kwargs.items() if k not in encoding}

    # Get all available dimension names. If size-information is available,
    # sort them by size (descending), otherwise just use them as they are.
    if hasattr(dims, "items"):
        all_dims = [
            name
            for name, _ in sorted(
                dims.items(), key=lambda kv: kv[1], reverse=True
            )
        ]
    else:
        all_dims = list(dims)

    # TODO Warn upon non-indexed dimensions?

    # Evaluate ensure_unique_dims depending on whether there are data variables
    # or not
    if isinstance(ensure_unique_dims, str) and "auto" in ensure_unique_dims:
        ensure_unique_dims = ENSURE_UNIQUE_DIMS[
            (ensure_unique_dims, data_vars is not None)
        ]

    # From these two lists, assign free dimension names to free encodings;
    encoding, free_specs, free_dims = map_dims_to_encoding(
        all_specs,
        all_dims,
        encoding=encoding,
        drop_missing_dims=drop_missing_dims,
        data_vars=data_vars,
        ignore_encodings=ignore_encodings,
        ensure_unique_dims=ensure_unique_dims,
    )

    # Drop those encoding specifiers that are effectively unset.
    #   Will remove things like {s: None} -> {},
    #   but also from multi-dim specs: {s: [None, foo, None]} -> {s: [foo]}
    # Need to do this in two steps, first dropping nested Nones, then Nones
    # or empty lists.
    encoding = {
        s: (
            tuple([_dim for _dim in dim if _dim])
            if isinstance(dim, (tuple, list))
            else dim
        )
        for s, dim in encoding.items()
    }
    encoding = {s: dim for s, dim in encoding.items() if dim}

    # Provide information about the chosen encoding
    log.remark("   → encoding:    %s", _fmt_encoding(encoding))

    if free_specs:
        log.remark("   remaining:     %s", _fmt_specs(free_specs))

    if free_dims:
        log.remark("   free dims:     %s", ", ".join(free_dims))

    # -- Automatic column wrapping
    if plot_kwargs.get("col_wrap") in ("auto", "square"):
        if (
            not encoding.get("row")
            and encoding.get("col")
            and hasattr(dims, "items")  # i.e.: have size information
            and dims[encoding["col"]] >= 4
        ):
            num_cols = dims[encoding["col"]]
            col_wrap_mode = plot_kwargs["col_wrap"]
            plot_kwargs["col_wrap"] = determine_ideal_col_wrap(
                num_cols, fill_last_row=(col_wrap_mode == "auto")
            )
            log.remark(
                "   col_wrap:      %d  (mode '%s', col dim size: %d)",
                plot_kwargs["col_wrap"],
                col_wrap_mode,
                num_cols,
            )
        else:
            # Remove it to avoid a plot warning or "unexpected argument"
            del plot_kwargs["col_wrap"]

    # Finally, return the merged layout specifiers and plot kwargs
    updated_plot_cfg = dict(**plot_kwargs, **encoding)

    if not return_encoding_info:
        return updated_plot_cfg
    return updated_plot_cfg, (encoding, free_specs, free_dims)




[docs]
def build_pspace_selector(
    d: Union[xr.DataArray, xr.Dataset], dims: List[str], **sel
) -> Dict[str, Union[psp.ParamDim, Any]]:
    """Builds a selector for :py:meth:`~xarray.DataArray.sel` operations that
    uses :py:class:`~paramspace.paramdim.ParamDim` as values.

    This method also combines the parameter space selector with an existing
    selector dict, ``sel``, and throws an error if there is an overlap between
    keys in ``dims`` and ``sel``.
    """
    psp_sel: Dict[str, psp.ParamDim] = {
        dim: psp.ParamDim(
            default=d.coords[dim].values.tolist()[0],
            values=d.coords[dim].values.tolist(),
        )
        for dim in dims
    }
    # NOTE Using ``tolist`` here to get native data types.
    #      The default value should not be needed, but we still specify sth.

    if any(_sel in psp_sel for _sel in sel):
        raise PlotConfigError(
            f"Cannot combine parameter sweep selector ({', '.join(dims)}) "
            f"with existing selector ({sel}) because there are "
            "overlapping dimension names! Remove them and retry."
        )

    return dict(**psp_sel, **sel)



# -----------------------------------------------------------------------------



[docs]
class make_facet_grid_plot:
    """This is a decorator class that transforms a plot function that works on
    a single axis into one that supports faceting via
    :py:class:`xarray.plot.FacetGrid`.

    Additionally, it allows to register the plotting function with the generic
    :py:func:`~dantro.plot.funcs.generic.facet_grid` plot by adding the
    callable to ``_FACET_GRID_FUNCS``.
    """

    MAP_FUNCS = {
        "dataset": lambda fg, f, **kws: fg.map_dataset(f, **kws),
        "dataarray": lambda fg, f, **kws: fg.map_dataarray(f, **kws),
        "dataarray_line": lambda fg, f, **kws: fg.map_dataarray_line(f, **kws),
    }
    """The available mapping functions in :py:class:`xarray.plot.FacetGrid`"""

    DEFAULT_ENCODINGS = ("col", "row", ("files", ...), "frames")
    """The default encodings the facet grid supplies; these are those supported
    by the generic facet grid function, irrespective of chosen ``kind``"""

    DEFAULT_DROP_KWARGS = ("_fg", "meta_data", "hue_style", "add_guide")
    """The default kwargs that are to be dropped rather than passed on to the
    wrapped plotting function.
    Can be customized via ``drop_kwargs`` argument."""


[docs]
    def __init__(
        self,
        *,
        map_as: str,
        encodings: Tuple[str],
        supported_hue_styles: Tuple[str] = None,
        register_as_kind: Union[bool, str] = True,
        overwrite_existing: bool = False,
        drop_kwargs: Tuple[str] = DEFAULT_DROP_KWARGS,
        parse_cmap_and_norm_kwargs: bool = True,
        **default_kwargs,
    ):
        """Initialize the decorator, making the decorated function capable of
        performing a facet grid plot.

        Args:
            map_as (str): Which mapping to use. Available: ``dataset``,
                ``dataarray`` and ``dataarray_line``.
            encodings (Tuple[str]): The encodings supported by the wrapped
                plot function, e.g. ``("x", "hue")``.
                Note that these *need to be dimensionality-reducing encodings*
                that have a qualitatively similar effect as ``col`` & ``row``
                in that they consume a data *dimension*. This is in contrast to
                plots that may represent multiple data *variables*, e.g. if the
                data comes from a  :py:class:`xarray.Dataset`; those should not
                be specified here.
            supported_hue_styles (Tuple[str]): Which hue styles are
                supported by the wrapped plot function. It is suggested to set
                this value if mapping via ``dataset`` or ``dataarray_line`` in
                order to disallow configurations that will not work with the
                wrapped plot function. If set to None, no check will be done.
            register_as_kind (Union[bool, str], optional): If boolean, controls
                *whether* to register the wrapped function with the generic
                facet grid plot, using its own name. If a string, uses that
                name for registration.
            overwrite_existing (bool, optional): Whether to overwrite an
                existing registration in ``_FACET_GRID_FUNCS``. If False, an
                existing entry of the same ``register_as_kind`` value will
                lead to an error.
            drop_kwargs (Tuple[str], optional): Which keyword arguments to
                drop before invocation of the wrapped function; this can be
                useful to trim down the signature of the wrapped function.
            parse_cmap_and_norm_kwargs (bool, optional): Whether to parse
                colormap-related plot function arguments using the
                :py:func:`~dantro.plot.utils.color_mngr.parse_cmap_and_norm_kwargs`
                function. Should be set to false if the decorated plot function
                takes care of these arguments itself.
            **default_kwargs: Additional arguments that are passed to the
                single-axis plotting function. These are used both when calling
                it via the selected mapping function and when invoking it
                without a facet grid.
                These are recursively updated with those given upon plot
                function invocation.
        """
        try:
            self.map_func = self.MAP_FUNCS[map_as]
        except KeyError:
            raise ValueError(
                f"Unsupported value for `map_as` argument: '{map_as}'! Needs "
                f"to be one of:  {', '.join(self.MAP_FUNCS)}"
            )

        self.encodings = encodings
        self.supported_hue_styles = supported_hue_styles
        self.register_as_kind = register_as_kind
        self.overwrite_existing = overwrite_existing
        self.drop_kwargs = drop_kwargs if drop_kwargs else ()
        self.default_kwargs = default_kwargs
        self.parse_cmap_and_norm_kwargs = parse_cmap_and_norm_kwargs



[docs]
    def parse_wpf_kwargs(self, data, **kwargs) -> dict:
        """Parses the keyword arguments in preparation for invoking the wrapped
        plot function. This can happen both in context of a facet grid mapping
        and a single invocation.
        """
        # Update from defaults
        kwargs = recursive_update(copy.deepcopy(self.default_kwargs), kwargs)

        # Some checks
        if (
            self.supported_hue_styles is not None
            and "hue_style" in kwargs
            and kwargs["hue_style"] not in self.supported_hue_styles
        ):
            raise ValueError(
                f"The selected `hue_style` '{kwargs['hue_style']}' is not "
                "supported for this plotting function! May only be:  "
                f"{', '.join(self.supported_hue_styles)}"
            )

        # Parse colormap-related arguments
        if self.parse_cmap_and_norm_kwargs:
            kwargs = parse_cmap_and_norm_kwargs(**copy.deepcopy(kwargs))

        # Can do more pre-processing here
        # ...

        return kwargs



[docs]
    def __call__(self, plot_single_axis: Callable) -> Callable:
        """Generates a standalone DAG-based plotting function that supports
        faceting. Additionally, integrates it as ``kind`` for the
        general facet grid plotting function by adding it to the global
        ``_FACET_GRID_FUNCS`` dictionary.
        """

        # First, wrap the single-axis plot function to achieve helper support
        def wrapped_plot_func(
            *args,
            hlpr: PlotHelper,
            _is_facetgrid: bool,
            ax=None,
            _fg: "xr.plot.FacetGrid" = None,
            **kwargs,
        ):
            """Wraps the single-axis plotting function and performs the
            following additional operations before invoking it:

                1. Sync the plot helper to the given axis (if faceting)
                2. Evaluates ``drop_kwargs`` to reduce the passed arguments
            """
            # If this is called as part of a facet grid plot, we need to sync
            # the helper to the given axis, otherwise the helper cannot be used
            if _is_facetgrid:
                hlpr.select_axis(ax=ax)

            # Prepare kwargs, optionally dropping some keys that bloat the
            # function signature ...
            kwargs["_fg"] = _fg
            kwargs["_is_facetgrid"] = _is_facetgrid
            kwargs = {
                k: v for k, v in kwargs.items() if k not in self.drop_kwargs
            }

            # Now invoke the single-axis plotting function
            return plot_single_axis(*args, hlpr=hlpr, **kwargs)

        # Get the mapping function
        map_to_facet_grid = self.map_func

        # Now, generate the facet-grid supporting function
        def fgplot(
            data,
            *,
            hlpr=None,
            col: str = None,
            row: str = None,
            col_wrap: int = None,
            sharex: bool = True,
            sharey: bool = True,
            figsize: tuple = None,
            aspect: float = 1.0,
            size: float = 3.0,
            subplot_kws: dict = None,
            **kwargs,
        ):
            """A facet-grid capable version of the given plot function.

            Explicitly named arguments here are passed to the setup of the
            :py:class:`xarray.plot.FacetGrid`; all ``kwargs`` are passed on to
            the selected mapping function and subsequently: the wrapped
            single-axis plot function.
            """
            # Without columns or rows, cannot use facet grid. Make a primitive
            # plot instead, directly using the wrapped plot function.
            if not col and not row:
                log.debug("No `col` or `row` set. Not using a facet grid.")

                kwargs = self.parse_wpf_kwargs(data, **kwargs)
                log.debug(
                    "Invoking single-axis plot function with kwargs:  %s",
                    kwargs,
                )

                hlpr.setup_figure()  # TODO Find out why this is necessary ...
                return wrapped_plot_func(
                    data, hlpr=hlpr, _is_facetgrid=False, **kwargs
                )

            # Prepare facet grid and helper
            log.debug(
                "Setting up a facet grid (col: %s, row: %s) ...", col, row
            )
            fg = xr.plot.FacetGrid(
                data,
                col=col,
                row=row,
                col_wrap=col_wrap,
                sharex=sharex,
                sharey=sharey,
                figsize=figsize,
                aspect=aspect,
                size=size,
                subplot_kws=subplot_kws if subplot_kws else {},
            )
            hlpr.attach_figure_and_axes(fig=fg.fig, axes=fg.axs)

            # Make the FacetGrid object available to the helper
            hlpr._attrs["facet_grid"] = fg

            # Parse arguments expected by wrapped plot function
            kwargs = self.parse_wpf_kwargs(data, **kwargs)

            # Prepare mapping keyword arguments and apply the mapping
            log.debug("Invoking mapping function with kwargs  %s  ...", kwargs)
            try:
                map_to_facet_grid(
                    fg, wrapped_plot_func, hlpr=hlpr, _fg=fg, **kwargs
                )

            except Exception as exc:
                raise PlottingError(
                    f"Failed mapping {type(data)} data to facet grid! Check "
                    "the given arguments, dimensionality, dimension names, "
                    "and whether the dimensions have coordinates associated.\n"
                    f"Got a {type(exc).__name__}: {exc}"
                ) from exc

            # Return the FacetGrid object for further handling
            return fg

        # facet grid plot function constructed now.
        # ... register it as a single-axis facet grid plot kind.
        if self.register_as_kind:
            if isinstance(self.register_as_kind, str):
                regname = self.register_as_kind
            else:
                regname = plot_single_axis.__name__

            if regname in _FACET_GRID_FUNCS or regname in _XR_PLOT_KINDS:
                if not self.overwrite_existing:
                    _in_use = ", ".join(
                        list(_FACET_GRID_FUNCS) + list(_XR_PLOT_KINDS)
                    )
                    raise ValueError(
                        f"The plot function name '{regname}' is already used! "
                        "Either set `register_as_kind` to a different value, "
                        "or set `overwrite_existing`. Registered functions: "
                        f"{_in_use}"
                    )

            # Register the callable for the non-standalone case
            _FACET_GRID_FUNCS[regname] = fgplot
            log.debug("Registered '%s' as special facet grid kind.", regname)

            _FACET_GRID_KINDS[regname] = (
                self.encodings + self.DEFAULT_ENCODINGS
            )
            log.debug(
                "Registered '%s' encodings:  %s",
                regname,
                _fmt_specs(_FACET_GRID_KINDS[regname]),
            )

        # Build the standalone plot function, which takes the place of the
        # decorated plot function
        @is_plot_func(use_dag=True, required_dag_tags=("data",))
        def standalone(*, data: dict, hlpr: PlotHelper, **kwargs):
            try:
                return fgplot(data["data"], hlpr=hlpr, **kwargs)

            except Exception as exc:
                raise PlottingError(
                    "Standalone facet grid plotting for plot function "
                    f"'{plot_single_axis.__name__}' failed!\n"
                    f"Got {type(exc).__name__}: {exc}\n\n"
                    f"Given arguments:\n  {kwargs}\n\n"
                    f"Selected data:\n  {str(data['data'])}\n"
                ) from exc

        return standalone




# -----------------------------------------------------------------------------
# -- Facet Grid ---------------------------------------------------------------
# -----------------------------------------------------------------------------



[docs]
@is_plot_func(
    use_dag=True, required_dag_tags=("data",), supports_animation=True
)
def facet_grid(
    *,
    data: dict,
    hlpr: PlotHelper,
    kind: Union[str, dict] = None,
    auto_encoding: Union[bool, dict] = False,
    auto_encoding_options: dict = None,
    title_kwargs: dict = None,
    suptitle_kwargs: dict = None,
    squeeze: bool = True,
    drop_nonindexed_coords: bool = False,
    sel: dict = None,
    show_data: bool = False,
    **plot_kwargs,
):
    """A generic facet grid plot function for high dimensional data.

    This function calls the ``data['data'].plot`` function if no plot ``kind``
    is given, otherwise ``data['data'].plot.<kind>``.
    It is designed for `plotting with xarray objects <http://xarray.pydata.org/en/stable/plotting.html>`_,
    i.e. :py:class:`xarray.DataArray` and :py:class:`xarray.Dataset`.
    Specifying the kind of plot requires the data to be of one of those types
    and have a dimensionality that can be represented in these plots. See
    `the correponding API documentation <https://docs.xarray.dev/en/stable/api/plotting.html>`_ for more information.

    In most cases, this function creates a so-called
    :py:class:`xarray.plot.FacetGrid` object that automatically layouts and
    chooses a visual representation that fits the dimensionality of the data.
    To specify which data dimension should be represented in which way, it
    supports a declarative syntax: via the optional keyword arguments ``x``,
    ``y``, ``row``, ``col``, and/or ``hue`` (available options are listed in
    the corresponding `plot function documentation <https://docs.xarray.dev/en/stable/api/plotting.html>`_),
    the representation of the data dimensions can be selected.
    In dantro, this is referred to as "layout encoding".

    dantro not only wraps this interface, but adds the following functionality:

        * the ``frames`` layout encoding argument, which behaves in the same
          way as the other encodings, but leads to an *animation* being
          generated, thus opening up one further dimension of representation;
        * the ``files`` encoding, which triggers plot config updating and
          thereby allows to represent data of arbitrary dimensionality; this is
          achieved by performing a parameter sweep plot where each point
          corresponds to a single plot file of a subspace of the data;
        * the ``auto_encoding`` feature, which allows to assign layout-
          encodings automatically, depending on dimensions and dimension
          sizes of the data;
        * the ``kind: 'auto'`` option, which can be used in conjunction
          with ``auto_encoding`` to choose the plot kind automatically as well;
        * the ``col_wrap: 'auto'`` option, which selects the value such that
          the figure becomes more square-like (requires ``auto_encoding``);
        * and allowing to register additional plot ``kind`` values that create
          plots with a custom single-axis plotting function, using the
          :py:class:`~dantro.plot.funcs.generic.make_facet_grid_plot`
          decorator.

    For details about auto-encoding and how the plot ``kind`` is chosen, see
    :py:func:`~dantro.plot.funcs.generic.determine_encoding`
    and :py:func:`~dantro.plot.funcs.generic.determine_plot_kind`.

    .. note::

        The way the plot data is labelled for the facet grid plot is very
        important to understand how this plot function behaves.

        **Background:**
        One can distinguish different categories of xarray data dimensions,
        most relevant for association of encodings: those *with* and those
        *without* coordinate labels. If coordinates are available, the
        corresponding dimension is called *indexed*, otherwise it is a
        *non-indexed* dimension, no coordinate labels exist and hence only
        trivial indexing is possible.

        xarray objects may also contain additional (scalar) coordinate metadata
        which has no relation to the data dimensions and is ignored here.

        Furthermore, there can be additional non-scalar coordinates that *are*
        associated with existing data dimensions, but are *not* acting as their
        index; these run "in parallel" to the existing coordinates along that
        dimension.


    .. note::

        When specifying ``frames``, the ``animation`` arguments also need to be
        specified. See :ref:`here <pcr_pyplot_animations>` for more information
        on the expected animation parameters.

        The value of the ``animation.enabled`` key is not relevant for this
        function; it will automatically enter or exit animation mode,
        depending on whether the ``frames`` argument is given or not. This uses
        the :ref:`animation mode switching <pcr_pyplot_animation_mode_switching>`
        feature.

    .. note::

        Internally, this function by default call ``.squeeze`` on the selected
        data (controlled by the ``squeeze`` argument), thus being more tolerant
        with data that has size-1 dimension coordinates.
        To suppress this behaviour, set the ``squeeze`` argument accordingly.

    .. warning::

        Depending on ``kind`` and the dimensionality of the data, some plot
        functions might create their own figure, disregarding any previously
        set up figure. This includes the figure from the plot helper.

        To control figure aesthetics, you can either specify matplotlib RC
        :ref:`style parameters <pcr_pyplot_style>` (via the ``style`` argument),
        or you can use the ``plot_kwargs`` to pass arguments to the respective
        plot functions. For the latter, refer to the respective documentation
        to find out about available arguments.

    Args:
        data (dict): The data selected by the data transformation framework,
            expecting the ``data`` key.
        hlpr (PlotHelper): The plot helper
        kind (str, optional): The kind of plot to use. Options are:
            ``contourf``, ``contour``, ``imshow``, ``line``, ``pcolormesh``,
            ``step``, ``hist``, ``scatter``, ``errorbars`` and any plot kinds
            that were additionally registered via the
            :py:class:`~dantro.plot.funcs.generic.make_facet_grid_plot`
            decorator.
            With ``auto``, dantro chooses an appropriate kind by itself; this
            setting is useful when also using the ``auto_encoding`` feature;
            see :ref:`dag_generic_facet_grid_auto_kind` for more information.
            If None is given, xarray automatically determines it using the
            dimensionality of the data, frequently falling back to ``hist``
            for higher-dimensional data or lacking specifiers.
        frames (str, optional): Data dimension from which to create animation
            frames. If given, this results in the creation of an animation. If
            not given, a single plot is generated. Note that this requires
            ``animation`` options as part of the plot configuration.
        auto_encoding (Union[bool, dict], optional): Whether to choose the
            layout encoding options automatically. For further options, can
            pass a dict. See :ref:`dag_generic_auto_encoding` for more info.
        auto_encoding_options (dict, optional): Additional arguments for
            :py:func:`~dantro.plot.funcs.generic.determine_encoding`.
        title_kwargs (dict, optional): Keyword arguments passed on
            :py:meth:`xarray.plot.FacetGrid.set_titles` to set the ``template``
            (allowing ``{coord}`` and ``{value}`` placeholders), ``maxchar``
            and other properties of the title strings.
            Invoked only if a FacetGrid object is produced, i.e. if ``col``
            and/or ``row`` encodings are used. If not given, FacetGrid still
            invokes the same method, but then uses default arguments.
        suptitle_kwargs (dict, optional): Keyword arguments passed on to the
            PlotHelper's ``set_suptitle`` helper function. Only used if
            animations are enabled. The ``title`` entry can be a format string
            with the following keys, which are updated for each frame of the
            animation: ``dim``, ``value``. Default: ``{dim:} = {value:.3g}``.
        squeeze (bool, optional): whether to squeeze the data before plotting,
            such that size-1 dimensions do not take up encoding dimensions.
        drop_nonindexed_coords (bool, optional): If true, non-indexed
            coordinates will be dropped.
        sel (dict, optional): A selector dict that is applied to the data to
            use only a subset of it for the plot; passed to
            :py:meth:`xarray.Dataset.sel` or :py:meth:`xarray.DataArray.sel`.
            Note that this requires the data to have indexed dimensions.
        show_data (bool, optional): If true, shows the head of the data that
            will be used for plotting.
        **plot_kwargs: Passed on to ``<data>.plot`` or ``<data>.plot.<kind>``
            These should include the layout encoding specifiers (``x``, ``y``,
            ``hue``, ``col``, ``row``, ``frames``, ``files``, ...).

    Raises:
        AttributeError: Upon unsupported ``kind`` value
        ValueError: Upon *any* upstream error in invocation of the xarray
            plotting capabilities. This wraps the given error message and
            provides additional information that helps to track down why the
            plotting failed.
        UpdatePlotConfig: To rewrite the plot configuration and restart this
            plot with a new configuration.
        EnterAnimationMode: To enter animation mode if not already in it.
        ExitAnimationMode: To exit animation mode if unnecessarily in it.
    """
    import matplotlib.pyplot as plt

    # Make sure to have the latest module-level variables available here; this
    # is important to ensure that those `kind`s registered by the
    # make_facet_grid_plot decorator are available here.
    from .generic import _FACET_GRID_FUNCS, _FACET_GRID_KINDS

    # .........................................................................
    def plot_frame(
        _d, *, kind: str, plot_kwargs: dict, groupby_dim: str = None
    ):
        """Plot a FacetGrid frame"""
        # Prepare data, getting rid of size-1 dimensions resulting from
        # a potential outside groupby operation.
        # Importantly, other dimensions should not be squeezed out here!
        if groupby_dim:
            _d = _d.squeeze(groupby_dim)

        # Retrieve the generic or specialized plot function, depending on kind
        if kind is None:
            plot_func = _d.plot

        elif kind in _FACET_GRID_FUNCS:
            _plot_func = _FACET_GRID_FUNCS[kind]

            # Bind the data and helper to the function
            plot_func = _partial(_plot_func, _d, hlpr=hlpr)

        else:
            try:
                plot_func = getattr(_d.plot, kind)

            except AttributeError as err:
                _available_xr = ", ".join(_XR_PLOT_KINDS)
                _available_dtr = ", ".join(_FACET_GRID_FUNCS)
                raise AttributeError(
                    f"The plot kind '{kind}' seems not to be available for "
                    f"data of type {type(_d)}! Please check the documentation "
                    "regarding the expected data types. For xarray data "
                    f"structures, valid choices are:  {_available_xr}.\n"
                    "Additionally, the following facet grid kinds were "
                    f"registered from within dantro:  {_available_dtr}"
                ) from err

        # Make sure to work on a fully cleared figure. This is important for
        # *some* specialized plot functions and for certain dimensionality of
        # the data: in these specific cases, an existing figure can be
        # re-used, in some cases leading to plotting artifacts.
        # In other cases, a new figure is opened by the plot function. The
        # currently attached helper figure is then discarded below.
        hlpr.fig.clear()

        # Invoke the specialized plot function, taking care that no figures
        # that are additionally created survive beyond that point, which would
        # lead to figure leakage, gobbling up memory.
        with figure_leak_prevention(close_current_fig_on_raise=True):
            try:
                rv = plot_func(**plot_kwargs)

            except Exception as exc:
                raise PlottingError(
                    "facet_grid plotting failed, most probably because the "
                    "dimensionality of the data, the chosen plot kind "
                    f"({kind}) and the specified layout encoding were not "
                    "compatible or because the selected data was missing "
                    "coordinates for one or more dimensions.\n"
                    "For debugging, inspect the chained traceback and the "
                    "information below.\n\n"
                    f"The upstream error was a {type(exc).__name__}: {exc}\n\n"
                    f"xr.plot.FacetGrid arguments:\n  {plot_kwargs}\n\n"
                    f"Data:\n  {str(_d)}\n"
                ) from exc
        # NOTE rv usually is a xarray.FaceGrid object but not always: `hist`
        #      returns what matplotlib.pyplot.hist returns.
        #      This leads to the question why `hist`s do not seem to be
        #      possible in `xarray.FacetGrid`s, although they would be useful?
        #      Gaining a deeper understanding of this issue and corresponding
        #      xarray functionality is something to investigate in the future.

        # Determine which figure and axes to attach to the PlotHelper.
        # This is necessary because a figure might have been created in the
        # invoked plot function and we need to make sure that we attach it
        # correctly, otherwise there will be no plot output.
        if isinstance(rv, xr.plot.FacetGrid):
            fig = rv.fig
            axes = rv.axs

            # Allow re-writing the axis titles with custom kwargs
            if title_kwargs is not None:
                rv.set_titles(**title_kwargs)
        else:
            # Use the currently set figure and its axes.
            fig = plt.gcf()
            axes = plt.gca()

        # When now attaching the new figure and axes, the previously existing
        # figure (the one .clear()-ed above) is closed and discarded.
        # If the figure extracted here is identical to the already-associated
        # figure, nothing happens.
        hlpr.attach_figure_and_axes(fig=fig, axes=axes, skip_if_identical=True)

        # Store the FacetGrid instance for potential later manipulation
        hlpr._attrs["facet_grid"] = rv

        # Done with this frame now.

    def set_suptitle_kwargs(
        st_kwargs: dict, dim: str, value: Any, suptitle_warning_issued
    ) -> Tuple[dict, bool]:
        try:
            st_kwargs["title"] = st_kwargs["title"].format(
                dim=dim, value=value
            )
        except Exception as exc:
            # Warn (once)
            if not suptitle_warning_issued:
                log.caution(
                    "Failed to format suptitle using '%s'! Got %s: %s",
                    st_kwargs["title"],
                    type(exc).__name__,
                    exc,
                )
                log.remark(
                    "Falling back to string-based formatting "
                    "(not warning again)."
                )
                suptitle_warning_issued = True

            # Fall back to string-based format
            st_kwargs["title"] = "{dim:s} = {value:s}".format(
                dim=dim, value=value
            )

        return st_kwargs, suptitle_warning_issued

    # Actual plotting routine starts here .....................................
    # Get the Dataset, DataArray, or other compatible data
    d = data["data"]

    # .. Prepare data . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
    log.note("Preparing data for facet grid plot ...")
    if show_data:
        log.remark("%s", d.head())

    # Can apply a data subselection
    if sel:
        log.remark(
            "Applying data selection:  %s",
            ",  ".join(f"{dim}: {val}" for dim, val in sel.items()),
        )
        d = d.sel(**sel)

    # Squeeze size-1 dimension coordinates to non-dimension coordinates
    if squeeze and 1 in d.sizes.values():
        log.remark(
            "Squeezing ... (1-sized dimensions: %s)",
            ", ".join(d for d, size in d.sizes.items() if size == 1),
        )
        d = d.squeeze()

    # Drop unwanted non-indexed coordinates
    nonindexed_coords = [c for c in d.coords if c not in d.indexes]
    if nonindexed_coords and drop_nonindexed_coords:
        log.remark(
            "Dropping non-indexed coordinate variables:  %s",
            ", ".join(nonindexed_coords),
        )
        d = d.drop_vars(nonindexed_coords)

    # Determine kind and encoding, updating the plot kwargs accordingly.
    # NOTE Need to pop all explicitly given specifiers in order to not have
    #      them appear as part of plot_kwargs further downstream.
    kind = determine_plot_kind(
        d, kind=kind, default_kind_map=_AUTO_PLOT_KINDS, **plot_kwargs
    )
    plot_kwargs, (encoding, _, _) = determine_encoding(
        d.sizes,
        kind=kind,
        auto_encoding=auto_encoding,
        default_encodings=_FACET_GRID_KINDS,
        data_vars=(list(d.data_vars) if hasattr(d, "data_vars") else None),
        plot_kwargs=plot_kwargs,
        return_encoding_info=True,
        **(auto_encoding_options if auto_encoding_options else {}),
    )
    free = plot_kwargs.pop("free", None)  # TODO Should this be an argument?
    frames = plot_kwargs.pop("frames", None)
    files = plot_kwargs.pop("files", None)
    _special_specs = ("free", "frames", "files")

    if free:
        free = (free,) if isinstance(free, str) else tuple(free)
        log.remark("   deliberately free:    %s", ", ".join(free))

    # Potentially perform files iteration
    # This may leave the plotting function via a messaging exception; the next
    # time we arrive here, `files` should no longer be set.
    if files:
        log.info(
            "Initiating %d-dimensional files iteration:  %s",
            len(files),
            ", ".join(files),
        )

        # Build selector, including existing values
        # TODO What about index selection?! May not always have coordinates
        sel = build_pspace_selector(d, files, **(sel if sel else {}))

        raise UpdatePlotConfig(
            "facet_grid files iteration",
            from_pspace=dict(sel=sel),
            #
            # Explicitly pass the encoding so it's not tinkered with again
            **{s: d for s, d in encoding.items() if s not in _special_specs},
            free=free,
            frames=frames,
            files=None,  # TODO Consider setting to some informative value
        )

    # Parse colorbar-related arguments
    plot_kwargs = parse_cmap_and_norm_kwargs(**plot_kwargs)

    # Done parsing arguments
    log.note("Facet grid plot of kind '%s' now commencing ...", kind)

    # If no animation is desired, the plotting routine is really simple
    if not frames:
        # Exit animation mode, if it was enabled. Then plot the figure. Done.
        hlpr.disable_animation()
        plot_frame(d, kind=kind, plot_kwargs=plot_kwargs)
        return

    # else: Animation is desired. Might have to enable it.
    # If not already in animation mode, the plot function will be exited here
    # and be invoked anew in animation mode. It will end up in this branch
    # again, and will then be able to proceed past this point...
    hlpr.enable_animation()

    # Prepare some parameters for the update routine
    suptitle_kwargs = suptitle_kwargs if suptitle_kwargs else {}
    if "title" not in suptitle_kwargs:
        suptitle_kwargs["title"] = "{dim:} = {value:.3g}"

    # Define an animation update function. All frames are plotted therein.
    # There is no need to plot the first frame _outside_ the update function,
    # because it would be discarded anyway.
    def update():
        """The animation update function: a python generator"""
        suptitle_warning_issued = False

        # Go over all available frame data dimension
        for f_value, f_data in d.groupby(frames):
            # Plot a frame. It attaches the new figure and axes to the hlpr
            plot_frame(
                f_data, kind=kind, plot_kwargs=plot_kwargs, groupby_dim=frames
            )

            # Apply the suptitle format string and invoke the helper to set it
            st_kwargs = copy.deepcopy(suptitle_kwargs)
            st_kwargs, suptitle_warning_issued = set_suptitle_kwargs(
                st_kwargs, frames, f_value, suptitle_warning_issued
            )
            hlpr.invoke_helper("set_suptitle", **st_kwargs)

            # Done with this frame. Let the writer grab it.
            yield

    # Register the animation update with the helper
    hlpr.register_animation_update(update, invoke_helpers_before_grab=True)



# -- Additional facet-grid supporting plots -----------------------------------


# TODO Should support errors along x as well!

[docs]
@make_facet_grid_plot(
    map_as="dataset",
    encodings=("x", "hue"),
    supported_hue_styles=("discrete",),
    #
    # defaults
    hue_style="discrete",
    add_guide=False,
)
def errorbars(
    ds: "xarray.Dataset",
    *,
    _is_facetgrid: bool,
    hlpr: PlotHelper,
    y: str,
    yerr: str,
    x: str = None,
    hue: str = None,
    hue_fstr: str = "{value:}",
    use_bands: bool = False,
    add_legend: bool = True,
    **kwargs,
):
    """An errorbar plot supporting facet grid.

    This function makes use of a decorator to implement faceting support:
    :py:class:`~dantro.plot.funcs.generic.make_facet_grid_plot`.
    It additionally registers this plot as an available plot ``kind`` in
    :py:func:`~dantro.plot.funcs.generic.facet_grid`.

    .. note::

        This plot function is heavily wrapped by the decorator, which is why
        not all functionality is exposed here. Instead, the arguments seen here
        are those that apply to a *single* subplot of a facet grid.

    Uses :py:func:`~dantro.plot.funcs._utils.plot_errorbar` for
    plotting individual lines.

    Args:
        ds (xarray.Dataset): The dataset containing the errorbar data
        _is_facetgrid (bool): Indicates whether this plot is called as part of
            a facet grid or whether no faceting takes place (i.e. when neither
            columns nor rows are available for faceting). In such a case, this
            plot supplies metadata to the plot helper to draw axis labels etc.
            (For internal use only, no need to pass this parameter.)
        hlpr (PlotHelper): The plot helper, exposing the currently selected
            axis via ``hlpr.ax``.
        y (str): Which data variable to use for the y-axis values
        yerr (str): Which data variable to use for the errorbars or bands
        x (str, optional): Which data dimension to plot on the x-axis
        hue (str, optional): Which data dimension to represent via hues
        hue_fstr (str, optional): A format string that is used to build the
            label of discrete hue encoding.
        use_bands (bool, optional): Whether to use errorbands instead of bars.
        add_legend (bool, optional): Whether to add a legend to the individual
            plot or to the figure
        **kwargs: Passed on to ``hlpr.ax.errorbar`` via
            :py:func:`~dantro.plot.funcs._utils.plot_errorbar`.
    """
    # Prepare data
    _y = ds[y]
    _yerr = ds[yerr]

    # Try to infer x, if not given
    x = x if x else [dim for dim in _y.dims if dim not in (hue,)][0]
    _x = ds.coords[x]

    # If this is not a facet grid, still show some labels
    if not _is_facetgrid:
        # FIXME Should do this via helper, but not working (see #82)
        # hlpr.provide_defaults("set_labels", x=x, y=f"{y} & {yerr}")

        # Workaround:
        hlpr.ax.set_xlabel(x)
        hlpr.ax.set_ylabel(f"{y}, {yerr}")

    # Case: No hue dimension -> plot single errorbar line
    if hue is None:
        _plot_errorbar(
            ax=hlpr.ax,
            x=_x,
            y=_y,
            yerr=_yerr,
            fill_between=use_bands,
            **kwargs,
        )
        return

    # else: will plot multiple lines
    # Keep track of legend handles and labels
    _handles, _labels = [], []

    # Group by the hue dimension and perform plots. To be a bit more permissive
    # regarding data shape, squeeze out any additional dimensions that might
    # have been left over.
    def ordered_groupby(ds, dim):
        for v in ds[dim].values:  # preserves existing order
            yield (v, ds.sel({dim: v}, drop=False))

    hue_iter = zip(
        ordered_groupby(_y, hue),
        ordered_groupby(_yerr, hue),
    )

    for (_y_coord, _y_vals), (_yerr_coord, _yerr_vals) in hue_iter:
        _y_vals = _y_vals.squeeze(drop=True)
        _yerr_vals = _yerr_vals.squeeze(drop=True)

        label = hue_fstr.format(dim=hue, value=_y_coord)
        handle = _plot_errorbar(
            ax=hlpr.ax,
            x=_x,
            y=_y_vals,
            yerr=_yerr_vals,
            label=label,
            fill_between=use_bands,
            **kwargs,
        )
        _handles.append(handle)
        _labels.append(label)

    # Either do a single-axis legend or prepare for figure-level legend
    if not _is_facetgrid:
        if add_legend:
            hlpr.ax.legend(_handles, _labels, title=hue)

    else:
        hlpr.track_handles_labels(_handles, _labels)
        if add_legend:
            hlpr.provide_defaults("set_figlegend", title=hue)



# .............................................................................



[docs]
@make_facet_grid_plot(
    map_as="dataset",
    register_as_kind="scatter3d",
    encodings=("hue", "markersize"),
    supported_hue_styles=("continuous",),
    parse_cmap_and_norm_kwargs=False,
    # defaults
    # hue_style="discrete",  # FIXME setting to 'discrete' fails, but shouldn't
)
def scatter3d(
    ds: "xarray.Dataset",
    *,
    _is_facetgrid: bool,
    hlpr: PlotHelper,
    x: str,
    y: str,
    z: str,
    hue: str = None,
    markersize: Union[float, str] = None,
    size_mapping: dict = None,
    cmap: Union[str, dict, mcolors.Colormap] = None,
    norm: Union[str, dict, mcolors.Normalize] = None,
    vmin: float = None,
    vmax: float = None,
    add_colorbar: bool = True,
    cbar_kwargs: dict = None,
    **kwargs,
):
    """A 3-dimensional scatter plot supporting facet grid.

    This function makes use of a decorator to implement faceting support:
    :py:class:`~dantro.plot.funcs.generic.make_facet_grid_plot`.
    It additionally registers this plot as an available plot ``kind`` in
    :py:func:`~dantro.plot.funcs.generic.facet_grid`.

    .. note::

        This plot relies on the figure projection having been set to 3D,
        which can be achieved via:

        .. code-block:: yaml

            my_3d_plot:
              # ...
              # for faceting:
              subplot_kws: &projection
                projection: 3d

              # for single plot:
              helpers:
                set_figure:
                  subplot_kw:  # sic
                    <<: *projection

        There *may* also be a base plot configuration that does this.

    .. warning::

        Support of :ref:`auto-encoding <dag_generic_auto_encoding>` and of the
        ``hue`` and ``markersize`` encodings is not as general as it could be.
        If you get dimensionality- or size-related errors, that's probably due
        to an incompatible combination of encodings.

    .. note::

        This plot function is heavily wrapped by the decorator, which is why
        not all functionality is exposed here. Instead, the arguments seen here
        are those that apply to a *single* subplot of a facet grid.

    Args:
        ds (xarray.Dataset): The dataset containing the data
        _is_facetgrid (bool): Indicates whether this plot is called as part of
            a facet grid or whether no faceting takes place (i.e. when neither
            columns nor rows are available for faceting). In such a case, this
            plot supplies metadata to the plot helper to draw axis labels etc.
            (For internal use only, no need to pass this parameter.)
        hlpr (PlotHelper): The plot helper, exposing the currently selected
            axis via ``hlpr.ax``.
        x (str): Which data variable to plot on the x-axis
        y (str): Which data variable to plot on the y-axis
        z (str): Which data variable to plot on the z-axis
        hue (str, optional): Which dimension or variable to represent via hues
        markersize: (str, optional): Which data *dimension* to plot using the
            markersize. Note that if ``hue`` is given this needs to match the
            size of that dimension.
            Whether using data *variables*  here depends on the dimensionality
            of the data; don't be surprised by a cryptic error message from
            deep within xarray.
        size_mapping: (dict, optional): A dictionary containing the facet grid
            ``size_mapping``. Is overwritten by ``markersize``, if passed.
        cmap (Union[str, dict, matplotlib.colors.Colormap], optional): The
            colormap, passed to the
            :py:class:`~dantro.plot.utils.color_mngr.ColorManager`.
        norm (Union[str, dict, matplotlib.colors.Normalize], optional):
            The norm that is applied for the color-mapping.
        vmin (float, optional): The lower bound of the color-mapping,
            passed to the
            :py:class:`~dantro.plot.utils.color_mngr.ColorManager`.
            Ignored if norm evaluates to ``BoundaryNorm``.
        vmax (float, optional): The upper bound of the color-mapping,
            passed to the
            :py:class:`~dantro.plot.utils.color_mngr.ColorManager`.
            Ignored if norm evaluates to ``BoundaryNorm``.
        add_colorbar (bool, optional): Whether to add a colorbar
        cbar_kwargs (dict, optional): Arguments for colorbar creation.
        **kwargs: Passed on to :py:func:`matplotlib.axes.Axes.scatter` or, if
            ``z`` is given, the equivalent 3D axes.

    Raises:
        AttributeError: If the active axes does not have a ``zaxis``.
            In that case, you probably forgot to set the figure's projection,
            see above.
    """

    def get_var(v: str) -> xr.DataArray:
        """Retrieves a data variable from the dataset, making some checks"""
        d = ds[v]
        if d.ndim != 1:
            raise ValueError(
                f"Unexpected data dimensionality for variable '{v}'! "
                "On the subplot-level, data variables should be 1D, but "
                f"ds['{v}'] was {d.ndim}-dimensional: {dict(d.sizes)}"
            )
        return d

    if not hasattr(hlpr.ax, "zaxis"):
        raise AttributeError(
            "Missing z-axis! Did you set the "
            "projection (via `subplot_kws` or `setup_figure` helper)?"
        )

    cm = ColorManager(
        cmap=cmap,
        norm=norm,
        vmin=vmin,
        vmax=vmax,
    )

    shared_kwargs = dict(
        c=get_var(hue) if hue is not None else None,
        cmap=cm.cmap if cmap is not None else None,
        norm=cm.norm if norm is not None else None,
        vmin=vmin if norm is None else None,
        vmax=vmax if norm is None else None,
    )

    # Add the 's' key to the kwargs. If both size_mapping and markersize are
    # passed, 'markersize' will take precedent.
    if size_mapping is not None:
        shared_kwargs["s"] = size_mapping.values

    if not _is_facetgrid and markersize is not None:
        shared_kwargs["s"] = get_var(markersize).values

    im = hlpr.ax.scatter(
        get_var(x),
        get_var(y),
        get_var(z),
        **shared_kwargs,
        **kwargs,
    )

    # Postprocess
    if not _is_facetgrid and hue is not None and add_colorbar:
        # TODO This should read information from the FacetGrid's cbar_kwargs,
        #      which are also parsed there...
        cm.create_cbar(
            im,
            fig=hlpr.fig,
            ax=hlpr.ax,
            **(cbar_kwargs if cbar_kwargs else {}),
        )

    # FIXME Should do this via helper, but not working (see #82)
    # hlpr.provide_defaults("set_labels", x=x, y=y, z=z)
    hlpr.ax.set_xlabel(x)
    hlpr.ax.set_ylabel(y)
    hlpr.ax.set_zlabel(z)

    return im