Source code for dantro.data_loaders.pandas

"""Defines a loader mixin to load data via :py:mod:`pandas`"""

from ..containers import PassthroughContainer
from ._registry import add_loader

# -----------------------------------------------------------------------------


[docs]class PandasLoaderMixin: """Supplies functionality to load data via :py:mod:`pandas`.""" @add_loader(TargetCls=PassthroughContainer) def _load_pandas_csv( filepath: str, *, TargetCls: type, **load_kwargs, ) -> PassthroughContainer: """Loads CSV data using :py:func:`pandas.read_csv`, returning a :py:class:`~dantro.containers.general.PassthroughContainer` that contains a :py:class:`pandas.DataFrame`. .. note:: As there is no proper equivalent of a :py:class:`pandas.DataFrame` in dantro (yet), and unpacking the dataframe into a dantro group would reduce functionality, a passthrough-container is used here. It behaves mostly like the object it wraps. However, in some cases, you may have to retrieve the underlying data using the ``.data`` property. Args: filepath (str): Where the CSV data file is located TargetCls (type): The class constructor **load_kwargs: Passed on to :py:func:`pandas.read_csv` Returns: PassthroughContainer: Payload being the loaded CSV data in form of a :py:class:`pandas.DataFrame`. """ import pandas as pd df = pd.read_csv(filepath, **load_kwargs) return TargetCls(data=df, attrs=dict(filepath=filepath)) @add_loader(TargetCls=PassthroughContainer) def _load_pandas_generic( filepath: str, *, TargetCls: type, reader: str, **load_kwargs, ) -> PassthroughContainer: """Loads data from a file using one of :py:mod:`pandas` ``read_*`` functions, returning a :py:class:`pandas.DataFrame` wrapped into a :py:class:`~dantro.containers.general.PassthroughContainer`. The ``reader`` argument needs to match a reader function from `pandas IO <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_. .. note:: As there is no proper equivalent of a :py:class:`pandas.DataFrame` in dantro (yet), and unpacking the dataframe into a dantro group would reduce functionality, a passthrough-container is used here. It behaves mostly like the object it wraps. However, in some cases, you may have to retrieve the underlying data using the ``.data`` property. .. note:: Some of pandas' reader functions require additional packages to have been installed. .. warning:: While this in principle allows access to reader functions that are *not* file-based, calling those will most probably fail because the functions do not expect a file path as their first argument. Args: filepath (str): Where the data file is located TargetCls (type): The class constructor reader (str): The name of the reader function from pandas IO to use **load_kwargs: Passed on to the reader function Returns: PassthroughContainer: Payload being the loaded data in form of a :py:class:`pandas.DataFrame`. """ import pandas as pd try: read_func = getattr(pd, f"read_{reader}") except AttributeError as err: NOT_FILE_READERS = ( "clipboard", "gbq", "sql", "sql_query", "sql_table", ) _avail = ", ".join( s[5:] for s in dir(pd) if s.startswith("read_") and s[5:] not in NOT_FILE_READERS ) raise ValueError( f"Invalid pandas reader name '{reader}'!\n" f"Available readers: {_avail}" ) from err df = read_func(filepath, **load_kwargs) return TargetCls(data=df, attrs=dict(filepath=filepath))