Source code for dask.dataframe.accessor

from __future__ import annotations

import functools
import warnings

import numpy as np
import pandas as pd

from dask.utils import derived_from


def _bind_method(cls, pd_cls, attr, min_version=None):
    def func(self, *args, **kwargs):
        return self._function_map(attr, *args, **kwargs)

    func.__name__ = attr
    func.__qualname__ = f"{cls.__name__}.{attr}"
    try:
        func.__wrapped__ = getattr(pd_cls, attr)
    except Exception:
        pass
    setattr(cls, attr, derived_from(pd_cls, version=min_version)(func))


def _bind_property(cls, pd_cls, attr, min_version=None):
    def func(self):
        return self._property_map(attr)

    func.__name__ = attr
    func.__qualname__ = f"{cls.__name__}.{attr}"
    try:
        # Attempt to determine the method we are wrapping
        original_prop = getattr(pd_cls, attr)
        if isinstance(original_prop, property):
            method = original_prop.fget
        elif isinstance(original_prop, functools.cached_property):
            method = original_prop.func
        else:
            method = original_prop
            func.__wrapped__ = method
    except Exception:
        # If we can't then no matter, the function still works.
        pass
    setattr(cls, attr, property(derived_from(pd_cls, version=min_version)(func)))


def maybe_wrap_pandas(obj, x):
    if isinstance(x, np.ndarray):
        if isinstance(obj, pd.Series):
            return pd.Series(x, index=obj.index, dtype=x.dtype)
        return pd.Index(x)
    return x


# Ported from pandas
# https://github.com/pandas-dev/pandas/blob/master/pandas/core/accessor.py
class CachedAccessor:
    """
    Custom property-like object (descriptor) for caching accessors.

    Parameters
    ----------
    name : str
        The namespace this will be accessed under, e.g. ``df.foo``
    accessor : cls
        The class with the extension methods. The class' __init__ method
        should expect one of a ``Series``, ``DataFrame`` or ``Index`` as
        the single argument ``data``
    """

    def __init__(self, name, accessor):
        self._name = name
        self._accessor = accessor

    def __get__(self, obj, cls):
        if obj is None:
            # we're accessing the attribute of the class, i.e., Dataset.geo
            return self._accessor
        accessor_obj = self._accessor(obj)
        # Replace the property with the accessor object. Inspired by:
        # http://www.pydanny.com/cached-property.html
        # We need to use object.__setattr__ because we overwrite __setattr__ on
        # NDFrame
        object.__setattr__(obj, self._name, accessor_obj)
        return accessor_obj


def _register_accessor(name, cls):
    def decorator(accessor):
        if hasattr(cls, name):
            warnings.warn(
                "registration of accessor {!r} under name {!r} for type "
                "{!r} is overriding a preexisting attribute with the same "
                "name.".format(accessor, name, cls),
                UserWarning,
                stacklevel=2,
            )
        setattr(cls, name, CachedAccessor(name, accessor))
        cls._accessors.add(name)
        return accessor

    return decorator


[docs]def register_dataframe_accessor(name): """ Register a custom accessor on :class:`dask.dataframe.DataFrame`. See :func:`pandas.api.extensions.register_dataframe_accessor` for more. """ from dask.dataframe import DataFrame return _register_accessor(name, DataFrame)
[docs]def register_series_accessor(name): """ Register a custom accessor on :class:`dask.dataframe.Series`. See :func:`pandas.api.extensions.register_series_accessor` for more. """ from dask.dataframe import Series return _register_accessor(name, Series)
[docs]def register_index_accessor(name): """ Register a custom accessor on :class:`dask.dataframe.Index`. See :func:`pandas.api.extensions.register_index_accessor` for more. """ from dask.dataframe import Index return _register_accessor(name, Index)