Garve/scikit-bonus

View on GitHub
skbonus/preprocessing/saturation.py

Summary

Maintainability
A
0 mins
Test Coverage
"""Saturation classes."""

from __future__ import annotations

from abc import ABC, abstractmethod

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted, check_array


class Saturation(BaseEstimator, TransformerMixin, ABC):
    """Base class for all saturations, such as Box-Cox, Adbudg, ..."""

    def fit(self, X: np.ndarray, y: None = None) -> Saturation:
        """
        Fit the transformer.

        In this special case, nothing is done.

        Parameters
        ----------
        X : Ignored
            Not used, present here for API consistency by convention.

        y : Ignored
            Not used, present here for API consistency by convention.

        Returns
        -------
        Saturation
            Fitted transformer.
        """
        X = check_array(X)
        self._check_n_features(X, reset=True)

        return self

    def transform(self, X: np.ndarray) -> np.ndarray:
        """
        Apply the saturation effect.

        Parameters
        ----------
        X : np.ndarray
            Data to be transformed.

        Returns
        -------
        np.ndarray
            Data with saturation effect applied.
        """
        check_is_fitted(self)
        X = check_array(X)
        self._check_n_features(X, reset=False)

        return self._transformation(X)

    @abstractmethod
    def _transformation(self, X: np.ndarray) -> np.ndarray:
        """Generate the transformation formula."""


class BoxCoxSaturation(Saturation):
    """
    Apply the Box-Cox saturation.

    The formula is ((x + shift) ** exponent-1) / exponent if exponent!=0, else ln(x+shift).

    Parameters
    ----------
    exponent: float, default=1.0
        The exponent.

    shift : float, default=1.0
        The shift.

    Examples
    --------
    >>> import numpy as np
    >>> X = np.array([[1, 1000], [2, 1000], [3, 1000]])
    >>> BoxCoxSaturation(exponent=0.5).fit_transform(X)
    array([[ 0.82842712, 61.27716808],
           [ 1.46410162, 61.27716808],
           [ 2.        , 61.27716808]])
    """

    def __init__(self, exponent: float = 1.0, shift: float = 1.0) -> None:
        """Initialize."""
        self.exponent = exponent
        self.shift = shift

    def _transformation(self, X: np.ndarray) -> np.ndarray:
        """Generate the transformation formula."""
        if self.exponent != 0:
            return ((X + self.shift) ** self.exponent - 1) / self.exponent
        else:
            return np.log(X + self.shift)


class AdbudgSaturation(Saturation):
    """
    Apply the Adbudg saturation.

    The formula is x ** exponent / (denominator_shift + x ** exponent).

    Parameters
    ----------
    exponent : float, default=1.0
        The exponent.

    denominator_shift : float, default=1.0
        The shift in the denominator.

    Notes
    -----
    This version produces saturated values in the interval [0, 1]. You can use `LinearShift` from the shift module to
    bring it between some interval [a, b].

    Examples
    --------
    >>> import numpy as np
    >>> X = np.array([[1, 1000], [2, 1000], [3, 1000]])
    >>> AdbudgSaturation().fit_transform(X)
    array([[0.5       , 0.999001  ],
           [0.66666667, 0.999001  ],
           [0.75      , 0.999001  ]])
    """

    def __init__(self, exponent: float = 1.0, denominator_shift: float = 1.0) -> None:
        """Initialize."""
        self.exponent = exponent
        self.denominator_shift = denominator_shift

    def _transformation(self, X: np.ndarray) -> np.ndarray:
        """Generate the transformation formula."""
        return X ** self.exponent / (self.denominator_shift + X ** self.exponent)


class HillSaturation(Saturation):
    """
    Apply the Hill saturation.

    The formula is 1 / (1 + (half_saturation / x) ** exponent).

    Parameters
    ----------
    exponent : float, default=1.0
        The exponent.

    half_saturation : float, default=1.0
        The point of half saturation, i.e. Hill(half_saturation) = 0.5.

    Examples
    --------
    >>> import numpy as np
    >>> X = np.array([[1, 1000], [2, 1000], [3, 1000]])
    >>> HillSaturation().fit_transform(X)
    array([[0.5       , 0.999001  ],
           [0.66666667, 0.999001  ],
           [0.75      , 0.999001  ]])
    """

    def __init__(self, exponent: float = 1.0, half_saturation: float = 1.0) -> None:
        """Initialize."""
        self.half_saturation = half_saturation
        self.exponent = exponent

    def _transformation(self, X: np.ndarray) -> np.ndarray:
        """Generate the transformation formula."""
        eps = np.finfo(np.float64).eps
        return 1 / (1 + (self.half_saturation / (X + eps)) ** self.exponent)


class ExponentialSaturation(Saturation):
    """
    Apply exponential saturation.

    The formula is 1 - exp(-exponent * x).

    Parameters
    ----------
    exponent : float, default=1.0
        The exponent.

    Notes
    -----
    This version produces saturated values in the interval [0, 1]. You can use `LinearShift` from the shift module to
    bring it between some interval [a, b].

    Examples
    --------
    >>> import numpy as np
    >>> X = np.array([[1, 1000], [2, 1000], [3, 1000]])
    >>> ExponentialSaturation().fit_transform(X)
    array([[0.63212056, 1.        ],
           [0.86466472, 1.        ],
           [0.95021293, 1.        ]])
    """

    def __init__(self, exponent: float = 1.0) -> None:
        """Initialize."""
        self.exponent = exponent

    def _transformation(self, X: np.ndarray) -> np.ndarray:
        """Generate the transformation formula."""
        return 1 - np.exp(-self.exponent * X)