rasa/core/policies/policy.py from RasaHQ/rasa

rasa/core/policies/policy.py
Summary

Maintainability

3 days
Test Coverage

88%
Issues
from __future__ import annotations
import abc
import copy
import logging
from enum import Enum
from pathlib import Path
from rasa.shared.core.events import Event
from typing import (
    Any,
    List,
    Optional,
    Text,
    Dict,
    Callable,
    Tuple,
    TypeVar,
    TYPE_CHECKING,
)

import numpy as np

from rasa.engine.graph import GraphComponent, ExecutionContext
from rasa.engine.storage.resource import Resource
from rasa.engine.storage.storage import ModelStorage
from rasa.core.featurizers.precomputation import MessageContainerForCoreFeaturization
from rasa.core.featurizers.tracker_featurizers import TrackerFeaturizer
from rasa.core.featurizers.tracker_featurizers import MaxHistoryTrackerFeaturizer
from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer
from rasa.core.featurizers.tracker_featurizers import FEATURIZER_FILE
import rasa.utils.common
import rasa.shared.utils.io
from rasa.shared.exceptions import RasaException, FileIOException
from rasa.shared.nlu.constants import ENTITIES, INTENT, TEXT, ACTION_TEXT, ACTION_NAME
from rasa.shared.core.domain import Domain, State
from rasa.shared.core.trackers import DialogueStateTracker
from rasa.shared.core.generator import TrackerWithCachedStates
from rasa.core.constants import (
    DEFAULT_POLICY_PRIORITY,
    POLICY_PRIORITY,
    POLICY_MAX_HISTORY,
)
from rasa.shared.core.constants import USER, SLOTS, PREVIOUS_ACTION, ACTIVE_LOOP
import rasa.shared.utils.common


if TYPE_CHECKING:
    from rasa.shared.nlu.training_data.features import Features


logger = logging.getLogger(__name__)

TrackerListTypeVar = TypeVar(
    "TrackerListTypeVar", List[DialogueStateTracker], List[TrackerWithCachedStates]
)


class SupportedData(Enum):
    """Enumeration of a policy's supported training data type."""

    # policy only supports ML-based training data ("stories")
    ML_DATA = 1

    # policy only supports rule-based data ("rules")
    RULE_DATA = 2

    # policy supports both ML-based and rule-based data ("stories" as well as "rules")
    ML_AND_RULE_DATA = 3

    @staticmethod
    def trackers_for_supported_data(
        supported_data: SupportedData,
        trackers: TrackerListTypeVar,
    ) -> TrackerListTypeVar:
        """Return trackers for a given policy.

        Args:
            supported_data: Supported data filter for the `trackers`.
            trackers: Trackers to split.

        Returns:
            Trackers from ML-based training data and/or rule-based data.
        """
        if supported_data == SupportedData.RULE_DATA:
            return [tracker for tracker in trackers if tracker.is_rule_tracker]

        if supported_data == SupportedData.ML_DATA:
            return [tracker for tracker in trackers if not tracker.is_rule_tracker]

        # `supported_data` is `SupportedData.ML_AND_RULE_DATA`
        return trackers


class Policy(GraphComponent):
    """Common parent class for all dialogue policies."""

    @staticmethod
    def supported_data() -> SupportedData:
        """The type of data supported by this policy.

        By default, this is only ML-based training data. If policies support rule data,
        or both ML-based data and rule data, they need to override this method.

        Returns:
            The data type supported by this policy (ML-based training data).
        """
        return SupportedData.ML_DATA

    def __init__(
        self,
        config: Dict[Text, Any],
        model_storage: ModelStorage,
        resource: Resource,
        execution_context: ExecutionContext,
        featurizer: Optional[TrackerFeaturizer] = None,
    ) -> None:
        """Constructs a new Policy object."""
        self.config = config
        if featurizer is None:
            featurizer = self._create_featurizer()
        self.__featurizer = featurizer

        self.priority = config.get(POLICY_PRIORITY, DEFAULT_POLICY_PRIORITY)
        self.finetune_mode = execution_context.is_finetuning

        self._model_storage = model_storage
        self._resource = resource

    @classmethod
    def create(
        cls,
        config: Dict[Text, Any],
        model_storage: ModelStorage,
        resource: Resource,
        execution_context: ExecutionContext,
        **kwargs: Any,
    ) -> Policy:
        """Creates a new untrained policy (see parent class for full docstring)."""
        return cls(config, model_storage, resource, execution_context)

    def _create_featurizer(self) -> TrackerFeaturizer:
        policy_config = copy.deepcopy(self.config)

        featurizer_configs = policy_config.get("featurizer")

        if not featurizer_configs:
            return self._standard_featurizer()

        featurizer_func = _get_featurizer_from_config(
            featurizer_configs,
            self.__class__.__name__,
            lookup_path="rasa.core.featurizers.tracker_featurizers",
        )
        featurizer_config = featurizer_configs[0]

        state_featurizer_configs = featurizer_config.pop("state_featurizer", None)
        if state_featurizer_configs:
            state_featurizer_func = _get_featurizer_from_config(
                state_featurizer_configs,
                self.__class__.__name__,
                lookup_path="rasa.core.featurizers.single_state_featurizer",
            )
            state_featurizer_config = state_featurizer_configs[0]

            featurizer_config["state_featurizer"] = state_featurizer_func(
                **state_featurizer_config
            )

        featurizer = featurizer_func(**featurizer_config)
        if (
            isinstance(featurizer, MaxHistoryTrackerFeaturizer)
            and POLICY_MAX_HISTORY in policy_config
            and POLICY_MAX_HISTORY not in featurizer_config
        ):
            featurizer.max_history = policy_config[POLICY_MAX_HISTORY]
        return featurizer

    def _standard_featurizer(self) -> MaxHistoryTrackerFeaturizer:
        """Initializes the standard featurizer for this policy."""
        return MaxHistoryTrackerFeaturizer(
            SingleStateFeaturizer(), self.config.get(POLICY_MAX_HISTORY)
        )

    @property
    def featurizer(self) -> TrackerFeaturizer:
        """Returns the policy's featurizer."""
        return self.__featurizer

    @staticmethod
    def _get_valid_params(func: Callable, **kwargs: Any) -> Dict:
        """Filters out kwargs that cannot be passed to func.

        Args:
            func: a callable function

        Returns:
            the dictionary of parameters
        """
        valid_keys = rasa.shared.utils.common.arguments_of(func)

        params = {key: kwargs.get(key) for key in valid_keys if kwargs.get(key)}
        ignored_params = {
            key: kwargs.get(key) for key in kwargs.keys() if not params.get(key)
        }
        logger.debug(f"Parameters ignored by `model.fit(...)`: {ignored_params}")
        return params

    def _featurize_for_training(
        self,
        training_trackers: List[DialogueStateTracker],
        domain: Domain,
        precomputations: Optional[MessageContainerForCoreFeaturization],
        bilou_tagging: bool = False,
        **kwargs: Any,
    ) -> Tuple[
        List[List[Dict[Text, List[Features]]]],
        np.ndarray,
        List[List[Dict[Text, List[Features]]]],
    ]:
        """Transform training trackers into a vector representation.

        The trackers, consisting of multiple turns, will be transformed
        into a float vector which can be used by a ML model.

        Args:
            training_trackers:
                the list of the :class:`rasa.core.trackers.DialogueStateTracker`
            domain: the :class:`rasa.shared.core.domain.Domain`
            precomputations: Contains precomputed features and attributes.
            bilou_tagging: indicates whether BILOU tagging should be used or not

        Returns:
            - a dictionary of attribute (INTENT, TEXT, ACTION_NAME, ACTION_TEXT,
              ENTITIES, SLOTS, FORM) to a list of features for all dialogue turns in
              all training trackers
            - the label ids (e.g. action ids) for every dialogue turn in all training
              trackers
            - A dictionary of entity type (ENTITY_TAGS) to a list of features
              containing entity tag ids for text user inputs otherwise empty dict
              for all dialogue turns in all training trackers
        """
        state_features, label_ids, entity_tags = self.featurizer.featurize_trackers(
            training_trackers,
            domain,
            precomputations=precomputations,
            bilou_tagging=bilou_tagging,
            ignore_action_unlikely_intent=self.supported_data()
            == SupportedData.ML_DATA,
        )

        max_training_samples = kwargs.get("max_training_samples")
        if max_training_samples is not None:
            logger.debug(
                "Limit training data to {} training samples."
                "".format(max_training_samples)
            )
            state_features = state_features[:max_training_samples]
            label_ids = label_ids[:max_training_samples]
            entity_tags = entity_tags[:max_training_samples]

        return state_features, label_ids, entity_tags

    def _prediction_states(
        self,
        tracker: DialogueStateTracker,
        domain: Domain,
        use_text_for_last_user_input: bool = False,
        rule_only_data: Optional[Dict[Text, Any]] = None,
    ) -> List[State]:
        """Transforms tracker to states for prediction.

        Args:
            tracker: The tracker to be featurized.
            domain: The Domain.
            use_text_for_last_user_input: Indicates whether to use text or intent label
                for featurizing last user input.
            rule_only_data: Slots and loops which are specific to rules and hence
                should be ignored by this policy.

        Returns:
            A list of states.
        """
        return self.featurizer.prediction_states(
            [tracker],
            domain,
            use_text_for_last_user_input=use_text_for_last_user_input,
            ignore_rule_only_turns=self.supported_data() == SupportedData.ML_DATA,
            rule_only_data=rule_only_data,
            ignore_action_unlikely_intent=self.supported_data()
            == SupportedData.ML_DATA,
        )[0]

    def _featurize_for_prediction(
        self,
        tracker: DialogueStateTracker,
        domain: Domain,
        precomputations: Optional[MessageContainerForCoreFeaturization],
        rule_only_data: Optional[Dict[Text, Any]],
        use_text_for_last_user_input: bool = False,
    ) -> List[List[Dict[Text, List[Features]]]]:
        """Transforms training tracker into a vector representation.

        The trackers, consisting of multiple turns, will be transformed
        into a float vector which can be used by a ML model.

        Args:
            tracker: The tracker to be featurized.
            domain: The Domain.
            precomputations: Contains precomputed features and attributes.
            use_text_for_last_user_input: Indicates whether to use text or intent label
                for featurizing last user input.
            rule_only_data: Slots and loops which are specific to rules and hence
                should be ignored by this policy.

        Returns:
            A list (corresponds to the list of trackers)
            of lists (corresponds to all dialogue turns)
            of dictionaries of state type (INTENT, TEXT, ACTION_NAME, ACTION_TEXT,
            ENTITIES, SLOTS, ACTIVE_LOOP) to a list of features for all dialogue
            turns in all trackers.
        """
        return self.featurizer.create_state_features(
            [tracker],
            domain,
            precomputations=precomputations,
            use_text_for_last_user_input=use_text_for_last_user_input,
            ignore_rule_only_turns=self.supported_data() == SupportedData.ML_DATA,
            rule_only_data=rule_only_data,
            ignore_action_unlikely_intent=self.supported_data()
            == SupportedData.ML_DATA,
        )

    @abc.abstractmethod
    def train(
        self,
        training_trackers: List[TrackerWithCachedStates],
        domain: Domain,
        **kwargs: Any,
    ) -> Resource:
        """Trains a policy.

        Args:
            training_trackers: The story and rules trackers from the training data.
            domain: The model's domain.
            **kwargs: Depending on the specified `needs` section and the resulting
                graph structure the policy can use different input to train itself.

        Returns:
            A policy must return its resource locator so that potential children nodes
            can load the policy from the resource.
        """
        raise NotImplementedError("Policy must have the capacity to train.")

    @abc.abstractmethod
    def predict_action_probabilities(
        self,
        tracker: DialogueStateTracker,
        domain: Domain,
        rule_only_data: Optional[Dict[Text, Any]] = None,
        **kwargs: Any,
    ) -> PolicyPrediction:
        """Predicts the next action the bot should take after seeing the tracker.

        Args:
            tracker: The tracker containing the conversation history up to now.
            domain: The model's domain.
            rule_only_data: Slots and loops which are specific to rules and hence
                should be ignored by this policy.
            **kwargs: Depending on the specified `needs` section and the resulting
                graph structure the policy can use different input to make predictions.

        Returns:
             The prediction.
        """
        raise NotImplementedError("Policy must have the capacity to predict.")

    def _prediction(
        self,
        probabilities: List[float],
        events: Optional[List[Event]] = None,
        optional_events: Optional[List[Event]] = None,
        is_end_to_end_prediction: bool = False,
        is_no_user_prediction: bool = False,
        diagnostic_data: Optional[Dict[Text, Any]] = None,
        action_metadata: Optional[Dict[Text, Any]] = None,
    ) -> PolicyPrediction:
        return PolicyPrediction(
            probabilities,
            self.__class__.__name__,
            self.priority,
            events,
            optional_events,
            is_end_to_end_prediction,
            is_no_user_prediction,
            diagnostic_data,
            action_metadata=action_metadata,
        )

    @classmethod
    def load(
        cls,
        config: Dict[Text, Any],
        model_storage: ModelStorage,
        resource: Resource,
        execution_context: ExecutionContext,
        **kwargs: Any,
    ) -> Policy:
        """Loads a trained policy (see parent class for full docstring)."""
        featurizer = None

        try:
            with model_storage.read_from(resource) as path:
                if (Path(path) / FEATURIZER_FILE).is_file():
                    featurizer = TrackerFeaturizer.load(path)

                config.update(kwargs)

        except (ValueError, FileNotFoundError, FileIOException):
            logger.debug(
                f"Couldn't load metadata for policy '{cls.__name__}' as the persisted "
                f"metadata couldn't be loaded."
            )

        return cls(
            config, model_storage, resource, execution_context, featurizer=featurizer
        )

    def _default_predictions(self, domain: Domain) -> List[float]:
        """Creates a list of zeros.

        Args:
            domain: the :class:`rasa.shared.core.domain.Domain`
        Returns:
            the list of the length of the number of actions
        """
        return [0.0] * domain.num_actions

    @staticmethod
    def format_tracker_states(states: List[Dict]) -> Text:
        """Format tracker states to human readable format on debug log.

        Args:
            states: list of tracker states dicts

        Returns:
            the string of the states with user intents and actions
        """
        # empty string to insert line break before first state
        formatted_states = [""]
        if states:
            for index, state in enumerate(states):
                state_messages = []
                if state:
                    if USER in state:
                        if TEXT in state[USER]:
                            state_messages.append(f"user text: {state[USER][TEXT]!s}")
                        if INTENT in state[USER]:
                            state_messages.append(
                                f"user intent: {state[USER][INTENT]!s}"
                            )
                        if ENTITIES in state[USER]:
                            state_messages.append(
                                f"user entities: {state[USER][ENTITIES]!s}"
                            )
                    if PREVIOUS_ACTION in state:
                        if ACTION_NAME in state[PREVIOUS_ACTION]:
                            state_messages.append(
                                f"previous action name: "
                                f"{state[PREVIOUS_ACTION][ACTION_NAME]!s}"
                            )
                        if ACTION_TEXT in state[PREVIOUS_ACTION]:
                            state_messages.append(
                                f"previous action text: "
                                f"{state[PREVIOUS_ACTION][ACTION_TEXT]!s}"
                            )
                    if ACTIVE_LOOP in state:
                        state_messages.append(f"active loop: {state[ACTIVE_LOOP]!s}")
                    if SLOTS in state:
                        state_messages.append(f"slots: {state[SLOTS]!s}")
                    state_message_formatted = " | ".join(state_messages)
                    state_formatted = f"[state {index!s}] {state_message_formatted}"
                    formatted_states.append(state_formatted)

        return "\n".join(formatted_states)

    def __repr__(self) -> Text:
        """Returns text representation of object."""
        return f"{self.__class__.__name__}@{id(self)}"


class PolicyPrediction:
    """Stores information about the prediction of a `Policy`."""

    def __init__(
        self,
        probabilities: List[float],
        policy_name: Optional[Text],
        policy_priority: int = 1,
        events: Optional[List[Event]] = None,
        optional_events: Optional[List[Event]] = None,
        is_end_to_end_prediction: bool = False,
        is_no_user_prediction: bool = False,
        diagnostic_data: Optional[Dict[Text, Any]] = None,
        hide_rule_turn: bool = False,
        action_metadata: Optional[Dict[Text, Any]] = None,
    ) -> None:
        """Creates a `PolicyPrediction`.

        Args:
            probabilities: The probabilities for each action.
            policy_name: Name of the policy which made the prediction.
            policy_priority: The priority of the policy which made the prediction.
            events: Events which the `Policy` needs to have applied to the tracker
                after the prediction. These events are applied independent of whether
                the policy wins against other policies or not. Be careful which events
                you return as they can potentially influence the conversation flow.
            optional_events: Events which the `Policy` needs to have applied to the
                tracker after the prediction in case it wins. These events are only
                applied in case the policy's prediction wins. Be careful which events
                you return as they can potentially influence the conversation flow.
            is_end_to_end_prediction: `True` if the prediction used the text of the
                user message instead of the intent.
            is_no_user_prediction: `True` if the prediction uses neither the text
                of the user message nor the intent. This is for the example the case
                for happy loop paths.
            diagnostic_data: Intermediate results or other information that is not
                necessary for Rasa to function, but intended for debugging and
                fine-tuning purposes.
            hide_rule_turn: `True` if the prediction was made by the rules which
                do not appear in the stories
            action_metadata: Specifies additional metadata that can be passed
                by policies.
        """
        self.probabilities = probabilities
        self.policy_name = policy_name
        self.policy_priority = policy_priority
        self.events = events or []
        self.optional_events = optional_events or []
        self.is_end_to_end_prediction = is_end_to_end_prediction
        self.is_no_user_prediction = is_no_user_prediction
        self.diagnostic_data = diagnostic_data or {}
        self.hide_rule_turn = hide_rule_turn
        self.action_metadata = action_metadata

    @staticmethod
    def for_action_name(
        domain: Domain,
        action_name: Text,
        policy_name: Optional[Text] = None,
        confidence: float = 1.0,
        action_metadata: Optional[Dict[Text, Any]] = None,
    ) -> "PolicyPrediction":
        """Create a prediction for a given action.

        Args:
            domain: The current model domain
            action_name: The action which should be predicted.
            policy_name: The policy which did the prediction.
            confidence: The prediction confidence.
            action_metadata: Additional metadata to be attached with the prediction.

        Returns:
            The prediction.
        """
        probabilities = confidence_scores_for(action_name, confidence, domain)

        return PolicyPrediction(
            probabilities, policy_name, action_metadata=action_metadata
        )

    def __eq__(self, other: Any) -> bool:
        """Checks if the two objects are equal.

        Args:
            other: Any other object.

        Returns:
            `True` if other has the same type and the values are the same.
        """
        if not isinstance(other, PolicyPrediction):
            return False

        return (
            self.probabilities == other.probabilities
            and self.policy_name == other.policy_name
            and self.policy_priority == other.policy_priority
            and self.events == other.events
            and self.optional_events == other.optional_events
            and self.is_end_to_end_prediction == other.is_end_to_end_prediction
            and self.is_no_user_prediction == other.is_no_user_prediction
            and self.hide_rule_turn == other.hide_rule_turn
            and self.action_metadata == other.action_metadata
            # We do not compare `diagnostic_data`, because it has no effect on the
            # action prediction.
        )

    @property
    def max_confidence_index(self) -> int:
        """Gets the index of the action prediction with the highest confidence.

        Returns:
            The index of the action with the highest confidence.
        """
        return self.probabilities.index(self.max_confidence)

    @property
    def max_confidence(self) -> float:
        """Gets the highest predicted confidence.

        Returns:
            The highest predicted confidence.
        """
        return max(self.probabilities, default=0.0)


def confidence_scores_for(
    action_name: Text, value: float, domain: Domain
) -> List[float]:
    """Returns confidence scores if a single action is predicted.

    Args:
        action_name: the name of the action for which the score should be set
        value: the confidence for `action_name`
        domain: the :class:`rasa.shared.core.domain.Domain`

    Returns:
        the list of the length of the number of actions
    """
    results = [0.0] * domain.num_actions
    idx = domain.index_for_action(action_name)
    results[idx] = value

    return results


class InvalidPolicyConfig(RasaException):
    """Exception that can be raised when policy config is not valid."""


def _get_featurizer_from_config(
    config: List[Dict[Text, Any]], policy_name: Text, lookup_path: Text
) -> Callable[..., TrackerFeaturizer]:
    """Gets the featurizer initializer and its arguments from a policy config."""
    # Only 1 featurizer is allowed
    if len(config) > 1:
        featurizer_names = [
            featurizer_config.get("name") for featurizer_config in config
        ]
        raise InvalidPolicyConfig(
            f"Every policy can only have 1 featurizer but '{policy_name}' "
            f"uses {len(config)} featurizers ('{', '.join(featurizer_names)}')."
        )

    featurizer_config = config[0]
    featurizer_name = featurizer_config.pop("name")
    featurizer_func = rasa.shared.utils.common.class_from_module_path(
        featurizer_name, lookup_path=lookup_path
    )

    return featurizer_func