rasa/engine/graph.py
from __future__ import annotations
import dataclasses
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
import logging
from typing import Any, Callable, Dict, List, Optional, Text, Type, Tuple, Union
from rasa.engine.exceptions import (
GraphComponentException,
GraphRunError,
GraphSchemaException,
)
import rasa.shared.utils.common
import rasa.utils.common
from rasa.engine.storage.resource import Resource
from rasa.engine.storage.storage import ModelStorage
from rasa.shared.exceptions import InvalidConfigException, RasaException
from rasa.shared.data import TrainingType
logger = logging.getLogger(__name__)
@dataclass
class SchemaNode:
"""Represents one node in the schema.
Args:
needs: describes which parameters in `fn` (or `constructor_name`
if `eager==False`) are filled by which parent nodes.
uses: The class which models the behavior of this specific graph node.
constructor_name: The name of the constructor which should be used to
instantiate the component. If `eager==False` then the `constructor` can
also specify parameters which are filled by parent nodes. This is e.g.
useful if a parent node returns a `Resource` and this node wants to
directly load itself from this resource.
fn: The name of the function which should be called on the instantiated
component when the graph is executed. The parameters from `needs` are
filled from the parent nodes.
config: The user's configuration for this graph node. This configuration
does not need to be specify all possible parameters; the default values
for missing parameters will be filled in later.
eager: If `eager` then the component is instantiated before the graph is run.
Otherwise it's instantiated as the graph runs (lazily). Usually we always
instantiated lazily during training and eagerly during inference (to
avoid that the first prediction takes longer).
is_target: If `True` then this node can't be pruned during fingerprinting
(it might be replaced with a cached value though). This is e.g. used for
all components which train as their result always needs to be added to
the model archive so that the data is available during inference.
is_input: Nodes with `is_input` are _always_ run (also during the fingerprint
run). This makes sure that we e.g. detect changes in file contents.
resource: If given, then the graph node is loaded from an existing resource
instead of instantiated from scratch. This is e.g. used to load a trained
component for predictions.
"""
needs: Dict[Text, Text]
uses: Type[GraphComponent]
constructor_name: Text
fn: Text
config: Dict[Text, Any]
eager: bool = False
is_target: bool = False
is_input: bool = False
resource: Optional[Resource] = None
@dataclass
class GraphSchema:
"""Represents a graph for training a model or making predictions."""
nodes: Dict[Text, SchemaNode]
def as_dict(self) -> Dict[Text, Any]:
"""Returns graph schema in a serializable format.
Returns:
The graph schema in a format which can be dumped as JSON or other formats.
"""
serializable_graph_schema: Dict[Text, Dict[Text, Any]] = {"nodes": {}}
for node_name, node in self.nodes.items():
serializable = dataclasses.asdict(node)
# Classes are not JSON serializable (surprise)
serializable["uses"] = f"{node.uses.__module__}.{node.uses.__name__}"
serializable_graph_schema["nodes"][node_name] = serializable
return serializable_graph_schema
@classmethod
def from_dict(cls, serialized_graph_schema: Dict[Text, Any]) -> GraphSchema:
"""Loads a graph schema which has been serialized using `schema.as_dict()`.
Args:
serialized_graph_schema: A serialized graph schema.
Returns:
A properly loaded schema.
Raises:
GraphSchemaException: In case the component class for a node couldn't be
found.
"""
nodes = {}
for node_name, serialized_node in serialized_graph_schema["nodes"].items():
try:
serialized_node[
"uses"
] = rasa.shared.utils.common.class_from_module_path(
serialized_node["uses"]
)
resource = serialized_node["resource"]
if resource:
serialized_node["resource"] = Resource(**resource)
except ImportError as e:
raise GraphSchemaException(
"Error deserializing graph schema. Can't "
"find class for graph component type "
f"'{serialized_node['uses']}'."
) from e
nodes[node_name] = SchemaNode(**serialized_node)
return GraphSchema(nodes)
@property
def target_names(self) -> List[Text]:
"""Returns the names of all target nodes."""
return [node_name for node_name, node in self.nodes.items() if node.is_target]
def minimal_graph_schema(self, targets: Optional[List[Text]] = None) -> GraphSchema:
"""Returns a new schema where all nodes are a descendant of a target."""
dependencies = self._all_dependencies_schema(
targets if targets else self.target_names
)
return GraphSchema(
{
node_name: node
for node_name, node in self.nodes.items()
if node_name in dependencies
}
)
def _all_dependencies_schema(self, targets: List[Text]) -> List[Text]:
required = []
for target in targets:
required.append(target)
try:
target_dependencies = self.nodes[target].needs.values()
except KeyError: # This can happen if the target is an input placeholder.
continue
for dependency in target_dependencies:
required += self._all_dependencies_schema([dependency])
return required
class GraphComponent(ABC):
"""Interface for any component which will run in a graph."""
@classmethod
def required_components(cls) -> List[Type]:
"""Components that should be included in the pipeline before this component."""
return []
@classmethod
@abstractmethod
def create(
cls,
config: Dict[Text, Any],
model_storage: ModelStorage,
resource: Resource,
execution_context: ExecutionContext,
) -> GraphComponent:
"""Creates a new `GraphComponent`.
Args:
config: This config overrides the `default_config`.
model_storage: Storage which graph components can use to persist and load
themselves.
resource: Resource locator for this component which can be used to persist
and load itself from the `model_storage`.
execution_context: Information about the current graph run.
Returns: An instantiated `GraphComponent`.
"""
...
@classmethod
def load(
cls,
config: Dict[Text, Any],
model_storage: ModelStorage,
resource: Resource,
execution_context: ExecutionContext,
**kwargs: Any,
) -> GraphComponent:
"""Creates a component using a persisted version of itself.
If not overridden this method merely calls `create`.
Args:
config: The config for this graph component. This is the default config of
the component merged with config specified by the user.
model_storage: Storage which graph components can use to persist and load
themselves.
resource: Resource locator for this component which can be used to persist
and load itself from the `model_storage`.
execution_context: Information about the current graph run.
kwargs: Output values from previous nodes might be passed in as `kwargs`.
Returns:
An instantiated, loaded `GraphComponent`.
"""
return cls.create(config, model_storage, resource, execution_context)
@staticmethod
def get_default_config() -> Dict[Text, Any]:
"""Returns the component's default config.
Default config and user config are merged by the `GraphNode` before the
config is passed to the `create` and `load` method of the component.
Returns:
The default config of the component.
"""
return {}
@staticmethod
def supported_languages() -> Optional[List[Text]]:
"""Determines which languages this component can work with.
Returns: A list of supported languages, or `None` to signify all are supported.
"""
return None
@staticmethod
def not_supported_languages() -> Optional[List[Text]]:
"""Determines which languages this component cannot work with.
Returns: A list of not supported languages, or
`None` to signify all are supported.
"""
return None
@staticmethod
def required_packages() -> List[Text]:
"""Any extra python dependencies required for this component to run."""
return []
@classmethod
def fingerprint_addon(cls, config: Dict[str, Any]) -> Optional[str]:
"""Adds additional data to the fingerprint calculation.
This is useful if a component uses external data that is not provided
by the graph.
"""
return None
class GraphNodeHook(ABC):
"""Holds functionality to be run before and after a `GraphNode`."""
@abstractmethod
def on_before_node(
self,
node_name: Text,
execution_context: ExecutionContext,
config: Dict[Text, Any],
received_inputs: Dict[Text, Any],
) -> Dict:
"""Runs before the `GraphNode` executes.
Args:
node_name: The name of the node being run.
execution_context: The execution context of the current graph run.
config: The node's config.
received_inputs: Mapping from parameter name to input value.
Returns:
Data that is then passed to `on_after_node`
"""
...
@abstractmethod
def on_after_node(
self,
node_name: Text,
execution_context: ExecutionContext,
config: Dict[Text, Any],
output: Any,
input_hook_data: Dict,
) -> None:
"""Runs after the `GraphNode` as executed.
Args:
node_name: The name of the node that has run.
execution_context: The execution context of the current graph run.
config: The node's config.
output: The output of the node.
input_hook_data: Data returned from `on_before_node`.
"""
...
@dataclass
class ExecutionContext:
"""Holds information about a single graph run."""
graph_schema: GraphSchema = field(repr=False)
model_id: Optional[Text] = None
should_add_diagnostic_data: bool = False
is_finetuning: bool = False
# This is set by the `GraphNode` before it is passed to the `GraphComponent`.
node_name: Optional[Text] = None
class GraphNode:
"""Instantiates and runs a `GraphComponent` within a graph.
A `GraphNode` is a wrapper for a `GraphComponent` that allows it to be executed
in the context of a graph. It is responsible for instantiating the component at the
correct time, collecting the inputs from the parent nodes, running the run function
of the component and passing the output onwards.
"""
def __init__(
self,
node_name: Text,
component_class: Type[GraphComponent],
constructor_name: Text,
component_config: Dict[Text, Any],
fn_name: Text,
inputs: Dict[Text, Text],
eager: bool,
model_storage: ModelStorage,
resource: Optional[Resource],
execution_context: ExecutionContext,
hooks: Optional[List[GraphNodeHook]] = None,
) -> None:
"""Initializes `GraphNode`.
Args:
node_name: The name of the node in the schema.
component_class: The class to be instantiated and run.
constructor_name: The method used to instantiate the component.
component_config: Config to be passed to the component.
fn_name: The function on the instantiated `GraphComponent` to be run when
the node executes.
inputs: A map from input name to parent node name that provides it.
eager: Determines if the node is instantiated right away, or just before
being run.
model_storage: Storage which graph components can use to persist and load
themselves.
resource: If given the `GraphComponent` will be loaded from the
`model_storage` using the given resource.
execution_context: Information about the current graph run.
hooks: These are called before and after execution.
"""
self._node_name: Text = node_name
self._component_class: Type[GraphComponent] = component_class
self._constructor_name: Text = constructor_name
self._constructor_fn: Callable = getattr(
self._component_class, self._constructor_name
)
self._component_config: Dict[Text, Any] = rasa.utils.common.override_defaults(
self._component_class.get_default_config(), component_config
)
self._fn_name: Text = fn_name
self._fn: Callable = getattr(self._component_class, self._fn_name)
self._inputs: Dict[Text, Text] = inputs
self._eager: bool = eager
self._model_storage = model_storage
self._existing_resource = resource
self._execution_context: ExecutionContext = dataclasses.replace(
execution_context, node_name=self._node_name
)
self._hooks: List[GraphNodeHook] = hooks if hooks else []
self._component: Optional[GraphComponent] = None
if self._eager:
self._load_component()
def _load_component(self, **kwargs: Any) -> None:
logger.debug(
f"Node '{self._node_name}' loading "
f"'{self._component_class.__name__}.{self._constructor_name}' "
f"and kwargs: '{kwargs}'."
)
constructor = getattr(self._component_class, self._constructor_name)
try:
self._component: GraphComponent = constructor( # type: ignore[no-redef]
config=self._component_config,
model_storage=self._model_storage,
resource=self._get_resource(kwargs),
execution_context=self._execution_context,
**kwargs,
)
except InvalidConfigException:
# Pass through somewhat expected exception to allow more fine granular
# handling of exceptions.
raise
except Exception as e:
if not isinstance(e, RasaException):
raise GraphComponentException(
f"Error initializing graph component for node {self._node_name}."
) from e
else:
logger.error(
f"Error initializing graph component for node {self._node_name}."
)
raise
def _get_resource(self, kwargs: Dict[Text, Any]) -> Resource:
if "resource" in kwargs:
# A parent node provides resource during training. The component wrapped
# by this `GraphNode` will load itself from this resource.
return kwargs.pop("resource")
if self._existing_resource:
# The component should be loaded from a trained resource during inference.
# E.g. a classifier might train and persist itself during training and will
# then load itself from this resource during inference.
return self._existing_resource
# The component gets a chance to persist itself
return Resource(self._node_name)
def __call__(
self, *inputs_from_previous_nodes: Union[Tuple[Text, Any], Text]
) -> Tuple[Text, Any]:
"""Calls the `GraphComponent` run method when the node executes in the graph.
Args:
*inputs_from_previous_nodes: The output of all parent nodes. Each is a
dictionary with a single item mapping the node's name to its output.
If the node couldn't be resolved and has no output, the node name is
provided instead of a tuple.
Returns:
The node name and its output.
"""
# filter out arguments that dask couldn't lookup
received_inputs: Dict[Text, Any] = {}
for i in inputs_from_previous_nodes:
if isinstance(i, tuple):
node_name, node_output = i
received_inputs[node_name] = node_output
else:
logger.warning(
f"Node '{i}' was not resolved, there is no putput. "
f"Another component should have provided this as an "
f"output."
)
kwargs = {}
for input_name, input_provider_node_name in self._inputs.items():
if input_provider_node_name not in received_inputs:
raise GraphRunError(
f"Missing input to run node '{self._node_name}'. "
f"Expected input '{input_provider_node_name}' to "
f"provide parameter '{input_name}'."
)
kwargs[input_name] = received_inputs[input_provider_node_name]
input_hook_outputs = self._run_before_hooks(kwargs)
if not self._eager:
constructor_kwargs = rasa.shared.utils.common.minimal_kwargs(
kwargs, self._constructor_fn
)
self._load_component(**constructor_kwargs)
run_kwargs = {
k: v for k, v in kwargs.items() if k not in constructor_kwargs
}
else:
run_kwargs = kwargs
logger.debug(
f"Node '{self._node_name}' running "
f"'{self._component_class.__name__}.{self._fn_name}'."
)
try:
output = self._fn(self._component, **run_kwargs)
except InvalidConfigException:
# Pass through somewhat expected exception to allow more fine granular
# handling of exceptions.
raise
except Exception as e:
if not isinstance(e, RasaException):
raise GraphComponentException(
f"Error running graph component for node {self._node_name}."
) from e
else:
logger.error(
f"Error running graph component for node {self._node_name}."
)
raise
self._run_after_hooks(input_hook_outputs, output)
return self._node_name, output
def _run_after_hooks(self, input_hook_outputs: List[Dict], output: Any) -> None:
for hook, hook_data in zip(self._hooks, input_hook_outputs):
try:
logger.debug(
f"Hook '{hook.__class__.__name__}.on_after_node' "
f"running for node '{self._node_name}'."
)
hook.on_after_node(
node_name=self._node_name,
execution_context=self._execution_context,
config=self._component_config,
output=output,
input_hook_data=hook_data,
)
except Exception as e:
raise GraphComponentException(
f"Error running after hook for node '{self._node_name}'."
) from e
def _run_before_hooks(self, received_inputs: Dict[Text, Any]) -> List[Dict]:
input_hook_outputs = []
for hook in self._hooks:
try:
logger.debug(
f"Hook '{hook.__class__.__name__}.on_before_node' "
f"running for node '{self._node_name}'."
)
hook_output = hook.on_before_node(
node_name=self._node_name,
execution_context=self._execution_context,
config=self._component_config,
received_inputs=received_inputs,
)
input_hook_outputs.append(hook_output)
except Exception as e:
raise GraphComponentException(
f"Error running before hook for node '{self._node_name}'."
) from e
return input_hook_outputs
@classmethod
def from_schema_node(
cls,
node_name: Text,
schema_node: SchemaNode,
model_storage: ModelStorage,
execution_context: ExecutionContext,
hooks: Optional[List[GraphNodeHook]] = None,
) -> GraphNode:
"""Creates a `GraphNode` from a `SchemaNode`."""
return cls(
node_name=node_name,
component_class=schema_node.uses,
constructor_name=schema_node.constructor_name,
component_config=schema_node.config,
fn_name=schema_node.fn,
inputs=schema_node.needs,
eager=schema_node.eager,
model_storage=model_storage,
execution_context=execution_context,
resource=schema_node.resource,
hooks=hooks,
)
@dataclass()
class GraphModelConfiguration:
"""The model configuration to run as a graph during training and prediction."""
train_schema: GraphSchema
predict_schema: GraphSchema
training_type: TrainingType
assistant_id: Optional[Text]
language: Optional[Text]
core_target: Optional[Text]
nlu_target: Optional[Text]
spaces: Optional[Dict[Text, Text]] = None