RasaHQ/rasa_core

View on GitHub
tests/core/test_featurizer.py

Summary

Maintainability
F
4 days
Test Coverage
from rasa.core.featurizers import TrackerFeaturizer, \
    BinarySingleStateFeaturizer, LabelTokenizerSingleStateFeaturizer
import numpy as np


def test_fail_to_load_non_existent_featurizer():
    assert TrackerFeaturizer.load("non_existent_class") is None


def test_binary_featurizer_handles_on_non_existing_features():
    f = BinarySingleStateFeaturizer()
    f.input_state_map = {"a": 0, "b": 3, "c": 2, "d": 1}
    f.num_features = len(f.input_state_map)
    encoded = f.encode({"a": 1.0, "b": 1.0, "c": 0.0, "e": 1.0})
    assert (encoded == np.array([1, 0, 0, 1])).all()


def test_binary_featurizer_uses_correct_dtype_int():
    f = BinarySingleStateFeaturizer()
    f.input_state_map = {"a": 0, "b": 3, "c": 2, "d": 1}
    f.num_features = len(f.input_state_map)
    encoded = f.encode({"a": 1.0, "b": 1.0, "c": 0.0})
    assert encoded.dtype == np.int32


def test_binary_featurizer_uses_correct_dtype_float():
    f = BinarySingleStateFeaturizer()
    f.input_state_map = {"a": 0, "b": 3, "c": 2, "d": 1}
    f.num_features = len(f.input_state_map)
    encoded = f.encode({"a": 1.0, "b": 0.2, "c": 0.0})
    assert encoded.dtype == np.float64


def test_binary_featurizer_handles_on_non_existing_probabilistic_features():
    f = BinarySingleStateFeaturizer()
    f.input_state_map = {"a": 0, "b": 3, "c": 2, "d": 1}
    f.num_features = len(f.input_state_map)
    encoded = f.encode({"a": 1.0, "b": 0.2, "c": 0.0, "e": 1.0})
    assert (encoded == np.array([1, 0, 0, 0.2])).all()


def test_binary_featurizer_handles_probabilistic_intents():
    f = BinarySingleStateFeaturizer()
    f.input_state_map = {"intent_a": 0, "b": 3, "intent_c": 2, "d": 1}
    f.num_features = len(f.input_state_map)
    encoded = f.encode({"intent_a": 0.5, "b": 0.2, "intent_c": 1.0})
    assert (encoded == np.array([0.5, 0, 1.0, 0.2])).all()


def test_label_tokenizer_featurizer_handles_on_non_existing_features():
    f = LabelTokenizerSingleStateFeaturizer()
    f.user_labels = ["a_d"]
    f.bot_labels = ["c_b"]
    f.user_vocab = {"a": 0, "d": 1}
    f.bot_vocab = {"b": 1, "c": 0}
    f.num_features = (len(f.user_vocab) +
                      len(f.slot_labels) +
                      len(f.bot_vocab))
    encoded = f.encode({"a_d": 1.0, "prev_c_b": 0.0, "e": 1.0,
                        "prev_action_listen": 1.0})
    assert (encoded == np.array([1, 1, 0, 0])).all()


def test_label_tokenizer_featurizer_uses_correct_dtype_int():
    f = LabelTokenizerSingleStateFeaturizer()
    f.user_labels = ["a_d"]
    f.bot_labels = ["c_b"]
    f.user_vocab = {"a": 0, "d": 1}
    f.bot_vocab = {"b": 1, "c": 0}
    f.num_features = (len(f.user_vocab) +
                      len(f.slot_labels) +
                      len(f.bot_vocab))
    encoded = f.encode({"a_d": 1.0, "prev_c_b": 0.0,
                        "prev_action_listen": 1.0})
    assert encoded.dtype == np.int32


def test_label_tokenizer_featurizer_uses_correct_dtype_float():
    f = LabelTokenizerSingleStateFeaturizer()
    f.user_labels = ["a_d"]
    f.bot_labels = ["c_b"]
    f.user_vocab = {"a": 0, "d": 1}
    f.bot_vocab = {"b": 1, "c": 0}
    f.num_features = (len(f.user_vocab) +
                      len(f.slot_labels) +
                      len(f.bot_vocab))
    encoded = f.encode({"a_d": 0.2, "prev_c_b": 0.0,
                        "prev_action_listen": 1.0})
    assert encoded.dtype == np.float64


def test_label_tokenizer_featurizer_handles_on_non_existing_probabilistic():
    f = LabelTokenizerSingleStateFeaturizer()
    f.user_labels = ["a_d"]
    f.bot_labels = ["c_b"]
    f.user_vocab = {"a": 0, "d": 1}
    f.bot_vocab = {"b": 1, "c": 0}
    f.num_features = (len(f.user_vocab) +
                      len(f.slot_labels) +
                      len(f.bot_vocab))
    encoded = f.encode({"a_d": 0.2, "prev_c_b": 1.0, "c": 0.0, "e": 1.0,
                        "prev_action_listen": 1.0})
    assert (encoded == np.array([0.2, 0.2, 1.0, 1.0])).all()


def test_label_tokenizer_featurizer_handles_probabilistic_intents():
    f = LabelTokenizerSingleStateFeaturizer()
    f.user_labels = ["intent_a", "intent_d"]
    f.bot_labels = ["c", "b"]
    f.user_vocab = {"intent": 2, "a": 0, "d": 1}
    f.bot_vocab = {"b": 1, "c": 0}
    f.num_features = (len(f.user_vocab) +
                      len(f.slot_labels) +
                      len(f.bot_vocab))
    encoded = f.encode({"intent_a": 0.5, "prev_b": 0.2, "intent_d": 1.0,
                        "prev_action_listen": 1.0})
    assert (encoded == np.array([0.5, 1.0, 1.5, 0.0, 0.2])).all()