src/so_magic/data/features/features.py
from abc import ABC, abstractmethod
import attr
from so_magic.data.variables.types import VariableTypeFactory
class AttributeReporterInterface(ABC):
"""A class implementing this interface has the ability to report information on an attribute/variable
of some structured data (observations)
"""
@abstractmethod
def values(self, datapoints, attribute, **kwargs):
"""Get the values ([N x 1] vector) of all datapoints (N x D) corresponding to the input variable/attribute.
Args:
datapoints (Datapoints): [description]
attribute (str): [description]
Return:
(numpy.ndarray): the values in a [N x 1] vector
"""
raise NotImplementedError
@abstractmethod
def variable_type(self, datapoints, attribute, **kwargs):
"""Call to get the variable type of the datapoints, given the attribute.
Args:
datapoints (Datapoints): [description]
attribute (str): [description]
Return:
(str): [description]
"""
raise NotImplementedError
@abstractmethod
def value_set(self, datapoints, attribute, **kwargs):
raise NotImplementedError
class BaseAttributeReporter(AttributeReporterInterface):
def values(self, datapoints, attribute, **kwargs):
return datapoints[attribute]
def variable_type(self, datapoints, attribute, **kwargs):
return VariableTypeFactory.infer(datapoints, attribute, **kwargs)
def value_set(self, datapoints, attribute, **kwargs):
return set(datapoints.column(attribute))
#### HELPERS
def _list_validator(_self, _attribute, value):
if not isinstance(value, list):
raise ValueError(f'Expected a list; instead a {type(value).__name__} was given.')
def _string_validator(_self, _attribute, value):
if not isinstance(value, str):
raise ValueError(f'Expected a string; instead a {type(value).__name__} was given.')
@attr.s
class AttributeReporter:
label = attr.ib(init=True)
reporter = attr.ib(init=True, default=BaseAttributeReporter())
def values(self, datapoints):
"""A default implementation of the values method"""
return self.reporter.values(datapoints, self.label)
def variable_type(self, datapoints):
"""A default implementation of the values method"""
return self.reporter.variable_type(datapoints, self.label)
def value_set(self, datapoints):
return self.reporter.value_set(datapoints, self.label)
def __str__(self):
return self.label
@attr.s
class FeatureState:
key = attr.ib(init=True)
reporter = attr.ib(init=True)
def __str__(self):
return self.key
def is_callable(_self, _attribute, value):
if not callable(value):
raise ValueError(f"Expected a callable object; instead {type(value)} was given.")
if value.func_code.co_argcount < 1:
raise ValueError("Expected a callable that takes at least 1 argument; "
"instead a callable that takes no arguments was given.")
@attr.s
class FeatureFunction:
"""Example: Assume we have a datapoint v = [v_1, v_2, .., v_n, and 2 feature functions f_1, f_2\n
Then we can produce an encoded vector (eg to feed for training a ML model) like: encoded_vector = [f_1(v), f_2(v)]
"""
function = attr.ib(init=True, validator=is_callable)
label = attr.ib(init=True, default=None)
@label.validator
def is_label(self, _attribute, value):
if value is None:
self.label = self.function.func_name
def values(self, dataset):
return self.function(dataset)
@property
def state(self):
return FeatureState(self.label, self.function)
@attr.s
class StateMachine:
states = attr.ib(init=True)
init_state = attr.ib(init=True)
_current = attr.ib(init=False, default=attr.Factory(lambda self: self.init_state, takes_self=True))
@property
def current(self):
return self._current
def update(self, *args, **kwargs):
if len(args) > 1:
self.states[args[0]] = args[1]
self._current = args[0]
elif len(args) > 0:
if args[0] in self.states:
self._current = args[0]
else:
raise RuntimeError(f"Requested to set the current state to '{args[0]}', "
f"it is not in existing [{', '.join(sorted(self.states))}]")
@property
def state(self):
"""Construct an object representing the current state"""
return FeatureState(self._current, self.states[self._current])
@attr.s
class TrackingFeature:
feature = attr.ib(init=True)
state_machine = attr.ib(init=True)
variable_type = attr.ib(init=True, default=None)
@classmethod
def from_callable(cls, a_callable, label=None, variable_type=None):
"""Construct a feature that has one extract/report capability.
Input id is correlated to the features position on the vector (see FeatureFunction above)"""
return TrackingFeature(FeatureFunction(a_callable, label), StateMachine({'raw': a_callable}, 'raw'),
variable_type)
def values(self, dataset):
return self.state_machine.state.reporter(dataset)
def label(self):
return self.feature.label
@property
def state(self):
"""Returns the current state"""
return self.state_machine.state
def update(self, *args, **kwargs):
self.state_machine.update(*args, **kwargs)
@attr.s
class FeatureIndex:
keys = attr.ib(init=True, validator=_list_validator)
class PhiFeatureFunction:
def __call__(self, *args, **kwargs):
raise NotImplementedError