KarrLab/obj_tables

View on GitHub
obj_tables/math/numeric.py

Summary

Maintainability
D
2 days
Test Coverage
A
100%
""" Math attributes

:Author: Jonathan Karr <karr@mssm.edu>
:Date: 2017-05-10
:Copyright: 2017, Karr Lab
:License: MIT
"""

from .. import core
import json
import numpy
import pandas

__all__ = [
    'ArrayAttribute',
    'TableAttribute',
]


class ArrayAttribute(core.LiteralAttribute):
    """ numpy.ndarray attribute

    Attributes:
        min_length (:obj:`int`): minimum length
        max_length (:obj:`int`): maximum length
        default (:obj:`numpy.ndarray`): default value
    """

    def __init__(self, min_length=0, max_length=float('inf'), default=None, none_value=None, verbose_name='', description='',
                 primary=False, unique=False):
        """
        Args:
            min_length (:obj:`int`, optional): minimum length
            max_length (:obj:`int`, optional): maximum length
            default (:obj:`numpy.array`, optional): default value
            none_value (:obj:`object`, optional): none value
            verbose_name (:obj:`str`, optional): verbose name
            description (:obj:`str`, optional): description
            primary (:obj:`bool`, optional): indicate if attribute is primary attribute
            unique (:obj:`bool`, optional): indicate if attribute value must be unique
        """
        if default is not None and not isinstance(default, numpy.ndarray):
            raise ValueError('`default` must be a `numpy.array` or `None`')
        if not isinstance(min_length, (int, float)) or min_length < 0:
            raise ValueError('`min_length` must be a non-negative integer')
        if not isinstance(max_length, (int, float)) or max_length < min_length:
            raise ValueError('`max_length` must be an integer greater than or equal to `min_length`')

        super(ArrayAttribute, self).__init__(default=default, none_value=none_value,
                                             verbose_name=verbose_name, description=description,
                                             primary=primary, unique=unique)

        if primary:
            self.type = numpy.ndarray
        else:
            self.type = (numpy.ndarray, None.__class__)
        self.min_length = min_length
        self.max_length = max_length

    def deserialize(self, value):
        """ Deserialize value

        Args:
            value (:obj:`str`): semantically equivalent representation

        Returns:
            :obj:`tuple` of :obj:`numpy.array`, :obj:`core.InvalidAttribute` or :obj:`None`: tuple of cleaned value and cleaning error
        """
        if self.default is not None:
            dtype = self.default.dtype.type
        else:
            dtype = None

        if value is None:
            value = None
            error = None
        elif isinstance(value, str) and value == '':
            value = None
            error = None
        elif isinstance(value, str):
            try:
                value = numpy.array(json.loads(value), dtype)
                error = None
            except Exception:
                value = None
                error = 'Unable to parse numpy array from string'
        elif isinstance(value, (list, tuple, numpy.ndarray)):
            value = numpy.array(value, dtype)
            error = None
        else:
            value = None
            error = core.InvalidAttribute(self, [
                ('ArrayAttribute must be None, an empty string, '
                 'a JSON-formatted array, a tuple, a list, '
                 'or a numpy array')
            ])
        return (value, error)

    def validate(self, obj, value):
        """ Determine if :obj:`value` is a valid value

        Args:
            obj (:obj:`Model`): class being validated
            value (:obj:`numpy.array`): value of attribute to validate

        Returns:
            :obj:`core.InvalidAttribute` or None: None if attribute is valid, other return
                list of errors as an instance of :obj:`core.InvalidAttribute`
        """
        errors = []

        if value is not None:
            if not isinstance(value, numpy.ndarray):
                errors.append('Value must be an instance of `numpy.ndarray`')
            elif self.default is not None:
                for elem in numpy.nditer(value):
                    if not isinstance(elem, self.default.dtype.type):
                        errors.append('Array elements must be of type `{}`'.format(self.default.dtype.type.__name__))
                        break

        if self.min_length and (value is None or len(value) < self.min_length):
            errors.append('Value must be at least {:d} characters'.format(self.min_length))

        if self.max_length and value is not None and len(value) > self.max_length:
            errors.append('Value must be less than {:d} characters'.format(self.max_length))

        if self.primary and (value is None or len(value) == 0):
            errors.append('{} value for primary attribute cannot be empty'.format(
                self.__class__.__name__))

        if errors:
            return core.InvalidAttribute(self, errors)
        return None

    def validate_unique(self, objects, values):
        """ Determine if the attribute values are unique

        Args:
            objects (:obj:`list` of :obj:`Model`): list of :obj:`Model` objects
            values (:obj:`list` of :obj:`numpy.array`): list of values

        Returns:
            :obj:`core.InvalidAttribute` or None: None if values are unique, otherwise return a
                list of errors as an instance of :obj:`core.InvalidAttribute`
        """
        str_values = []
        for v in values:
            str_values.append(self.serialize(v))
        return super(ArrayAttribute, self).validate_unique(objects, str_values)

    def serialize(self, value):
        """ Serialize string

        Args:
            value (:obj:`numpy.array`): Python representation

        Returns:
            :obj:`str`: simple Python representation
        """
        if value is not None:
            return json.dumps(value.tolist())
        return ''

    def to_builtin(self, value):
        """ Encode a value of the attribute using a simple Python representation (dict, list, str, float, bool, None)
        that is compatible with JSON and YAML

        Args:
            value (:obj:`numpy.array`): value of the attribute

        Returns:
            :obj:`list`: simple Python representation of a value of the attribute
        """
        if value is None:
            return None
        else:
            return value.tolist()

    def from_builtin(self, json):
        """ Decode a simple Python representation (dict, list, str, float, bool, None) of a value of the attribute
        that is compatible with JSON and YAML

        Args:
            json (:obj:`list`): simple Python representation of a value of the attribute

        Returns:
            :obj:`numpy.array`: decoded value of the attribute
        """
        if json is None:
            return None
        else:
            if self.default is not None:
                dtype = self.default.dtype.type
            else:
                dtype = None
            return numpy.array(json, dtype)


class TableAttribute(core.LiteralAttribute):
    """ pandas.DataFrame attribute

    Attributes:
        default (:obj:`pandas.DataFrame`): default value
    """

    def __init__(self, default=None, none_value=None, verbose_name='', description='',
                 primary=False, unique=False):
        """
        Args:
            default (:obj:`pandas.DataFrame`, optional): default value
            none_value (:obj:`object`, optional): none value
            verbose_name (:obj:`str`, optional): verbose name
            description (:obj:`str`, optional): description
            primary (:obj:`bool`, optional): indicate if attribute is primary attribute
            unique (:obj:`bool`, optional): indicate if attribute value must be unique
        """
        if default is not None and not isinstance(default, pandas.DataFrame):
            raise ValueError('`default` must be a `pandas.DataFrame` or `None`')

        super(TableAttribute, self).__init__(default=default, none_value=none_value,
                                             verbose_name=verbose_name, description=description,
                                             primary=primary, unique=unique)
        if primary:
            self.type = pandas.DataFrame
        else:
            self.type = (pandas.DataFrame, None.__class__)

    def deserialize(self, value):
        """ Deserialize value

        Args:
            value (:obj:`str`): semantically equivalent representation

        Returns:
            :obj:`tuple` of :obj:`pandas.DataFrame`, :obj:`core.InvalidAttribute` or :obj:`None`: tuple of cleaned value and cleaning error
        """
        if self.default is not None:
            dtype = self.default.values.dtype.type
        else:
            dtype = None

        if value is None:
            value = None
            error = None
        elif isinstance(value, str) and value == '':
            value = None
            error = None
        elif isinstance(value, str):
            try:
                dict_value = json.loads(value)
                index = dict_value.pop('_index')
                value = pandas.DataFrame.from_dict(dict_value, dtype=dtype)
                value.index = pandas.Index(index)
                error = None
            except Exception:
                value = None
                error = 'Unable to parse pandas.DataFrame from string'
        elif isinstance(value, dict):
            try:
                index = value.pop('_index')
                value = pandas.DataFrame(value)
                value = value.astype(dtype)
                value.index = pandas.Index(index)
                error = None
            except Exception:
                value = None
                error = 'Unable to parse pandas.DataFrame from dict'
        elif isinstance(value, pandas.DataFrame):
            error = None
        else:
            value = None
            error = core.InvalidAttribute(self, [
                ('TableAttribute must be None, an empty string, '
                 'a JSON-formatted dict, a dict, '
                 'or a pandas.DataFrame')
            ])
        return (value, error)

    def validate(self, obj, value):
        """ Determine if :obj:`value` is a valid value

        Args:
            obj (:obj:`Model`): class being validated
            value (:obj:`pandas.DataFrame`): value of attribute to validate

        Returns:
            :obj:`core.InvalidAttribute` or None: None if attribute is valid, other return list of
                errors as an instance of :obj:`core.InvalidAttribute`
        """
        errors = []

        if value is not None:
            if not isinstance(value, pandas.DataFrame):
                errors.append('Value must be an instance of `pandas.DataFrame`')
            elif self.default is not None:
                for elem in numpy.nditer(value.values):
                    if not issubclass(elem.dtype.type, self.default.values.dtype.type):
                        errors.append('Array elements must be of type `{}`'.format(self.default.values.dtype.type.__name__))
                        break

        if self.primary and (value is None or value.values.size == 0):
            errors.append('{} value for primary attribute cannot be empty'.format(
                self.__class__.__name__))

        if errors:
            return core.InvalidAttribute(self, errors)
        return None

    def validate_unique(self, objects, values):
        """ Determine if the attribute values are unique

        Args:
            objects (:obj:`list` of :obj:`Model`): list of :obj:`Model` objects
            values (:obj:`list` of :obj:`pandas.DataFrame`): list of values

        Returns:
            :obj:`core.InvalidAttribute` or None: None if values are unique, otherwise return a
                list of errors as an instance of :obj:`core.InvalidAttribute`
        """
        str_values = []
        for v in values:
            str_values.append(self.serialize(v))
        return super(TableAttribute, self).validate_unique(objects, str_values)

    def serialize(self, value):
        """ Serialize string

        Args:
            value (:obj:`pandas.DataFrame`): Python representation

        Returns:
            :obj:`str`: simple Python representation
        """
        if value is not None:
            dict_value = value.to_dict()
            dict_value['_index'] = value.index.values.tolist()
            return json.dumps(dict_value)
        return ''

    def to_builtin(self, value):
        """ Encode a value of the attribute using a simple Python representation (dict, list, str, float, bool, None)
        that is compatible with JSON and YAML

        Args:
            value (:obj:`pandas.DataFrame`): value of the attribute

        Returns:
            :obj:`dict`: simple Python representation of a value of the attribute
        """
        if value is None:
            return None
        else:
            dict_value = value.to_dict()
            dict_value['_index'] = value.index.values.tolist()
            return dict_value

    def from_builtin(self, json):
        """ Decode a simple Python representation (dict, list, str, float, bool, None) of a value of the attribute
        that is compatible with JSON and YAML

        Args:
            json (:obj:`dict`): simple Python representation of a value of the attribute

        Returns:
            :obj:`pandas.DataFrame`: decoded value of the attribute
        """
        if json is None:
            return None
        else:
            if self.default is not None:
                dtype = self.default.values.dtype.type
            else:
                dtype = None
            index = json.pop('_index')
            value = pandas.DataFrame.from_dict(json, dtype=dtype)
            value.index = pandas.Index(index)
            return value