KarrLab/obj_tables

View on GitHub
obj_tables/sci/refs.py

Summary

Maintainability
F
6 days
Test Coverage
A
100%
""" Attributes for references

:Author: Jonathan Karr <karr@mssm.edu>
:Date: 2019-09-20
:Copyright: 2019, Karr Lab
:License: MIT
"""

from .. import core
import re
import wc_utils.workbook.io

__all__ = [
    'Identifier',
    'IdentifierAttribute',
    'IdentifiersAttribute',
    'DoiAttribute',
    'DoisAttribute',
    'PubMedIdAttribute',
    'PubMedIdsAttribute',
]


class Identifier(object):
    """ An identifier in a namespace registered with Identifiers.org

    Attributes:
        namespace (:obj:`str`): namespace
        id (:obj:`str`): identifier in namespace
    """

    def __init__(self, namespace=None, id=None):
        """
        Args:
            namespace (:obj:`str`, optional): namespace
            id (:obj:`str`, optional): identifier in namespace
        """
        self.namespace = namespace or None
        self.id = id or None

    def to_dict(self):
        """ Get a dictionary representation

        Returns:
            :obj:`dict`: dictionary representation
        """
        return {'namespace': self.namespace, 'id': self.id}

    def from_dict(self, value):
        """ Set value from a dictionary representation

        Args:
            value (:obj:`dict`): dictionary representation

        Returns:
            :obj:`Identifier`: self
        """
        self.namespace = value['namespace']
        self.id = value['id']
        return self

    def __str__(self):
        """ Generate a string representation

        Returns:
            :obj:`str`: string representation
        """
        return "'{}' @ '{}'".format(self.id.replace("'", "\'"),
                                    self.namespace.replace("'", "\'"))

    def to_str(self):
        """ Generate a string representation (``'id' @ 'namespace'``)

        Returns:
            :obj:`str`: string representation
        """
        return str(self)

    def from_str(self, value):
        """ Set value from a string representation (``'id' @ 'namespace'``)

        Args:
            value (:obj:`value`): string representation

        Returns:
            :obj:`Identifier`: self

        Raises:
            :obj:`ValueError`: if string representation doesn't match the pattern
                ``'id' @ 'namespace'``
        """
        match = re.match(r"^'((?:[^'\\]|\\.)*)' *@ *'((?:[^'\\]|\\.)*)'$", value, re.IGNORECASE)
        if not match:
            raise ValueError("Value must follow the pattern 'id' @ 'namespace'")
        self.namespace = match.group(2)
        self.id = match.group(1)
        return self

    def get_url(self):
        """ Get the URL for the webpage for the identifier

        Returns:
            :obj:`str`: URL for the webpage for the identifier
        """
        return 'https://identifiers.org/{}:{}'.format(self.namespace, self.id)


class IdentifierAttribute(core.LiteralAttribute):
    """ Identifier attribute """

    def __init__(self):
        super(IdentifierAttribute, self).__init__(
            default=None,
            none_value=None,
            verbose_name='Identifier',
            description=(
                "An identifier."
                "\n"
                "\nExample:"
                "\n  '16333295' @ 'pubmed'"
            ),
            primary=False,
            unique=False)
        self.type = (Identifier, None.__class__)

    def deserialize(self, value):
        """ Deserialize value

        Args:
            value (:obj:`str`): string representation of structure

        Returns:
            :obj:`tuple`:

                * :obj:`Identifier`: cleaned value
                * :obj:`core.InvalidAttribute`: cleaning error
        """
        if value:
            if not isinstance(value, str):
                return (None, core.InvalidAttribute(self, ['Value must be an identifier']))

            try:
                return (Identifier().from_str(value), None)
            except ValueError as err:
                return (None, core.InvalidAttribute(self, [str(err)]))
        return (None, None)

    def validate(self, obj, value):
        """ Determine if :obj:`value` is a valid value

        Args:
            obj (:obj:`Model`): class being validated
            value (:obj:`Identifier`): value of attribute to validate

        Returns:
            :obj:`core.InvalidAttribute` or None: None if attribute is valid, other
                return list of errors as an instance of :obj:`core.InvalidAttribute`
        """
        errors = []

        if value is not None and not isinstance(value, Identifier):
            errors.append('Value must be an identifier')

        if errors:
            return core.InvalidAttribute(self, errors)
        return None

    def serialize(self, value):
        """ Serialize string

        Args:
            value (:obj:`Identifier`): value of attribute to validate

        Returns:
            :obj:`str`: simple Python representation
        """
        if value:
            return str(value)
        return ''

    def to_builtin(self, value):
        """ Encode a value of the attribute using a simple Python representation (dict, list, str, float, bool, None)
        that is compatible with JSON and YAML

        Args:
            value (:obj:`Identifier`): value of attribute to validate

        Returns:
            :obj:`list` of :obj:`int`: simple Python representation of a value of the attribute
        """
        if value:
            return value.to_dict()
        return None

    def from_builtin(self, json):
        """ Decode a simple Python representation (dict, list, str, float, bool, None) of a value of the attribute
        that is compatible with JSON and YAML

        Args:
            json (:obj:`list` of :obj:`int`): simple Python representation of a value of the attribute

        Returns:
            :obj:`Identifier`: value of attribute to validate
        """
        if json:
            return Identifier().from_dict(json)
        return None

    def get_xlsx_validation(self, sheet_models=None, doc_metadata_model=None):
        """ Get XLSX validation

        Args:
            sheet_models (:obj:`list` of :obj:`Model`, optional): models encoded as separate sheets
            doc_metadata_model (:obj:`type`): model whose worksheet contains the document metadata

        Returns:
            :obj:`wc_utils.workbook.io.FieldValidation`: validation
        """
        validation = super(IdentifierAttribute, self).get_xlsx_validation(sheet_models=sheet_models,
                                                                          doc_metadata_model=doc_metadata_model)

        validation.type = wc_utils.workbook.io.FieldValidationType.any

        input_message = ['Enter an identifier.']
        error_message = ['Value must be an identifier.']

        if validation.input_message:
            validation.input_message += '\n\n'
        validation.input_message += '\n\n'.join(input_message)

        if validation.error_message:
            validation.error_message += '\n\n'
        validation.error_message += '\n\n'.join(error_message)

        return validation


class IdentifiersAttribute(core.LiteralAttribute):
    """ Identifiers attribute """

    def __init__(self):
        super(IdentifiersAttribute, self).__init__(
            default=None,
            none_value=None,
            verbose_name='Identifiers',
            description=(
                "A list of identifiers."
                "\n"
                "\nExample:"
                "\n  '16333295' @ 'pubmed', 'CHEBI:36927' @ 'chebi'"
            ),
            primary=False,
            unique=False)
        self.type = list

    def deserialize(self, value):
        """ Deserialize value

        Args:
            value (:obj:`str`): string representation of structure

        Returns:
            :obj:`tuple`:

                * :obj:`Identifier`: cleaned value
                * :obj:`core.InvalidAttribute`: cleaning error
        """
        if value:
            if not isinstance(value, str):
                return (None, core.InvalidAttribute(self, ['Value must be a list of identifiers']))

            id_pattern = r"'((?:[^'\\]|\\.)*)' *@ *'((?:[^'\\]|\\.)*)'"
            match = re.match(r"^{}(, *{})*$".format(id_pattern, id_pattern), value, re.IGNORECASE)
            if not match:
                return (None, core.InvalidAttribute(self, ['Value must be a list of identifiers']))

            return ([Identifier(v[1], v[0]) for v in re.findall(id_pattern, value, re.IGNORECASE)], None)
        return ([], None)

    def validate(self, obj, value):
        """ Determine if :obj:`value` is a valid value

        Args:
            obj (:obj:`Model`): class being validated
            value (:obj:`list` of :obj:`Identifier`): value of attribute to validate

        Returns:
            :obj:`core.InvalidAttribute` or None: None if attribute is valid, other
                return list of errors as an instance of :obj:`core.InvalidAttribute`
        """
        errors = []

        if not isinstance(value, list):
            errors.append('Value must be a list of identifiers')

        for v in value:
            if not isinstance(v, Identifier):
                errors.append('Value must be a list of identifiers')
                break

        if errors:
            return core.InvalidAttribute(self, errors)
        return None

    def serialize(self, value):
        """ Serialize string

        Args:
            value (:obj:`list` of :obj:`int`): Python representation

        Returns:
            :obj:`str`: simple Python representation
        """
        return ', '.join(str(v) for v in value)

    def to_builtin(self, value):
        """ Encode a value of the attribute using a simple Python representation (dict, list, str, float, bool, None)
        that is compatible with JSON and YAML

        Args:
            value (:obj:`list` of :obj:`Identifier`): value of the attribute

        Returns:
            :obj:`list` of :obj:`int`: simple Python representation of a value of the attribute
        """
        return [v.to_dict() for v in value]

    def from_builtin(self, json):
        """ Decode a simple Python representation (dict, list, str, float, bool, None) of a value of the attribute
        that is compatible with JSON and YAML

        Args:
            json (:obj:`list` of :obj:`int`): simple Python representation of a value of the attribute

        Returns:
            :obj:`list` of :obj:`int`: decoded value of the attribute
        """
        return [Identifier().from_dict(v) for v in json]

    def get_xlsx_validation(self, sheet_models=None, doc_metadata_model=None):
        """ Get XLSX validation

        Args:
            sheet_models (:obj:`list` of :obj:`Model`, optional): models encoded as separate sheets
            doc_metadata_model (:obj:`type`): model whose worksheet contains the document metadata

        Returns:
            :obj:`wc_utils.workbook.io.FieldValidation`: validation
        """
        validation = super(IdentifiersAttribute, self).get_xlsx_validation(sheet_models=sheet_models,
                                                                           doc_metadata_model=doc_metadata_model)

        validation.type = wc_utils.workbook.io.FieldValidationType.any

        input_message = ['Enter a comma-separated list of identifiers.']
        error_message = ['Value must be a comma-separated list of identifiers.']

        if validation.input_message:
            validation.input_message += '\n\n'
        validation.input_message += '\n\n'.join(input_message)

        if validation.error_message:
            validation.error_message += '\n\n'
        validation.error_message += '\n\n'.join(error_message)

        return validation


class DoiAttribute(core.RegexAttribute):
    """ DOI attribute """

    def __init__(self, primary=False, unique=True):
        """
        Args:
            primary (:obj:`bool`, optional): indicate if attribute is primary attribute
            unique (:obj:`bool`, optional): indicate if attribute value must be unique
        """
        pattern = r'^10.\d{4,9}/[-._;()/:A-Z0-9]+$'
        super(DoiAttribute, self).__init__(pattern, re.IGNORECASE,
                                           none=True, default=None, none_value=None,
                                           verbose_name='DOI',
                                           description='Digitial Object Identifier (DOI)',
                                           primary=primary, unique=unique)

    @staticmethod
    def get_url(doi):
        """ Get the URL for a DOI

        Args:
            doi (:obj:`str`): URL for DOI
        """
        return 'https://doi.org/' + doi


class DoisAttribute(core.LiteralAttribute):
    """ DOIs attribute """

    PATTERN = r'^10.\d{4,9}\/[-._;()/:A-Z0-9]+$'

    def __init__(self):
        super(DoisAttribute, self).__init__(
            default=[],
            none_value=[],
            verbose_name='DOIs',
            description=(
                "A list of DOIs."
                "\n"
                "\nExample:"
                "\n  10.1016/j.mib.2015.06.004,"
                " 10.1016/j.coisb.2017.10.005"
            ),
            primary=False,
            unique=False)
        self.type = list

    def deserialize(self, value):
        """ Deserialize value

        Args:
            value (:obj:`str`): string representation of structure

        Returns:
            :obj:`tuple`:

                * :obj:`list` of :obj:`int`: cleaned value
                * :obj:`core.InvalidAttribute`: cleaning error
        """
        if value:
            if not isinstance(value, str):
                return (None, core.InvalidAttribute(self, ['Value must be a comma-separated list of DOIs']))

            return ([v.strip() for v in value.split(',')], None)
        return ([], None)

    def validate(self, obj, value):
        """ Determine if :obj:`value` is a valid value

        Args:
            obj (:obj:`Model`): class being validated
            value (:obj:`list` of :obj:`str`): value of attribute to validate

        Returns:
            :obj:`core.InvalidAttribute` or None: None if attribute is valid, other
                return list of errors as an instance of :obj:`core.InvalidAttribute`
        """
        errors = []

        if not isinstance(value, list):
            errors.append('Value must be a list of DOIs')

        for v in value:
            if not re.match(self.PATTERN, v, re.IGNORECASE):
                errors.append('Value must be a list of DOIs')
                break

        if errors:
            return core.InvalidAttribute(self, errors)
        return None

    def serialize(self, value):
        """ Serialize string

        Args:
            value (:obj:`list` of :obj:`int`): Python representation

        Returns:
            :obj:`str`: simple Python representation
        """
        return ', '.join(value)

    def to_builtin(self, value):
        """ Encode a value of the attribute using a simple Python representation (dict, list, str, float, bool, None)
        that is compatible with JSON and YAML

        Args:
            value (:obj:`list` of :obj:`int`): value of the attribute

        Returns:
            :obj:`list` of :obj:`int`: simple Python representation of a value of the attribute
        """
        return value

    def from_builtin(self, json):
        """ Decode a simple Python representation (dict, list, str, float, bool, None) of a value of the attribute
        that is compatible with JSON and YAML

        Args:
            json (:obj:`list` of :obj:`int`): simple Python representation of a value of the attribute

        Returns:
            :obj:`list` of :obj:`int`: decoded value of the attribute
        """
        return json

    def get_xlsx_validation(self, sheet_models=None, doc_metadata_model=None):
        """ Get XLSX validation

        Args:
            sheet_models (:obj:`list` of :obj:`Model`, optional): models encoded as separate sheets
            doc_metadata_model (:obj:`type`): model whose worksheet contains the document metadata

        Returns:
            :obj:`wc_utils.workbook.io.FieldValidation`: validation
        """
        validation = super(DoisAttribute, self).get_xlsx_validation(sheet_models=sheet_models,
                                                                    doc_metadata_model=doc_metadata_model)

        validation.type = wc_utils.workbook.io.FieldValidationType.any

        input_message = ['Enter a comma-separated list of DOIs.']
        error_message = ['Value must be a comma-separated list of DOIs.']

        if validation.input_message:
            validation.input_message += '\n\n'
        validation.input_message += '\n\n'.join(input_message)

        if validation.error_message:
            validation.error_message += '\n\n'
        validation.error_message += '\n\n'.join(error_message)

        return validation


class PubMedIdAttribute(core.IntegerAttribute):
    """ PubMed id attribute """

    def __init__(self, primary=False, unique=True):
        """
        Args:
            primary (:obj:`bool`, optional): indicate if attribute is primary attribute
            unique (:obj:`bool`, optional): indicate if attribute value must be unique
        """
        super(PubMedIdAttribute, self).__init__(none=True, default=None, none_value=None, min=0,
                                                verbose_name='PubMed id',
                                                description='PubMed identifier',
                                                primary=primary, unique=unique)

    @staticmethod
    def get_url(pmid):
        """ Get the URL for a PubMed id

        Args:
            pmid (:obj:`int`): URL for PubMed id
        """
        return 'https://www.ncbi.nlm.nih.gov/pubmed/' + str(pmid)


class PubMedIdsAttribute(core.LiteralAttribute):
    """ PubMed ids attribute """

    def __init__(self):
        super(PubMedIdsAttribute, self).__init__(
            default=[],
            none_value=[],
            verbose_name='PubMed ids',
            description=(
                "A list of PubMed ids."
                "\n"
                "\nExample:"
                "\n  16333295, 16333299"
            ),
            primary=False,
            unique=False)
        self.type = list

    def deserialize(self, value):
        """ Deserialize value

        Args:
            value (:obj:`str`): string representation of structure

        Returns:
            :obj:`tuple`:

                * :obj:`list` of :obj:`int`: cleaned value
                * :obj:`core.InvalidAttribute`: cleaning error
        """
        if value:
            if not isinstance(value, str):
                return (None, core.InvalidAttribute(self, ['Value must be a string']))

            values = []
            for v in value.split(','):
                v = v.strip()
                try:
                    v_float = float(v)
                except (TypeError, ValueError):
                    return (None, core.InvalidAttribute(self, ['Value must be a comma-separated list of non-negative integers']))
                v_int = int(v_float)
                if v_int != v_float:
                    return (None, core.InvalidAttribute(self, ['Value must be a comma-separated list of non-negative integers']))
                values.append(v_int)

            return (values, None)
        return ([], None)

    def validate(self, obj, value):
        """ Determine if :obj:`value` is a valid value

        Args:
            obj (:obj:`Model`): class being validated
            value (:obj:`list` of :obj:`Identifier`): value of attribute to validate

        Returns:
            :obj:`core.InvalidAttribute` or None: None if attribute is valid, other
                return list of errors as an instance of :obj:`core.InvalidAttribute`
        """
        errors = []

        if not isinstance(value, list):
            errors.append('Value must be a list of non-negative integers')

        for v in value:
            if not isinstance(v, (int, float)) or v != int(v) or v < 0:
                errors.append('Value must be a list of non-negative integers')
                break

        if errors:
            return core.InvalidAttribute(self, errors)
        return None

    def serialize(self, value):
        """ Serialize string

        Args:
            value (:obj:`list` of :obj:`int`): Python representation

        Returns:
            :obj:`str`: simple Python representation
        """
        return ', '.join(str(v) for v in value)

    def to_builtin(self, value):
        """ Encode a value of the attribute using a simple Python representation (dict, list, str, float, bool, None)
        that is compatible with JSON and YAML

        Args:
            value (:obj:`list` of :obj:`int`): value of the attribute

        Returns:
            :obj:`list` of :obj:`int`: simple Python representation of a value of the attribute
        """
        return value

    def from_builtin(self, json):
        """ Decode a simple Python representation (dict, list, str, float, bool, None) of a value of the attribute
        that is compatible with JSON and YAML

        Args:
            json (:obj:`list` of :obj:`int`): simple Python representation of a value of the attribute

        Returns:
            :obj:`list` of :obj:`int`: decoded value of the attribute
        """
        return json

    def get_xlsx_validation(self, sheet_models=None, doc_metadata_model=None):
        """ Get XLSX validation

        Args:
            sheet_models (:obj:`list` of :obj:`Model`, optional): models encoded as separate sheets
            doc_metadata_model (:obj:`type`): model whose worksheet contains the document metadata

        Returns:
            :obj:`wc_utils.workbook.io.FieldValidation`: validation
        """
        validation = super(PubMedIdsAttribute, self).get_xlsx_validation(sheet_models=sheet_models,
                                                                         doc_metadata_model=doc_metadata_model)

        validation.type = wc_utils.workbook.io.FieldValidationType.any

        input_message = ['Enter a comma-separated list of non-negative integers.']
        error_message = ['Value must be a comma-separated list of non-negative integers.']

        if validation.input_message:
            validation.input_message += '\n\n'
        validation.input_message += '\n\n'.join(input_message)

        if validation.error_message:
            validation.error_message += '\n\n'
        validation.error_message += '\n\n'.join(error_message)

        return validation