KarrLab/wc_lang

View on GitHub
wc_lang/io.py

Summary

Maintainability
C
1 day
Test Coverage
A
100%
""" Reading and writing models to/from files.

Supported file types:

* Comma separated values (.csv)
* Excel (.xlsx)
* Tab separated values (.tsv)

:Author: Jonathan Karr <karr@mssm.edu>
:Author: Arthur Goldberg <Arthur.Goldberg@mssm.edu>
:Date: 2016-12-05
:Copyright: 2016, Karr Lab
:License: MIT
"""

from wc_lang import core
from wc_utils.util.string import indent_forest
from wc_utils.util.list import get_count_limited_class
import obj_tables
import obj_tables.io
import obj_tables.utils
import os
import wc_lang
import wc_lang.config.core
from wc_utils.util import git


def get_root_model(models):
    """ Get a root model class from an iterator over `obj_tables.Model` classes

    The root model is obtained by name, rather than from `core.Model`, because `models` may have
    been obtained from `obj_tables` migration, which loads other versions of `wc_lang.core` in
    private imports.

    Args:
        models (:obj:`iterator`): subclasses of :obj:`obj_tables.Model` being read or written

    Returns:
        :obj:`type`: a subclass of `obj_tables.Model` whose name is `root_model_name`
    """
    # the singleton, root obj_tables.Model in wc_lang.core
    ROOT_MODEL_NAME = 'Model'
    return get_count_limited_class(models, ROOT_MODEL_NAME)


class Writer(obj_tables.io.Writer):
    """ Write model to file(s) """

    MODELS = (
        core.Model, core.Taxon, core.Environment,
        core.Submodel, core.Compartment, core.SpeciesType, core.Species,
        core.DistributionInitConcentration, core.Observable, core.Function,
        core.Reaction, core.RateLaw,
        core.DfbaObjective, core.DfbaObjReaction, core.DfbaObjSpecies,
        core.Parameter, core.StopCondition,
        core.Observation, core.ObservationSet, core.Conclusion, core.Reference, core.Author, core.Change,
    )

    def run(self, path, model, models=None, get_related=True, include_all_attributes=False, validate=None,
            title=None, description=None, keywords=None, version=None, language=None, creator=None,
            write_schema=False, write_toc=True, extra_entries=0, data_repo_metadata=False, schema_package=None,
            protected=True):
        """ Write a list of model classes to an Excel file, with one worksheet for each model, or to
            a set of .csv or .tsv files, with one file for each model.

        Args:
            path (:obj:`str`): path to write file(s)
            model (:obj:`type`): a `wc_lang` Model that's a subclass of `obj_tables.Model`
            models (:obj:`list` of :obj:`Model`, optional): models in the order that they should
                appear as worksheets; all models which are not in `models` will
                follow in alphabetical order
            get_related (:obj:`bool`, optional): if :obj:`True`, write object and all related objects
            include_all_attributes (:obj:`bool`, optional): if :obj:`True`, export all attributes including those
                not explictly included in `Model.Meta.attribute_order`
            validate (:obj:`bool`, optional): if :obj:`True`, validate the data
            title (:obj:`str`, optional): title
            description (:obj:`str`, optional): description
            keywords (:obj:`str`, optional): keywords
            version (:obj:`str`, optional): version
            language (:obj:`str`, optional): language
            creator (:obj:`str`, optional): creator
            write_schema (:obj:`bool`, optional): if :obj:`True`, include additional worksheet with schema
            write_toc (:obj:`bool`, optional): if :obj:`True`, include additional worksheet with table of contents
            extra_entries (:obj:`int`, optional): additional entries to display
            data_repo_metadata (:obj:`bool`, optional): if :obj:`True`, try to write metadata information
                about the file's Git repo; the repo must be current with origin, except for the file
            schema_package (:obj:`str`, optional): the package which defines the `obj_tables` schema
                used by the file; if not :obj:`None`, try to write metadata information about the
                the schema's Git repository: the repo must be current with origin
            protected (:obj:`bool`, optional): if :obj:`True`, protect the worksheet
        """
        if models is None:
            models = self.MODELS
        root_model = get_root_model(models)

        if issubclass(self.get_writer(path), obj_tables.io.WorkbookWriter):
            self.validate_implicit_relationships(root_model)
            self.validate_implicit_relationships_are_set(model, root_model)

        config = wc_lang.config.core.get_config()['wc_lang']['io']
        if validate is None:
            validate = config['validate']

        # default metadata for exported file
        if title is None:
            title = model.id
        if description is None:
            description = model.name
        if version is None:
            version = model.version
        if language is None:
            language = 'wc_lang'
        if creator is None:
            creator = '{}.{}'.format(self.__class__.__module__, self.__class__.__name__)

        super(Writer, self).run(path, model, schema_name='wc_lang', models=models, get_related=get_related,
                                include_all_attributes=include_all_attributes, validate=validate,
                                title=title, description=description, version=version, language=language,
                                creator=creator,
                                write_schema=write_schema, write_toc=write_toc,
                                extra_entries=extra_entries,
                                data_repo_metadata=data_repo_metadata, schema_package=schema_package,
                                protected=protected)

    @classmethod
    def validate_implicit_relationships(cls, root_model):
        """ Check that relationships to :obj:`root_model` do not need to be explicitly exported because
            they can be inferred by :obj:`Reader.run`. This is necessary to enable the relationships
            to :obj:`root_model` to not be exported in workbooks, and instead added by :obj:`Reader.run`.

        Raises:
            :obj:`Exception`: if there are relationships from :obj:`core.Model` or one-to-many or
                many-to-many relationships to :obj:`root_model`
        """
        for attr in root_model.Meta.attributes.values():
            if isinstance(attr, obj_tables.RelatedAttribute) and \
                    attr.related_class.__name__ != 'Identifier':
                raise Exception('Relationships from `Model` not supported')

        for attr in root_model.Meta.related_attributes.values():
            if not isinstance(attr, (obj_tables.OneToOneAttribute, obj_tables.ManyToOneAttribute)):
                raise Exception('Only one-to-one and many-to-one relationships are supported to `{}`'.format(
                    root_model.__name__))

    def validate_implicit_relationships_are_set(self, model, root_model):
        """ Check that there is only one instance of :obj:`root_model` and that each relationship to
        :obj:`root_model` is set. This is necessary to enable the relationships to :obj:`root_model`
        to not be exported in workbooks, and instead added by :obj:`Reader.run`.

        Args:
            model (:obj:`obj_tables.Model`): the root model instance
            root_model (:obj:`type`): the type of `model`, a subclass of :obj:`obj_tables.Model`

        Raises:
            :obj:`ValueError`: if there are multiple instances of `root_model` in the object graph
        """
        for obj in model.get_related():
            for attr in obj.Meta.attributes.values():
                if isinstance(attr, obj_tables.RelatedAttribute) and \
                        attr.related_class == root_model:
                    if getattr(obj, attr.name) != model:
                        raise ValueError('{}.{} must be set to the instance of `Model`'.format(obj.__class__.__name__, attr.name))


class Reader(obj_tables.io.Reader):
    """ Read model from file(s) """

    MODELS = Writer.MODELS

    def run(self, path, models=None,
            ignore_missing_models=None, ignore_extra_models=None, ignore_sheet_order=None,
            include_all_attributes=False, ignore_missing_attributes=None, ignore_extra_attributes=None,
            ignore_attribute_order=None, validate=None):
        """ Read a list of model objects from file(s) and, optionally, validate them

        Args:
            path (:obj:`str`): path to file(s)
            models (:obj:`types.TypeType` or :obj:`list` of :obj:`types.TypeType`, optional): type
                of object to read or list of types of objects to read
            ignore_missing_models (:obj:`bool`, optional): if :obj:`False`, report an error if a worksheet/
                file is missing for one or more models
            ignore_extra_models (:obj:`bool`, optional): if :obj:`True` and all `models` are found, ignore
                other worksheets or files
            ignore_sheet_order (:obj:`bool`, optional): if :obj:`True`, do not require the sheets to be provided
                in the canonical order
            include_all_attributes (:obj:`bool`, optional): if :obj:`True`, export all attributes including those
                not explictly included in `Model.Meta.attribute_order`
            ignore_missing_attributes (:obj:`bool`, optional): if :obj:`False`, report an error if a
                worksheet/file doesn't contain all of attributes in a model in `models`
            ignore_extra_attributes (:obj:`bool`, optional): if :obj:`True`, do not report errors if
                attributes in the data are not in the model
            ignore_attribute_order (:obj:`bool`): if :obj:`True`, do not require the attributes to be provided
                in the canonical order
            validate (:obj:`bool`, optional): if :obj:`True`, validate the data

        Returns:
            :obj:`dict`: model objects grouped by `obj_tables.Model` class

        Raises:
            :obj:`ValueError`: if the file defines zero or multiple models or the model defined in the file(s) is
                invalid
        """
        if models is None:
            models = self.MODELS

        root_model = get_root_model(models)
        if issubclass(self.get_reader(path), obj_tables.io.WorkbookReader):
            Writer.validate_implicit_relationships(root_model)

        config = wc_lang.config.core.get_config()['wc_lang']['io']
        if ignore_missing_models is None:
            ignore_missing_models = not config['strict']
        if ignore_extra_models is None:
            ignore_extra_models = not config['strict']
        if ignore_sheet_order is None:
            ignore_sheet_order = not config['strict']
        if ignore_missing_attributes is None:
            ignore_missing_attributes = not config['strict']
        if ignore_extra_attributes is None:
            ignore_extra_attributes = not config['strict']
        if ignore_attribute_order is None:
            ignore_attribute_order = not config['strict']

        objects = super(Reader, self).run(path, schema_name='wc_lang', models=models,
                                          ignore_missing_models=ignore_missing_models,
                                          ignore_extra_models=ignore_extra_models,
                                          ignore_sheet_order=ignore_sheet_order,
                                          include_all_attributes=include_all_attributes,
                                          ignore_missing_attributes=ignore_missing_attributes,
                                          ignore_extra_attributes=ignore_extra_attributes,
                                          ignore_attribute_order=ignore_attribute_order,
                                          group_objects_by_model=True,
                                          validate=False)

        # check that file only has 1 wc_lang Model instance
        root_model = get_root_model(objects)
        if len(objects[root_model]) != 1:
            raise ValueError('"{}" should define one model'.format(path))
        model = objects[root_model][0]

        # add implicit relationships to `Model`
        if issubclass(self.get_reader(path), obj_tables.io.WorkbookReader):
            for cls, cls_objects in objects.items():
                for attr in cls.Meta.attributes.values():
                    if isinstance(attr, obj_tables.RelatedAttribute) and \
                            attr.related_class == root_model:
                        for cls_obj in cls_objects:
                            setattr(cls_obj, attr.name, model)

        # validate
        config = wc_lang.config.core.get_config()['wc_lang']['io']
        if (validate is not None and validate) or (validate is None and config['validate']):
            objs = []
            for cls_objs in objects.values():
                objs.extend(cls_objs)

            errors = obj_tables.Validator().validate(objs)
            if errors:
                raise ValueError(
                    indent_forest(['The model cannot be loaded because it fails to validate:', [errors]]))

        # return model
        return objects


def convert(source, destination, protected=True):
    """ Convert among Excel (.xlsx), comma separated (.csv), and tab separated (.tsv) file formats

    Read a model from the `source` files(s) and write it to the `destination` files(s). A path to a
    delimiter separated set of models must be represented by a Unix glob pattern (with a \\*) that
    matches all delimiter separated files.

    Args:
        source (:obj:`str`): path to source file(s)
        destination (:obj:`str`): path to save converted file
        protected (:obj:`bool`, optional): if :obj:`True`, protect the worksheet
    """
    model = Reader().run(source)[core.Model][0]
    Writer().run(destination, model, data_repo_metadata=False, protected=protected)


def create_template(path, write_schema=False, write_toc=True,
                    extra_entries=10, data_repo_metadata=True,
                    protected=True):
    """ Create file with model template, including row and column headings

    Args:
        path (:obj:`str`): path to file(s)
        write_schema (:obj:`bool`, optional): if :obj:`True`, include additional worksheet with schema
        write_toc (:obj:`bool`, optional): if :obj:`True`, include additional worksheet with table of contents
        extra_entries (:obj:`int`, optional): additional entries to display
        data_repo_metadata (:obj:`bool`, optional): if :obj:`True`, try to write metadata information
            about the file's Git repo
        protected (:obj:`bool`, optional): if :obj:`True`, protect the worksheet
    """
    model = core.Model(id='template', name='Template', version=wc_lang.__version__)
    Writer().run(path, model,
                 write_schema=write_schema, write_toc=write_toc,
                 extra_entries=extra_entries,
                 data_repo_metadata=data_repo_metadata,
                 protected=protected)