KarrLab/obj_tables

View on GitHub
obj_tables/web_service.py

Summary

Maintainability
D
2 days
Test Coverage
A
100%
""" Web service

:Author: Jonathan Karr <karr@mssm.edu>
:Date: 2019-09-15
:Copyright: 2019, Karr Lab
:License: MIT
"""

from . import core
from . import io
from . import utils
from .__main__ import get_schema_models, DEFAULT_WRITER_ARGS, DEFAULT_READER_ARGS
from wc_utils.util.string import indent_forest
from werkzeug.datastructures import FileStorage
import copy
import flask
import flask_cors
import flask_restplus
import flask_restplus.inputs
import glob
import obj_tables
import os
import shutil
import tempfile
import zipfile

# setup app
app = flask.Flask(__name__)
cors = flask_cors.CORS(app,
                       resources={r"/*": {"origins": "*"}},
                       expose_headers=["content-disposition"])


class PrefixMiddleware(object):
    def __init__(self, app, prefix=''):
        self.app = app
        self.prefix = prefix

    def __call__(self, environ, start_response):
        if environ['PATH_INFO'].startswith(self.prefix):
            environ['PATH_INFO'] = environ['PATH_INFO'][len(self.prefix):]
            environ['SCRIPT_NAME'] = self.prefix
            return self.app(environ, start_response)
        else:
            start_response('404', [('Content-Type', 'text/plain')])
            return ["This url does not belong to the app.".encode()]


app.wsgi_app = PrefixMiddleware(app.wsgi_app, prefix='/api')

api = flask_restplus.Api(app,
                         title='ObjTables web service',
                         description='Web service for generating and working with schemas for tabular-formatted datasets',
                         contact='info@objtables.org',
                         version=obj_tables.__version__,
                         license='MIT',
                         license_url='https://github.com/KarrLab/obj_tables/blob/master/LICENSE',
                         doc='/')

""" Convert """
convert_parser = api.parser()
convert_parser.add_argument('schema', location='files',
                            type=FileStorage,
                            required=True,
                            help='Schema file (.csv, .tsv, .xlsx)')
convert_parser.add_argument('workbook', location='files',
                            type=FileStorage,
                            required=True,
                            help='Workbook (.csv, .json, .tsv, .yml, .xlsx, .zip of .csv or .tsv)')
convert_parser.add_argument('format',
                            type=flask_restplus.inputs.regex(r'^(csv|multi\.csv|json|tsv|multi\.tsv|xlsx|yml)$'),
                            default='xlsx',
                            required=False,
                            help='Format to convert workbook')
convert_parser.add_argument('write-toc',
                            type=flask_restplus.inputs.boolean,
                            default=False,
                            required=False,
                            help='If true, save table of contents with file')
convert_parser.add_argument('write-schema',
                            type=flask_restplus.inputs.boolean,
                            default=False,
                            required=False,
                            help='If true, save schema with file')
convert_parser.add_argument('protected',
                            type=flask_restplus.inputs.boolean,
                            default=True,
                            required=False,
                            help='If true, protect the table headings in the file from editing')


@api.route("/convert/",
           doc={'description': 'Convert a schema-encoded workbook to another format (CSV, multi-CSV, JSON, TSV, multi-TSV, XLSX, YAML)'})
@api.expect(convert_parser)
class Convert(flask_restplus.Resource):
    """ Convert a schema-encoded workbook to another format (CSV, multi-CSV, JSON, TSV, multi-TSV, XLSX, YAML) """

    def post(self):
        """ Convert a schema-encoded workbook to another format (CSV, multi-CSV, JSON, TSV, multi-TSV, XLSX, YAML)
        """
        """
        Returns:
            :obj:`flask.Response`: response with workbook
        """
        args = convert_parser.parse_args()
        schema_dir, schema_filename = save_schema(args['schema'])
        in_wb_dir, in_wb_filename = save_in_workbook(args['workbook'])
        format = args['format']

        try:
            schema_name, schema, models = get_schema_models(schema_filename)
            objs, doc_metadata, model_metadata = read_workbook(in_wb_filename, models, schema_name=schema_name)
            out_wb_dir, out_wb_filename, out_wb_mimetype = save_out_workbook(
                format, objs, schema_name, doc_metadata, model_metadata, models=models,
                write_toc=args['write-toc'],
                write_schema=args['write-schema'],
                protected=args['protected'],
                **DEFAULT_WRITER_ARGS)
        except Exception as err:
            flask_restplus.abort(400, str(err))
        finally:
            shutil.rmtree(schema_dir)
            shutil.rmtree(in_wb_dir)

        @flask.after_this_request
        def remove_out_file(response):
            shutil.rmtree(out_wb_dir)
            return response

        return flask.send_file(out_wb_filename,
                               attachment_filename=os.path.basename(out_wb_filename),
                               mimetype=out_wb_mimetype,
                               as_attachment=True)


""" Difference """
diff_parser = api.parser()
diff_parser.add_argument('schema', location='files',
                         type=FileStorage,
                         required=True,
                         help='Schema file (.csv, .tsv, .xlsx)')
diff_parser.add_argument('model',
                         type=str,
                         required=True,
                         help='Type of objects to compare')
diff_parser.add_argument('workbook', location='files',
                         type=FileStorage,
                         required=True,
                         help='First workbook (.csv, .json, .tsv, .yml, .xlsx, .zip of .csv or .tsv)')
diff_parser.add_argument('workbook-2', location='files',
                         type=FileStorage,
                         required=True,
                         help='Second workbook (.csv, .json, .tsv, .yml, .xlsx, .zip of .csv or .tsv)')


@api.route("/diff/",
           doc={'description': 'Calculate the difference between two workbooks according to a schema'})
@api.expect(diff_parser)
class Diff(flask_restplus.Resource):
    """ Calculate the difference between two workbooks according to a schema """

    def post(self):
        """ Calculate the difference between two workbooks according to a schema
        """
        """
        Returns:
            :obj:`list` of :obj:`str`: list of difference between workbooks
        """
        args = diff_parser.parse_args()
        schema_dir, schema_filename = save_schema(args['schema'])
        model_name = args['model']
        wb_dir_1, wb_filename_1 = save_in_workbook(args['workbook'])
        wb_dir_2, wb_filename_2 = save_in_workbook(args['workbook-2'])

        try:
            schema_name, schema, models = get_schema_models(schema_filename)
        except Exception as err:
            shutil.rmtree(schema_dir)
            shutil.rmtree(wb_dir_1)
            shutil.rmtree(wb_dir_2)
            flask_restplus.abort(400, str(err))

        try:
            diffs = utils.diff_workbooks(wb_filename_1, wb_filename_2,
                                         models, model_name,
                                         schema_name=schema_name,
                                         **DEFAULT_READER_ARGS)
        except Exception as err:
            flask_restplus.abort(400, str(err))
        finally:
            shutil.rmtree(schema_dir)
            shutil.rmtree(wb_dir_1)
            shutil.rmtree(wb_dir_2)

        return diffs


""" Generate template """
gen_template_parser = api.parser()
gen_template_parser.add_argument('schema', location='files',
                                 type=FileStorage,
                                 required=True,
                                 help='Schema file (.csv, .tsv, .xlsx)')
gen_template_parser.add_argument('format',
                                 type=flask_restplus.inputs.regex(r'^(csv|multi\.csv|json|tsv|multi\.tsv|xlsx|yml)$'),
                                 default='xlsx',
                                 required=False,
                                 help='Format for template')
gen_template_parser.add_argument('write-toc',
                                 type=flask_restplus.inputs.boolean,
                                 default=False,
                                 required=False,
                                 help='If true, save table of contents with file')
gen_template_parser.add_argument('write-schema',
                                 type=flask_restplus.inputs.boolean,
                                 default=False,
                                 required=False,
                                 help='If true, save schema with file')
gen_template_parser.add_argument('protected',
                                 type=flask_restplus.inputs.boolean,
                                 default=True,
                                 required=False,
                                 help='If true, protect the table headings in the file from editing')


@api.route("/gen-template/",
           doc={'description':
                'Generate a template workbook (CSV, multi-CSV, TSV, multi-TSV, XLSX) for a schema or declarative description of a schema'})
@api.expect(gen_template_parser)
class GenTemplate(flask_restplus.Resource):
    """ Generate a template workbook (CSV, multi-CSV, TSV, multi-TSV, XLSX) for a schema or declarative description of a schema """

    def post(self):
        """ Generate a template workbook (CSV, multi-CSV, TSV, multi-TSV, XLSX) for a schema or declarative description of a schema
        """
        """
        Returns:
            :obj:`flask.Response`: response with workbook
        """
        args = gen_template_parser.parse_args()
        schema_dir, schema_filename = save_schema(args['schema'])
        format = args['format']

        try:
            schema_name, schema, models = get_schema_models(schema_filename)
        except Exception as err:
            flask_restplus.abort(400, str(err))
        finally:
            shutil.rmtree(schema_dir)

        kw_args = copy.copy(DEFAULT_WRITER_ARGS)
        kw_args['write_empty_models'] = True
        kw_args['write_empty_cols'] = True
        out_wb_dir, out_wb_filename, out_wb_mimetype = save_out_workbook(
            format, [], schema_name, {}, {}, models=models,
            write_toc=args['write-toc'],
            write_schema=args['write-schema'],
            protected=args['protected'],
            **kw_args)

        @flask.after_this_request
        def remove_out_file(response):
            shutil.rmtree(out_wb_dir)
            return response

        return flask.send_file(out_wb_filename,
                               attachment_filename=os.path.basename(out_wb_filename),
                               mimetype=out_wb_mimetype,
                               as_attachment=True)


""" Init schema """
init_schema_parser = api.parser()
init_schema_parser.add_argument('schema', location='files',
                                type=FileStorage,
                                required=True,
                                help='File with tabular description of schema (.csv, .tsv, .xlsx)')


@api.route("/init-schema/",
           doc={'description': 'Initialize a Python schema from a declarative description of the schema in a table (CSV, TSV, XLSX)'})
@api.expect(init_schema_parser)
class InitSchema(flask_restplus.Resource):
    """ Initialize a Python schema from a declarative description of the schema in a table (CSV, TSV, XLSX) """

    def post(self):
        """ Initialize a Python schema from a declarative description of the schema in a table (CSV, TSV, XLSX)
        """
        """
        Returns:
            :obj:`flask.Response`: response with Python schema
        """
        args = init_schema_parser.parse_args()

        schema_dir, schema_filename = save_schema(args['schema'])

        py_schema_dir = tempfile.mkdtemp()
        py_schema_filename = os.path.join(py_schema_dir, 'schema.py')

        try:
            utils.init_schema(schema_filename,
                              out_filename=py_schema_filename)
        except Exception as err:
            flask_restplus.abort(400, str(err))
        finally:
            shutil.rmtree(schema_dir)

            @flask.after_this_request
            def remove_out_file(response):
                shutil.rmtree(py_schema_dir)
                return response

        return flask.send_file(py_schema_filename,
                               attachment_filename='schema.py',
                               mimetype='text/x-python',
                               as_attachment=True)


""" Normalize """
norm_parser = api.parser()
norm_parser.add_argument('schema', location='files',
                         type=FileStorage,
                         required=True,
                         help='Schema file (.csv, .tsv, .xlsx)')
norm_parser.add_argument('model',
                         type=str,
                         required=True,
                         help='Type of objects to normalize')
norm_parser.add_argument('workbook', location='files',
                         type=FileStorage,
                         required=True,
                         help='Workbook (.csv, .json, .tsv, .yml, .xlsx, .zip of .csv or .tsv)')
norm_parser.add_argument('format',
                         type=flask_restplus.inputs.regex(r'^(csv|multi\.csv|json|tsv|multi\.tsv|xlsx|yml)$'),
                         default='xlsx',
                         required=False,
                         help='Format for normalized workbook')
norm_parser.add_argument('write-toc',
                         type=flask_restplus.inputs.boolean,
                         default=False,
                         required=False,
                         help='If true, save table of contents with file')
norm_parser.add_argument('write-schema',
                         type=flask_restplus.inputs.boolean,
                         default=False,
                         required=False,
                         help='If true, save schema with file')
norm_parser.add_argument('protected',
                         type=flask_restplus.inputs.boolean,
                         default=True,
                         required=False,
                         help='If true, protect the table headings in the file from editing')


@api.route("/normalize/",
           doc={'description': 'Normalize a workbook according to a schema'})
@api.expect(norm_parser)
class Normalize(flask_restplus.Resource):
    """ Normalize a workbook according to a schema """

    def post(self):
        """ Normalize a workbook according to a schema
        """
        """
        Returns:
            :obj:`flask.Response`: response with workbook
        """
        args = norm_parser.parse_args()
        schema_dir, schema_filename = save_schema(args['schema'])
        model_name = args['model']
        in_wb_dir, in_wb_filename = save_in_workbook(args['workbook'])
        format = args['format']

        try:
            schema_name, schema, models = get_schema_models(schema_filename)
        except Exception as err:
            shutil.rmtree(schema_dir)
            shutil.rmtree(in_wb_dir)
            flask_restplus.abort(400, str(err))

        model = get_model(models, model_name)

        try:
            objs, doc_metadata, model_metadata = read_workbook(in_wb_filename, models, schema_name=schema_name)
            for obj in objs:
                if isinstance(obj, model):
                    obj.normalize()
        except Exception as err:
            flask_restplus.abort(400, str(err))
        finally:
            shutil.rmtree(schema_dir)
            shutil.rmtree(in_wb_dir)

        out_wb_dir, out_wb_filename, out_wb_mimetype = save_out_workbook(
            format, objs, schema_name, doc_metadata, model_metadata, models=models,
            write_toc=args['write-toc'],
            write_schema=args['write-schema'],
            protected=args['protected'],
            **DEFAULT_WRITER_ARGS)

        @flask.after_this_request
        def remove_out_file(response):
            shutil.rmtree(out_wb_dir)
            return response

        return flask.send_file(out_wb_filename,
                               attachment_filename=os.path.basename(out_wb_filename),
                               mimetype=out_wb_mimetype,
                               as_attachment=True)


""" Validate """
validate_parser = api.parser()
validate_parser.add_argument('schema', location='files',
                             type=FileStorage,
                             required=True,
                             help='Schema file (.csv, .tsv, .xlsx)')
validate_parser.add_argument('workbook', location='files',
                             type=FileStorage,
                             required=True,
                             help='Workbook (.csv, .json, .tsv, .yml, .xlsx, .zip of .csv or .tsv)')


@api.route("/validate/")
@api.expect(validate_parser,
            doc={'description': 'Validate that a workbook is consistent with a schema, and report any errors'})
class Validate(flask_restplus.Resource):
    """ Validate that a workbook is consistent with a schema, and report any errors """

    def post(self):
        """ Validate that a workbook is consistent with a schema, and report any errors
        """
        """
        Returns:
            :obj:`str`: errors
        """
        args = validate_parser.parse_args()
        schema_dir, schema_filename = save_schema(args['schema'])
        wb_dir, wb_filename = save_in_workbook(args['workbook'])

        try:
            schema_name, schema, models = get_schema_models(schema_filename)
            objs = io.Reader().run(wb_filename,
                                   schema_name=schema_name,
                                   models=models,
                                   group_objects_by_model=False,
                                   validate=False,
                                   **DEFAULT_READER_ARGS)
        except Exception as err:
            flask_restplus.abort(400, str(err))
        finally:
            shutil.rmtree(schema_dir)
            shutil.rmtree(wb_dir)

        errors = core.Validator().validate(objs)
        if errors:
            msg = indent_forest(['The dataset is invalid:', [errors]])
        else:
            msg = 'The dataset is valid'

        return msg


""" Visualize schema """
viz_parser = api.parser()
viz_parser.add_argument('schema', location='files',
                        type=FileStorage,
                        required=True,
                        help='Schema file (.csv, .tsv, .xlsx)')
viz_parser.add_argument('format',
                        type=flask_restplus.inputs.regex(r'^(pdf|png|svg)$'),
                        default='svg',
                        required=False,
                        help='Format for UML diagram')


@api.route("/viz-schema/")
@api.expect(viz_parser,
            doc={'description': 'Generate a UML diagram for a schema'})
class VizSchema(flask_restplus.Resource):
    """ Generate a UML diagram for a schema """

    def post(self):
        """ Generate a UML diagram for a schema
        """
        """
        Returns:
            :obj:`str`: errors
        """
        args = viz_parser.parse_args()
        schema_dir, schema_filename = save_schema(args['schema'])

        try:
            _, schema, _ = get_schema_models(schema_filename)
        except Exception as err:
            flask_restplus.abort(400, str(err))
        finally:
            shutil.rmtree(schema_dir)

        format = args['format']
        img_dir = tempfile.mkdtemp()
        img_file = os.path.join(img_dir, 'schema.' + format)
        try:
            utils.viz_schema(schema, img_file)
        except Exception as err:
            shutil.rmtree(img_dir)
            flask_restplus.abort(400, str(err))

        @flask.after_this_request
        def remove_out_file(response):
            shutil.rmtree(img_dir)
            return response

        if format == 'pdf':
            mimetype = 'application/pdf'
        elif format == 'png':
            mimetype = 'image/png'
        elif format == 'svg':
            mimetype = 'image/svg+xml'

        return flask.send_file(img_file,
                               attachment_filename=os.path.basename(img_file),
                               mimetype=mimetype,
                               as_attachment=True)


def save_schema(file_storage):
    """ Save schema to a temporary directory

    Args:
        file_storage (:obj:`FileStorage`): uploaded file

    Returns:
        :obj:`tuple`:

            * :obj:`str`: temporary directory with schema
            * :obj:`str`: local path to schema file
    """
    if os.path.splitext(file_storage.filename)[1] not in ['.csv', '.tsv', '.xlsx']:
        flask_restplus.abort(400, 'Schema must be a .csv, .tsv or .xlsx file.')

    dir = tempfile.mkdtemp()
    filename = os.path.join(dir, file_storage.filename)
    file_storage.save(filename)
    file_storage.close()

    return dir, filename


def save_in_workbook(file_storage):
    """ Save workbook to a temporary directory

    Args:
        file_storage (:obj:`FileStorage`): uploaded file

    Returns:
        :obj:`tuple`:

            * :obj:`str`: temporary directory with workbook
            * :obj:`str`: local path to workbook file
    """
    if os.path.splitext(file_storage.filename)[1] not in ['.csv', '.json', '.tsv', '.xlsx', '.yml', '.zip']:
        flask_restplus.abort(400, 'Workbook must be a .csv, .json, .tsv .xlsx, .yml, or .zip file.')

    dir = tempfile.mkdtemp()

    if os.path.splitext(file_storage.filename)[1] == '.zip':
        zip_file_dir = tempfile.mkdtemp()
        zip_filename = os.path.join(zip_file_dir, 'tmp.zip')
        file_storage.save(zip_filename)
        file_storage.close()

        with zipfile.ZipFile(zip_filename, 'r') as zip_file:
            has_csv = False
            has_tsv = False
            for f in zip_file.infolist():
                has_csv = has_csv or os.path.splitext(f.filename)[1] == '.csv'
                has_tsv = has_tsv or os.path.splitext(f.filename)[1] == '.tsv'
            if (has_csv and has_tsv) or (not has_csv and not has_tsv):
                flask_restplus.abort(400, 'Workbook must contain .csv or .tsv files.')
            if has_csv:
                filename = os.path.join(dir, '*.csv')
            else:
                filename = os.path.join(dir, '*.tsv')
            zip_file.extractall(dir)

        shutil.rmtree(zip_file_dir)
    else:
        filename = os.path.join(dir, file_storage.filename)
        file_storage.save(filename)
        file_storage.close()

    return (dir, filename)


def read_workbook(filename, models, schema_name=None):
    """ Read a workbook

    Args:
        filename (:obj:`str`): path to workbook
        models (:obj:`list` of :obj:`core.Model`): models
        schema_name (:obj:str`, optional): schema name

    Returns:
        :obj:`tuple`:

            * :obj:`dict`: dictionary that maps types to a dictionary of instance
            * :obj:`dict`: dictionary of model metadata
    """
    reader = io.Reader()
    result = reader.run(filename,
                        schema_name=schema_name,
                        models=models,
                        group_objects_by_model=False,
                        **DEFAULT_READER_ARGS)
    return result, reader._doc_metadata, reader._model_metadata


def save_out_workbook(format, objs, schema_name, doc_metadata, model_metadata, models,
                      write_toc=False, write_schema=False, write_empty_models=True, write_empty_cols=True,
                      protected=True):
    """
    Args:
        format (:obj:`str`): format (csv, multi.csv, json, tsv, multi.tsv, xlsx, yml)
        objs (:obj:`dict`): dictionary that maps types to instances
        schema_name (:obj:`str`): schema name
        doc_metadata (:obj:`dict`): dictionary of document metadata
        model_metadata (:obj:`dict`): dictionary of model metadata
        models (:obj:`list` of :obj:`core.Model`): models
        write_toc (:obj:`bool`, optional): if :obj:`True`, write
            a table of contents with the file
        write_schema (:obj:`bool`, optional): if :obj:`True`, write
            schema with file
        write_empty_models (:obj:`bool`, optional): if :obj:`True`, write models even when there are no instances
        write_empty_cols (:obj:`bool`, optional): if :obj:`True`, write columns even when all values are :obj:`None`
        protected (:obj:`bool`, optional): if :obj:`True`, protect the worksheet

    Returns:
        :obj:`tuple`:

            * :obj:`str`: temporary directory with workbook
            * :obj:`str`: path to workbook file
            * :obj:`str`: mimetype of workbook
    """
    dir = tempfile.mkdtemp()
    if format in ['csv', 'tsv']:
        temp_filename = os.path.join(dir, '*.' + format)
    elif format in ['multi.csv', 'multi.tsv']:
        temp_filename = os.path.join(dir, 'workbook.' + format.replace('multi.', ''))
    else:
        temp_filename = os.path.join(dir, 'workbook.' + format)

    io.Writer().run(temp_filename, objs, schema_name=schema_name, doc_metadata=doc_metadata, model_metadata=model_metadata,
                    models=models, write_toc=write_toc, write_schema=write_schema,
                    write_empty_models=write_empty_models,
                    write_empty_cols=write_empty_cols,
                    protected=protected)

    if format in ['csv', 'tsv']:
        filename = os.path.join(dir, 'workbook.{}.zip'.format(format))
        mimetype = 'application/zip'
        with zipfile.ZipFile(filename, 'w') as zip_file:
            for temp_model_filename in glob.glob(temp_filename):
                zip_file.write(temp_model_filename, os.path.basename(temp_model_filename))
    elif format in ['multi.csv', 'multi.tsv']:
        filename = temp_filename
        mimetype = 'text/plain'
    elif format == 'json':
        filename = temp_filename
        mimetype = 'application/json'
    elif format == 'yml':
        filename = temp_filename
        mimetype = 'text/vnd.yaml'
    else:
        filename = temp_filename
        mimetype = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'

    return dir, filename, mimetype


def get_model(models, name):
    """ Get the model with name :obj:`name`

    Args:
        models (:obj:`list` of :obj:`core.Model`): models
        name (:obj:`str`): model name

    Returns:
        :obj:`core.Model`: model
    """
    for model in models:
        if model.__name__ == name:
            break
    if model.__name__ != name:
        flask_restplus.abort(400, 'Workbook does not have model "{}".'.format(name))
    return model