Ensembl/ensembl-hive

View on GitHub
sql/patch_2014-04-14.sqlite

Summary

Maintainability
Test Coverage

-- ---------------------------------------------------------------------------------------------------
-- Create   `worker_resource_usage` table to be a meadow-agnostic replacement for `lsf_report` (no data is copied over)
-- Create   `resource_usage_stats` view as a meadow-agnostic substitute for `lsf_usage` view
-- Key      on `worker` table to speed up the worker_id<->process_id mapping
-- ---------------------------------------------------------------------------------------------------

CREATE TEMPORARY VIEW exp_ver AS SELECT "58" AS expected_version;

.bail ON

SELECT ('The patch only applies to schema version '
    || expected_version
    || ', but the current schema version is '
    || meta_value
    || ', skipping the rest.') AS ''
    FROM hive_meta JOIN exp_ver WHERE meta_key='hive_sql_schema_version' AND meta_value<>expected_version;

INSERT INTO hive_meta (meta_key, meta_value)
    SELECT hm.* FROM hive_meta AS hm JOIN exp_ver WHERE meta_key='hive_sql_schema_version' AND meta_value<>expected_version;

SELECT ('The patch seems to be compatible with schema version '
    || expected_version
    || ', applying the patch...') AS '' FROM exp_ver;


-- ----------------------------------<actual_patch> -------------------------------------------------

    -- add a new meadow-agnostic table for tracking the resource usage:
CREATE TABLE worker_resource_usage (
    worker_id               INTEGER         NOT NULL,
    exit_status             VARCHAR(255)    DEFAULT NULL,
    mem_megs                FLOAT           DEFAULT NULL,
    swap_megs               FLOAT           DEFAULT NULL,
    pending_sec             FLOAT           DEFAULT NULL,
    cpu_sec                 FLOAT           DEFAULT NULL,
    lifespan_sec            FLOAT           DEFAULT NULL,
    exception_status        VARCHAR(255)    DEFAULT NULL,

    PRIMARY KEY (worker_id)
);

    -- add a stats view over the new table:
CREATE VIEW IF NOT EXISTS resource_usage_stats AS
    SELECT a.logic_name || '(' || a.analysis_id || ')' analysis,
           w.meadow_type,
           rc.name || '(' || rc.resource_class_id || ')' resource_class,
           count(*) workers,
           min(mem_megs) AS min_mem_megs, avg(mem_megs) AS avg_mem_megs, max(mem_megs) AS max_mem_megs,
           min(swap_megs) AS min_swap_megs, avg(swap_megs) AS avg_swap_megs, max(swap_megs) AS max_swap_megs
    FROM resource_class rc
    JOIN analysis_base a USING(resource_class_id)
    LEFT JOIN worker w USING(analysis_id)
    LEFT JOIN worker_resource_usage USING (worker_id)
    GROUP BY analysis_id, w.meadow_type, rc.resource_class_id
    ORDER BY analysis_id, w.meadow_type;

    -- add a new key to worker table to speed up mapping between process_id and worker_id:
CREATE        INDEX meadow_process ON worker (meadow_type, meadow_name, process_id);

-- ----------------------------------</actual_patch> -------------------------------------------------


    -- increase the schema version by one:
UPDATE hive_meta SET meta_value=meta_value+1 WHERE meta_key='hive_sql_schema_version';