sql/patch_2014-04-14.sqlite
-- ---------------------------------------------------------------------------------------------------
-- Create `worker_resource_usage` table to be a meadow-agnostic replacement for `lsf_report` (no data is copied over)
-- Create `resource_usage_stats` view as a meadow-agnostic substitute for `lsf_usage` view
-- Key on `worker` table to speed up the worker_id<->process_id mapping
-- ---------------------------------------------------------------------------------------------------
CREATE TEMPORARY VIEW exp_ver AS SELECT "58" AS expected_version;
.bail ON
SELECT ('The patch only applies to schema version '
|| expected_version
|| ', but the current schema version is '
|| meta_value
|| ', skipping the rest.') AS ''
FROM hive_meta JOIN exp_ver WHERE meta_key='hive_sql_schema_version' AND meta_value<>expected_version;
INSERT INTO hive_meta (meta_key, meta_value)
SELECT hm.* FROM hive_meta AS hm JOIN exp_ver WHERE meta_key='hive_sql_schema_version' AND meta_value<>expected_version;
SELECT ('The patch seems to be compatible with schema version '
|| expected_version
|| ', applying the patch...') AS '' FROM exp_ver;
-- ----------------------------------<actual_patch> -------------------------------------------------
-- add a new meadow-agnostic table for tracking the resource usage:
CREATE TABLE worker_resource_usage (
worker_id INTEGER NOT NULL,
exit_status VARCHAR(255) DEFAULT NULL,
mem_megs FLOAT DEFAULT NULL,
swap_megs FLOAT DEFAULT NULL,
pending_sec FLOAT DEFAULT NULL,
cpu_sec FLOAT DEFAULT NULL,
lifespan_sec FLOAT DEFAULT NULL,
exception_status VARCHAR(255) DEFAULT NULL,
PRIMARY KEY (worker_id)
);
-- add a stats view over the new table:
CREATE VIEW IF NOT EXISTS resource_usage_stats AS
SELECT a.logic_name || '(' || a.analysis_id || ')' analysis,
w.meadow_type,
rc.name || '(' || rc.resource_class_id || ')' resource_class,
count(*) workers,
min(mem_megs) AS min_mem_megs, avg(mem_megs) AS avg_mem_megs, max(mem_megs) AS max_mem_megs,
min(swap_megs) AS min_swap_megs, avg(swap_megs) AS avg_swap_megs, max(swap_megs) AS max_swap_megs
FROM resource_class rc
JOIN analysis_base a USING(resource_class_id)
LEFT JOIN worker w USING(analysis_id)
LEFT JOIN worker_resource_usage USING (worker_id)
GROUP BY analysis_id, w.meadow_type, rc.resource_class_id
ORDER BY analysis_id, w.meadow_type;
-- add a new key to worker table to speed up mapping between process_id and worker_id:
CREATE INDEX meadow_process ON worker (meadow_type, meadow_name, process_id);
-- ----------------------------------</actual_patch> -------------------------------------------------
-- increase the schema version by one:
UPDATE hive_meta SET meta_value=meta_value+1 WHERE meta_key='hive_sql_schema_version';