SiLeBAT/FSK-Lab

View on GitHub
de.bund.bfr.knime.pmm.common/src/de/bund/bfr/knime/pmm/common/QualityMeasurementComputation.java

Summary

Maintainability
F
6 days
Test Coverage
/*******************************************************************************
 * Copyright (c) 2015 Federal Institute for Risk Assessment (BfR), Germany
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Contributors:
 *     Department Biological Safety - BfR
 *******************************************************************************/
package de.bund.bfr.knime.pmm.common;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.lsmp.djep.djep.DJep;
import org.nfunk.jep.Node;
import org.nfunk.jep.ParseException;

import de.bund.bfr.knime.pmm.common.generictablemodel.KnimeTuple;
import de.bund.bfr.knime.pmm.common.math.MathUtilities;
import de.bund.bfr.knime.pmm.common.pmmtablemodel.AttributeUtilities;
import de.bund.bfr.knime.pmm.common.pmmtablemodel.Model1Schema;
import de.bund.bfr.knime.pmm.common.pmmtablemodel.Model2Schema;
import de.bund.bfr.knime.pmm.common.pmmtablemodel.PmmUtilities;
import de.bund.bfr.knime.pmm.common.pmmtablemodel.TimeSeriesSchema;

public class QualityMeasurementComputation {

    private QualityMeasurementComputation() {
    }

    public static List<KnimeTuple> computePrimary(List<KnimeTuple> tuples,
            boolean perDataSet) {
        Map<String, KnimeTuple> tupleMap = new LinkedHashMap<>();
        Map<String, Set<Integer>> usedCondIDs = new LinkedHashMap<>();
        Map<String, List<Double>> targetValueMap = new LinkedHashMap<>();
        Map<String, Map<String, List<Double>>> variableValueMap = new LinkedHashMap<>();

        for (KnimeTuple tuple : tuples) {
            if (((EstModelXml) tuple.getPmmXml(Model1Schema.ATT_ESTMODEL)
                    .get(0)).id == null) {
                continue;
            }

            String id = ((EstModelXml) tuple.getPmmXml(
                    Model1Schema.ATT_ESTMODEL).get(0)).id
                    + "";

            if (perDataSet
                    || tuple.getPmmXml(Model1Schema.ATT_INDEPENDENT).size() <= 1) {
                id += "(" + tuple.getInt(TimeSeriesSchema.ATT_CONDID) + ")";
            }

            if (!tupleMap.containsKey(id)) {
                PmmXmlDoc indepXml = tuple
                        .getPmmXml(Model1Schema.ATT_INDEPENDENT);

                tupleMap.put(id, tuple);
                usedCondIDs.put(id, new LinkedHashSet<Integer>());
                targetValueMap.put(id, new ArrayList<Double>());
                variableValueMap.put(id,
                        new LinkedHashMap<String, List<Double>>());

                for (PmmXmlElementConvertable el : indepXml.getElementSet()) {
                    IndepXml element = (IndepXml) el;

                    variableValueMap.get(id).put(element.name,
                            new ArrayList<Double>());
                }
            }

            if (!usedCondIDs.get(id).add(
                    tuple.getInt(TimeSeriesSchema.ATT_CONDID))) {
                continue;
            }

            List<Double> targetValues = targetValueMap.get(id);
            Map<String, List<Double>> variableValues = variableValueMap.get(id);
            Map<String, Double> miscValues = new LinkedHashMap<>();
            PmmXmlDoc miscXml = tuple.getPmmXml(TimeSeriesSchema.ATT_MISC);
            List<String> miscNames = CellIO.getNameList(miscXml);
            boolean miscMissing = false;

            for (String var : variableValues.keySet()) {
                if (var.equals(AttributeUtilities.TIME)) {
                    continue;
                } else {
                    if (!miscNames.contains(var)) {
                        miscMissing = true;
                        break;
                    }

                    Double value = ((MiscXml) miscXml.get(miscNames
                            .indexOf(var))).value;

                    if (value == null) {
                        miscMissing = true;
                        break;
                    } else {
                        miscValues.put(var, value);
                    }
                }
            }

            if (miscMissing) {
                continue;
            }

            PmmXmlDoc timeSeriesXml = tuple
                    .getPmmXml(TimeSeriesSchema.ATT_TIMESERIES);

            for (PmmXmlElementConvertable el : timeSeriesXml.getElementSet()) {
                TimeSeriesXml element = (TimeSeriesXml) el;

                if (element.time != null
                        && element.concentration != null) {
                    targetValues.add(element.concentration);
                    variableValues.get(AttributeUtilities.TIME).add(
                            element.time);

                    for (String var : variableValues.keySet()) {
                        if (!var.equals(AttributeUtilities.TIME)) {
                            variableValues.get(var).add(miscValues.get(var));
                        }
                    }
                }
            }
        }

        Map<String, Double> sseMap = new LinkedHashMap<>();
        Map<String, Double> rmsMap = new LinkedHashMap<>();
        Map<String, Double> rSquaredMap = new LinkedHashMap<>();
        Map<String, Double> aicMap = new LinkedHashMap<>();
        Map<String, Integer> dofMap = new LinkedHashMap<>();

        loop: for (String id : tupleMap.keySet()) {
            KnimeTuple tuple = tupleMap.get(id);
            List<Double> targetValues = targetValueMap.get(id);
            Map<String, List<Double>> variableValues = variableValueMap.get(id);

            DJep parser = MathUtilities.createParser();
            String formula = ((CatalogModelXml) tuple.getPmmXml(
                    Model1Schema.ATT_MODELCATALOG).get(0)).formula;
            Node function = null;

            try {
                function = parser
                        .parse(formula.substring(formula.indexOf("=") + 1));
            } catch (ParseException e) {
                e.printStackTrace();
            }

            PmmXmlDoc paramXml = tuple.getPmmXml(Model1Schema.ATT_PARAMETER);

            for (PmmXmlElementConvertable el : paramXml.getElementSet()) {
                ParamXml element = (ParamXml) el;

                if (element.value == null) {
                    continue loop;
                }

                parser.addVariable(element.name, element.value);
            }

            for (String var : variableValues.keySet()) {
                parser.addVariable(var, 0.0);
            }

            double sse = 0.0;
            List<Double> usedTargetValues = new ArrayList<>();

            for (int i = 0; i < targetValues.size(); i++) {
                Object value = null;

                for (String var : variableValues.keySet()) {
                    parser.setVarValue(var, variableValues.get(var).get(i));
                }

                try {
                    value = parser.evaluate(function);
                } catch (ParseException e) {
                    e.printStackTrace();
                }

                if (value instanceof Double && !((Double) value).isNaN()
                        && !((Double) value).isInfinite()) {
                    double diff = targetValues.get(i) - (Double) value;

                    sse += diff * diff;
                    usedTargetValues.add(targetValues.get(i));
                }
            }

            if (!usedTargetValues.isEmpty()) {
                Double rms;

                if (perDataSet) {
                    rms = MathUtilities.getRMSE(sse, usedTargetValues.size());
                } else {
                    rms = MathUtilities.getRMSE(sse, paramXml.getElementSet()
                            .size(), usedTargetValues.size());
                }

                Double rSquared = MathUtilities.getRSquared(sse,
                        usedTargetValues);
                Double aic = MathUtilities.akaikeCriterion(paramXml
                        .getElementSet().size(), usedTargetValues.size(), sse);

                sseMap.put(id, sse);
                rmsMap.put(id, rms);
                rSquaredMap.put(id, rSquared);
                aicMap.put(id, aic);
                dofMap.put(id, usedTargetValues.size()
                        - paramXml.getElementSet().size());
            }
        }

        List<KnimeTuple> newTuples = new ArrayList<>();

        for (KnimeTuple tuple : tuples) {
            KnimeTuple newTuple = new KnimeTuple(tuple.getSchema(), tuple
                    .getSchema().createSpec(), tuple);

            if (((EstModelXml) tuple.getPmmXml(Model1Schema.ATT_ESTMODEL)
                    .get(0)).id != null) {
                String id = ((EstModelXml) tuple.getPmmXml(
                        Model1Schema.ATT_ESTMODEL).get(0)).id
                        + "";

                if (perDataSet
                        || tuple.getPmmXml(Model1Schema.ATT_INDEPENDENT).size() <= 1) {
                    id += "(" + tuple.getInt(TimeSeriesSchema.ATT_CONDID) + ")";
                }

                if (rmsMap.containsKey(id)) {
                    PmmXmlDoc estModelXml = newTuple
                            .getPmmXml(Model1Schema.ATT_ESTMODEL);

                    ((EstModelXml) estModelXml.get(0)).sse = sseMap.get(id);
                    ((EstModelXml) estModelXml.get(0)).rms = rmsMap.get(id);
                    ((EstModelXml) estModelXml.get(0)).r2 = rSquaredMap
                            .get(id);
                    ((EstModelXml) estModelXml.get(0)).aic = aicMap.get(id);
                    ((EstModelXml) estModelXml.get(0)).dof = dofMap.get(id);

                    newTuple.setValue(Model1Schema.ATT_ESTMODEL, estModelXml);
                }
            }

            newTuples.add(newTuple);
        }

        return newTuples;
    }

    public static List<KnimeTuple> computeSecondary(List<KnimeTuple> tuples) {
        Set<String> idSet = new LinkedHashSet<>();
        Map<String, String> formulaMap = new LinkedHashMap<>();
        Map<String, PmmXmlDoc> paramMap = new LinkedHashMap<>();
        Map<String, String> depVarMap = new LinkedHashMap<>();
        Map<String, PmmXmlDoc> indepVarMap = new LinkedHashMap<>();
        Map<String, List<Double>> depVarDataMap = new LinkedHashMap<>();
        Map<String, Map<String, List<Double>>> miscDataMaps = new LinkedHashMap<>();
        Map<String, Double> sseMap = new LinkedHashMap<>();
        Map<String, Double> rmsMap = new LinkedHashMap<>();
        Map<String, Double> rSquaredMap = new LinkedHashMap<>();
        Map<String, Double> aicMap = new LinkedHashMap<>();
        Map<String, Integer> dofMap = new LinkedHashMap<>();
        List<String> miscParams = PmmUtilities.getMiscParams(tuples);

        for (KnimeTuple tuple : tuples) {
            DepXml depXml = (DepXml) tuple
                    .getPmmXml(Model2Schema.ATT_DEPENDENT).get(0);
            CatalogModelXml primModelXml = (CatalogModelXml) tuple.getPmmXml(
                    Model1Schema.ATT_MODELCATALOG).get(0);
            String id = depXml.name + " (" + primModelXml.id + ")";

            if (!idSet.contains(id)) {
                PmmXmlDoc modelXmlSec = tuple
                        .getPmmXml(Model2Schema.ATT_MODELCATALOG);
                String formulaSec = ((CatalogModelXml) modelXmlSec.get(0)).formula;
                String depVarSec = ((DepXml) tuple.getPmmXml(
                        Model2Schema.ATT_DEPENDENT).get(0)).name;
                PmmXmlDoc paramXmlSec = tuple
                        .getPmmXml(Model2Schema.ATT_PARAMETER);

                idSet.add(id);
                formulaMap.put(id, formulaSec);
                depVarMap.put(id, depVarSec);
                indepVarMap.put(id,
                        tuple.getPmmXml(Model2Schema.ATT_INDEPENDENT));
                paramMap.put(id, paramXmlSec);
                depVarDataMap.put(id, new ArrayList<Double>());
                miscDataMaps.put(id, new LinkedHashMap<String, List<Double>>());

                for (String param : miscParams) {
                    miscDataMaps.get(id).put(param, new ArrayList<Double>());
                }
            }

            PmmXmlDoc paramXml = tuple.getPmmXml(Model1Schema.ATT_PARAMETER);
            String depVar = depVarMap.get(id);
            int depVarIndex = CellIO.getNameList(paramXml).indexOf(depVar);
            Double depVarValue = ((ParamXml) paramXml.get(depVarIndex))
                    .value;

            depVarDataMap.get(id).add(depVarValue);

            PmmXmlDoc misc = tuple.getPmmXml(TimeSeriesSchema.ATT_MISC);

            for (String param : miscParams) {
                Double paramValue = null;

                for (PmmXmlElementConvertable el : misc.getElementSet()) {
                    MiscXml element = (MiscXml) el;

                    if (param.equals(element.name)) {
                        paramValue = element.value;
                        break;
                    }
                }

                miscDataMaps.get(id).get(param).add(paramValue);
            }
        }

        for (String id : idSet) {
            DJep parser = MathUtilities.createParser();
            Node function = null;
            String formula = formulaMap.get(id);
            List<Double> depVarData = depVarDataMap.get(id);
            Map<String, List<Double>> miscs = miscDataMaps.get(id);
            List<String> indepVars = CellIO.getNameList(indepVarMap.get(id));

            for (int i = 0; i < depVarData.size(); i++) {
                boolean isNull = depVarData.get(i) == null;

                if (!isNull) {
                    for (String var : indepVars) {
                        if (miscs.get(var).get(i) == null) {
                            isNull = true;
                            break;
                        }
                    }
                }

                if (isNull) {
                    depVarData.remove(i);

                    for (String param : miscParams) {
                        miscs.get(param).remove(i);
                    }
                }
            }

            try {
                function = parser
                        .parse(formula.substring(formula.indexOf("=") + 1));
            } catch (ParseException e) {
                e.printStackTrace();
            }

            for (PmmXmlElementConvertable el : paramMap.get(id).getElementSet()) {
                ParamXml element = (ParamXml) el;

                parser.addVariable(element.name, element.value);
            }

            for (String var : indepVars) {
                parser.addVariable(var, 0.0);
            }

            double sse = 0.0;
            List<Double> usedTargetValues = new ArrayList<>();

            for (int i = 0; i < depVarData.size(); i++) {
                Object value = null;

                for (String var : indepVars) {
                    parser.setVarValue(var, miscs.get(var).get(i));
                }

                try {
                    value = parser.evaluate(function);
                } catch (ParseException e) {
                }

                if (value instanceof Double && !((Double) value).isNaN()
                        && !((Double) value).isInfinite()) {
                    double diff = depVarData.get(i) - (Double) value;

                    sse += diff * diff;
                    usedTargetValues.add(depVarData.get(i));
                }
            }

            if (!usedTargetValues.isEmpty()) {
                Double rms = MathUtilities.getRMSE(sse, paramMap.get(id)
                        .getElementSet().size(), usedTargetValues.size());
                Double rSquared = MathUtilities.getRSquared(sse,
                        usedTargetValues);
                Double aic = MathUtilities.akaikeCriterion(paramMap.get(id)
                        .getElementSet().size(), usedTargetValues.size(), sse);

                sseMap.put(id, sse);
                rmsMap.put(id, rms);
                rSquaredMap.put(id, rSquared);
                aicMap.put(id, aic);
                dofMap.put(id, usedTargetValues.size()
                        - paramMap.get(id).getElementSet().size());
            }
        }

        List<KnimeTuple> newTuples = new ArrayList<>();

        for (KnimeTuple tuple : tuples) {
            KnimeTuple newTuple = new KnimeTuple(tuple.getSchema(), tuple
                    .getSchema().createSpec(), tuple);
            DepXml depXml = (DepXml) tuple
                    .getPmmXml(Model2Schema.ATT_DEPENDENT).get(0);
            CatalogModelXml primModelXml = (CatalogModelXml) tuple.getPmmXml(
                    Model1Schema.ATT_MODELCATALOG).get(0);
            String id = depXml.name + " (" + primModelXml.id + ")";

            if (rmsMap.containsKey(id)) {
                PmmXmlDoc estModelXml = newTuple
                        .getPmmXml(Model2Schema.ATT_ESTMODEL);

                ((EstModelXml) estModelXml.get(0)).sse = sseMap.get(id);
                ((EstModelXml) estModelXml.get(0)).rms = rmsMap.get(id);
                ((EstModelXml) estModelXml.get(0)).r2 = rSquaredMap.get(id);
                ((EstModelXml) estModelXml.get(0)).aic = aicMap.get(id);
                ((EstModelXml) estModelXml.get(0)).dof = dofMap.get(id);

                newTuple.setValue(Model2Schema.ATT_ESTMODEL, estModelXml);
            }

            newTuples.add(newTuple);
        }

        return newTuples;
    }

}