SiLeBAT/FSK-Lab

View on GitHub
de.bund.bfr.knime.fsklab.metadata.model/src/metadata/swagger/DataModelSheetImporter.java

Summary

Maintainability
F
4 days
Test Coverage
package metadata.swagger;

import java.util.HashMap;

import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.threeten.bp.LocalDate;

import de.bund.bfr.metadata.swagger.Assay;
import de.bund.bfr.metadata.swagger.Contact;
import de.bund.bfr.metadata.swagger.DataModel;
import de.bund.bfr.metadata.swagger.DataModelGeneralInformation;
import de.bund.bfr.metadata.swagger.DataModelModelMath;
import de.bund.bfr.metadata.swagger.DietaryAssessmentMethod;
import de.bund.bfr.metadata.swagger.GenericModelDataBackground;
import de.bund.bfr.metadata.swagger.GenericModelScope;
import de.bund.bfr.metadata.swagger.Laboratory;
import de.bund.bfr.metadata.swagger.Model;
import de.bund.bfr.metadata.swagger.Parameter;
import de.bund.bfr.metadata.swagger.Reference;
import de.bund.bfr.metadata.swagger.Study;
import de.bund.bfr.metadata.swagger.StudySample;

public class DataModelSheetImporter implements SheetImporter {

    private int GENERAL_INFORMATION__NAME = 1;
    private int GENERAL_INFORMATION__SOURCE = 2;
    private int GENERAL_INFORMATION__IDENTIFIER = 3;
    private int GENERAL_INFORMATION_CREATION_DATE = 6;
    private int GENERAL_INFORMATION__RIGHTS = 8;
    private int GENERAL_INFORMATION__AVAILABLE = 9;
    private int GENERAL_INFORMATION__URL = 10;
    private int GENERAL_INFORMATION__FORMAT = 11;
    private int GENERAL_INFORMATION__LANGUAGE = 24;
    private int GENERAL_INFORMATION__STATUS = 32;
    private int GENERAL_INFORMATION__OBJECTIVE = 25;
    private int GENERAL_INFORMATION__DESCRIPTION = 26;

    private int SCOPE__GENERAL_COMMENT = 65;
    private int SCOPE__TEMPORAL_INFORMATION = 66;

    private int STUDY__STUDY_IDENTIFIER = 69;
    private int STUDY__STUDY_TITLE = 70;
    private int STUDY__STUDY_DESCRIPTION = 71;
    private int STUDY__STUDY_DESIGN_TYPE = 72;
    private int STUDY__STUDY_ASSAY_MEASUREMENT_TYPE = 73;
    private int STUDY__STUDY_ASSAY_TECHNOLOGY_TYPE = 74;
    private int STUDY__STUDY_ASSAY_TECHNOLOGY_PLATFORM = 75;
    private int STUDY__ACCREDITATION_PROCEDURE_FOR_THE_ASSAY_TECHNOLOGY = 76;
    private int STUDY__STUDY_PROTOCOL_NAME = 77;
    private int STUDY__STUDY_PROTOCOL_TYPE = 78;
    private int STUDY__STUDY_PROTOCOL_DESCRIPTION = 79;
    private int STUDY__STUDY_PROTOCOL_URI = 80;
    private int STUDY__STUDY_PROTOCOL_VERSION = 81;
    private int STUDY__STUDY_PROTOCOL_PARAMETERS_NAME = 82;
    private int STUDY__STUDY_PROTOCOL_COMPONENTS_NAME = 83;
    private int STUDY__STUDY_PROTOCOL_COMPONENTS_TYPE = 84;

    private int GI_CREATOR_ROW = 3;
    private int GI_REFERENCE_ROW = 14;
    private int SCOPE_PRODHAZPOP_ROW = 30;
    private int BG_STUDY_SAMPLE_ROW = 88;
    private int BG_DIET_ASSESS_ROW = 94;
    private int BG_LABORATORY_ROW = 101;
    private int BG_ASSAY_ROW = 107;

    private int MM_PARAMETER_ROW = 115;

    /** Columns for each of the properties of DietaryAssessmentMethod. */
    private final HashMap<String, Integer> methodColumns;

    /** Columns for each of the properties of Laboratory. */
    private final HashMap<String, Integer> laboratoryColumns;

    /** Columns for each of the properties of Reference. */
    private final HashMap<String, Integer> referenceColumns;

    /** Columns for each of the properties of Product. */
    private final HashMap<String, Integer> productColumns;

    /** Columns for each of the properties of Creator. */
    private final HashMap<String, Integer> creatorColumns;

    /** Columns for each of the properties of Creator. */
    private final HashMap<String, Integer> authorColumns;

    /** Columns for each of the properties of Parameter. */
    private final HashMap<String, Integer> parameterColumns;

    /** Columns for each of the properties of StudySample. */
    private final HashMap<String, Integer> sampleColumns;

    /** Columns for each of the properties of Hazard. */
    private final HashMap<String, Integer> hazardColumns;
    
    /** Columns for each of the properties of PopulationGroup. */
    private final HashMap<String, Integer> populationColumns;
    
    /** Columns for each of the properties of Assay. */
    private final HashMap<String, Integer> assayColumns;

    public DataModelSheetImporter() {

        methodColumns = new HashMap<>();
        methodColumns.put("collectionTool", L);
        methodColumns.put("numberOfNonConsecutiveOneDay", M);
        methodColumns.put("softwareTool", N);
        methodColumns.put("numberOfFoodItems", O);
        methodColumns.put("recordTypes", P);
        methodColumns.put("foodDescriptors", Q);

        laboratoryColumns = new HashMap<>();
        laboratoryColumns.put("accreditation", L);
        laboratoryColumns.put("name", M);
        laboratoryColumns.put("country", N);

        creatorColumns = new HashMap<>();
        creatorColumns.put("mail", S);
        creatorColumns.put("title", L);
        creatorColumns.put("familyName", P);
        creatorColumns.put("givenName", N);
        creatorColumns.put("telephone", R);
        creatorColumns.put("streetAddress", X);
        creatorColumns.put("country", T);
        creatorColumns.put("city", U);
        creatorColumns.put("zipCode", V);
        creatorColumns.put("region", Z);
        creatorColumns.put("organization", Q);

        authorColumns = new HashMap<>();
        authorColumns.put("title", AB);
        authorColumns.put("name", AC);
        authorColumns.put("givenName", AD);
        authorColumns.put("additionalName", AE);
        authorColumns.put("familyName", AF);
        authorColumns.put("organization", AG);
        authorColumns.put("telephone", AH);
        authorColumns.put("mail", AI);
        authorColumns.put("country", AJ);
        authorColumns.put("city", AK);
        authorColumns.put("zipCode", AL);
        authorColumns.put("postOfficeBox", AM);
        authorColumns.put("streetAddress", AN);
        authorColumns.put("extendedAddress", AO);
        authorColumns.put("region", AP);

        referenceColumns = new HashMap<>();
        referenceColumns.put("referenceDescription", L);
        referenceColumns.put("type", M);
        referenceColumns.put("date", N);
        referenceColumns.put("pmid", O);
        referenceColumns.put("doi", P);
        referenceColumns.put("author", Q);
        referenceColumns.put("title", R);
        referenceColumns.put("abstract", S);
        referenceColumns.put("status", U);
        referenceColumns.put("website", V);
        referenceColumns.put("comment", W);

        productColumns = new HashMap<>();
        productColumns.put("name", L);
        productColumns.put("description", M);
        productColumns.put("unit", N);
        productColumns.put("productionMethod", O);
        productColumns.put("packaging", P);
        productColumns.put("treatment", Q);
        productColumns.put("originCountry", R);
        productColumns.put("originArea", S);
        productColumns.put("fisheriesArea", T);
        productColumns.put("productionDate", U);
        productColumns.put("expiryDate", V);

        parameterColumns = new HashMap<>();
        parameterColumns.put("id", L);
        parameterColumns.put("classification", M);
        parameterColumns.put("name", N);
        parameterColumns.put("description", O);
        parameterColumns.put("unit", P);
        parameterColumns.put("unitCategory", Q);
        parameterColumns.put("dataType", R);
        parameterColumns.put("source", S);
        parameterColumns.put("subject", T);
        parameterColumns.put("distribution", U);
        parameterColumns.put("value", V);
        parameterColumns.put("reference", W);
        parameterColumns.put("variability", X);
        parameterColumns.put("max", Y);
        parameterColumns.put("min", Z);
        parameterColumns.put("error", AA);

        sampleColumns = new HashMap<>();
        sampleColumns.put("sample", L);
        sampleColumns.put("protocolOfSampleCollection", M);
        sampleColumns.put("samplingStrategy", N);
        sampleColumns.put("samplingProgramType", O);
        sampleColumns.put("samplingMethod", P);
        sampleColumns.put("samplingPlan", Q);
        sampleColumns.put("samplingWeight", R);
        sampleColumns.put("samplingSize", S);
        sampleColumns.put("lotSizeUnit", T);
        sampleColumns.put("samplingPoint", U);

        hazardColumns = new HashMap<>();
        hazardColumns.put("type", W);
        hazardColumns.put("name", X);
        hazardColumns.put("description", Y);
        hazardColumns.put("unit", Z);
        hazardColumns.put("adverseEffect", AA);
        hazardColumns.put("sourceOfContamination", AB);
        hazardColumns.put("benchmarkDose", AC);
        hazardColumns.put("maximumResidueLimit", AD);
        hazardColumns.put("noObservedAdverseAffectLevel", AE);
        hazardColumns.put("lowestObservedAdverseAffectLevel", AF);
        hazardColumns.put("acceptableOperatorsExposureLevel", AG);
        hazardColumns.put("acuteReferenceDose", AH);
        hazardColumns.put("acceptableDailyIntake", AI);
        hazardColumns.put("indSum", AJ);
        
        assayColumns = new HashMap<>();
        assayColumns.put("name", L);
        assayColumns.put("description", M);
        assayColumns.put("moisturePercentage", N);
        assayColumns.put("fatPercentage", O);
        assayColumns.put("detectionLimit", P);
        assayColumns.put("quantificationLimit", Q);
        assayColumns.put("leftCensoredData", R);
        assayColumns.put("contaminationRange", S);
        assayColumns.put("uncertaintyValue", T);
        
        populationColumns = new HashMap<>();
        populationColumns.put("name", AK);
        populationColumns.put("targetPopulation", AL);
        populationColumns.put("span", AM);
        populationColumns.put("description", AN);
        populationColumns.put("age", AO);
        populationColumns.put("gender", AP);
        populationColumns.put("bmi", AQ);
        populationColumns.put("diet", AR);
        populationColumns.put("consumption", AS);
        populationColumns.put("region", AT);
        populationColumns.put("country", AU);
        populationColumns.put("risk", AV);
        populationColumns.put("season", AW);
    }

    private DataModelGeneralInformation retrieveGeneralInformation(Sheet sheet) {

        DataModelGeneralInformation information = new DataModelGeneralInformation();

        Cell nameCell = sheet.getRow(GENERAL_INFORMATION__NAME).getCell(J);
        if (nameCell.getCellTypeEnum() == CellType.STRING) {
            information.setName(nameCell.getStringCellValue());
        }

        Cell sourceCell = sheet.getRow(GENERAL_INFORMATION__SOURCE).getCell(J);
        if (sourceCell.getCellTypeEnum() == CellType.STRING) {
            information.setSource(sourceCell.getStringCellValue());
        }

        Cell identifierCell = sheet.getRow(GENERAL_INFORMATION__IDENTIFIER).getCell(J);
        if (identifierCell.getCellTypeEnum() == CellType.STRING) {
            information.setIdentifier(identifierCell.getStringCellValue());
        }

        for (int numRow = GI_CREATOR_ROW; numRow < GI_CREATOR_ROW + 6; numRow++) {
            Row row = sheet.getRow(numRow);

            try {
                Contact contact = ImporterUtils.retrieveContact(row, creatorColumns);
                information.addCreatorItem(contact);
            } catch (Exception exception) {
            }

            try {
                Contact author = ImporterUtils.retrieveContact(row, authorColumns);
                information.addAuthorItem(author);
            } catch (Exception exception) {
            }
        }

        Cell creationDateCell = sheet.getRow(GENERAL_INFORMATION_CREATION_DATE).getCell(J);
        if (creationDateCell.getCellTypeEnum() == CellType.NUMERIC) {
            LocalDate localDate = ImporterUtils.retrieveDate(creationDateCell);
            information.setCreationDate(localDate);
        }

        // TODO: modificationDate

        Cell rightsCell = sheet.getRow(GENERAL_INFORMATION__RIGHTS).getCell(J);
        if (rightsCell.getCellTypeEnum() == CellType.STRING) {
            information.setRights(rightsCell.getStringCellValue());
        }

        Cell isAvailableCell = sheet.getRow(GENERAL_INFORMATION__AVAILABLE).getCell(J);
        if (isAvailableCell.getCellTypeEnum() == CellType.STRING) {
            information.setAvailability(isAvailableCell.getStringCellValue());
        }

        Cell urlCell = sheet.getRow(GENERAL_INFORMATION__URL).getCell(J);
        if (urlCell.getCellTypeEnum() == CellType.STRING) {
            information.setUrl(urlCell.getStringCellValue());
        }

        Cell formatCell = sheet.getRow(GENERAL_INFORMATION__FORMAT).getCell(J);
        if (formatCell.getCellTypeEnum() == CellType.STRING) {
            information.setFormat(formatCell.getStringCellValue());
        }

        // reference (1..n)
        for (int numRow = GI_REFERENCE_ROW; numRow < (GI_REFERENCE_ROW + 4); numRow++) {
            try {
                Row row = sheet.getRow(numRow);
                Reference reference = ImporterUtils.retrieveReference(row, referenceColumns);
                information.addReferenceItem(reference);
            } catch (Exception exception) {
            }
        }

        Cell languageCell = sheet.getRow(GENERAL_INFORMATION__LANGUAGE).getCell(J);
        if (languageCell.getCellTypeEnum() == CellType.STRING) {
            information.setLanguage(languageCell.getStringCellValue());
        }

        // model category (0..n)

        Cell statusCell = sheet.getRow(GENERAL_INFORMATION__STATUS).getCell(J);
        if (statusCell.getCellTypeEnum() == CellType.STRING) {
            information.setStatus(statusCell.getStringCellValue());
        }

        Cell objectiveCell = sheet.getRow(GENERAL_INFORMATION__OBJECTIVE).getCell(J);
        if (objectiveCell.getCellTypeEnum() == CellType.STRING) {
            information.setObjective(objectiveCell.getStringCellValue());
        }

        Cell descriptionCell = sheet.getRow(GENERAL_INFORMATION__DESCRIPTION).getCell(J);
        if (descriptionCell.getCellTypeEnum() == CellType.STRING) {
            information.setDescription(descriptionCell.getStringCellValue());
        }

        return information;
    }

    private GenericModelScope retrieveScope(Sheet sheet) {

        GenericModelScope scope = new GenericModelScope();

        for (int numrow = SCOPE_PRODHAZPOP_ROW; numrow <= (SCOPE_PRODHAZPOP_ROW + 11); numrow++) {

            Row row = sheet.getRow(numrow);

            try {
                scope.addProductItem(ImporterUtils.retrieveProduct(row, productColumns));
            } catch (IllegalArgumentException exception) {
                // ignore exception since products are optional (*)
            }

            try {
                scope.addHazardItem(ImporterUtils.retrieveHazard(row, hazardColumns));
            } catch (IllegalArgumentException exception) {
                // ignore exception since products are optional (*)
            }

            try {
                scope.addPopulationGroupItem(ImporterUtils.retrievePopulationGroup(row, populationColumns));
            } catch (IllegalArgumentException exception) {
                // ignore exception since population groups are optional (*)
            }
        }

        Cell generalCommentCell = sheet.getRow(SCOPE__GENERAL_COMMENT).getCell(J);
        if (generalCommentCell.getCellTypeEnum() == CellType.STRING) {
            scope.setGeneralComment(generalCommentCell.getStringCellValue());
        }

        Cell temporalInformationCell = sheet.getRow(SCOPE__TEMPORAL_INFORMATION).getCell(J);
        if (temporalInformationCell.getCellTypeEnum() == CellType.STRING) {
            scope.setTemporalInformation(temporalInformationCell.getStringCellValue());
        }

        // TODO: Spatial information

        return scope;

    }

    private GenericModelDataBackground retrieveBackground(Sheet sheet) {

        GenericModelDataBackground background = new GenericModelDataBackground();

        try {
            Study study = retrieveStudy(sheet);
            background.setStudy(study);
        } catch (Exception exception) {
            // Skip faulty study and continue
        }

        for (int numrow = BG_STUDY_SAMPLE_ROW; numrow < (BG_STUDY_SAMPLE_ROW + 3); numrow++) {
            try {
                Row row = sheet.getRow(numrow);
                StudySample sample = ImporterUtils.retrieveStudySample(row, sampleColumns);
                background.addStudySampleItem(sample);
            } catch (Exception exception) {
                // Skip faulty sample and continue
            }
        }

        for (int numrow = BG_DIET_ASSESS_ROW; numrow < (BG_DIET_ASSESS_ROW + 3); numrow++) {
            try {
                DietaryAssessmentMethod method = ImporterUtils.retrieveDietaryAssessmentMethod(sheet.getRow(numrow),
                        methodColumns);
                background.addDietaryAssessmentMethodItem(method);
            } catch (Exception exception) {
                // Skip faulty method and continue
            }
        }

        for (int numrow = BG_LABORATORY_ROW; numrow < (BG_LABORATORY_ROW + 3); numrow++) {
            try {
                Row row = sheet.getRow(numrow);
                Laboratory laboratory = ImporterUtils.retrieveLaboratory(row, laboratoryColumns);
                background.addLaboratoryItem(laboratory);
            } catch (Exception exception) {
                // Skip faulty laboratory and continue
            }
        }

        for (int numrow = BG_ASSAY_ROW; numrow < (BG_ASSAY_ROW + 3); numrow++) {
            try {
                Row row = sheet.getRow(numrow);
                Assay assay = ImporterUtils.retrieveAssay(row, assayColumns);
                background.addAssayItem(assay);
            } catch (Exception exception) {
                // ignore errors since Assay is optional
            }
        }

        return background;

    }

    private DataModelModelMath retrieveModelMath(Sheet sheet) {

        DataModelModelMath math = new DataModelModelMath();

        for (int rownum = MM_PARAMETER_ROW; rownum < sheet.getLastRowNum(); rownum++) {
            try {
                Row row = sheet.getRow(rownum);
                Parameter param = ImporterUtils.retrieveParameter(row, parameterColumns);
                math.addParameterItem(param);
            } catch (Exception exception) {
                // ...
            }
        }

        return math;
    }

    private Study retrieveStudy(Sheet sheet) {

        // Check first mandatory properties
        if (sheet.getRow(STUDY__STUDY_TITLE).getCell(J).getCellTypeEnum() != CellType.STRING) {
            throw new IllegalArgumentException("Missing study title");
        }

        Study study = new Study();

        Cell identifierCell = sheet.getRow(STUDY__STUDY_IDENTIFIER).getCell(J);
        if (identifierCell.getCellTypeEnum() == CellType.STRING) {
            study.setIdentifier(identifierCell.getStringCellValue());
        }

        study.setTitle(sheet.getRow(STUDY__STUDY_TITLE).getCell(J).getStringCellValue());

        Cell descriptionCell = sheet.getRow(STUDY__STUDY_DESCRIPTION).getCell(J);
        if (descriptionCell.getCellTypeEnum() == CellType.STRING) {
            study.setDescription(descriptionCell.getStringCellValue());
        }

        Cell designTypeCell = sheet.getRow(STUDY__STUDY_DESIGN_TYPE).getCell(J);
        if (designTypeCell.getCellTypeEnum() == CellType.STRING) {
            study.setDesignType(designTypeCell.getStringCellValue());
        }

        Cell measurementTypeCell = sheet.getRow(STUDY__STUDY_ASSAY_MEASUREMENT_TYPE).getCell(J);
        if (measurementTypeCell.getCellTypeEnum() == CellType.STRING) {
            study.setAssayMeasurementType(measurementTypeCell.getStringCellValue());
        }

        Cell technologyTypeCell = sheet.getRow(STUDY__STUDY_ASSAY_TECHNOLOGY_TYPE).getCell(J);
        if (technologyTypeCell.getCellTypeEnum() == CellType.STRING) {
            study.setAssayTechnologyType(technologyTypeCell.getStringCellValue());
        }

        Cell technologyPlatformCell = sheet.getRow(STUDY__STUDY_ASSAY_TECHNOLOGY_PLATFORM).getCell(J);
        if (technologyPlatformCell.getCellTypeEnum() == CellType.STRING) {
            study.setAssayTechnologyPlatform(technologyPlatformCell.getStringCellValue());
        }

        Cell accreditationProcedureCell = sheet.getRow(STUDY__ACCREDITATION_PROCEDURE_FOR_THE_ASSAY_TECHNOLOGY)
                .getCell(J);
        if (accreditationProcedureCell.getCellTypeEnum() == CellType.STRING) {
            study.setAccreditationProcedureForTheAssayTechnology(accreditationProcedureCell.getStringCellValue());
        }

        Cell protocolNameCell = sheet.getRow(STUDY__STUDY_PROTOCOL_NAME).getCell(J);
        if (protocolNameCell.getCellTypeEnum() == CellType.STRING) {
            study.setProtocolName(protocolNameCell.getStringCellValue());
        }

        Cell protocolTypeCell = sheet.getRow(STUDY__STUDY_PROTOCOL_TYPE).getCell(J);
        if (protocolTypeCell.getCellTypeEnum() == CellType.STRING) {
            study.setProtocolType(protocolTypeCell.getStringCellValue());
        }

        Cell protocolDescriptionCell = sheet.getRow(STUDY__STUDY_PROTOCOL_DESCRIPTION).getCell(J);
        if (protocolDescriptionCell.getCellTypeEnum() == CellType.STRING) {
            study.setProtocolDescription(protocolDescriptionCell.getStringCellValue());
        }

        Cell protocolURICell = sheet.getRow(STUDY__STUDY_PROTOCOL_URI).getCell(J);
        if (protocolURICell.getCellTypeEnum() == CellType.STRING) {
            study.setProtocolURI(protocolURICell.getStringCellValue());
        }

        Cell protocolVersionCell = sheet.getRow(STUDY__STUDY_PROTOCOL_VERSION).getCell(J);
        if (protocolVersionCell.getCellTypeEnum() == CellType.STRING) {
            study.setProtocolVersion(protocolVersionCell.getStringCellValue());
        }

        Cell parameterNameCell = sheet.getRow(STUDY__STUDY_PROTOCOL_PARAMETERS_NAME).getCell(J);
        if (parameterNameCell.getCellTypeEnum() == CellType.STRING) {
            study.setProtocolParametersName(parameterNameCell.getStringCellValue());
        }

        Cell componentNameCell = sheet.getRow(STUDY__STUDY_PROTOCOL_COMPONENTS_NAME).getCell(J);
        if (componentNameCell.getCellTypeEnum() == CellType.STRING) {
            study.setProtocolComponentsName(componentNameCell.getStringCellValue());
        }

        Cell componentTypeCell = sheet.getRow(STUDY__STUDY_PROTOCOL_COMPONENTS_TYPE).getCell(J);
        if (componentTypeCell.getCellTypeEnum() == CellType.STRING) {
            study.setProtocolComponentsType(componentTypeCell.getStringCellValue());
        }

        return study;
    }

    @Override
    public Model retrieveModel(Sheet sheet) {

        DataModel model = new DataModel();
        model.setModelType("dataModel");
        model.setGeneralInformation(retrieveGeneralInformation(sheet));
        model.setScope(retrieveScope(sheet));
        model.setDataBackground(retrieveBackground(sheet));
        model.setModelMath(retrieveModelMath(sheet));

        return model;
    }
}