de.bund.bfr.knime.fsklab.metadata.model/src/metadata/swagger/DataModelSheetImporter.java
package metadata.swagger;
import java.util.HashMap;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.threeten.bp.LocalDate;
import de.bund.bfr.metadata.swagger.Assay;
import de.bund.bfr.metadata.swagger.Contact;
import de.bund.bfr.metadata.swagger.DataModel;
import de.bund.bfr.metadata.swagger.DataModelGeneralInformation;
import de.bund.bfr.metadata.swagger.DataModelModelMath;
import de.bund.bfr.metadata.swagger.DietaryAssessmentMethod;
import de.bund.bfr.metadata.swagger.GenericModelDataBackground;
import de.bund.bfr.metadata.swagger.GenericModelScope;
import de.bund.bfr.metadata.swagger.Laboratory;
import de.bund.bfr.metadata.swagger.Model;
import de.bund.bfr.metadata.swagger.Parameter;
import de.bund.bfr.metadata.swagger.Reference;
import de.bund.bfr.metadata.swagger.Study;
import de.bund.bfr.metadata.swagger.StudySample;
public class DataModelSheetImporter implements SheetImporter {
private int GENERAL_INFORMATION__NAME = 1;
private int GENERAL_INFORMATION__SOURCE = 2;
private int GENERAL_INFORMATION__IDENTIFIER = 3;
private int GENERAL_INFORMATION_CREATION_DATE = 6;
private int GENERAL_INFORMATION__RIGHTS = 8;
private int GENERAL_INFORMATION__AVAILABLE = 9;
private int GENERAL_INFORMATION__URL = 10;
private int GENERAL_INFORMATION__FORMAT = 11;
private int GENERAL_INFORMATION__LANGUAGE = 24;
private int GENERAL_INFORMATION__STATUS = 32;
private int GENERAL_INFORMATION__OBJECTIVE = 25;
private int GENERAL_INFORMATION__DESCRIPTION = 26;
private int SCOPE__GENERAL_COMMENT = 65;
private int SCOPE__TEMPORAL_INFORMATION = 66;
private int STUDY__STUDY_IDENTIFIER = 69;
private int STUDY__STUDY_TITLE = 70;
private int STUDY__STUDY_DESCRIPTION = 71;
private int STUDY__STUDY_DESIGN_TYPE = 72;
private int STUDY__STUDY_ASSAY_MEASUREMENT_TYPE = 73;
private int STUDY__STUDY_ASSAY_TECHNOLOGY_TYPE = 74;
private int STUDY__STUDY_ASSAY_TECHNOLOGY_PLATFORM = 75;
private int STUDY__ACCREDITATION_PROCEDURE_FOR_THE_ASSAY_TECHNOLOGY = 76;
private int STUDY__STUDY_PROTOCOL_NAME = 77;
private int STUDY__STUDY_PROTOCOL_TYPE = 78;
private int STUDY__STUDY_PROTOCOL_DESCRIPTION = 79;
private int STUDY__STUDY_PROTOCOL_URI = 80;
private int STUDY__STUDY_PROTOCOL_VERSION = 81;
private int STUDY__STUDY_PROTOCOL_PARAMETERS_NAME = 82;
private int STUDY__STUDY_PROTOCOL_COMPONENTS_NAME = 83;
private int STUDY__STUDY_PROTOCOL_COMPONENTS_TYPE = 84;
private int GI_CREATOR_ROW = 3;
private int GI_REFERENCE_ROW = 14;
private int SCOPE_PRODHAZPOP_ROW = 30;
private int BG_STUDY_SAMPLE_ROW = 88;
private int BG_DIET_ASSESS_ROW = 94;
private int BG_LABORATORY_ROW = 101;
private int BG_ASSAY_ROW = 107;
private int MM_PARAMETER_ROW = 115;
/** Columns for each of the properties of DietaryAssessmentMethod. */
private final HashMap<String, Integer> methodColumns;
/** Columns for each of the properties of Laboratory. */
private final HashMap<String, Integer> laboratoryColumns;
/** Columns for each of the properties of Reference. */
private final HashMap<String, Integer> referenceColumns;
/** Columns for each of the properties of Product. */
private final HashMap<String, Integer> productColumns;
/** Columns for each of the properties of Creator. */
private final HashMap<String, Integer> creatorColumns;
/** Columns for each of the properties of Creator. */
private final HashMap<String, Integer> authorColumns;
/** Columns for each of the properties of Parameter. */
private final HashMap<String, Integer> parameterColumns;
/** Columns for each of the properties of StudySample. */
private final HashMap<String, Integer> sampleColumns;
/** Columns for each of the properties of Hazard. */
private final HashMap<String, Integer> hazardColumns;
/** Columns for each of the properties of PopulationGroup. */
private final HashMap<String, Integer> populationColumns;
/** Columns for each of the properties of Assay. */
private final HashMap<String, Integer> assayColumns;
public DataModelSheetImporter() {
methodColumns = new HashMap<>();
methodColumns.put("collectionTool", L);
methodColumns.put("numberOfNonConsecutiveOneDay", M);
methodColumns.put("softwareTool", N);
methodColumns.put("numberOfFoodItems", O);
methodColumns.put("recordTypes", P);
methodColumns.put("foodDescriptors", Q);
laboratoryColumns = new HashMap<>();
laboratoryColumns.put("accreditation", L);
laboratoryColumns.put("name", M);
laboratoryColumns.put("country", N);
creatorColumns = new HashMap<>();
creatorColumns.put("mail", S);
creatorColumns.put("title", L);
creatorColumns.put("familyName", P);
creatorColumns.put("givenName", N);
creatorColumns.put("telephone", R);
creatorColumns.put("streetAddress", X);
creatorColumns.put("country", T);
creatorColumns.put("city", U);
creatorColumns.put("zipCode", V);
creatorColumns.put("region", Z);
creatorColumns.put("organization", Q);
authorColumns = new HashMap<>();
authorColumns.put("title", AB);
authorColumns.put("name", AC);
authorColumns.put("givenName", AD);
authorColumns.put("additionalName", AE);
authorColumns.put("familyName", AF);
authorColumns.put("organization", AG);
authorColumns.put("telephone", AH);
authorColumns.put("mail", AI);
authorColumns.put("country", AJ);
authorColumns.put("city", AK);
authorColumns.put("zipCode", AL);
authorColumns.put("postOfficeBox", AM);
authorColumns.put("streetAddress", AN);
authorColumns.put("extendedAddress", AO);
authorColumns.put("region", AP);
referenceColumns = new HashMap<>();
referenceColumns.put("referenceDescription", L);
referenceColumns.put("type", M);
referenceColumns.put("date", N);
referenceColumns.put("pmid", O);
referenceColumns.put("doi", P);
referenceColumns.put("author", Q);
referenceColumns.put("title", R);
referenceColumns.put("abstract", S);
referenceColumns.put("status", U);
referenceColumns.put("website", V);
referenceColumns.put("comment", W);
productColumns = new HashMap<>();
productColumns.put("name", L);
productColumns.put("description", M);
productColumns.put("unit", N);
productColumns.put("productionMethod", O);
productColumns.put("packaging", P);
productColumns.put("treatment", Q);
productColumns.put("originCountry", R);
productColumns.put("originArea", S);
productColumns.put("fisheriesArea", T);
productColumns.put("productionDate", U);
productColumns.put("expiryDate", V);
parameterColumns = new HashMap<>();
parameterColumns.put("id", L);
parameterColumns.put("classification", M);
parameterColumns.put("name", N);
parameterColumns.put("description", O);
parameterColumns.put("unit", P);
parameterColumns.put("unitCategory", Q);
parameterColumns.put("dataType", R);
parameterColumns.put("source", S);
parameterColumns.put("subject", T);
parameterColumns.put("distribution", U);
parameterColumns.put("value", V);
parameterColumns.put("reference", W);
parameterColumns.put("variability", X);
parameterColumns.put("max", Y);
parameterColumns.put("min", Z);
parameterColumns.put("error", AA);
sampleColumns = new HashMap<>();
sampleColumns.put("sample", L);
sampleColumns.put("protocolOfSampleCollection", M);
sampleColumns.put("samplingStrategy", N);
sampleColumns.put("samplingProgramType", O);
sampleColumns.put("samplingMethod", P);
sampleColumns.put("samplingPlan", Q);
sampleColumns.put("samplingWeight", R);
sampleColumns.put("samplingSize", S);
sampleColumns.put("lotSizeUnit", T);
sampleColumns.put("samplingPoint", U);
hazardColumns = new HashMap<>();
hazardColumns.put("type", W);
hazardColumns.put("name", X);
hazardColumns.put("description", Y);
hazardColumns.put("unit", Z);
hazardColumns.put("adverseEffect", AA);
hazardColumns.put("sourceOfContamination", AB);
hazardColumns.put("benchmarkDose", AC);
hazardColumns.put("maximumResidueLimit", AD);
hazardColumns.put("noObservedAdverseAffectLevel", AE);
hazardColumns.put("lowestObservedAdverseAffectLevel", AF);
hazardColumns.put("acceptableOperatorsExposureLevel", AG);
hazardColumns.put("acuteReferenceDose", AH);
hazardColumns.put("acceptableDailyIntake", AI);
hazardColumns.put("indSum", AJ);
assayColumns = new HashMap<>();
assayColumns.put("name", L);
assayColumns.put("description", M);
assayColumns.put("moisturePercentage", N);
assayColumns.put("fatPercentage", O);
assayColumns.put("detectionLimit", P);
assayColumns.put("quantificationLimit", Q);
assayColumns.put("leftCensoredData", R);
assayColumns.put("contaminationRange", S);
assayColumns.put("uncertaintyValue", T);
populationColumns = new HashMap<>();
populationColumns.put("name", AK);
populationColumns.put("targetPopulation", AL);
populationColumns.put("span", AM);
populationColumns.put("description", AN);
populationColumns.put("age", AO);
populationColumns.put("gender", AP);
populationColumns.put("bmi", AQ);
populationColumns.put("diet", AR);
populationColumns.put("consumption", AS);
populationColumns.put("region", AT);
populationColumns.put("country", AU);
populationColumns.put("risk", AV);
populationColumns.put("season", AW);
}
private DataModelGeneralInformation retrieveGeneralInformation(Sheet sheet) {
DataModelGeneralInformation information = new DataModelGeneralInformation();
Cell nameCell = sheet.getRow(GENERAL_INFORMATION__NAME).getCell(J);
if (nameCell.getCellTypeEnum() == CellType.STRING) {
information.setName(nameCell.getStringCellValue());
}
Cell sourceCell = sheet.getRow(GENERAL_INFORMATION__SOURCE).getCell(J);
if (sourceCell.getCellTypeEnum() == CellType.STRING) {
information.setSource(sourceCell.getStringCellValue());
}
Cell identifierCell = sheet.getRow(GENERAL_INFORMATION__IDENTIFIER).getCell(J);
if (identifierCell.getCellTypeEnum() == CellType.STRING) {
information.setIdentifier(identifierCell.getStringCellValue());
}
for (int numRow = GI_CREATOR_ROW; numRow < GI_CREATOR_ROW + 6; numRow++) {
Row row = sheet.getRow(numRow);
try {
Contact contact = ImporterUtils.retrieveContact(row, creatorColumns);
information.addCreatorItem(contact);
} catch (Exception exception) {
}
try {
Contact author = ImporterUtils.retrieveContact(row, authorColumns);
information.addAuthorItem(author);
} catch (Exception exception) {
}
}
Cell creationDateCell = sheet.getRow(GENERAL_INFORMATION_CREATION_DATE).getCell(J);
if (creationDateCell.getCellTypeEnum() == CellType.NUMERIC) {
LocalDate localDate = ImporterUtils.retrieveDate(creationDateCell);
information.setCreationDate(localDate);
}
// TODO: modificationDate
Cell rightsCell = sheet.getRow(GENERAL_INFORMATION__RIGHTS).getCell(J);
if (rightsCell.getCellTypeEnum() == CellType.STRING) {
information.setRights(rightsCell.getStringCellValue());
}
Cell isAvailableCell = sheet.getRow(GENERAL_INFORMATION__AVAILABLE).getCell(J);
if (isAvailableCell.getCellTypeEnum() == CellType.STRING) {
information.setAvailability(isAvailableCell.getStringCellValue());
}
Cell urlCell = sheet.getRow(GENERAL_INFORMATION__URL).getCell(J);
if (urlCell.getCellTypeEnum() == CellType.STRING) {
information.setUrl(urlCell.getStringCellValue());
}
Cell formatCell = sheet.getRow(GENERAL_INFORMATION__FORMAT).getCell(J);
if (formatCell.getCellTypeEnum() == CellType.STRING) {
information.setFormat(formatCell.getStringCellValue());
}
// reference (1..n)
for (int numRow = GI_REFERENCE_ROW; numRow < (GI_REFERENCE_ROW + 4); numRow++) {
try {
Row row = sheet.getRow(numRow);
Reference reference = ImporterUtils.retrieveReference(row, referenceColumns);
information.addReferenceItem(reference);
} catch (Exception exception) {
}
}
Cell languageCell = sheet.getRow(GENERAL_INFORMATION__LANGUAGE).getCell(J);
if (languageCell.getCellTypeEnum() == CellType.STRING) {
information.setLanguage(languageCell.getStringCellValue());
}
// model category (0..n)
Cell statusCell = sheet.getRow(GENERAL_INFORMATION__STATUS).getCell(J);
if (statusCell.getCellTypeEnum() == CellType.STRING) {
information.setStatus(statusCell.getStringCellValue());
}
Cell objectiveCell = sheet.getRow(GENERAL_INFORMATION__OBJECTIVE).getCell(J);
if (objectiveCell.getCellTypeEnum() == CellType.STRING) {
information.setObjective(objectiveCell.getStringCellValue());
}
Cell descriptionCell = sheet.getRow(GENERAL_INFORMATION__DESCRIPTION).getCell(J);
if (descriptionCell.getCellTypeEnum() == CellType.STRING) {
information.setDescription(descriptionCell.getStringCellValue());
}
return information;
}
private GenericModelScope retrieveScope(Sheet sheet) {
GenericModelScope scope = new GenericModelScope();
for (int numrow = SCOPE_PRODHAZPOP_ROW; numrow <= (SCOPE_PRODHAZPOP_ROW + 11); numrow++) {
Row row = sheet.getRow(numrow);
try {
scope.addProductItem(ImporterUtils.retrieveProduct(row, productColumns));
} catch (IllegalArgumentException exception) {
// ignore exception since products are optional (*)
}
try {
scope.addHazardItem(ImporterUtils.retrieveHazard(row, hazardColumns));
} catch (IllegalArgumentException exception) {
// ignore exception since products are optional (*)
}
try {
scope.addPopulationGroupItem(ImporterUtils.retrievePopulationGroup(row, populationColumns));
} catch (IllegalArgumentException exception) {
// ignore exception since population groups are optional (*)
}
}
Cell generalCommentCell = sheet.getRow(SCOPE__GENERAL_COMMENT).getCell(J);
if (generalCommentCell.getCellTypeEnum() == CellType.STRING) {
scope.setGeneralComment(generalCommentCell.getStringCellValue());
}
Cell temporalInformationCell = sheet.getRow(SCOPE__TEMPORAL_INFORMATION).getCell(J);
if (temporalInformationCell.getCellTypeEnum() == CellType.STRING) {
scope.setTemporalInformation(temporalInformationCell.getStringCellValue());
}
// TODO: Spatial information
return scope;
}
private GenericModelDataBackground retrieveBackground(Sheet sheet) {
GenericModelDataBackground background = new GenericModelDataBackground();
try {
Study study = retrieveStudy(sheet);
background.setStudy(study);
} catch (Exception exception) {
// Skip faulty study and continue
}
for (int numrow = BG_STUDY_SAMPLE_ROW; numrow < (BG_STUDY_SAMPLE_ROW + 3); numrow++) {
try {
Row row = sheet.getRow(numrow);
StudySample sample = ImporterUtils.retrieveStudySample(row, sampleColumns);
background.addStudySampleItem(sample);
} catch (Exception exception) {
// Skip faulty sample and continue
}
}
for (int numrow = BG_DIET_ASSESS_ROW; numrow < (BG_DIET_ASSESS_ROW + 3); numrow++) {
try {
DietaryAssessmentMethod method = ImporterUtils.retrieveDietaryAssessmentMethod(sheet.getRow(numrow),
methodColumns);
background.addDietaryAssessmentMethodItem(method);
} catch (Exception exception) {
// Skip faulty method and continue
}
}
for (int numrow = BG_LABORATORY_ROW; numrow < (BG_LABORATORY_ROW + 3); numrow++) {
try {
Row row = sheet.getRow(numrow);
Laboratory laboratory = ImporterUtils.retrieveLaboratory(row, laboratoryColumns);
background.addLaboratoryItem(laboratory);
} catch (Exception exception) {
// Skip faulty laboratory and continue
}
}
for (int numrow = BG_ASSAY_ROW; numrow < (BG_ASSAY_ROW + 3); numrow++) {
try {
Row row = sheet.getRow(numrow);
Assay assay = ImporterUtils.retrieveAssay(row, assayColumns);
background.addAssayItem(assay);
} catch (Exception exception) {
// ignore errors since Assay is optional
}
}
return background;
}
private DataModelModelMath retrieveModelMath(Sheet sheet) {
DataModelModelMath math = new DataModelModelMath();
for (int rownum = MM_PARAMETER_ROW; rownum < sheet.getLastRowNum(); rownum++) {
try {
Row row = sheet.getRow(rownum);
Parameter param = ImporterUtils.retrieveParameter(row, parameterColumns);
math.addParameterItem(param);
} catch (Exception exception) {
// ...
}
}
return math;
}
private Study retrieveStudy(Sheet sheet) {
// Check first mandatory properties
if (sheet.getRow(STUDY__STUDY_TITLE).getCell(J).getCellTypeEnum() != CellType.STRING) {
throw new IllegalArgumentException("Missing study title");
}
Study study = new Study();
Cell identifierCell = sheet.getRow(STUDY__STUDY_IDENTIFIER).getCell(J);
if (identifierCell.getCellTypeEnum() == CellType.STRING) {
study.setIdentifier(identifierCell.getStringCellValue());
}
study.setTitle(sheet.getRow(STUDY__STUDY_TITLE).getCell(J).getStringCellValue());
Cell descriptionCell = sheet.getRow(STUDY__STUDY_DESCRIPTION).getCell(J);
if (descriptionCell.getCellTypeEnum() == CellType.STRING) {
study.setDescription(descriptionCell.getStringCellValue());
}
Cell designTypeCell = sheet.getRow(STUDY__STUDY_DESIGN_TYPE).getCell(J);
if (designTypeCell.getCellTypeEnum() == CellType.STRING) {
study.setDesignType(designTypeCell.getStringCellValue());
}
Cell measurementTypeCell = sheet.getRow(STUDY__STUDY_ASSAY_MEASUREMENT_TYPE).getCell(J);
if (measurementTypeCell.getCellTypeEnum() == CellType.STRING) {
study.setAssayMeasurementType(measurementTypeCell.getStringCellValue());
}
Cell technologyTypeCell = sheet.getRow(STUDY__STUDY_ASSAY_TECHNOLOGY_TYPE).getCell(J);
if (technologyTypeCell.getCellTypeEnum() == CellType.STRING) {
study.setAssayTechnologyType(technologyTypeCell.getStringCellValue());
}
Cell technologyPlatformCell = sheet.getRow(STUDY__STUDY_ASSAY_TECHNOLOGY_PLATFORM).getCell(J);
if (technologyPlatformCell.getCellTypeEnum() == CellType.STRING) {
study.setAssayTechnologyPlatform(technologyPlatformCell.getStringCellValue());
}
Cell accreditationProcedureCell = sheet.getRow(STUDY__ACCREDITATION_PROCEDURE_FOR_THE_ASSAY_TECHNOLOGY)
.getCell(J);
if (accreditationProcedureCell.getCellTypeEnum() == CellType.STRING) {
study.setAccreditationProcedureForTheAssayTechnology(accreditationProcedureCell.getStringCellValue());
}
Cell protocolNameCell = sheet.getRow(STUDY__STUDY_PROTOCOL_NAME).getCell(J);
if (protocolNameCell.getCellTypeEnum() == CellType.STRING) {
study.setProtocolName(protocolNameCell.getStringCellValue());
}
Cell protocolTypeCell = sheet.getRow(STUDY__STUDY_PROTOCOL_TYPE).getCell(J);
if (protocolTypeCell.getCellTypeEnum() == CellType.STRING) {
study.setProtocolType(protocolTypeCell.getStringCellValue());
}
Cell protocolDescriptionCell = sheet.getRow(STUDY__STUDY_PROTOCOL_DESCRIPTION).getCell(J);
if (protocolDescriptionCell.getCellTypeEnum() == CellType.STRING) {
study.setProtocolDescription(protocolDescriptionCell.getStringCellValue());
}
Cell protocolURICell = sheet.getRow(STUDY__STUDY_PROTOCOL_URI).getCell(J);
if (protocolURICell.getCellTypeEnum() == CellType.STRING) {
study.setProtocolURI(protocolURICell.getStringCellValue());
}
Cell protocolVersionCell = sheet.getRow(STUDY__STUDY_PROTOCOL_VERSION).getCell(J);
if (protocolVersionCell.getCellTypeEnum() == CellType.STRING) {
study.setProtocolVersion(protocolVersionCell.getStringCellValue());
}
Cell parameterNameCell = sheet.getRow(STUDY__STUDY_PROTOCOL_PARAMETERS_NAME).getCell(J);
if (parameterNameCell.getCellTypeEnum() == CellType.STRING) {
study.setProtocolParametersName(parameterNameCell.getStringCellValue());
}
Cell componentNameCell = sheet.getRow(STUDY__STUDY_PROTOCOL_COMPONENTS_NAME).getCell(J);
if (componentNameCell.getCellTypeEnum() == CellType.STRING) {
study.setProtocolComponentsName(componentNameCell.getStringCellValue());
}
Cell componentTypeCell = sheet.getRow(STUDY__STUDY_PROTOCOL_COMPONENTS_TYPE).getCell(J);
if (componentTypeCell.getCellTypeEnum() == CellType.STRING) {
study.setProtocolComponentsType(componentTypeCell.getStringCellValue());
}
return study;
}
@Override
public Model retrieveModel(Sheet sheet) {
DataModel model = new DataModel();
model.setModelType("dataModel");
model.setGeneralInformation(retrieveGeneralInformation(sheet));
model.setScope(retrieveScope(sheet));
model.setDataBackground(retrieveBackground(sheet));
model.setModelMath(retrieveModelMath(sheet));
return model;
}
}