migraf/fhir-kindling

View on GitHub
fhir_kindling/benchmark/data.py

Summary

Maintainability
A
1 hr
Test Coverage
import random

from fhir_kindling.benchmark.constants import Codes
from fhir_kindling.generators.dataset import DatasetGenerator
from fhir_kindling.generators.field_generator import FieldGenerator
from fhir_kindling.generators.resource_generator import (
    FieldValue,
    GeneratorParameters,
    ResourceGenerator,
)
from fhir_kindling.generators.time_series_generator import TimeSeriesGenerator
from fhir_kindling.util.date_utils import (
    local_now,
    local_now_string,
    subtract,
    to_iso_string,
)

N_BASE_RESOURCES = 100


def generate_benchmark_data(n_patients: int = N_BASE_RESOURCES) -> DatasetGenerator:
    dataset_generator = DatasetGenerator("Patient", n=n_patients)

    # covid
    covid_params = GeneratorParameters(
        field_values=[
            FieldValue(field="code", value=Codes.COVID.value),
        ]
    )
    covid_generator = ResourceGenerator("Condition", generator_parameters=covid_params)
    dataset_generator.add_resource_generator(
        covid_generator, name="covid", depends_on="base", reference_field="subject"
    )

    # covid vaccination(s)

    vaccination_date_generator = FieldGenerator(
        field="occurrenceDateTime",
        generator_function=local_now_string,
    )

    # first shot covid vaccine

    first_vax_params = GeneratorParameters(
        field_values=[
            FieldValue(field="vaccineCode", value=Codes.COVID_VACC_RNA.value),
            FieldValue(field="status", value="completed"),
        ],
        field_generators=[vaccination_date_generator],
    )
    vaccination_generator = ResourceGenerator(
        "Immunization",
        generator_parameters=first_vax_params,
    )

    dataset_generator.add_resource_generator(
        vaccination_generator,
        "vacc-mrna-1",
        depends_on="base",
        likelihood=0.7,
        reference_field="patient",
    )

    # second shot
    second_vax_params = GeneratorParameters(
        field_values=[
            FieldValue(field="vaccineCode", value=Codes.COVID_VACC_RNA.value),
            FieldValue(field="status", value="completed"),
        ],
        field_generators=[vaccination_date_generator],
    )
    second_vaccination_generator = ResourceGenerator(
        "Immunization", generator_parameters=second_vax_params
    )

    dataset_generator.add_resource_generator(
        second_vaccination_generator,
        "vacc-mrna-2",
        depends_on=["base", "vacc-mrna-1"],
        likelihood=0.9,
        reference_field=["patient", None],
    )

    # third shot
    second_vax_params = GeneratorParameters(
        field_values=[
            FieldValue(field="vaccineCode", value=Codes.COVID_VACC_RNA.value),
            FieldValue(field="status", value="completed"),
        ],
        field_generators=[vaccination_date_generator],
    )
    third_vaccination_generator = ResourceGenerator(
        "Immunization", generator_parameters=second_vax_params
    )

    dataset_generator.add_resource_generator(
        third_vaccination_generator,
        "vacc-mrna-3",
        depends_on=["base", "vacc-mrna-1", "vacc-mrna-2"],
        reference_field=["patient", None, None],
        likelihood=0.7,
    )
    # generate encounters
    emergency_encounter_period_generator = FieldGenerator(
        field="period",
        generator_function=lambda: {
            "start": to_iso_string(subtract(local_now(), days=730)),
            "end": to_iso_string(subtract(local_now(), days=729)),
        },
    )

    # emergency encounter
    emergency_encounter_params = GeneratorParameters(
        field_values=[
            FieldValue(field="class", value=Codes.EMERGENCY_ENCOUNTER.value),
            FieldValue(field="status", value="finished"),
        ],
        field_generators=[emergency_encounter_period_generator],
    )

    emergency_encounter_generator = ResourceGenerator(
        "Encounter", generator_parameters=emergency_encounter_params
    )

    dataset_generator.add_resource_generator(
        emergency_encounter_generator,
        "emergency-encounter",
        depends_on="base",
        reference_field="subject",
    )

    icu_encounter_period_generator = FieldGenerator(
        field="period",
        generator_function=lambda: {
            "start": to_iso_string(subtract(local_now(), days=720)),
            "end": to_iso_string(subtract(local_now(), days=710)),
        },
    )

    # icu encounter
    icu_encounter_params = GeneratorParameters(
        field_values=[
            FieldValue(field="class", value=Codes.ICU_ENCOUNTER.value),
            FieldValue(
                field="type", value=Codes.ICU_ENCOUNTER_TYPE.value, list_field=True
            ),
            FieldValue(field="status", value="finished"),
        ],
        field_generators=[icu_encounter_period_generator],
    )

    icu_encounter_generator = ResourceGenerator(
        "Encounter", generator_parameters=icu_encounter_params
    )

    dataset_generator.add_resource_generator(
        icu_encounter_generator,
        "icu-encounter",
        depends_on=["base", "emergency-encounter"],
        reference_field=["subject", None],
    )

    # generate observations

    # blood oxygen saturation
    blood_oxygen_params = GeneratorParameters(
        field_values=[
            FieldValue(field="code", value=Codes.OXYGEN_SATURATION.value),
            FieldValue(field="status", value="final"),
        ],
        field_generators=[
            FieldGenerator(
                field="valueQuantity",
                generator_function=lambda: {
                    "value": random.randint(90, 100),
                    "unit": "%",
                },
            ),
        ],
    )
    blood_oxygen_saturation_generator = ResourceGenerator(
        "Observation", generator_parameters=blood_oxygen_params
    )

    bo_time_series_generator = TimeSeriesGenerator(
        resource_generator=blood_oxygen_saturation_generator,
        start=subtract(local_now(), days=730),
        n=10,
        time_field="effectiveDateTime",
        freq="daily",
    )

    dataset_generator.add_resource_generator(
        bo_time_series_generator,
        "blood-oxygen-saturation",
        depends_on="base",
        reference_field="subject",
    )

    # body temperature
    body_temperature_params = GeneratorParameters(
        field_values=[
            FieldValue(field="code", value=Codes.BODY_TEMPERATURE.value),
            FieldValue(field="status", value="final"),
        ],
        field_generators=[
            FieldGenerator(
                field="valueQuantity",
                generator_function=lambda: {
                    "value": random.randint(36, 40) + random.random(),
                    "unit": "°C",
                },
            ),
        ],
    )

    body_temperature_generator = ResourceGenerator(
        "Observation", generator_parameters=body_temperature_params
    )

    dataset_generator.add_resource_generator(
        body_temperature_generator,
        "body-temperature",
        depends_on="icu-encounter",
        reference_field="encounter",
    )

    # respiratory rate
    respiratory_rate_params = GeneratorParameters(
        field_values=[
            FieldValue(field="code", value=Codes.RESPIRATORY_RATE.value),
            FieldValue(field="status", value="final"),
        ],
        field_generators=[
            FieldGenerator(
                field="valueQuantity",
                generator_function=lambda: {
                    "value": random.randint(12, 30),
                    "unit": "breaths/min",
                },
            ),
        ],
    )

    respiratory_rate_generator = ResourceGenerator(
        "Observation", generator_parameters=respiratory_rate_params
    )

    dataset_generator.add_resource_generator(
        respiratory_rate_generator,
        "respiratory-rate",
        depends_on="icu-encounter",
        reference_field="encounter",
    )

    # heart rate
    heart_rate_params = GeneratorParameters(
        field_values=[
            FieldValue(field="code", value=Codes.HEART_RATE.value),
            FieldValue(field="status", value="final"),
        ],
        field_generators=[
            FieldGenerator(
                field="valueQuantity",
                generator_function=lambda: {
                    "value": random.randint(60, 100),
                    "unit": "beats/min",
                },
            ),
        ],
    )

    heart_rate_generator = ResourceGenerator(
        "Observation", generator_parameters=heart_rate_params
    )

    dataset_generator.add_resource_generator(
        heart_rate_generator,
        name="heart-rate",
        depends_on="icu-encounter",
        reference_field="encounter",
    )

    return dataset_generator