Skip to content

Data Generators

CdsDataGenerator

A class to generate CDS (Clinical Decision Support) data based on specified workflows and constraints.

ATTRIBUTE DESCRIPTION
registry

A registry of data generators.

TYPE: dict

mappings

A mapping of workflows to their respective data generators.

TYPE: dict

data

The generated CDS FHIR data.

TYPE: CdsFhirData

Source code in healthchain/data_generators/cdsdatagenerator.py
class CdsDataGenerator:
    """
    A class to generate CDS (Clinical Decision Support) data based on specified workflows and constraints.

    Attributes:
        registry (dict): A registry of data generators.
        mappings (dict): A mapping of workflows to their respective data generators.
        data (CdsFhirData): The generated CDS FHIR data.
    """

    # TODO: Add ordering and logic so that patient/encounter IDs are passed to subsequent generators
    # TODO: Some of the resources should be allowed to be multiplied

    default_workflow_mappings = {
        Workflow.encounter_discharge: [
            {"generator": "EncounterGenerator"},
            {"generator": "ConditionGenerator"},
            {"generator": "ProcedureGenerator"},
            {"generator": "MedicationRequestGenerator"},
        ],
        Workflow.patient_view: [
            {"generator": "PatientGenerator"},
            {"generator": "EncounterGenerator"},
            {"generator": "ConditionGenerator"},
        ],
    }

    def __init__(self):
        self.registry = generator_registry
        self.mappings = self.default_workflow_mappings
        self.data: CdsFhirData = None

    def fetch_generator(self, generator_name: str) -> Callable:
        """
        Fetches a data generator function by its name from the registry.

        Parameters:
            generator_name (str): The name of the data generator to fetch.

        Returns:
            Callable: The data generator function.
        """
        return self.registry.get(generator_name)

    def set_workflow(self, workflow: str) -> None:
        """
        Sets the current workflow to be used for data generation.

        Parameters:
            workflow (str): The name of the workflow to set.
        """
        self.workflow = workflow

    def generate(
        self,
        constraints: Optional[list] = None,
        free_text_path: Optional[str] = None,
        column_name: Optional[str] = None,
        random_seed: Optional[int] = None,
    ) -> BaseModel:
        """
        Generates CDS data based on the current workflow, constraints, and optional free text data.

        Parameters:
            constraints (Optional[list]): A list of constraints to apply to the data generation.
            free_text_path (Optional[str]): The path to a CSV file containing free text data.
            column_name (Optional[str]): The column name in the CSV file to use for free text data.
            random_seed (Optional[int]): The random seed to use for reproducible data generation.

        Returns:
            BaseModel: The generated CDS FHIR data.
        """
        results = []

        if self.workflow not in self.mappings.keys():
            raise ValueError(f"Workflow {self.workflow} not found in mappings")

        for resource in self.mappings[self.workflow]:
            generator_name = resource["generator"]
            generator = self.fetch_generator(generator_name)
            result = generator.generate(
                constraints=constraints, random_seed=random_seed
            )

            results.append(BundleEntry(resource=result))

        parsed_free_text = (
            self.free_text_parser(free_text_path, column_name)
            if free_text_path
            else None
        )
        if parsed_free_text:
            results.append(BundleEntry(resource=random.choice(parsed_free_text)))

        output = CdsFhirData(prefetch=Bundle(resourceType="Bundle", entry=results))
        self.data = output
        return output

    def free_text_parser(self, path_to_csv: str, column_name: str) -> Dict:
        """
        Parses free text data from a CSV file and converts it into a list of DocumentReference models.

        Parameters:
            path_to_csv (str): The path to the CSV file containing free text data.
            column_name (str): The column name in the CSV file to use for free text data.

        Returns:
            dict: A dictionary of parsed free text data converted into DocumentReference models.
        """
        column_data = []

        # Check that path_to_csv is a valid path with pathlib
        path = Path(path_to_csv)
        if not path.is_file():
            raise FileNotFoundError(
                f"The file {path_to_csv} does not exist or is not a file."
            )

        try:
            with path.open(mode="r", newline="") as file:
                reader = csv.DictReader(file)
                if column_name is not None:
                    for row in reader:
                        column_data.append(row[column_name])
                else:
                    raise ValueError(
                        "Column name must be provided when header is True."
                    )
        except Exception as ex:
            logger.error(f"An error occurred: {ex}")

        document_list = []

        for x in column_data:
            # First parse x in to documentreferencemodel format
            text = Narrative(
                status="generated",
                div=f'<div xmlns="http://www.w3.org/1999/xhtml">{x}</div>',
            )
            doc = DocumentReference(resourceType="DocumentReference", text=text)
            document_list.append(doc)

        return document_list

fetch_generator(generator_name)

Fetches a data generator function by its name from the registry.

PARAMETER DESCRIPTION
generator_name

The name of the data generator to fetch.

TYPE: str

RETURNS DESCRIPTION
Callable

The data generator function.

TYPE: Callable

Source code in healthchain/data_generators/cdsdatagenerator.py
def fetch_generator(self, generator_name: str) -> Callable:
    """
    Fetches a data generator function by its name from the registry.

    Parameters:
        generator_name (str): The name of the data generator to fetch.

    Returns:
        Callable: The data generator function.
    """
    return self.registry.get(generator_name)

free_text_parser(path_to_csv, column_name)

Parses free text data from a CSV file and converts it into a list of DocumentReference models.

PARAMETER DESCRIPTION
path_to_csv

The path to the CSV file containing free text data.

TYPE: str

column_name

The column name in the CSV file to use for free text data.

TYPE: str

RETURNS DESCRIPTION
dict

A dictionary of parsed free text data converted into DocumentReference models.

TYPE: Dict

Source code in healthchain/data_generators/cdsdatagenerator.py
def free_text_parser(self, path_to_csv: str, column_name: str) -> Dict:
    """
    Parses free text data from a CSV file and converts it into a list of DocumentReference models.

    Parameters:
        path_to_csv (str): The path to the CSV file containing free text data.
        column_name (str): The column name in the CSV file to use for free text data.

    Returns:
        dict: A dictionary of parsed free text data converted into DocumentReference models.
    """
    column_data = []

    # Check that path_to_csv is a valid path with pathlib
    path = Path(path_to_csv)
    if not path.is_file():
        raise FileNotFoundError(
            f"The file {path_to_csv} does not exist or is not a file."
        )

    try:
        with path.open(mode="r", newline="") as file:
            reader = csv.DictReader(file)
            if column_name is not None:
                for row in reader:
                    column_data.append(row[column_name])
            else:
                raise ValueError(
                    "Column name must be provided when header is True."
                )
    except Exception as ex:
        logger.error(f"An error occurred: {ex}")

    document_list = []

    for x in column_data:
        # First parse x in to documentreferencemodel format
        text = Narrative(
            status="generated",
            div=f'<div xmlns="http://www.w3.org/1999/xhtml">{x}</div>',
        )
        doc = DocumentReference(resourceType="DocumentReference", text=text)
        document_list.append(doc)

    return document_list

generate(constraints=None, free_text_path=None, column_name=None, random_seed=None)

Generates CDS data based on the current workflow, constraints, and optional free text data.

PARAMETER DESCRIPTION
constraints

A list of constraints to apply to the data generation.

TYPE: Optional[list] DEFAULT: None

free_text_path

The path to a CSV file containing free text data.

TYPE: Optional[str] DEFAULT: None

column_name

The column name in the CSV file to use for free text data.

TYPE: Optional[str] DEFAULT: None

random_seed

The random seed to use for reproducible data generation.

TYPE: Optional[int] DEFAULT: None

RETURNS DESCRIPTION
BaseModel

The generated CDS FHIR data.

TYPE: BaseModel

Source code in healthchain/data_generators/cdsdatagenerator.py
def generate(
    self,
    constraints: Optional[list] = None,
    free_text_path: Optional[str] = None,
    column_name: Optional[str] = None,
    random_seed: Optional[int] = None,
) -> BaseModel:
    """
    Generates CDS data based on the current workflow, constraints, and optional free text data.

    Parameters:
        constraints (Optional[list]): A list of constraints to apply to the data generation.
        free_text_path (Optional[str]): The path to a CSV file containing free text data.
        column_name (Optional[str]): The column name in the CSV file to use for free text data.
        random_seed (Optional[int]): The random seed to use for reproducible data generation.

    Returns:
        BaseModel: The generated CDS FHIR data.
    """
    results = []

    if self.workflow not in self.mappings.keys():
        raise ValueError(f"Workflow {self.workflow} not found in mappings")

    for resource in self.mappings[self.workflow]:
        generator_name = resource["generator"]
        generator = self.fetch_generator(generator_name)
        result = generator.generate(
            constraints=constraints, random_seed=random_seed
        )

        results.append(BundleEntry(resource=result))

    parsed_free_text = (
        self.free_text_parser(free_text_path, column_name)
        if free_text_path
        else None
    )
    if parsed_free_text:
        results.append(BundleEntry(resource=random.choice(parsed_free_text)))

    output = CdsFhirData(prefetch=Bundle(resourceType="Bundle", entry=results))
    self.data = output
    return output

set_workflow(workflow)

Sets the current workflow to be used for data generation.

PARAMETER DESCRIPTION
workflow

The name of the workflow to set.

TYPE: str

Source code in healthchain/data_generators/cdsdatagenerator.py
def set_workflow(self, workflow: str) -> None:
    """
    Sets the current workflow to be used for data generation.

    Parameters:
        workflow (str): The name of the workflow to set.
    """
    self.workflow = workflow

ClassGenerator

Bases: BaseGenerator

A generator class for creating FHIR Class resources.

METHOD DESCRIPTION
generate

Generates a FHIR Class resource.

Source code in healthchain/data_generators/encountergenerators.py
@register_generator
class ClassGenerator(BaseGenerator):
    """
    A generator class for creating FHIR Class resources.

    Methods:
        generate() -> CodeableConcept:
            Generates a FHIR Class resource.
    """

    @staticmethod
    def generate() -> CodeableConcept:
        patient_class_mapping = {"IMP": "inpatient", "AMB": "ambulatory"}
        patient_class = faker.random_element(elements=("IMP", "AMB"))
        return CodeableConcept(
            coding=[
                Coding(
                    system="http://terminology.hl7.org/CodeSystem/v3-ActCode",
                    code=patient_class,
                    display=patient_class_mapping.get(patient_class),
                )
            ]
        )

EncounterGenerator

Bases: BaseGenerator

A generator class for creating FHIR Encounter resources.

METHOD DESCRIPTION
generate

Optional[list] = None, random_seed: Optional[int] = None) -> Encounter: Generates a FHIR Encounter resource with optional constraints and random_seed.

Source code in healthchain/data_generators/encountergenerators.py
@register_generator
class EncounterGenerator(BaseGenerator):
    """
    A generator class for creating FHIR Encounter resources.

    Methods:
        generate(constraints: Optional[list] = None, random_seed: Optional[int] = None) -> Encounter:
            Generates a FHIR Encounter resource with optional constraints and random_seed.
    """

    @staticmethod
    def generate(
        constraints: Optional[list] = None,
        random_seed: Optional[int] = None,
    ) -> Encounter:
        Faker.seed(random_seed)
        patient_reference = "Patient/123"
        return Encounter(
            resourceType="Encounter",
            id=generator_registry.get("IdGenerator").generate(),
            status=faker.random_element(
                elements=(
                    "planned",
                    "in-progress",
                    "on-hold",
                    "discharged",
                    "cancelled",
                )
            ),
            class_field=[generator_registry.get("ClassGenerator").generate()],
            priority=generator_registry.get("EncounterPriorityGenerator").generate(),
            type_field=[generator_registry.get("EncounterTypeGenerator").generate()],
            subject={"reference": patient_reference, "display": patient_reference},
            actualPeriod=generator_registry.get("PeriodGenerator").generate(),
            location=[generator_registry.get("EncounterLocationGenerator").generate()],
        )

EncounterLocationGenerator

Bases: BaseGenerator

A generator class for creating FHIR EncounterLocation resources.

METHOD DESCRIPTION
generate

Generates a FHIR EncounterLocation resource.

Source code in healthchain/data_generators/encountergenerators.py
@register_generator
class EncounterLocationGenerator(BaseGenerator):
    """
    A generator class for creating FHIR EncounterLocation resources.

    Methods:
        generate() -> EncounterLocation:
            Generates a FHIR EncounterLocation resource.
    """

    @staticmethod
    def generate() -> EncounterLocation:
        return EncounterLocation(
            location=Reference(reference="Location/123"),
            status=faker.random_element(elements=("active", "completed")),
            period=generator_registry.get("PeriodGenerator").generate(),
        )

EncounterPriorityGenerator

Bases: BaseGenerator

A generator class for creating FHIR EncounterPriority resources.

METHOD DESCRIPTION
generate

Generates a FHIR EncounterPriority resource.

Source code in healthchain/data_generators/encountergenerators.py
@register_generator
class EncounterPriorityGenerator(BaseGenerator):
    """
    A generator class for creating FHIR EncounterPriority resources.

    Methods:
        generate() -> CodeableConcept:
            Generates a FHIR EncounterPriority resource.
    """

    @staticmethod
    def generate() -> CodeableConcept:
        encounter_priority_mapping = {"17621005": "normal", "24484000": "critical"}
        encounter_priority = faker.random_element(elements=("17621005", "24484000"))
        return CodeableConcept(
            coding=[
                Coding(
                    system="http://snomed.info/sct",
                    code=encounter_priority,
                    display=encounter_priority_mapping.get(encounter_priority),
                )
            ]
        )

EncounterTypeGenerator

Bases: BaseGenerator

A generator class for creating FHIR EncounterType resources.

METHOD DESCRIPTION
generate

Generates a FHIR EncounterType resource.

Source code in healthchain/data_generators/encountergenerators.py
@register_generator
class EncounterTypeGenerator(BaseGenerator):
    """
    A generator class for creating FHIR EncounterType resources.

    Methods:
        generate() -> CodeableConcept:
            Generates a FHIR EncounterType resource.
    """

    @staticmethod
    def generate() -> CodeableConcept:
        encounter_type_mapping = {"11429006": "consultation", "50849002": "emergency"}
        encounter_type = faker.random_element(elements=("11429006", "50849002"))
        return CodeableConcept(
            coding=[
                Coding(
                    system="http://snomed.info/sct",
                    code=encounter_type,
                    display=encounter_type_mapping.get(encounter_type),
                )
            ]
        )

PeriodGenerator

Bases: BaseGenerator

A generator class for creating FHIR Period resources.

METHOD DESCRIPTION
generate

Generates a FHIR Period resource with random start and end times.

Source code in healthchain/data_generators/encountergenerators.py
@register_generator
class PeriodGenerator(BaseGenerator):
    """
    A generator class for creating FHIR Period resources.

    Methods:
        generate() -> Period:
            Generates a FHIR Period resource with random start and end times.
    """

    @staticmethod
    def generate():
        start = faker.date_time()
        end = faker.date_time_between(start_date=start).isoformat()
        start = start.isoformat()
        return Period(
            start=dateTimeModel(start),
            end=dateTimeModel(end),
        )