Merge branch 'main' into refactor/streamlit-demo

dotimplement · Nov 26, 2024 · 4ba9c11 · 4ba9c11
2 parents 67316f0 + 6e80363
commit 4ba9c11
Show file tree

Hide file tree

Showing 77 changed files with 4,431 additions and 1,598 deletions.
diff --git a/README.md b/README.md
@@ -19,6 +19,8 @@ pip install healthchain
 ```
 First time here? Check out our [Docs](https://dotimplement.github.io/HealthChain/) page!
 
+Came here from NHS RPySOC 2024 ✨? [CDS sandbox walkthrough](https://dotimplement.github.io/HealthChain/cookbook/cds_sandbox/)
+
 ## Features
 - [x] 🛠️ Build custom pipelines or use [pre-built ones](https://dotimplement.github.io/HealthChain/reference/pipeline/pipeline/#prebuilt) for your healthcare NLP and ML tasks
 - [x] 🏗️ Add built-in [CDA and FHIR parsers](https://dotimplement.github.io/HealthChain/reference/utilities/cda_parser/) to connect your pipeline to interoperability standards
@@ -40,7 +42,7 @@ Pipelines provide a flexible way to build and manage processing pipelines for NL
 ```python
 from healthchain.io.containers import Document
 from healthchain.pipeline import Pipeline
-from healthchain.pipeline.components import TextPreProcessor, Model, TextPostProcessor
+from healthchain.pipeline.components import TextPreProcessor, SpacyNLP, TextPostProcessor
 
 # Initialize the pipeline
 nlp_pipeline = Pipeline[Document]()
@@ -50,8 +52,8 @@ preprocessor = TextPreProcessor(tokenizer="spacy")
 nlp_pipeline.add_node(preprocessor)
 
 # Add Model component (assuming we have a pre-trained model)
-model = Model(model_path="path/to/pretrained/model")
-nlp_pipeline.add_node(model)
+spacy_nlp = SpacyNLP.from_model_id("en_core_sci_md", source="spacy")
+nlp_pipeline.add_node(spacy_nlp)
 
 # Add TextPostProcessor component
 postprocessor = TextPostProcessor(
@@ -68,7 +70,7 @@ nlp = nlp_pipeline.build()
 # Use the pipeline
 result = nlp(Document("Patient has a history of heart attack and high blood pressure."))
 
-print(f"Entities: {result.entities}")
+print(f"Entities: {result.nlp.spacy_doc.ents}")
 ```
 
 #### Adding connectors
@@ -96,7 +98,13 @@ Pre-built pipelines are use case specific end-to-end workflows that already have
 from healthchain.pipeline import MedicalCodingPipeline
 from healthchain.models import CdaRequest
 
-pipeline = MedicalCodingPipeline.load("./path/to/model")
+# Load from model ID
+pipeline = MedicalCodingPipeline.from_model_id(
+    model="blaze999/Medical-NER", task="token-classification", source="huggingface"
+)
+
+# Or load from local model
+pipeline = MedicalCodingPipeline.from_local_model("./path/to/model", source="spacy")
 
 cda_data = CdaRequest(document="<CDA XML content>")
 output = pipeline(cda_data)
@@ -129,7 +137,9 @@ from typing import List
 @hc.sandbox
 class MyCDS(ClinicalDecisionSupport):
     def __init__(self) -> None:
-        self.pipeline = SummarizationPipeline.load("./path/to/model")
+        self.pipeline = SummarizationPipeline.from_model_id(
+            "facebook/bart-large-cnn", source="huggingface"
+        )
         self.data_generator = CdsDataGenerator()
 
     # Sets up an instance of a mock EHR client of the specified workflow
@@ -162,7 +172,9 @@ from healthchain.models import CcdData, CdaRequest, CdaResponse
 @hc.sandbox
 class NotereaderSandbox(ClinicalDocumentation):
     def __init__(self):
-        self.pipeline = MedicalCodingPipeline.load("./path/to/model")
+        self.pipeline = MedicalCodingPipeline.from_model_id(
+            "en_core_sci_md", source="spacy"
+        )
 
     # Load an existing CDA file
     @hc.ehr(workflow="sign-note-inpatient")
@@ -192,9 +204,9 @@ Then run:
 healthchain run mycds.py
 ```
 By default, the server runs at `http://127.0.0.1:8000`, and you can interact with the exposed endpoints at `/docs`.
+
 ## Road Map
 - [ ] 🎛️ Versioning and artifact management for pipelines sandbox EHR configurations
-- [ ] 🤖 Integrations with other pipeline libraries such as spaCy, HuggingFace, LangChain etc.
 - [ ] ❓ Testing and evaluation framework for pipelines and use cases
 - [ ] 🧠 Multi-modal pipelines that that have built-in NLP to utilize unstructured data
 - [ ] ✨ Improvements to synthetic data generator methods

diff --git a/cookbook/cds_discharge_summarizer_hf_chat.py b/cookbook/cds_discharge_summarizer_hf_chat.py
@@ -0,0 +1,67 @@
+import healthchain as hc
+
+from healthchain.pipeline import SummarizationPipeline
+from healthchain.use_cases import ClinicalDecisionSupport
+from healthchain.models import CdsFhirData, CDSRequest, CDSResponse
+from healthchain.data_generators import CdsDataGenerator
+
+from langchain_huggingface.llms import HuggingFaceEndpoint
+from langchain_huggingface import ChatHuggingFace
+
+from langchain_core.prompts import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+
+import getpass
+import os
+
+
+if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
+    os.environ["HUGGINGFACEHUB_API_TOKEN"] = getpass.getpass("Enter your token: ")
+
+
+def create_summarization_chain():
+    hf = HuggingFaceEndpoint(
+        repo_id="HuggingFaceH4/zephyr-7b-beta",
+        task="text-generation",
+        max_new_tokens=512,
+        do_sample=False,
+        repetition_penalty=1.03,
+    )
+    model = ChatHuggingFace(llm=hf)
+    template = """
+    You are a bed planner for a hospital. Provide a concise, objective summary of the input text in short bullet points separated by new lines,
+    focusing on key actions such as appointments and medication dispense instructions, without using second or third person pronouns.\n'''{text}'''
+    """
+    prompt = PromptTemplate.from_template(template)
+    return prompt | model | StrOutputParser()
+
+
+@hc.sandbox
+class DischargeNoteSummarizer(ClinicalDecisionSupport):
+    def __init__(self):
+        # Initialize pipeline and data generator
+        chain = create_summarization_chain()
+        self.pipeline = SummarizationPipeline.load(
+            chain, source="langchain", template_path="templates/cds_card_template.json"
+        )
+        self.data_generator = CdsDataGenerator()
+
+    @hc.ehr(workflow="encounter-discharge")
+    def load_data_in_client(self) -> CdsFhirData:
+        # Generate synthetic FHIR data for testing
+        data = self.data_generator.generate(
+            free_text_path="data/discharge_notes.csv", column_name="text"
+        )
+        return data
+
+    @hc.api
+    def my_service(self, request: CDSRequest) -> CDSResponse:
+        # Process the request through our pipeline
+        result = self.pipeline(request)
+        return result
+
+
+if __name__ == "__main__":
+    # Start the sandbox server
+    summarizer = DischargeNoteSummarizer()
+    summarizer.start_sandbox()
diff --git a/cookbook/cds_discharge_summarizer_hf_trf.py b/cookbook/cds_discharge_summarizer_hf_trf.py
@@ -0,0 +1,39 @@
+import healthchain as hc
+
+from healthchain.pipeline import SummarizationPipeline
+from healthchain.use_cases import ClinicalDecisionSupport
+from healthchain.models import CdsFhirData, CDSRequest, CDSResponse
+from healthchain.data_generators import CdsDataGenerator
+
+import getpass
+import os
+
+
+if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
+    os.environ["HUGGINGFACEHUB_API_TOKEN"] = getpass.getpass("Enter your token: ")
+
+
+@hc.sandbox
+class DischargeNoteSummarizer(ClinicalDecisionSupport):
+    def __init__(self):
+        self.pipeline = SummarizationPipeline.from_model_id(
+            "google/pegasus-xsum", source="huggingface", task="summarization"
+        )
+        self.data_generator = CdsDataGenerator()
+
+    @hc.ehr(workflow="encounter-discharge")
+    def load_data_in_client(self) -> CdsFhirData:
+        data = self.data_generator.generate(
+            free_text_path="data/discharge_notes.csv", column_name="text"
+        )
+        return data
+
+    @hc.api
+    def my_service(self, request: CDSRequest) -> CDSResponse:
+        result = self.pipeline(request)
+        return result
+
+
+if __name__ == "__main__":
+    summarizer = DischargeNoteSummarizer()
+    summarizer.start_sandbox()
diff --git a/cookbook/data/discharge_notes.csv b/cookbook/data/discharge_notes.csv
@@ -0,0 +1,4 @@
+text
+"Your hospital stay for pneumonia is now complete and you are cleared for discharge home today. During your 5-day admission, you received intravenous antibiotics which have now been changed to oral Co-amoxiclav 625mg three times daily for 5 more days, and you should complete this full course. Take regular Paracetamol 1g four times daily as needed for fever or discomfort, and continue using your regular inhalers as prescribed. You should rest at home for at least 7 days and gradually increase your activity level as you feel able. Use 2-3 pillows when sleeping to help with breathing and try to drink at least 6-8 glasses of water daily. Call your GP or return to hospital immediately if you develop increased shortness of breath, chest pain, fever above 38°C, or coughing up blood. Schedule a follow-up appointment with your GP within 7 days, and attend your chest X-ray appointment scheduled for next Thursday at 2:30 PM to confirm the pneumonia has cleared. The district nurse will visit you at home tomorrow to check your progress and oxygen levels.",
+"73-year-old male post CVA ready for discharge to Cedar House Rehabilitation facility tomorrow. Transport booked for 1100hrs - requires bariatric ambulance and 2 crew members (confirmed). Medication reconciliation completed - pharmacy preparing discharge medications (Apixaban 5mg, Baclofen 20mg MR, new anticoagulation card) for collection by daughter at 0900. Patient requires hoisting and pressure-relieving equipment - ward to arrange hospital bed and mattress from equipment library before transfer. Outstanding tasks: 1) Final INR check due 0800 tomorrow, 2) SALT assessment scheduled 0830 tomorrow - must be completed prior to transfer, 3) MAR charts and medication administration record to be faxed to Cedar House before 1000. Social services have confirmed funding for 6-week rehabilitation placement. Daughter (NOK) aware of discharge plan and will bring clothing. Follow-up arrangements needed: Stroke clinic in 4 weeks, SALT outpatient review in 7 days - appointments pending. Current location: Stroke Unit bed 12, side room (previous MRSA). Deep clean required post-discharge. Patient requires NIL BY MOUTH status until SALT assessment completed. Obs stable: BP 135/82, HR 72, afebrile. Ward clerk to notify bed management once patient leaves ward. GP summary to be completed and sent with transfer documentation.",
+"Mr. Thompson's discharge from the Stroke Unit to Cedar House Rehabilitation Centre has been approved for tomorrow morning contingent on three requirements: the pharmacy must prepare his modified-release Baclofen 20mg and new anticoagulation medication pack (Apixaban 5mg) for collection by his daughter before 9am, hospital transport must be confirmed for an 11am pickup (bariatric ambulance and two crew members required due to hoisting needs), and the rehabilitation centre must receive his completed medication administration record by fax before accepting admission. The ward needs to arrange collection of his pressure-relieving mattress from the equipment library for transport with him, and his current hospital bed must be deep-cleaned after discharge due to previous MRSA status. Prior to discharge, the Stroke Early Supported Discharge team must complete their initial assessment at 8:30am, and his daughter needs to bring appropriate clothing as hospital gowns cannot be taken to the rehabilitation facility. The patient requires two additional outpatient appointments to be booked: a swallowing assessment with Speech and Language Therapy within 7 days, and a follow-up with the Stroke Consultant in 4 weeks. The social worker must confirm that the family has received the rehabilitation centre's payment schedule and admission documentation. Additionally, the ward must ensure his discharge summary is sent to both his GP and the rehabilitation centre, with a copy of his anticoagulation monitoring booklet and most recent INR results."
diff --git a/cookbook/templates/cds_card_template.json b/cookbook/templates/cds_card_template.json
@@ -0,0 +1,6 @@
+{
+    "summary": "Action Required",
+    "indicator": "info",
+    "source": {{ default_source | tojson }},
+    "detail": "{{ model_output }}"
+}
diff --git a/docs/api/component.md b/docs/api/component.md
@@ -1,6 +1,7 @@
 # Component
 
 ::: healthchain.pipeline.components.base
+::: healthchain.pipeline.components.integrations
 ::: healthchain.pipeline.components.preprocessors
-::: healthchain.pipeline.components.model
 ::: healthchain.pipeline.components.postprocessors
+::: healthchain.pipeline.components.cdscardcreator