Skip to content

Commit

Permalink
Release v1.0.0 (#108)
Browse files Browse the repository at this point in the history
* 🐛 Fix allergy_type typo and reaction filtering bug

* 🚚 Move lookup data outside package (#107)

* Switch to Ruff for formatting and Linting (#104)
  • Loading branch information
jenniferjiangkells authored Dec 1, 2023
1 parent fab4a25 commit bed10b2
Show file tree
Hide file tree
Showing 47 changed files with 794 additions and 747 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,7 @@ jobs:
pip install https://huggingface.co/kormilitzin/en_core_med7_lg/resolve/main/en_core_med7_lg-any-py3-none-any.whl
- name: run pytest
run: pytest ./tests/*
- name: install ruff
run: pip install ruff
- name: ruff format
run: ruff format --check .
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ __pycache__/
.ipynb_checkpoints/
.idea/

# Linting
.ruff_cache/

# Pytest
.pytest_cache/

Expand Down
2 changes: 2 additions & 0 deletions configs/miade_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ annotators:
meds/allergies: MedsAllergiesAnnotator
general:
problems:
lookup_data_path: ./lookup_data/
negation_detection: None
disable: []
meds/allergies:
lookup_data_path: ./lookup_data/
negation_detection: None
disable: []
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,8 @@ where = ["src"]
[tool.setuptools.package-data]
miade = ["data/*.csv"]

[tool.ruff]
line-length = 120

[tool.ruff.lint]
ignore = ["E721"]
384 changes: 239 additions & 145 deletions src/miade/annotators.py

Large diffs are not rendered by default.

11 changes: 4 additions & 7 deletions src/miade/concept.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def __init__(
meta_anns: Optional[List[MetaAnnotations]] = None,
debug_dict: Optional[Dict] = None,
):

self.name = name
self.id = id
self.category = category
Expand All @@ -54,7 +53,9 @@ def from_entity(cls, entity: [Dict]):

return Concept(
id=entity["cui"],
name=entity["source_value"], # can also use detected_name which is spell checked but delimited by ~ e.g. liver~failure
name=entity[
"source_value"
], # can also use detected_name which is spell checked but delimited by ~ e.g. liver~failure
category=None,
start=entity["start"],
end=entity["end"],
Expand All @@ -72,11 +73,7 @@ def __hash__(self):
return hash((self.id, self.name, self.category))

def __eq__(self, other):
return (
self.id == other.id
and self.name == other.name
and self.category == other.category
)
return self.id == other.id and self.name == other.name and self.category == other.category

def __lt__(self, other):
return int(self.id) < int(other.id)
Expand Down
50 changes: 26 additions & 24 deletions src/miade/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
import yaml
import logging

from negspacy.negation import Negex
from negspacy.negation import Negex # noqa: F401
from pathlib import Path
from typing import List, Optional, Dict

from .concept import Concept, Category
from .note import Note
from .annotators import Annotator, ProblemsAnnotator, MedsAllergiesAnnotator
from .annotators import Annotator, ProblemsAnnotator, MedsAllergiesAnnotator # noqa: F401
from .dosageextractor import DosageExtractor
from .utils.metaannotationstypes import SubstanceCategory
from .utils.miade_cat import MiADE_CAT
from .utils.modelfactory import ModelFactory
from .utils.annotatorconfig import AnnotatorConfig
Expand All @@ -29,11 +28,15 @@ def create_annotator(name: str, model_factory: ModelFactory):
"""
name = name.lower()
if name not in model_factory.models:
raise ValueError(f"MedCAT model for {name} does not exist: either not configured in config.yaml or "
f"missing from models directory")
raise ValueError(
f"MedCAT model for {name} does not exist: either not configured in config.yaml or "
f"missing from models directory"
)

if name in model_factory.annotators.keys():
return model_factory.annotators[name](cat=model_factory.models.get(name), config=model_factory.configs.get(name))
return model_factory.annotators[name](
cat=model_factory.models.get(name), config=model_factory.configs.get(name)
)
else:
log.warning(f"Annotator {name} does not exist, loading generic Annotator")
return Annotator(model_factory.models[name])
Expand All @@ -48,14 +51,15 @@ class NoteProcessor:
:param device (str) whether inference should be run on cpu or gpu - default "cpu"
:param custom_annotators (List[Annotators]) List of custom annotators
"""

def __init__(
self,
model_directory: Path,
model_config_path: Path = None,
log_level: int = logging.INFO,
dosage_extractor_log_level: int = logging.INFO,
device: str = "cpu",
custom_annotators: Optional[List[Annotator]] = None
custom_annotators: Optional[List[Annotator]] = None,
):
logging.getLogger("miade").setLevel(log_level)
logging.getLogger("miade.dosageextractor").setLevel(dosage_extractor_log_level)
Expand Down Expand Up @@ -122,7 +126,7 @@ def _load_model_factory(self, custom_annotators: Optional[List[Annotator]] = Non
continue
mapped_models[name] = cat_model
else:
log.warning(f"No model ids configured!")
log.warning("No model ids configured!")

mapped_annotators = {}
# {name: <class Annotator>}
Expand All @@ -140,7 +144,7 @@ def _load_model_factory(self, custom_annotators: Optional[List[Annotator]] = Non
except AttributeError as e:
log.warning(f"{annotator_string} not found: {e}")
else:
log.warning(f"No annotators configured!")
log.warning("No annotators configured!")

mapped_configs = {}
if "general" in config_dict:
Expand All @@ -152,13 +156,10 @@ def _load_model_factory(self, custom_annotators: Optional[List[Annotator]] = Non
else:
log.warning("No general settings configured, using default settings.")

model_factory_config = {"models": mapped_models,
"annotators": mapped_annotators,
"configs": mapped_configs}
model_factory_config = {"models": mapped_models, "annotators": mapped_annotators, "configs": mapped_configs}

return ModelFactory(**model_factory_config)


def add_annotator(self, name: str) -> None:
"""
Adds annotators to processor
Expand All @@ -167,7 +168,9 @@ def add_annotator(self, name: str) -> None:
"""
try:
annotator = create_annotator(name, self.model_factory)
log.info(f"Added {type(annotator).__name__} to processor with config {self.model_factory.configs.get(name)}")
log.info(
f"Added {type(annotator).__name__} to processor with config {self.model_factory.configs.get(name)}"
)
except Exception as e:
raise Exception(f"Error creating annotator: {e}")

Expand Down Expand Up @@ -214,11 +217,9 @@ def process(self, note: Note, record_concepts: Optional[List[Concept]] = None) -

return concepts

def get_concept_dicts(self,
note: Note,
filter_uncategorized: bool = True,
record_concepts: Optional[List[Concept]] = None
) -> List[Dict]:
def get_concept_dicts(
self, note: Note, filter_uncategorized: bool = True, record_concepts: Optional[List[Concept]] = None
) -> List[Dict]:
"""
Returns concepts in dictionary format
:param note: (Note) note containing text to extract concepts from
Expand All @@ -233,10 +234,12 @@ def get_concept_dicts(self,
continue
concept_dict = concept.__dict__
if concept.dosage is not None:
concept_dict["dosage"] = {"dose": concept.dosage.dose.dict() if concept.dosage.dose else None,
"duration": concept.dosage.duration.dict() if concept.dosage.duration else None,
"frequency": concept.dosage.frequency.dict() if concept.dosage.frequency else None,
"route": concept.dosage.route.dict() if concept.dosage.route else None}
concept_dict["dosage"] = {
"dose": concept.dosage.dose.dict() if concept.dosage.dose else None,
"duration": concept.dosage.duration.dict() if concept.dosage.duration else None,
"frequency": concept.dosage.frequency.dict() if concept.dosage.frequency else None,
"route": concept.dosage.route.dict() if concept.dosage.route else None,
}
if concept.meta is not None:
meta_anns = []
for meta in concept.meta:
Expand All @@ -249,4 +252,3 @@ def get_concept_dicts(self,
concept_list.append(concept_dict)

return concept_list

21 changes: 7 additions & 14 deletions src/miade/dosage.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,7 @@ class Route(BaseModel):
code_system: Optional[str] = ROUTE_CODE_SYSTEM


def parse_dose(
text: str, quantities: List[str], units: List[str], results: Dict
) -> Optional[Dose]:
def parse_dose(text: str, quantities: List[str], units: List[str], results: Dict) -> Optional[Dose]:
"""
:param text: (str) string containing dose
:param quantities: (list) list of quantity entities NER
Expand Down Expand Up @@ -99,15 +97,15 @@ def parse_dose(
else:
try:
quantity_dosage.value = float(quantities[0])
except:
except ValueError:
quantity_dosage.value = float(re.sub(r"[^\d.]+", "", quantities[0]))
quantity_dosage.unit = units[0]
elif len(quantities) == 2 and len(units) == 2:
quantities.sort()
try:
quantity_dosage.low = float(quantities[0])
quantity_dosage.high = float(quantities[1])
except:
except ValueError:
quantity_dosage.low = float(re.sub(r"[^\d.]+", "", quantities[0]))
quantity_dosage.high = float(re.sub(r"[^\d.]+", "", quantities[1]))
if units[0] == units[1]:
Expand All @@ -119,8 +117,7 @@ def parse_dose(
# use caliber results as backup
if results["units"] is not None:
log.debug(
f"Inconclusive dose entities {quantities}, "
f"using lookup results {results['qty']} {results['units']}"
f"Inconclusive dose entities {quantities}, " f"using lookup results {results['qty']} {results['units']}"
)
quantity_dosage.unit = results["units"]
# only autofill 1 if non-quantitative units e.g. tab, cap, puff
Expand Down Expand Up @@ -165,7 +162,7 @@ def parse_frequency(text: str, results: Dict) -> Optional[Frequency]:
if results["freq"] is not None and results["time"] is not None:
try:
frequency_dosage.value = results["time"] / results["freq"]
except ZeroDivisionError as e:
except ZeroDivisionError:
frequency_dosage.value = None
# here i convert time to hours if not institution specified
# (every X hrs as opposed to X times day) but it's arbitrary really...
Expand Down Expand Up @@ -327,13 +324,9 @@ def from_doc(cls, doc: Doc, calculate: bool = True):
# if duration not given in text could extract this from total dose if given
if total_dose is not None and dose is not None and doc._.results["freq"]:
if dose.value is not None:
daily_dose = float(dose.value) * (
round(doc._.results["freq"] / doc._.results["time"])
)
daily_dose = float(dose.value) * (round(doc._.results["freq"] / doc._.results["time"]))
elif dose.high is not None:
daily_dose = float(dose.high) * (
round(doc._.results["freq"] / doc._.results["time"])
)
daily_dose = float(dose.high) * (round(doc._.results["freq"] / doc._.results["time"]))

duration = parse_duration(
text=duration_text,
Expand Down
16 changes: 5 additions & 11 deletions src/miade/dosageextractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@
from typing import Optional

from .dosage import Dosage
from .drugdoseade.preprocessor import Preprocessor
from .drugdoseade.pattern_matcher import PatternMatcher
from .drugdoseade.entities_refiner import EntitiesRefiner

from .drugdoseade.preprocessor import Preprocessor # noqa: F401
from .drugdoseade.pattern_matcher import PatternMatcher # noqa: F401
from .drugdoseade.entities_refiner import EntitiesRefiner # noqa: F401

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -46,17 +45,12 @@ def extract(self, text: str, calculate: bool = True) -> Optional[Dosage]:
"""
doc = self.dosage_extractor(text)

log.debug(
f"NER results: {[(e.text, e.label_, e._.total_dose) for e in doc.ents]}"
)
log.debug(f"NER results: {[(e.text, e.label_, e._.total_dose) for e in doc.ents]}")
log.debug(f"Lookup results: {doc._.results}")

dosage = Dosage.from_doc(doc=doc, calculate=calculate)

if all(
v is None
for v in [dosage.dose, dosage.frequency, dosage.route, dosage.duration]
):
if all(v is None for v in [dosage.dose, dosage.frequency, dosage.route, dosage.duration]):
return None

return dosage
Expand Down
6 changes: 1 addition & 5 deletions src/miade/drugdoseade/entities_refiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@ def EntitiesRefiner(doc):
new_ents = []
for ind, ent in enumerate(doc.ents):
# combine consecutive labels with the same tag
if (
ent.label_ == "DURATION"
or ent.label_ == "FREQUENCY"
or ent.label_ == "DOSAGE"
) and ind != 0:
if (ent.label_ == "DURATION" or ent.label_ == "FREQUENCY" or ent.label_ == "DOSAGE") and ind != 0:
prev_ent = doc.ents[ind - 1]
if prev_ent.label_ == ent.label_:
new_ent = Span(doc, prev_ent.start, ent.end, label=ent.label)
Expand Down
14 changes: 3 additions & 11 deletions src/miade/drugdoseade/pattern_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,7 @@
@spacy.registry.misc("patterns_lookup_table.v1")
def create_patterns_dict():
patterns_data = pkgutil.get_data(__name__, "../data/patterns.csv")
patterns_dict = (
pd.read_csv(io.BytesIO(patterns_data), index_col=0)
.squeeze("columns")
.T.to_dict()
)
patterns_dict = pd.read_csv(io.BytesIO(patterns_data), index_col=0).squeeze("columns").T.to_dict()

return patterns_dict

Expand Down Expand Up @@ -67,9 +63,7 @@ def __call__(self, doc: Doc) -> Doc:
# rule-based matching based on structure of dosage - HIE medication e.g. take 2 every day, 24 tablets
expression = r"(?P<dose_string>start [\w\s,-]+ ), (?P<total_dose>\d+) (?P<unit>[a-z]+ )?$"
for match in re.finditer(expression, dose_string):
dose_string = match.group(
"dose_string"
) # remove total dose component for lookup
dose_string = match.group("dose_string") # remove total dose component for lookup
start, end = match.span("total_dose")
total_dose_span = doc.char_span(start, end, alignment_mode="contract")
total_dose_span.label_ = "DOSAGE"
Expand All @@ -81,9 +75,7 @@ def __call__(self, doc: Doc) -> Doc:
unit_span = doc.char_span(start, end, alignment_mode="contract")
unit_span.label_ = "FORM"
unit_span._.total_dose = True
doc._.results[
"units"
] = unit_span.text # set unit in results dict as well
doc._.results["units"] = unit_span.text # set unit in results dict as well
new_entities.append(unit_span)

# lookup patterns from CALIBERdrugdose - returns dosage results in doc._.results attribute
Expand Down
7 changes: 2 additions & 5 deletions src/miade/drugdoseade/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,7 @@ def __call__(self, doc: Doc) -> Doc:

# remove numbers relating to strength of med e.g. aspirin 200mg tablets...
processed_text = re.sub(
r" (\d+\.?\d*) (mg|ml|g|mcg|microgram|gram|%)"
r"(\s|/)(tab|cap|gel|cream|dose|pessaries)",
r" (\d+\.?\d*) (mg|ml|g|mcg|microgram|gram|%)" r"(\s|/)(tab|cap|gel|cream|dose|pessaries)",
"",
processed_text,
)
Expand All @@ -102,9 +101,7 @@ def __call__(self, doc: Doc) -> Doc:
if replacement == " ":
log.debug(f"Removed multiword match '{words}'")
else:
log.debug(
f"Replaced multiword match '{words}' with '{replacement}'"
)
log.debug(f"Replaced multiword match '{words}' with '{replacement}'")
processed_text = new_text

# numbers replace 2
Expand Down
4 changes: 1 addition & 3 deletions src/miade/drugdoseade/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,7 @@ def numbers_replace(text):
text,
)
# 3 weeks...
text = re.sub(
r" ([\d.]+) (week) ", lambda m: " {:g} days ".format(int(m.group(1)) * 7), text
)
text = re.sub(r" ([\d.]+) (week) ", lambda m: " {:g} days ".format(int(m.group(1)) * 7), text)
# 3 months ... NB assume 30 days in a month
text = re.sub(
r" ([\d.]+) (month) ",
Expand Down
Loading

0 comments on commit bed10b2

Please sign in to comment.