From a0ced051291aa46d1c8a811a578b4d581cdff996 Mon Sep 17 00:00:00 2001 From: Rabah Abdul Khalek Date: Tue, 16 Jan 2024 15:11:02 +0100 Subject: [PATCH 1/2] modified behaviour of tests w.r.t. cropped DL --- .../dataloaders/wrappers.py | 12 ++++++++- .../landmark_detection/tests/base.py | 21 +++++++++++---- .../tests_and_metrics/test_tests.py | 27 +++++++++++++++++++ 3 files changed, 54 insertions(+), 6 deletions(-) create mode 100644 tests/landmark_detection/tests_and_metrics/test_tests.py diff --git a/giskard_vision/landmark_detection/dataloaders/wrappers.py b/giskard_vision/landmark_detection/dataloaders/wrappers.py index 4da7d086..de89ef68 100644 --- a/giskard_vision/landmark_detection/dataloaders/wrappers.py +++ b/giskard_vision/landmark_detection/dataloaders/wrappers.py @@ -47,7 +47,7 @@ def __init__( self._margins = margins @property - def name(self): + def name(self) -> str: """ Gets the name of the cropped data loader. @@ -55,6 +55,16 @@ def name(self): str: The name of the cropped data loader. """ return f"{self._wrapped_dataloader.name} cropped on {self._part.name}" + + @property + def facial_part(self) -> FacialPart: + """ + Gets the facial_part used for the copping. + + Returns: + FacialPart: The name of the cropped data loader. + """ + return self._part def get_image(self, idx: int) -> np.ndarray: """ diff --git a/giskard_vision/landmark_detection/tests/base.py b/giskard_vision/landmark_detection/tests/base.py index 25e3c1ee..75736f79 100644 --- a/giskard_vision/landmark_detection/tests/base.py +++ b/giskard_vision/landmark_detection/tests/base.py @@ -38,6 +38,7 @@ class TestResult: passed: bool description: Optional[str] = None prediction_time: Optional[float] = None + prediction_fail_rate: Optional[float] = None facial_part: Optional[FacialPart] = None metric_name: Optional[str] = None model_name: Optional[str] = None @@ -113,6 +114,8 @@ def to_dict(self): "facial_part": self.facial_part.name, "model": self.model_name, "dataloader": self.dataloader_name, + "prediction_time": self.prediction_time, + "prediction_fail_rate": self.prediction_fail_rate, } if self.dataloader_ref_name: output.update({"dataloader_ref": self.dataloader_ref_name}) @@ -204,7 +207,7 @@ def run( self, model: FaceLandmarksModelBase, dataloader: DataIteratorBase, - facial_part: FacialPart = FacialParts.ENTIRE.value, + facial_part: FacialPart = None, ) -> TestResult: """Run the test on the specified model and dataloader. Passes if metric <= threhsold. @@ -212,12 +215,13 @@ def run( Args: model (FaceLandmarksModelBase): Model to be evaluated. dataloader (DataIteratorBase): Dataloader providing input data. - facial_part (FacialPart, optional): Facial part to consider during the evaluation. Defaults to entire face. - + facial_part (FacialPart, optional): Facial part to consider during the evaluation. Defaults to entire face if dataloader doesn't have facial_part as property. + Returns: TestResult: Result of the test. """ + facial_part = getattr(dataloader, "facial_part", FacialParts.ENTIRE.value) if facial_part is None else facial_part ground_truth = dataloader.all_marks prediction_result = model.predict(dataloader, facial_part=facial_part) metric_value = self.metric.get(prediction_result, ground_truth) @@ -229,6 +233,7 @@ def run( prediction_results=[prediction_result], passed=metric_value <= self.threshold, prediction_time=prediction_result.prediction_time, + prediction_fail_rate=prediction_result.prediction_fail_rate, facial_part=facial_part, metric_name=self.metric.name, model_name=model.name, @@ -255,7 +260,7 @@ def run( model: FaceLandmarksModelBase, dataloader: DataIteratorBase, dataloader_ref: DataIteratorBase, - facial_part: FacialPart = FacialParts.ENTIRE.value, + facial_part: Optional[FacialPart] = None # FacialParts.ENTIRE.value, ) -> TestResult: """Run the differential test on the specified model and dataloaders. Defined as metric_diff = (metric_ref-metric)/metric_ref. @@ -265,12 +270,14 @@ def run( model (FaceLandmarksModelBase): Model to be evaluated. dataloader (DataIteratorBase): Main dataloader. dataloader_ref (DataIteratorBase): Reference dataloader for comparison. - facial_part (FacialPart, optional): Facial part to consider during the evaluation. Defaults to entire face. + facial_part (FacialPart, optional): Facial part to consider during the evaluation. Defaults to entire face if dataloader doesn't have facial_part as property. Returns: TestResult: Result of the differential test. """ + facial_part = getattr(dataloader, "facial_part", FacialParts.ENTIRE.value) if facial_part is None else facial_part + prediction_result = model.predict(dataloader, facial_part=facial_part) prediction_result_ref = model.predict(dataloader_ref, facial_part=facial_part) @@ -285,6 +292,9 @@ def run( prediction_results = [prediction_result, prediction_result_ref] prediction_time = prediction_result.prediction_time + prediction_result_ref.prediction_time + prediction_fail_rate = np.mean( + [prediction_result.prediction_fail_rate, prediction_result_ref.prediction_fail_rate] + ) return TestResult( test_name=self.__class__.__name__, description=self.metric.description, @@ -293,6 +303,7 @@ def run( prediction_results=prediction_results, passed=abs(metric_value) <= self.threshold, prediction_time=prediction_time, + prediction_fail_rate=prediction_fail_rate, facial_part=facial_part, metric_name=self.metric.name, model_name=model.name, diff --git a/tests/landmark_detection/tests_and_metrics/test_tests.py b/tests/landmark_detection/tests_and_metrics/test_tests.py new file mode 100644 index 00000000..c0eaad9f --- /dev/null +++ b/tests/landmark_detection/tests_and_metrics/test_tests.py @@ -0,0 +1,27 @@ +from giskard_vision.landmark_detection.dataloaders.wrappers import CroppedDataLoader + +from giskard_vision.landmark_detection.tests.performance import NMEMean +from giskard_vision.landmark_detection.tests.base import Test, TestDiff +from giskard_vision.landmark_detection.marks.facial_parts import FacialParts + + +def test_tests_on_cropped_dl(opencv_model, dataset_300w): + + fp = FacialParts.LEFT_HALF.value + dl = CroppedDataLoader(dataset_300w, part=fp) + + for test in [Test, TestDiff]: + kwargs = { + "model": opencv_model, + "dataloader": dl, + "facial_part": fp + } + if test == TestDiff: + kwargs["dataloader_ref"] = dataset_300w + + test1 = test(metric=NMEMean, threshold=1).run(**kwargs) + kwargs.pop("facial_part") + test2 = test(metric=NMEMean, threshold=1).run(**kwargs) + + assert test1.metric_value == test2.metric_value + \ No newline at end of file From d2db888124c8b4fefa0c657c001ca63be282bf2c Mon Sep 17 00:00:00 2001 From: Rabah Abdul Khalek Date: Tue, 16 Jan 2024 15:14:13 +0100 Subject: [PATCH 2/2] pdm format --- .../landmark_detection/dataloaders/wrappers.py | 2 +- giskard_vision/landmark_detection/tests/base.py | 14 +++++++++----- .../tests_and_metrics/test_tests.py | 17 +++++------------ 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/giskard_vision/landmark_detection/dataloaders/wrappers.py b/giskard_vision/landmark_detection/dataloaders/wrappers.py index de89ef68..adf67faa 100644 --- a/giskard_vision/landmark_detection/dataloaders/wrappers.py +++ b/giskard_vision/landmark_detection/dataloaders/wrappers.py @@ -55,7 +55,7 @@ def name(self) -> str: str: The name of the cropped data loader. """ return f"{self._wrapped_dataloader.name} cropped on {self._part.name}" - + @property def facial_part(self) -> FacialPart: """ diff --git a/giskard_vision/landmark_detection/tests/base.py b/giskard_vision/landmark_detection/tests/base.py index 75736f79..27b13c29 100644 --- a/giskard_vision/landmark_detection/tests/base.py +++ b/giskard_vision/landmark_detection/tests/base.py @@ -216,12 +216,14 @@ def run( model (FaceLandmarksModelBase): Model to be evaluated. dataloader (DataIteratorBase): Dataloader providing input data. facial_part (FacialPart, optional): Facial part to consider during the evaluation. Defaults to entire face if dataloader doesn't have facial_part as property. - + Returns: TestResult: Result of the test. """ - facial_part = getattr(dataloader, "facial_part", FacialParts.ENTIRE.value) if facial_part is None else facial_part + facial_part = ( + getattr(dataloader, "facial_part", FacialParts.ENTIRE.value) if facial_part is None else facial_part + ) ground_truth = dataloader.all_marks prediction_result = model.predict(dataloader, facial_part=facial_part) metric_value = self.metric.get(prediction_result, ground_truth) @@ -260,7 +262,7 @@ def run( model: FaceLandmarksModelBase, dataloader: DataIteratorBase, dataloader_ref: DataIteratorBase, - facial_part: Optional[FacialPart] = None # FacialParts.ENTIRE.value, + facial_part: Optional[FacialPart] = None, # FacialParts.ENTIRE.value, ) -> TestResult: """Run the differential test on the specified model and dataloaders. Defined as metric_diff = (metric_ref-metric)/metric_ref. @@ -276,8 +278,10 @@ def run( TestResult: Result of the differential test. """ - facial_part = getattr(dataloader, "facial_part", FacialParts.ENTIRE.value) if facial_part is None else facial_part - + facial_part = ( + getattr(dataloader, "facial_part", FacialParts.ENTIRE.value) if facial_part is None else facial_part + ) + prediction_result = model.predict(dataloader, facial_part=facial_part) prediction_result_ref = model.predict(dataloader_ref, facial_part=facial_part) diff --git a/tests/landmark_detection/tests_and_metrics/test_tests.py b/tests/landmark_detection/tests_and_metrics/test_tests.py index c0eaad9f..2b2fc288 100644 --- a/tests/landmark_detection/tests_and_metrics/test_tests.py +++ b/tests/landmark_detection/tests_and_metrics/test_tests.py @@ -1,27 +1,20 @@ from giskard_vision.landmark_detection.dataloaders.wrappers import CroppedDataLoader - -from giskard_vision.landmark_detection.tests.performance import NMEMean -from giskard_vision.landmark_detection.tests.base import Test, TestDiff from giskard_vision.landmark_detection.marks.facial_parts import FacialParts +from giskard_vision.landmark_detection.tests.base import Test, TestDiff +from giskard_vision.landmark_detection.tests.performance import NMEMean def test_tests_on_cropped_dl(opencv_model, dataset_300w): - fp = FacialParts.LEFT_HALF.value dl = CroppedDataLoader(dataset_300w, part=fp) - + for test in [Test, TestDiff]: - kwargs = { - "model": opencv_model, - "dataloader": dl, - "facial_part": fp - } + kwargs = {"model": opencv_model, "dataloader": dl, "facial_part": fp} if test == TestDiff: kwargs["dataloader_ref"] = dataset_300w test1 = test(metric=NMEMean, threshold=1).run(**kwargs) kwargs.pop("facial_part") test2 = test(metric=NMEMean, threshold=1).run(**kwargs) - + assert test1.metric_value == test2.metric_value - \ No newline at end of file