diff --git a/robotoff/insights/annotate.py b/robotoff/insights/annotate.py index 93ceb1c3c4..40839ef143 100644 --- a/robotoff/insights/annotate.py +++ b/robotoff/insights/annotate.py @@ -28,6 +28,7 @@ update_expiration_date, update_quantity, ) +from robotoff.prediction.utils import get_image_rotation, get_nutrition_table_prediction from robotoff.products import get_image_id, get_product from robotoff.types import InsightAnnotation, InsightType, JSONType, NutrientData from robotoff.utils import get_logger @@ -734,6 +735,7 @@ def process_annotation( if is_vote: return CANNOT_VOTE_RESULT + insight_updated = False # The annotator can change the nutrient values to fix the model errors if data is not None: try: @@ -749,22 +751,32 @@ def process_annotation( # user insight.data["annotation"] = validated_nutrients.model_dump() insight.data["was_updated"] = True - insight.save() + insight_updated = True else: validated_nutrients = NutrientData.model_validate(insight.data) validated_nutrients = cls.add_default_unit(validated_nutrients) insight.data["annotation"] = validated_nutrients.model_dump() insight.data["was_updated"] = False + insight_updated = True + + product_id = insight.get_product_id() + product = get_product(product_id, ["code", "images", "lang"]) + + if product is None: + return MISSING_PRODUCT_RESULT + + if insight_updated: insight.save() save_nutrients( - product_id=insight.get_product_id(), + product_id=product_id, nutrient_data=validated_nutrients, insight_id=insight.id, auth=auth, is_vote=is_vote, ) + cls.select_nutrition_image(insight, product, auth) return UPDATED_ANNOTATION_RESULT @classmethod @@ -788,6 +800,80 @@ def validate_data(cls, data: JSONType) -> NutrientData: raise ValidationError("missing 'nutrients' field") return NutrientData.model_validate(data) + @classmethod + def select_nutrition_image( + cls, + insight: ProductInsight, + product: JSONType, + auth: OFFAuthentication | None = None, + ) -> None: + """If the insight is validated, select the source image as nutrition image. + + We fetch the image orientation from the `predictions` table and the prediction + of the nutrition table detector from the `image_predictions` table to know the + rotation angle and the bounding box of the nutrition table. + If any of these predictions are missing, we just select the image without any + rotation or crop bounding box. + + :param insight: the original `nutrient_extraction` insight + :param product: the product data + :param auth: the user authentication data + """ + + if insight.source_image is None: + return None + + image_id = get_image_id(insight.source_image) + images = product.get("images", {}) + image_meta: JSONType | None = images.get(image_id) + + if not image_id or not image_meta: + return None + + # Use the language of the product. This field should always be available, + # but we provide a default value just in case. + lang = product.get("lang", "en") + image_key = f"nutrition_{lang}" + # We don't want to select the nutrition image if one has already been + # selected + if image_key in images: + return None + + rotation = get_image_rotation(insight.source_image) + + nutrition_table_detections = get_nutrition_table_prediction( + insight.source_image, threshold=0.5 + ) + bounding_box = None + # Only crop according to the model predicted bounding box if there is exactly + # one nutrition table detected + if nutrition_table_detections and len(nutrition_table_detections) == 1: + bounding_box = nutrition_table_detections[0]["bounding_box"] + + crop_bounding_box: tuple[float, float, float, float] | None = None + if bounding_box: + rotation = rotation or 0 + # convert crop bounding box to the format expected by Product + # Opener + image_size = image_meta["sizes"]["full"] + width = image_size["w"] + height = image_size["h"] + crop_bounding_box = convert_crop_bounding_box( + bounding_box, width, height, rotation + ) + + product_id = insight.get_product_id() + select_rotate_image( + product_id=product_id, + image_id=image_id, + image_key=image_key, + rotate=rotation, + crop_bounding_box=crop_bounding_box, + auth=auth, + is_vote=False, + insight_id=insight.id, + ) + ANNOTATOR_MAPPING: dict[str, Type] = { InsightType.packager_code.name: PackagerCodeAnnotator, diff --git a/robotoff/prediction/nutrition_extraction.py b/robotoff/prediction/nutrition_extraction.py index bb8f29d435..1cc099ceba 100644 --- a/robotoff/prediction/nutrition_extraction.py +++ b/robotoff/prediction/nutrition_extraction.py @@ -569,6 +569,11 @@ def match_nutrient_value( ) ) and ( value in ("08", "09") + or ( + len(value) > 2 + and "." not in value + and (value.endswith("8") or (value.endswith("9"))) + ) or (value.endswith("8") and "." in value and not value.endswith(".8")) or (value.endswith("9") and "." in value and not value.endswith(".9")) ): diff --git a/robotoff/prediction/ocr/image_lang.py b/robotoff/prediction/ocr/image_lang.py index 4dff1341b5..20eb0d09e5 100644 --- a/robotoff/prediction/ocr/image_lang.py +++ b/robotoff/prediction/ocr/image_lang.py @@ -1,4 +1,4 @@ -from typing import Optional, Union +from typing import Optional, TypedDict, Union from openfoodfacts.ocr import OCRResult @@ -9,6 +9,11 @@ PREDICTOR_VERSION = "1" +class ImageLangDataType(TypedDict): + count: dict[str, int] + percent: dict[str, float] + + def get_image_lang(ocr_result: Union[OCRResult, str]) -> list[Prediction]: if isinstance(ocr_result, str): return [] diff --git a/robotoff/prediction/utils.py b/robotoff/prediction/utils.py new file mode 100644 index 0000000000..14722d9e4c --- /dev/null +++ b/robotoff/prediction/utils.py @@ -0,0 +1,74 @@ +from robotoff.models import ImageModel, ImagePrediction, Prediction +from robotoff.prediction.ocr.image_lang import ImageLangDataType +from robotoff.types import ObjectDetectionModel, PredictionType + + +def get_image_rotation(source_image: str) -> int | None: + """Return the image rotation of the image, by fetching the associated + `image_orientation` prediction from the DB. + + The image orientation is represented by a rotation angle in degrees: + - 0: upright + - 90: left + - 180: upside down + - 270: right + + If no prediction is found, return None. + + :param source_image: the source image of the prediction + :return: the rotation angle of the image, or None if no prediction is found + """ + image_orientation_prediction = Prediction.get_or_none( + Prediction.type == PredictionType.image_orientation, + Prediction.source_image == source_image, + ) + + if image_orientation_prediction is None: + return None + + return image_orientation_prediction.data["rotation"] + + +def get_image_lang(source_image: str) -> ImageLangDataType | None: + """Return the name of the language detected in the image, by fetching the + associated `image_lang` prediction from the DB. + + If no prediction is found, return None. + + :param source_image: the source image of the prediction + :return: the name of the language detected in the image, or None if no prediction + is found + """ + image_lang_prediction = Prediction.get_or_none( + Prediction.type == PredictionType.image_lang, + Prediction.source_image == source_image, + ) + + if image_lang_prediction is None: + return None + + return image_lang_prediction.data + + +def get_nutrition_table_prediction( + source_image: str, threshold: float = 0.5 +) -> list | None: + """Return the nutrition table prediction associated with the image. + + :param source_image: the source image of the prediction + :return: the nutrition table prediction associated with the image + """ + image_prediction = ( + ImagePrediction.select() + .join(ImageModel) + .where( + ImagePrediction.model_name == ObjectDetectionModel.nutrition_table.name, + ImageModel.source_image == source_image, + ) + ).get_or_none() + + if image_prediction is None: + return None + + objects = image_prediction.data["objects"] + return [obj for obj in objects if obj["score"] >= threshold] diff --git a/tests/integration/insights/test_annotate.py b/tests/integration/insights/test_annotate.py index 6a2e2aad62..01268fa89b 100644 --- a/tests/integration/insights/test_annotate.py +++ b/tests/integration/insights/test_annotate.py @@ -1,6 +1,7 @@ from unittest.mock import Mock import pytest +from openfoodfacts.types import JSONType from robotoff.insights.annotate import ( INVALID_DATA, @@ -8,10 +9,18 @@ AnnotationResult, CategoryAnnotator, IngredientSpellcheckAnnotator, + NutrientExtractionAnnotator, ) from robotoff.models import ProductInsight +from robotoff.types import ObjectDetectionModel, PredictionType -from ..models_utils import ProductInsightFactory, clean_db +from ..models_utils import ( + ImageModelFactory, + ImagePredictionFactory, + PredictionFactory, + ProductInsightFactory, + clean_db, +) @pytest.fixture(autouse=True) @@ -103,7 +112,6 @@ def test_process_annotation_with_invalid_user_input_data(self, user_data, mocker class TestIngredientSpellcheckAnnotator: - @pytest.fixture def mock_save_ingredients(self, mocker) -> Mock: return mocker.patch("robotoff.insights.annotate.save_ingredients") @@ -160,3 +168,137 @@ def test_process_annotate_no_user_data( assert annotation_result == UPDATED_ANNOTATION_RESULT assert "annotation" not in spellcheck_insight.data mock_save_ingredients.assert_called() + + class TestNutrientExtractionAnnotator: + SOURCE_IMAGE = "/872/032/603/7888/1.jpg" + + @pytest.fixture + def mock_select_rotate_image(self, mocker) -> Mock: + return mocker.patch("robotoff.insights.annotate.select_rotate_image") + + @pytest.fixture + def nutrient_extraction_insight(self): + return ProductInsightFactory( + type="nutrient_extraction", source_image=self.SOURCE_IMAGE + ) + + def test_select_nutrition_image_no_image_id( + self, + mock_select_rotate_image: Mock, + nutrient_extraction_insight: ProductInsightFactory, + ): + product: JSONType = {"images": {}, "lang": "fr"} + NutrientExtractionAnnotator.select_nutrition_image( + insight=nutrient_extraction_insight, + product=product, + ) + mock_select_rotate_image.assert_not_called() + + def test_select_nutrition_image_no_image_meta( + self, + mock_select_rotate_image: Mock, + nutrient_extraction_insight: ProductInsightFactory, + ): + product: JSONType = {"images": {"1": {}}, "lang": "fr"} + NutrientExtractionAnnotator.select_nutrition_image( + insight=nutrient_extraction_insight, + product=product, + ) + mock_select_rotate_image.assert_not_called() + + def test_select_nutrition_image_already_selected( + self, + mock_select_rotate_image: Mock, + nutrient_extraction_insight: ProductInsightFactory, + ): + product: JSONType = { + "images": { + "1": {"sizes": {"full": {"w": 1000, "h": 2000}}}, + "nutrition_fr": {}, + }, + "lang": "fr", + } + NutrientExtractionAnnotator.select_nutrition_image( + insight=nutrient_extraction_insight, + product=product, + ) + mock_select_rotate_image.assert_not_called() + + def test_select_nutrition_image( + self, + mock_select_rotate_image: Mock, + nutrient_extraction_insight: ProductInsightFactory, + ): + product = { + "images": {"1": {"sizes": {"full": {"w": 1000, "h": 2000}}}}, + "lang": "fr", + } + NutrientExtractionAnnotator.select_nutrition_image( + insight=nutrient_extraction_insight, + product=product, + ) + mock_select_rotate_image.assert_called_once_with( + product_id=nutrient_extraction_insight.get_product_id(), + image_id="1", + image_key="nutrition_fr", + rotate=None, + crop_bounding_box=None, + auth=None, + is_vote=False, + insight_id=nutrient_extraction_insight.id, + ) + + def test_select_nutrition_image_with_rotation_and_nutrition_table_detection( + self, + mock_select_rotate_image: Mock, + nutrient_extraction_insight: ProductInsightFactory, + ): + product = { + "images": {"1": {"sizes": {"full": {"w": 1000, "h": 2000}}}}, + "lang": "fr", + } + rotation_data = {"rotation": 90} + PredictionFactory( + type=PredictionType.image_orientation, + data=rotation_data, + source_image=self.SOURCE_IMAGE, + ) + image_model = ImageModelFactory(source_image=self.SOURCE_IMAGE) + detection_data = { + "objects": [ + { + "label": "nutrition-table", + "score": 0.550762104988098, + "bounding_box": [ + 0.06199073791503906, + 0.20298996567726135, + 0.4177824556827545, + 0.9909706115722656, + ], + }, + ] + } + ImagePredictionFactory( + model_name=ObjectDetectionModel.nutrition_table.name, + data=detection_data, + image=image_model, + ) + NutrientExtractionAnnotator.select_nutrition_image( + insight=nutrient_extraction_insight, + product=product, + ) + mock_select_rotate_image.assert_called_once_with( + product_id=nutrient_extraction_insight.get_product_id(), + image_id="1", + image_key="nutrition_fr", + rotate=rotation_data["rotation"], + crop_bounding_box=( + 202.98996567726135, + 1164.435088634491, + 990.9706115722656, + 1876.0185241699219, + ), + auth=None, + is_vote=False, + insight_id=nutrient_extraction_insight.id, + ) diff --git a/tests/integration/insights/test_category_import.py b/tests/integration/insights/test_category_import.py index 8ac2ab0e0a..fd18919941 100644 --- a/tests/integration/insights/test_category_import.py +++ b/tests/integration/insights/test_category_import.py @@ -25,6 +25,7 @@ def _set_up_and_tear_down(peewee_db): value_tag="en:salmons", automatic_processing=False, predictor="matcher", + predictor_version="matcher_v1", ) ProductInsightFactory( id=insight_id1, @@ -32,6 +33,7 @@ def _set_up_and_tear_down(peewee_db): type="category", value_tag="en:salmons", predictor="matcher", + predictor_version="matcher_v1", ) # Run the test case. yield @@ -50,6 +52,7 @@ def matcher_prediction(category): }, automatic_processing=False, predictor="matcher", + predictor_version="matcher_v1", ) @@ -61,6 +64,7 @@ def neural_prediction(category, confidence=0.7, auto=False): data={"lang": "xx"}, automatic_processing=auto, predictor="neural", + predictor_version="neural_v1", confidence=confidence, ) @@ -72,7 +76,7 @@ class TestCategoryImporter: """ def fake_product_store(self): - return {DEFAULT_PRODUCT_ID: Product({"categories_tags": ["en:fish"]})} + return {DEFAULT_PRODUCT_ID: Product({"categories_tags": ["en:fishes"]})} def _run_import(self, predictions, product_store=None): if product_store is None: @@ -85,16 +89,33 @@ def _run_import(self, predictions, product_store=None): "predictions", [ # category already on product - [matcher_prediction("en:fish")], - [neural_prediction("en:fish")], + [matcher_prediction("en:fishes")], + [neural_prediction("en:fishes")], + ], + ) + def test_import_one_insight_to_delete(self, predictions): + """Test when there is an import with a value already present in the product, + the insight should be deleted.""" + original_insights = ProductInsight.select() + assert len(original_insights) == 1 + import_result = self._run_import(predictions) + assert import_result.created_insights_count() == 0 + assert import_result.updated_insights_count() == 0 + assert import_result.deleted_insights_count() == 1 + insights = list(ProductInsight.select()) + assert len(insights) == 0 + + @pytest.mark.parametrize( + "predictions", + [ # category already in insights [matcher_prediction("en:salmons")], [neural_prediction("en:salmons")], # both [ - matcher_prediction("en:fish"), + matcher_prediction("en:fishes"), matcher_prediction("en:salmons"), - neural_prediction("en:fish"), + neural_prediction("en:fishes"), neural_prediction("en:salmons"), ], ], diff --git a/tests/integration/models_utils.py b/tests/integration/models_utils.py index 2918dd740f..30b07ee5e4 100644 --- a/tests/integration/models_utils.py +++ b/tests/integration/models_utils.py @@ -75,6 +75,11 @@ class Meta: predictor_version: Optional[str] = None confidence: Optional[float] = None server_type: str = "off" + source_image = factory.LazyAttribute( + lambda o: generate_image_path( + ProductIdentifier(o.barcode, ServerType[o.server_type]), "1" + ) + ) class AnnotationVoteFactory(UuidSequencer, PeeweeModelFactory): diff --git a/tests/integration/prediction/test_utils.py b/tests/integration/prediction/test_utils.py new file mode 100644 index 0000000000..8e3cd78696 --- /dev/null +++ b/tests/integration/prediction/test_utils.py @@ -0,0 +1,157 @@ +import pytest + +from robotoff.prediction.utils import ( + get_image_lang, + get_image_rotation, + get_nutrition_table_prediction, +) +from robotoff.types import ObjectDetectionModel, PredictionType +from tests.integration.models_utils import ( + ImageModelFactory, + ImagePredictionFactory, + PredictionFactory, + clean_db, +) + + +@pytest.fixture(autouse=True) +def _set_up_and_tear_down(peewee_db): + with peewee_db: + # clean db + clean_db() + # Run the test case. + yield + clean_db() + + +class TestGetImageRotation: + def test_get_image_rotation(self, peewee_db): + with peewee_db: + prediction = PredictionFactory( + type=PredictionType.image_orientation, data={"rotation": 90} + ) + source_image = prediction.source_image + # Create other prediction + PredictionFactory(type=PredictionType.image_lang, source_image=source_image) + result = get_image_rotation(source_image) + assert result == prediction.data["rotation"] + + def test_get_no_result(self, peewee_db): + source_image = "/872/032/603/7888/1.jpg" + with peewee_db: + PredictionFactory(type=PredictionType.image_lang, source_image=source_image) + result = get_image_rotation(source_image) + assert result is None + + +class TestGetImageLang: + def test_get_image_lang(self, peewee_db): + lang_data = { + "count": {"en": 20, "fil": 7, "null": 12, "words": 39}, + "percent": { + "en": 51.282051282051285, + "fil": 17.94871794871795, + "null": 30.76923076923077, + }, + } + with peewee_db: + prediction = PredictionFactory( + type=PredictionType.image_lang, data=lang_data + ) + source_image = prediction.source_image + # Create other prediction + PredictionFactory( + type=PredictionType.image_orientation, source_image=source_image + ) + result = get_image_lang(source_image) + assert result == lang_data + + def test_get_no_result(self, peewee_db): + source_image = "/872/032/603/7888/2.jpg" + with peewee_db: + PredictionFactory( + type=PredictionType.image_orientation, source_image=source_image + ) + result = get_image_lang(source_image) + assert result is None + + +class TestGetNutritionTablePrediction: + def test_get_nutrition_table_prediction(self, peewee_db): + detection_data = { + "objects": [ + { + "label": "nutrition-table", + "score": 0.9000762104988098, + "bounding_box": [ + 0.06199073791503906, + 0.20298996567726135, + 0.4177824556827545, + 0.9909706115722656, + ], + }, + { + "label": "nutrition-table", + "score": 0.53344119787216187, + "bounding_box": [ + 0.3770750164985657, + 0.0008307297830469906, + 0.5850498080253601, + 0.15185657143592834, + ], + }, + ] + } + with peewee_db: + image_model = ImageModelFactory(source_image="/872/032/603/7888/3.jpg") + ImagePredictionFactory( + model_name=ObjectDetectionModel.nutrition_table.name, + data=detection_data, + image=image_model, + ) + source_image = image_model.source_image + result = get_nutrition_table_prediction(source_image) + assert result == detection_data["objects"] + + def test_get_no_result(self, peewee_db): + source_image = "/872/032/603/7888/4.jpg" + with peewee_db: + ImageModelFactory(source_image=source_image) + result = get_nutrition_table_prediction(source_image) + assert result is None + + def test_get_below_threshold(self, peewee_db): + detection_data = { + "objects": [ + { + "label": "nutrition-table", + "score": 0.9000762104988098, + "bounding_box": [ + 0.06199073791503906, + 0.20298996567726135, + 0.4177824556827545, + 0.9909706115722656, + ], + }, + { + "label": "nutrition-table", + "score": 0.53344119787216187, + "bounding_box": [ + 0.3770750164985657, + 0.0008307297830469906, + 0.5850498080253601, + 0.15185657143592834, + ], + }, + ] + } + with peewee_db: + image_model = ImageModelFactory(source_image="/872/032/603/7888/5.jpg") + ImagePredictionFactory( + model_name=ObjectDetectionModel.nutrition_table.name, + data=detection_data, + image=image_model, + ) + source_image = image_model.source_image + result = get_nutrition_table_prediction(source_image, threshold=0.95) + assert result == [] diff --git a/tests/unit/prediction/test_nutrition_extraction.py b/tests/unit/prediction/test_nutrition_extraction.py index b072b0c4a4..fc21bfbcaf 100644 --- a/tests/unit/prediction/test_nutrition_extraction.py +++ b/tests/unit/prediction/test_nutrition_extraction.py @@ -401,10 +401,12 @@ def test_aggregate_entities_multiple_entities(self): ("09", "salt_100g", ("0", "g", True)), # Missing unit but value does not end with '8' or '9' ("091", "proteins_100g", ("091", None, True)), + ("219", "proteins_100g", ("21", "g", True)), + ("318", "carbohydrates_100g", ("31", "g", True)), + ("105", "proteins_100g", ("105", None, True)), ], ) def test_match_nutrient_value(words_str: str, entity_label: str, expected_output): - assert match_nutrient_value(words_str, entity_label) == expected_output