From 6335457acbb14a4c8fcf5fe1bb9d1270d8169a05 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Thu, 2 Jan 2025 12:12:46 +0100 Subject: [PATCH] Add tests --- .../tasks/transform/transform_column.py | 5 +- .../tasks/transform/test_transform_column.py | 172 +++++++++++------- .../tasks/transform/test_transform_df.py | 85 ++++++++- dev-requirements.in | 1 + dev-requirements.txt | 5 + 5 files changed, 196 insertions(+), 72 deletions(-) diff --git a/dags/sources/tasks/transform/transform_column.py b/dags/sources/tasks/transform/transform_column.py index 2289f8d38..8d4f1cc5f 100644 --- a/dags/sources/tasks/transform/transform_column.py +++ b/dags/sources/tasks/transform/transform_column.py @@ -101,7 +101,6 @@ def clean_acteur_type_code(value, _): "association, entreprise de l'economie sociale et solidaire (ess)": "ess", "etablissement de sante": "ets_sante", "decheterie": "decheterie", - "pharmacie": "commerce", "point d'apport volontaire prive": "pav_prive", "plateforme inertes": "plateforme_inertes", "magasin / franchise, enseigne commerciale / distributeur / point de vente " @@ -205,9 +204,9 @@ def clean_souscategorie_codes_sinoe( sscat_list = [ v.strip() for v in sscat_list - if v.strip().lower() not in ("", "nan", "np", "None") + if v.strip().lower() not in ("", "nan", "np", "none") ] - + print(sscat_list) sscat_list = [ dechet_mapping[v] for v in sscat_list diff --git a/dags_unit_tests/sources/tasks/transform/test_transform_column.py b/dags_unit_tests/sources/tasks/transform/test_transform_column.py index 47c54b734..9c4b6e0a2 100644 --- a/dags_unit_tests/sources/tasks/transform/test_transform_column.py +++ b/dags_unit_tests/sources/tasks/transform/test_transform_column.py @@ -3,6 +3,7 @@ import pytest from sources.tasks.transform.transform_column import ( cast_eo_boolean_or_string_to_boolean, + clean_acteur_type_code, clean_code_postal, clean_number, clean_public_accueilli, @@ -10,6 +11,7 @@ clean_siren, clean_siret, clean_souscategorie_codes, + clean_souscategorie_codes_sinoe, clean_url, convert_opening_hours, strip_string, @@ -150,8 +152,47 @@ def test_strip_string(self, input, output): class TestCleanActeurTypeCode: - # FIXME : Add tests - pass + @pytest.mark.parametrize( + "value, expected_code", + [ + ("solution en ligne (site web, app. mobile)", "acteur_digital"), + ("artisan, commerce independant", "artisan"), + ( + "magasin / franchise, enseigne commerciale / distributeur /" + " point de vente", + "commerce", + ), + ("point d'apport volontaire publique", "pav_public"), + ("association, entreprise de l'economie sociale et solidaire (ess)", "ess"), + ("etablissement de sante", "ets_sante"), + ("decheterie", "decheterie"), + ("point d'apport volontaire prive", "pav_prive"), + ("plateforme inertes", "plateforme_inertes"), + ( + "magasin / franchise, enseigne commerciale / distributeur / " + "point de vente / franchise, enseigne commerciale / distributeur /" + " point de vente", + "commerce", + ), + ("point d'apport volontaire ephemere / ponctuel", "pav_ponctuel"), + (" Dèchëtérie ", "decheterie"), + ], + ) + def test_clean_acteur_type_code(self, value, expected_code): + assert clean_acteur_type_code(value, None) == expected_code + + @pytest.mark.parametrize( + "value", + [ + ("unknown type"), + ("another unknown type"), + ], + ) + def test_clean_acteur_type_code_invalid(self, value): + with pytest.raises( + ValueError, match=f"Acteur type `{value}` not found in mapping" + ): + clean_acteur_type_code(value, None) class TestCleanPublicAccueilli: @@ -272,65 +313,68 @@ def test_clean_souscategorie_codes_raise(self, dag_config): class TestCleanSouscategorieCodesSinoe: - # FIXME : Add tests - pass - - # @pytest.mark.parametrize("produitsdechets_acceptes", (None, "NP|01.22")) - # def test_produitsdechets_acceptes_exclude_entries_not_mapped( - # self, - # product_mapping, - # dechet_mapping, - # produitsdechets_acceptes, - # ): - # df_normalised = pd.DataFrame( - # { - # "identifiant_externe": ["DECHET_2"], - # "ANNEE": [2024], - # "_geopoint": ["48.4812237361283,3.120109493179493"], - # "produitsdechets_acceptes": [produitsdechets_acceptes], - # "public_accueilli": ["DMA"], - # }, - # ) - - # df = df_normalize_sinoe( - # df=df_normalised, - # product_mapping=product_mapping, - # dechet_mapping=dechet_mapping, - # ) - # assert len(df) == 0 - - # @pytest.mark.parametrize( - # "produitsdechets_acceptes, produitsdechets_acceptes_expected", - # ( - # [ - # "01.1|07.25|07.6", - # ["Solvants usés", "Papiers cartons mêlés triés", "Déchets textiles"], - # ], - # ["07.6", ["Déchets textiles"]], - # ), - # ) - # def test_produitsdechets_acceptes_convert_dechet_codes_to_our_codes( - # self, - # product_mapping, - # dechet_mapping, - # produitsdechets_acceptes, - # produitsdechets_acceptes_expected, - # ): - # df_normalised = pd.DataFrame( - # { - # "identifiant_externe": ["DECHET_2"], - # "ANNEE": [2024], - # "_geopoint": ["48.4812237361283,3.120109493179493"], - # "produitsdechets_acceptes": [produitsdechets_acceptes], - # "public_accueilli": ["DMA"], - # }, - # ) - # df = df_normalize_sinoe( - # df=df_normalised, - # product_mapping=product_mapping, - # dechet_mapping=dechet_mapping, - # ) - # assert ( - # df.iloc[0]["produitsdechets_acceptes"] == - # produitsdechets_acceptes_expected - # ) + @pytest.mark.parametrize( + "sscats, dechet_mapping, product_mapping, expected_output", + [ + (None, {}, {}, []), + ("", {}, {}, []), + ( + "01.3|02.31", + {"01.3": "mapped1", "02.31": "mapped2"}, + {"mapped1": "product1", "mapped2": "product2"}, + ["product1", "product2"], + ), + ( + "01.3|02.31|01.3", + {"01.3": "mapped1", "02.31": "mapped2"}, + {"mapped1": "product1", "mapped2": "product2"}, + ["product1", "product2"], + ), + ( + "01.3|02.31", + {"01.3": "mapped1", "02.31": "mapped2"}, + {"mapped1": "product1"}, + ["product1"], + ), + ( + "01.3|02.31", + {"01.3": "mapped1", "02.31": "mapped2"}, + {"mapped2": "product2"}, + ["product2"], + ), + ("01.3|02.31", {"01.3": "mapped1", "02.31": "mapped2"}, {}, []), + ( + "01.3|nan|02.31", + {"01.3": "mapped1", "02.31": "mapped2"}, + {"mapped1": "product1", "mapped2": "product2"}, + ["product1", "product2"], + ), + ( + "01.3|np|02.31", + {"01.3": "mapped1", "02.31": "mapped2"}, + {"mapped1": "product1", "mapped2": "product2"}, + ["product1", "product2"], + ), + ( + "01.3|None|02.31", + {"01.3": "mapped1", "02.31": "mapped2"}, + {"mapped1": "product1", "mapped2": "product2"}, + ["product1", "product2"], + ), + ( + "01.3 | | 02.31", + {"01.3": "mapped1", "02.31": "mapped2"}, + {"mapped1": "product1", "mapped2": "product2"}, + ["product1", "product2"], + ), + ], + ) + def test_clean_souscategorie_codes_sinoe( + self, sscats, dechet_mapping, product_mapping, expected_output, dag_config + ): + # Mock the DAGConfig + dag_config.dechet_mapping = dechet_mapping + dag_config.product_mapping = product_mapping + + result = clean_souscategorie_codes_sinoe(sscats, dag_config) + assert sorted(result) == sorted(expected_output) diff --git a/dags_unit_tests/sources/tasks/transform/test_transform_df.py b/dags_unit_tests/sources/tasks/transform/test_transform_df.py index f7aab3f70..351ac8d24 100644 --- a/dags_unit_tests/sources/tasks/transform/test_transform_df.py +++ b/dags_unit_tests/sources/tasks/transform/test_transform_df.py @@ -2,12 +2,14 @@ import pytest from sources.tasks.transform.transform_df import ( clean_action_codes, + clean_adresse, clean_identifiant_externe, clean_identifiant_unique, clean_label_codes, clean_siret_and_siren, clean_telephone, get_latlng_from_geopoint, + merge_and_clean_souscategorie_codes, merge_duplicates, merge_sous_categories_columns, ) @@ -274,9 +276,66 @@ def test_merge_sscat_columns(self, row_columns, expected_produitsdechets_accepte class TestCleanAdresse: - # FIXME : Add tests - # @patch("sources.tasks.transform.transform_df._get_address") - pass + @pytest.mark.parametrize( + "adresse_format_ban, expected_adresse", + [ + ( + "123 Rue de Paris 75001 Paris", + { + "adresse": "123 Rue de Paris", + "code_postal": "75001", + "ville": "Paris", + }, + ), + ( + " 123 Rue de Paris 75001 Paris ", + { + "adresse": "123 Rue de Paris", + "code_postal": "75001", + "ville": "Paris", + }, + ), + ( + "75001 Paris", + { + "adresse": None, + "code_postal": "75001", + "ville": "Paris", + }, + ), + ( + " 123 Rue de Paris 75001 Paris CEDEX 01123", + { + "adresse": "123 Rue de Paris", + "code_postal": "75001", + "ville": "Paris", + }, + ), + ], + ) + def test_clean_adresse_without_ban( + self, adresse_format_ban, expected_adresse, dag_config + ): + dag_config.validate_address_with_ban = False + row = pd.Series({"adresse_format_ban": adresse_format_ban}) + assert dict(clean_adresse(row, dag_config)) == expected_adresse + + def test_clean_adresse_with_ban(self, dag_config, mocker): + def _get_address(_): + # Mock implementation of _get_address + return "Mock Address", "Mock Postal Code", "Mock City" + + mocker.patch( + "sources.tasks.transform.transform_df._get_address", + side_effect=_get_address, + ) + dag_config.validate_address_with_ban = True + row = pd.Series({"adresse_format_ban": "fake adresse"}) + assert dict(clean_adresse(row, dag_config)) == { + "adresse": "Mock Address", + "code_postal": "Mock Postal Code", + "ville": "Mock City", + } class TestCleanActeurserviceCodes: @@ -366,8 +425,24 @@ def test_ess_label(self, dag_config): class TestMergeAndCleanSouscategorieCodes: - # FIXME : Add tests - pass + @pytest.mark.parametrize( + "row_data, expected_output", + [ + ({"col1": "sscat1", "col2": "sscat2"}, ["mapped1", "mapped2"]), + ({"col1": "sscat1", "col2": "sscat1"}, ["mapped1"]), + ({"col1": None, "col2": "sscat2"}, ["mapped2"]), + ({"col1": "sscat1", "col2": None}, ["mapped1"]), + ({"col1": None, "col2": None}, []), + ], + ) + def test_merge_and_clean_souscategorie_codes( + self, row_data, expected_output, dag_config + ): + dag_config.product_mapping = {"sscat1": "mapped1", "sscat2": "mapped2"} + + row = pd.Series(row_data) + result = merge_and_clean_souscategorie_codes(row, dag_config) + assert sorted(result["souscategorie_codes"]) == sorted(expected_output) class TestGetLatLngFromGeopoint: diff --git a/dev-requirements.in b/dev-requirements.in index 4daa98c58..5d8ac365c 100644 --- a/dev-requirements.in +++ b/dev-requirements.in @@ -15,6 +15,7 @@ ptpython pytest pytest-django pytest-dotenv +pytest-mock python-Levenshtein ratelimit ruff diff --git a/dev-requirements.txt b/dev-requirements.txt index debbcc96f..b2bb17c3d 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1842,6 +1842,7 @@ pytest==8.3.4 \ # -r dev-requirements.in # pytest-django # pytest-dotenv + # pytest-mock pytest-django==4.9.0 \ --hash=sha256:1d83692cb39188682dbb419ff0393867e9904094a549a7d38a3154d5731b2b99 \ --hash=sha256:8bf7bc358c9ae6f6fc51b6cebb190fe20212196e6807121f11bd6a3b03428314 @@ -1850,6 +1851,10 @@ pytest-dotenv==0.5.2 \ --hash=sha256:2dc6c3ac6d8764c71c6d2804e902d0ff810fa19692e95fe138aefc9b1aa73732 \ --hash=sha256:40a2cece120a213898afaa5407673f6bd924b1fa7eafce6bda0e8abffe2f710f # via -r dev-requirements.in +pytest-mock==3.14.0 \ + --hash=sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f \ + --hash=sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0 + # via -r dev-requirements.in python-daemon==3.1.2 \ --hash=sha256:b906833cef63502994ad48e2eab213259ed9bb18d54fa8774dcba2ff7864cec6 \ --hash=sha256:f7b04335adc473de877f5117e26d5f1142f4c9f7cd765408f0877757be5afbf4