Skip to content

Commit

Permalink
Add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kolok committed Jan 6, 2025
1 parent c36cd94 commit 6335457
Show file tree
Hide file tree
Showing 5 changed files with 196 additions and 72 deletions.
5 changes: 2 additions & 3 deletions dags/sources/tasks/transform/transform_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def clean_acteur_type_code(value, _):
"association, entreprise de l'economie sociale et solidaire (ess)": "ess",
"etablissement de sante": "ets_sante",
"decheterie": "decheterie",
"pharmacie": "commerce",
"point d'apport volontaire prive": "pav_prive",
"plateforme inertes": "plateforme_inertes",
"magasin / franchise, enseigne commerciale / distributeur / point de vente "
Expand Down Expand Up @@ -205,9 +204,9 @@ def clean_souscategorie_codes_sinoe(
sscat_list = [
v.strip()
for v in sscat_list
if v.strip().lower() not in ("", "nan", "np", "None")
if v.strip().lower() not in ("", "nan", "np", "none")
]

print(sscat_list)
sscat_list = [
dechet_mapping[v]
for v in sscat_list
Expand Down
172 changes: 108 additions & 64 deletions dags_unit_tests/sources/tasks/transform/test_transform_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
import pytest
from sources.tasks.transform.transform_column import (
cast_eo_boolean_or_string_to_boolean,
clean_acteur_type_code,
clean_code_postal,
clean_number,
clean_public_accueilli,
clean_reprise,
clean_siren,
clean_siret,
clean_souscategorie_codes,
clean_souscategorie_codes_sinoe,
clean_url,
convert_opening_hours,
strip_string,
Expand Down Expand Up @@ -150,8 +152,47 @@ def test_strip_string(self, input, output):


class TestCleanActeurTypeCode:
# FIXME : Add tests
pass
@pytest.mark.parametrize(
"value, expected_code",
[
("solution en ligne (site web, app. mobile)", "acteur_digital"),
("artisan, commerce independant", "artisan"),
(
"magasin / franchise, enseigne commerciale / distributeur /"
" point de vente",
"commerce",
),
("point d'apport volontaire publique", "pav_public"),
("association, entreprise de l'economie sociale et solidaire (ess)", "ess"),
("etablissement de sante", "ets_sante"),
("decheterie", "decheterie"),
("point d'apport volontaire prive", "pav_prive"),
("plateforme inertes", "plateforme_inertes"),
(
"magasin / franchise, enseigne commerciale / distributeur / "
"point de vente / franchise, enseigne commerciale / distributeur /"
" point de vente",
"commerce",
),
("point d'apport volontaire ephemere / ponctuel", "pav_ponctuel"),
(" Dèchëtérie ", "decheterie"),
],
)
def test_clean_acteur_type_code(self, value, expected_code):
assert clean_acteur_type_code(value, None) == expected_code

@pytest.mark.parametrize(
"value",
[
("unknown type"),
("another unknown type"),
],
)
def test_clean_acteur_type_code_invalid(self, value):
with pytest.raises(
ValueError, match=f"Acteur type `{value}` not found in mapping"
):
clean_acteur_type_code(value, None)


class TestCleanPublicAccueilli:
Expand Down Expand Up @@ -272,65 +313,68 @@ def test_clean_souscategorie_codes_raise(self, dag_config):


class TestCleanSouscategorieCodesSinoe:
# FIXME : Add tests
pass

# @pytest.mark.parametrize("produitsdechets_acceptes", (None, "NP|01.22"))
# def test_produitsdechets_acceptes_exclude_entries_not_mapped(
# self,
# product_mapping,
# dechet_mapping,
# produitsdechets_acceptes,
# ):
# df_normalised = pd.DataFrame(
# {
# "identifiant_externe": ["DECHET_2"],
# "ANNEE": [2024],
# "_geopoint": ["48.4812237361283,3.120109493179493"],
# "produitsdechets_acceptes": [produitsdechets_acceptes],
# "public_accueilli": ["DMA"],
# },
# )

# df = df_normalize_sinoe(
# df=df_normalised,
# product_mapping=product_mapping,
# dechet_mapping=dechet_mapping,
# )
# assert len(df) == 0

# @pytest.mark.parametrize(
# "produitsdechets_acceptes, produitsdechets_acceptes_expected",
# (
# [
# "01.1|07.25|07.6",
# ["Solvants usés", "Papiers cartons mêlés triés", "Déchets textiles"],
# ],
# ["07.6", ["Déchets textiles"]],
# ),
# )
# def test_produitsdechets_acceptes_convert_dechet_codes_to_our_codes(
# self,
# product_mapping,
# dechet_mapping,
# produitsdechets_acceptes,
# produitsdechets_acceptes_expected,
# ):
# df_normalised = pd.DataFrame(
# {
# "identifiant_externe": ["DECHET_2"],
# "ANNEE": [2024],
# "_geopoint": ["48.4812237361283,3.120109493179493"],
# "produitsdechets_acceptes": [produitsdechets_acceptes],
# "public_accueilli": ["DMA"],
# },
# )
# df = df_normalize_sinoe(
# df=df_normalised,
# product_mapping=product_mapping,
# dechet_mapping=dechet_mapping,
# )
# assert (
# df.iloc[0]["produitsdechets_acceptes"] ==
# produitsdechets_acceptes_expected
# )
@pytest.mark.parametrize(
"sscats, dechet_mapping, product_mapping, expected_output",
[
(None, {}, {}, []),
("", {}, {}, []),
(
"01.3|02.31",
{"01.3": "mapped1", "02.31": "mapped2"},
{"mapped1": "product1", "mapped2": "product2"},
["product1", "product2"],
),
(
"01.3|02.31|01.3",
{"01.3": "mapped1", "02.31": "mapped2"},
{"mapped1": "product1", "mapped2": "product2"},
["product1", "product2"],
),
(
"01.3|02.31",
{"01.3": "mapped1", "02.31": "mapped2"},
{"mapped1": "product1"},
["product1"],
),
(
"01.3|02.31",
{"01.3": "mapped1", "02.31": "mapped2"},
{"mapped2": "product2"},
["product2"],
),
("01.3|02.31", {"01.3": "mapped1", "02.31": "mapped2"}, {}, []),
(
"01.3|nan|02.31",
{"01.3": "mapped1", "02.31": "mapped2"},
{"mapped1": "product1", "mapped2": "product2"},
["product1", "product2"],
),
(
"01.3|np|02.31",
{"01.3": "mapped1", "02.31": "mapped2"},
{"mapped1": "product1", "mapped2": "product2"},
["product1", "product2"],
),
(
"01.3|None|02.31",
{"01.3": "mapped1", "02.31": "mapped2"},
{"mapped1": "product1", "mapped2": "product2"},
["product1", "product2"],
),
(
"01.3 | | 02.31",
{"01.3": "mapped1", "02.31": "mapped2"},
{"mapped1": "product1", "mapped2": "product2"},
["product1", "product2"],
),
],
)
def test_clean_souscategorie_codes_sinoe(
self, sscats, dechet_mapping, product_mapping, expected_output, dag_config
):
# Mock the DAGConfig
dag_config.dechet_mapping = dechet_mapping
dag_config.product_mapping = product_mapping

result = clean_souscategorie_codes_sinoe(sscats, dag_config)
assert sorted(result) == sorted(expected_output)
85 changes: 80 additions & 5 deletions dags_unit_tests/sources/tasks/transform/test_transform_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
import pytest
from sources.tasks.transform.transform_df import (
clean_action_codes,
clean_adresse,
clean_identifiant_externe,
clean_identifiant_unique,
clean_label_codes,
clean_siret_and_siren,
clean_telephone,
get_latlng_from_geopoint,
merge_and_clean_souscategorie_codes,
merge_duplicates,
merge_sous_categories_columns,
)
Expand Down Expand Up @@ -274,9 +276,66 @@ def test_merge_sscat_columns(self, row_columns, expected_produitsdechets_accepte


class TestCleanAdresse:
# FIXME : Add tests
# @patch("sources.tasks.transform.transform_df._get_address")
pass
@pytest.mark.parametrize(
"adresse_format_ban, expected_adresse",
[
(
"123 Rue de Paris 75001 Paris",
{
"adresse": "123 Rue de Paris",
"code_postal": "75001",
"ville": "Paris",
},
),
(
" 123 Rue de Paris 75001 Paris ",
{
"adresse": "123 Rue de Paris",
"code_postal": "75001",
"ville": "Paris",
},
),
(
"75001 Paris",
{
"adresse": None,
"code_postal": "75001",
"ville": "Paris",
},
),
(
" 123 Rue de Paris 75001 Paris CEDEX 01123",
{
"adresse": "123 Rue de Paris",
"code_postal": "75001",
"ville": "Paris",
},
),
],
)
def test_clean_adresse_without_ban(
self, adresse_format_ban, expected_adresse, dag_config
):
dag_config.validate_address_with_ban = False
row = pd.Series({"adresse_format_ban": adresse_format_ban})
assert dict(clean_adresse(row, dag_config)) == expected_adresse

def test_clean_adresse_with_ban(self, dag_config, mocker):
def _get_address(_):
# Mock implementation of _get_address
return "Mock Address", "Mock Postal Code", "Mock City"

mocker.patch(
"sources.tasks.transform.transform_df._get_address",
side_effect=_get_address,
)
dag_config.validate_address_with_ban = True
row = pd.Series({"adresse_format_ban": "fake adresse"})
assert dict(clean_adresse(row, dag_config)) == {
"adresse": "Mock Address",
"code_postal": "Mock Postal Code",
"ville": "Mock City",
}


class TestCleanActeurserviceCodes:
Expand Down Expand Up @@ -366,8 +425,24 @@ def test_ess_label(self, dag_config):


class TestMergeAndCleanSouscategorieCodes:
# FIXME : Add tests
pass
@pytest.mark.parametrize(
"row_data, expected_output",
[
({"col1": "sscat1", "col2": "sscat2"}, ["mapped1", "mapped2"]),
({"col1": "sscat1", "col2": "sscat1"}, ["mapped1"]),
({"col1": None, "col2": "sscat2"}, ["mapped2"]),
({"col1": "sscat1", "col2": None}, ["mapped1"]),
({"col1": None, "col2": None}, []),
],
)
def test_merge_and_clean_souscategorie_codes(
self, row_data, expected_output, dag_config
):
dag_config.product_mapping = {"sscat1": "mapped1", "sscat2": "mapped2"}

row = pd.Series(row_data)
result = merge_and_clean_souscategorie_codes(row, dag_config)
assert sorted(result["souscategorie_codes"]) == sorted(expected_output)


class TestGetLatLngFromGeopoint:
Expand Down
1 change: 1 addition & 0 deletions dev-requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ ptpython
pytest
pytest-django
pytest-dotenv
pytest-mock
python-Levenshtein
ratelimit
ruff
Expand Down
5 changes: 5 additions & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1842,6 +1842,7 @@ pytest==8.3.4 \
# -r dev-requirements.in
# pytest-django
# pytest-dotenv
# pytest-mock
pytest-django==4.9.0 \
--hash=sha256:1d83692cb39188682dbb419ff0393867e9904094a549a7d38a3154d5731b2b99 \
--hash=sha256:8bf7bc358c9ae6f6fc51b6cebb190fe20212196e6807121f11bd6a3b03428314
Expand All @@ -1850,6 +1851,10 @@ pytest-dotenv==0.5.2 \
--hash=sha256:2dc6c3ac6d8764c71c6d2804e902d0ff810fa19692e95fe138aefc9b1aa73732 \
--hash=sha256:40a2cece120a213898afaa5407673f6bd924b1fa7eafce6bda0e8abffe2f710f
# via -r dev-requirements.in
pytest-mock==3.14.0 \
--hash=sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f \
--hash=sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0
# via -r dev-requirements.in
python-daemon==3.1.2 \
--hash=sha256:b906833cef63502994ad48e2eab213259ed9bb18d54fa8774dcba2ff7864cec6 \
--hash=sha256:f7b04335adc473de877f5117e26d5f1142f4c9f7cd765408f0877757be5afbf4
Expand Down

0 comments on commit 6335457

Please sign in to comment.