Skip to content

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kolok committed Jan 6, 2025
1 parent 6335457 commit be97c03
Show file tree
Hide file tree
Showing 3 changed files with 228 additions and 43 deletions.
1 change: 0 additions & 1 deletion dags/sources/tasks/transform/transform_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,6 @@ def clean_souscategorie_codes_sinoe(
for v in sscat_list
if v.strip().lower() not in ("", "nan", "np", "none")
]
print(sscat_list)
sscat_list = [
dechet_mapping[v]
for v in sscat_list
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# FIXME : ces tests sont à revoir, par très à propos depuis la refacto

import pandas as pd
import pytest
from sources.tasks.business_logic.propose_services import propose_services
Expand Down Expand Up @@ -133,8 +131,6 @@ def test_create_proposition_services_services(
actions_id_by_code=actions_id_by_code,
)

print(result["df"])
print(expected_df)
assert result["df"].equals(expected_df)
assert result["metadata"] == expected_metadata

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from sources.config.airflow_params import TRANSFORMATION_MAPPING
from sources.tasks.airflow_logic.config_management import DAGConfig
from sources.tasks.business_logic.source_data_normalize import (
_remove_undesired_lines,
df_normalize_pharmacie,
df_normalize_sinoe,
source_data_normalize,
Expand Down Expand Up @@ -90,23 +91,33 @@ def test_drop_annee_column(
assert "ANNEE" not in df.columns


NORMALIZATION_RULES = [
{
"origin": "col_to_rename",
"destination": "col_renamed",
},
{
"origin": "nom origin",
"transformation": "test_fct",
"destination": "nom destination",
},
{"column": "string_col", "value": "value of col"},
{"column": "list_col", "value": ["col1", "col2"]},
{
"origin": ["nom"],
"transformation": "test_fct",
"destination": ["nom"],
},
{"keep": "identifiant_unique"},
{"remove": "col_to_remove"},
]


class TestSourceDataNormalize:
# FIXME : Add tests to check all kind of transformation is applied
def test_normalization_rules_is_called(self):

def test_source_data_normalize_normalization_rules_are_called(self):
dag_config_kwargs = {
"normalization_rules": [
{
"origin": "nom origin",
"transformation": "test_fct",
"destination": "nom destination",
},
{
"origin": "nom",
"transformation": "test_fct",
"destination": "nom",
},
{"keep": "identifiant_unique"},
],
"normalization_rules": NORMALIZATION_RULES,
"product_mapping": {},
"endpoint": "http://example.com/api",
}
Expand All @@ -115,19 +126,62 @@ def test_normalization_rules_is_called(self):
df_acteur_from_source=pd.DataFrame(
{
"identifiant_unique": ["id"],
"col_to_remove": ["fake remove"],
"col_to_rename": ["fake rename"],
"nom origin": ["nom origin 1"],
"nom": ["nom"],
}
),
dag_config=DAGConfig.model_validate(dag_config_kwargs),
dag_id="dag_id",
)

assert "col_to_rename" not in df.columns
assert "col_renamed" in df.columns
assert df["col_renamed"].iloc[0] == "fake rename"

assert "nom destination" in df.columns
assert df["nom destination"].iloc[0] == "success"

assert "string_col" in df.columns
assert df["string_col"].iloc[0] == "value of col"

assert "list_col" in df.columns
assert df["list_col"].iloc[0] == ["col1", "col2"]

assert "nom" in df.columns
assert df["nom"].iloc[0] == "success"

assert "identifiant_unique" in df.columns
assert df["identifiant_unique"].iloc[0] == "id"

assert "col_to_remove" not in df.columns

def test_source_data_normalize_unhandles_column_raise(self):
dag_config_kwargs = {
"normalization_rules": NORMALIZATION_RULES,
"product_mapping": {},
"endpoint": "http://example.com/api",
}

with pytest.raises(ValueError) as erreur:
source_data_normalize(
df_acteur_from_source=pd.DataFrame(
{
"identifiant_unique": ["id"],
"col_to_remove": ["fake remove"],
"col_to_rename": ["fake rename"],
"nom origin": ["nom origin 1"],
"nom": ["nom"],
"col_make_it_raise": ["fake"],
}
),
dag_config=DAGConfig.model_validate(dag_config_kwargs),
dag_id="dag_id",
)
assert "Le dataframe n'a pas les colonnes attendues" in str(erreur.value)
assert "col_make_it_raise" in str(erreur.value)

# @pytest.mark.parametrize(
# "statut, statut_expected",
# [
Expand Down Expand Up @@ -223,27 +277,163 @@ def _enrich_from_ban_api(row):
)


class TestRemoveUndesired_lines:
# FIXME : Add tests
pass

# # "service_a_domicile"
# def test_service_a_domicile(
# self,
# df_empty_acteurs_from_db,
# ):

# result = propose_acteur_changes(
# df=pd.DataFrame(
# {
# "identifiant_unique": ["1", "2"],
# "service_a_domicile": ["Oui exclusivement", "Non"],
# }
# ),
# df_acteurs=df_empty_acteurs_from_db,
# )
# result_df = result["df"]
class TestRemoveUndesiredLines:
@pytest.mark.parametrize(
"df, expected_df",
[
# Cas suppression service à domicile
(
pd.DataFrame(
{
"identifiant_unique": ["id1", "id2", "id3"],
"service_a_domicile": [
"non",
"oui exclusivement",
"service à domicile uniquement",
],
"public_accueilli": [
"Particuliers",
"Particuliers",
"Particuliers",
],
"souscategorie_codes": [["code1"], ["code2"], ["code3"]],
}
),
pd.DataFrame(
{
"identifiant_unique": ["id1"],
"service_a_domicile": ["non"],
"public_accueilli": ["Particuliers"],
"souscategorie_codes": [["code1"]],
}
),
),
# Cas suppression professionnele
(
pd.DataFrame(
{
"identifiant_unique": ["id1", "id2", "id3", "id4"],
"service_a_domicile": ["non", "non", "non", "oui"],
"public_accueilli": [
"Particuliers",
"Particuliers et professionnels",
"Professionnels",
"Aucun",
],
"souscategorie_codes": [
["code1"],
["code2"],
["code3"],
["code4"],
],
}
),
pd.DataFrame(
{
"identifiant_unique": ["id1", "id2", "id4"],
"service_a_domicile": ["non", "non", "oui"],
"public_accueilli": [
"Particuliers",
"Particuliers et professionnels",
"Aucun",
],
"souscategorie_codes": [["code1"], ["code2"], ["code4"]],
}
),
),
# Cas avec suppression des lignes sans produits acceptés
(
pd.DataFrame(
{
"identifiant_unique": ["id1", "id2", "id3"],
"service_a_domicile": ["non", "non", "non"],
"public_accueilli": [
"Particuliers",
"Particuliers",
"Particuliers",
],
"souscategorie_codes": [["code1"], [], ["code3"]],
}
),
pd.DataFrame(
{
"identifiant_unique": ["id1", "id3"],
"service_a_domicile": ["non", "non"],
"public_accueilli": ["Particuliers", "Particuliers"],
"souscategorie_codes": [["code1"], ["code3"]],
}
),
),
],
)
def test_remove_undesired_lines_suppressions(self, df, expected_df, dag_config):
# Mock the DAGConfig

result_df = _remove_undesired_lines(df, dag_config)
pd.testing.assert_frame_equal(
result_df.reset_index(drop=True), expected_df.reset_index(drop=True)
)

def test_merge_duplicated_acteurs(self, dag_config):
dag_config.merge_duplicated_acteurs = True
result = _remove_undesired_lines(
pd.DataFrame(
{
"identifiant_unique": ["id1", "id1", "id2"],
"service_a_domicile": ["non", "non", "non"],
"public_accueilli": [
"Particuliers",
"Particuliers",
"Particuliers",
],
"souscategorie_codes": [["code1"], ["code2"], ["code3"]],
}
),
dag_config,
)
result = result.sort_values("identifiant_unique")
result = result.reset_index(drop=True)

expected_df = pd.DataFrame(
{
"identifiant_unique": ["id1", "id2"],
"service_a_domicile": ["non", "non"],
"public_accueilli": ["Particuliers", "Particuliers"],
"souscategorie_codes": [["code1", "code2"], ["code3"]],
}
)
expected_df.reset_index(drop=True)

pd.testing.assert_frame_equal(result, expected_df)

def test_ignore_duplicates(self, dag_config):
dag_config.ignore_duplicates = True
result = _remove_undesired_lines(
pd.DataFrame(
{
"identifiant_unique": ["id1", "id1", "id2"],
"service_a_domicile": ["non", "non", "non"],
"public_accueilli": [
"Particuliers",
"Particuliers",
"Particuliers",
],
"souscategorie_codes": [["code1"], ["code2"], ["code3"]],
}
),
dag_config,
)
result = result.sort_values("identifiant_unique")
result = result.reset_index(drop=True)

expected_df = pd.DataFrame(
{
"identifiant_unique": ["id1", "id2"],
"service_a_domicile": ["non", "non"],
"public_accueilli": ["Particuliers", "Particuliers"],
"souscategorie_codes": [["code1"], ["code3"]],
}
)
expected_df.reset_index(drop=True)

# assert len(result_df) == 1
# assert result_df["service_a_domicile"].iloc[0] == "Non"
# assert result_df["identifiant_unique"].iloc[0] == "2"
pd.testing.assert_frame_equal(result, expected_df)

0 comments on commit be97c03

Please sign in to comment.