Skip to content

Commit

Permalink
Mettre à jour et normaliser des codes de l'objet Source (#1187)
Browse files Browse the repository at this point in the history
  • Loading branch information
kolok authored Jan 13, 2025
1 parent c688024 commit faa8e1a
Show file tree
Hide file tree
Showing 22 changed files with 319 additions and 60 deletions.
2 changes: 2 additions & 0 deletions dags/sources/config/airflow_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
clean_souscategorie_codes_sinoe,
clean_url,
convert_opening_hours,
strip_lower_string,
strip_string,
)
from sources.tasks.transform.transform_df import (
Expand Down Expand Up @@ -63,6 +64,7 @@
"clean_url": clean_url,
"clean_souscategorie_codes_sinoe": clean_souscategorie_codes_sinoe,
"get_latlng_from_geopoint": get_latlng_from_geopoint,
"strip_lower_string": strip_lower_string,
}


Expand Down
9 changes: 5 additions & 4 deletions dags/sources/dags/source_aliapur.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
params={
"normalization_rules": [
# 1. Renommage des colonnes
{
"origin": "ecoorganisme",
"destination": "source_code",
},
{
"origin": "nom_de_lorganisme",
"destination": "nom",
Expand All @@ -35,6 +31,11 @@
"destination": "latitude",
},
# 2. Transformation des colonnes
{
"origin": "ecoorganisme",
"transformation": "strip_lower_string",
"destination": "source_code",
},
{
"origin": "type_de_point_de_collecte",
"transformation": "clean_acteur_type_code",
Expand Down
9 changes: 5 additions & 4 deletions dags/sources/dags/source_citeo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
params={
"normalization_rules": [
# 1. Renommage des colonnes
{
"origin": "ecoorganisme",
"destination": "source_code",
},
{
"origin": "nom_de_lorganisme",
"destination": "nom",
Expand All @@ -35,6 +31,11 @@
"destination": "latitude",
},
# 2. Transformation des colonnes
{
"origin": "ecoorganisme",
"transformation": "strip_lower_string",
"destination": "source_code",
},
{
"origin": "type_de_point_de_collecte",
"transformation": "clean_acteur_type_code",
Expand Down
2 changes: 1 addition & 1 deletion dags/sources/dags/source_cma.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
},
{
"column": "source_code",
"value": "cma_reparacteur",
"value": "cmareparacteur",
},
# 4. Transformation du dataframe
{
Expand Down
9 changes: 5 additions & 4 deletions dags/sources/dags/source_corepile.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
params={
"normalization_rules": [
# 1. Renommage des colonnes
{
"origin": "ecoorganisme",
"destination": "source_code",
},
{
"origin": "nom_de_lorganisme",
"destination": "nom",
Expand All @@ -35,6 +31,11 @@
"destination": "latitude",
},
# 2. Transformation des colonnes
{
"origin": "ecoorganisme",
"transformation": "strip_lower_string",
"destination": "source_code",
},
{
"origin": "type_de_point_de_collecte",
"transformation": "clean_acteur_type_code",
Expand Down
9 changes: 5 additions & 4 deletions dags/sources/dags/source_ecodds.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
params={
"normalization_rules": [
# 1. Renommage des colonnes
{
"origin": "ecoorganisme",
"destination": "source_code",
},
{
"origin": "nom_de_lorganisme",
"destination": "nom",
Expand All @@ -35,6 +31,11 @@
"destination": "latitude",
},
# 2. Transformation des colonnes
{
"origin": "ecoorganisme",
"transformation": "strip_lower_string",
"destination": "source_code",
},
{
"origin": "type_de_point_de_collecte",
"transformation": "clean_acteur_type_code",
Expand Down
9 changes: 5 additions & 4 deletions dags/sources/dags/source_ecologic.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
params={
"normalization_rules": [
# 1. Renommage des colonnes
{
"origin": "ecoorganisme",
"destination": "source_code",
},
{
"origin": "nom_de_lorganisme",
"destination": "nom",
Expand All @@ -35,6 +31,11 @@
"destination": "latitude",
},
# 2. Transformation des colonnes
{
"origin": "ecoorganisme",
"transformation": "strip_lower_string",
"destination": "source_code",
},
{
"origin": "type_de_point_de_collecte",
"transformation": "clean_acteur_type_code",
Expand Down
9 changes: 5 additions & 4 deletions dags/sources/dags/source_ecomaison.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
params={
"normalization_rules": [
# 1. Renommage des colonnes
{
"origin": "ecoorganisme",
"destination": "source_code",
},
{
"origin": "nom_de_lorganisme",
"destination": "nom",
Expand All @@ -35,6 +31,11 @@
"destination": "latitude",
},
# 2. Transformation des colonnes
{
"origin": "ecoorganisme",
"transformation": "strip_lower_string",
"destination": "source_code",
},
{
"origin": "site_web",
"transformation": "clean_url",
Expand Down
9 changes: 5 additions & 4 deletions dags/sources/dags/source_ecosystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
params={
"normalization_rules": [
# 1. Renommage des colonnes
{
"origin": "ecoorganisme",
"destination": "source_code",
},
{
"origin": "nom_de_lorganisme",
"destination": "nom",
Expand All @@ -35,6 +31,11 @@
"destination": "latitude",
},
# 2. Transformation des colonnes
{
"origin": "ecoorganisme",
"transformation": "strip_lower_string",
"destination": "source_code",
},
{
"origin": "type_de_point_de_collecte",
"transformation": "clean_acteur_type_code",
Expand Down
9 changes: 5 additions & 4 deletions dags/sources/dags/source_ocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
params={
"normalization_rules": [
# 1. Renommage des colonnes
{
"origin": "ecoorganisme",
"destination": "source_code",
},
{
"origin": "nom_de_lorganisme",
"destination": "nom",
Expand All @@ -35,6 +31,11 @@
"destination": "latitude",
},
# 2. Transformation des colonnes
{
"origin": "ecoorganisme",
"transformation": "strip_lower_string",
"destination": "source_code",
},
{
"origin": "type_de_point_de_collecte",
"transformation": "clean_acteur_type_code",
Expand Down
4 changes: 0 additions & 4 deletions dags/sources/dags/source_ocad3e.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
params={
"normalization_rules": [
# 1. Renommage des colonnes
# {
# "origin": "ecoorganisme",
# "destination": "source_code",
# },
{
"origin": "nom_de_lorganisme",
"destination": "nom",
Expand Down
9 changes: 5 additions & 4 deletions dags/sources/dags/source_pyreo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
params={
"normalization_rules": [
# 1. Renommage des colonnes
{
"origin": "ecoorganisme",
"destination": "source_code",
},
{
"origin": "nom_de_lorganisme",
"destination": "nom",
Expand All @@ -35,6 +31,11 @@
"destination": "latitude",
},
# 2. Transformation des colonnes
{
"origin": "ecoorganisme",
"transformation": "strip_lower_string",
"destination": "source_code",
},
# {
# "origin": "site_web",
# "transformation": "clean_url",
Expand Down
9 changes: 5 additions & 4 deletions dags/sources/dags/source_refashion.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
params={
"normalization_rules": [
# 1. Renommage des colonnes
{
"origin": "ecoorganisme",
"destination": "source_code",
},
{
"origin": "nom_de_lorganisme",
"destination": "nom",
Expand All @@ -39,6 +35,11 @@
"destination": "horaires_description",
},
# 2. Transformation des colonnes
{
"origin": "ecoorganisme",
"transformation": "strip_lower_string",
"destination": "source_code",
},
{
"origin": "site_web",
"transformation": "clean_url",
Expand Down
9 changes: 5 additions & 4 deletions dags/sources/dags/source_screlec.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@
),
"normalization_rules": [
# 1. Renommage des colonnes
{
"origin": "ecoorganisme",
"destination": "source_code",
},
{
"origin": "nom_de_lorganisme",
"destination": "nom",
Expand All @@ -40,6 +36,11 @@
"destination": "latitude",
},
# 2. Transformation des colonnes
{
"origin": "ecoorganisme",
"transformation": "strip_lower_string",
"destination": "source_code",
},
# {
# "origin": "site_web",
# "transformation": "clean_url",
Expand Down
2 changes: 1 addition & 1 deletion dags/sources/dags/source_sinoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
},
{
"column": "source_code",
"value": "ADEME_SINOE_Decheteries",
"value": "ademesinoedecheteries",
},
# 4. Transformation du dataframe
{
Expand Down
9 changes: 5 additions & 4 deletions dags/sources/dags/source_soren.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
params={
"normalization_rules": [
# 1. Renommage des colonnes
{
"origin": "ecoorganisme",
"destination": "source_code",
},
{
"origin": "nom_de_lorganisme",
"destination": "nom",
Expand All @@ -35,6 +31,11 @@
"destination": "latitude",
},
# 2. Transformation des colonnes
{
"origin": "ecoorganisme",
"transformation": "strip_lower_string",
"destination": "source_code",
},
# {
# "origin": "site_web",
# "transformation": "clean_url",
Expand Down
9 changes: 5 additions & 4 deletions dags/sources/dags/source_valdelia.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
params={
"normalization_rules": [
# 1. Renommage des colonnes
{
"origin": "ecoorganisme",
"destination": "source_code",
},
{
"origin": "nom_de_lorganisme",
"destination": "nom",
Expand All @@ -35,6 +31,11 @@
"destination": "latitude",
},
# 2. Transformation des colonnes
{
"origin": "ecoorganisme",
"transformation": "strip_lower_string",
"destination": "source_code",
},
# {
# "origin": "site_web",
# "transformation": "clean_url",
Expand Down
4 changes: 4 additions & 0 deletions dags/sources/tasks/transform/transform_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ def strip_string(value: str | None, _) -> str:
return str(value).strip() if not pd.isna(value) and value else ""


def strip_lower_string(value: str | None, _) -> str:
return str(value).strip().lower() if not pd.isna(value) and value else ""


def clean_acteur_type_code(value, _):
mapping_dict = {
# Here we store key without accents and special characters
Expand Down
20 changes: 20 additions & 0 deletions dags_unit_tests/sources/tasks/transform/test_transform_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
clean_souscategorie_codes_sinoe,
clean_url,
convert_opening_hours,
strip_lower_string,
strip_string,
)

Expand Down Expand Up @@ -151,6 +152,25 @@ def test_strip_string(self, input, output):
assert strip_string(input, None) == output


class TestStriplowerString:

@pytest.mark.parametrize(
"input, output",
[
(None, ""),
(pd.NA, ""),
(np.nan, ""),
(" ", ""),
(75001, "75001"),
(" adresse postale ", "adresse postale"),
("AdreSse posTale", "adresse postale"),
(" AdreSse posTale ", "adresse postale"),
],
)
def test_strip_lower_string(self, input, output):
assert strip_lower_string(input, None) == output


class TestCleanActeurTypeCode:
@pytest.mark.parametrize(
"value, expected_code",
Expand Down
Loading

0 comments on commit faa8e1a

Please sign in to comment.