diff --git a/core/models.py b/core/models.py new file mode 100644 index 000000000..1d4b399a3 --- /dev/null +++ b/core/models.py @@ -0,0 +1,10 @@ +from django.db import models +from django.db.models.functions import Now + + +class TimestampedModel(models.Model): + cree_le = models.DateTimeField(auto_now_add=True, db_default=Now()) + modifie_le = models.DateTimeField(auto_now=True, db_default=Now()) + + class Meta: + abstract = True diff --git a/core/settings.py b/core/settings.py index 540982469..de5a7a448 100644 --- a/core/settings.py +++ b/core/settings.py @@ -61,10 +61,10 @@ "core", "qfdmd", "qfdmo", + "data", "corsheaders", ] -# FIXME : check if we can manage django forms templating with jinja2 FORM_RENDERER = "django.forms.renderers.TemplatesSetting" diff --git a/core/urls.py b/core/urls.py index 03b434959..c0ffdf69b 100644 --- a/core/urls.py +++ b/core/urls.py @@ -60,6 +60,7 @@ class PaginatedSitemap(GenericSitemap): path("dsfr/", include(("dsfr_hacks.urls", "dsfr_hacks"), namespace="dsfr_hacks")), path("", include(("qfdmo.urls", "qfdmo"), namespace="qfdmo")), path("", include(("qfdmd.urls", "qfdmd"), namespace="qfdmd")), + path("data/", include(("data.urls", "data"), namespace="data")), path("docs/", TemplateView.as_view(template_name="techdocs.html"), name="techdocs"), ] diff --git a/core/views.py b/core/views.py new file mode 100644 index 000000000..4439d12e9 --- /dev/null +++ b/core/views.py @@ -0,0 +1,8 @@ +from django.contrib.auth.mixins import LoginRequiredMixin + + +class IsStaffMixin(LoginRequiredMixin): + def dispatch(self, request, *args, **kwargs): + if not request.user.is_staff: + return self.handle_no_permission() + return super().dispatch(request, *args, **kwargs) diff --git a/dags/ingest_validated_dataset_to_db.py b/dags/ingest_validated_dataset_to_db.py index d4a61e029..1ac93cca2 100755 --- a/dags/ingest_validated_dataset_to_db.py +++ b/dags/ingest_validated_dataset_to_db.py @@ -1,3 +1,7 @@ +""" +DEPRECATED : utiliser le dag apply_suggestions +""" + from datetime import timedelta import pandas as pd @@ -20,9 +24,12 @@ dag = DAG( dag_id="validate_and_process_dagruns", - dag_display_name="Traitement des cohortes de données validées", + dag_display_name="DEPRECATED : Traitement des cohortes de données validées", default_args=default_args, - description="Check for VALIDATE in qfdmo_dagrun and process qfdmo_dagrunchange", + description=""" + DEPRECATED : Check for VALIDATE in qfdmo_dagrun and process qfdmo_dagrunchange + util uniquement pour les cohortes de siretisations + """, schedule="*/5 * * * *", catchup=False, max_active_runs=1, diff --git a/dags/sources/config/airflow_params.py b/dags/sources/config/airflow_params.py index a42644e89..0ae7754b6 100644 --- a/dags/sources/config/airflow_params.py +++ b/dags/sources/config/airflow_params.py @@ -26,6 +26,7 @@ clean_label_codes, clean_siret_and_siren, clean_telephone, + compute_location, get_latlng_from_geopoint, merge_and_clean_souscategorie_codes, merge_sous_categories_columns, @@ -65,6 +66,7 @@ "clean_souscategorie_codes_sinoe": clean_souscategorie_codes_sinoe, "get_latlng_from_geopoint": get_latlng_from_geopoint, "strip_lower_string": strip_lower_string, + "compute_location": compute_location, } diff --git a/dags/sources/config/shared_constants.py b/dags/sources/config/shared_constants.py index 030cf2e33..e25e7dc09 100755 --- a/dags/sources/config/shared_constants.py +++ b/dags/sources/config/shared_constants.py @@ -1,9 +1,24 @@ -# DagRun statuts +# DEPRECATED DagRun statuts DAGRUN_TOVALIDATE = "TO_VALIDATE" DAGRUN_TOINSERT = "TO_INSERT" DAGRUN_REJECTED = "REJECTED" DAGRUN_FINISHED = "FINISHED" +# Suggestion statuts (pour cohorte et unitaire) +SUGGESTION_AVALIDER = "AVALIDER" +SUGGESTION_REJETEE = "REJETEE" +SUGGESTION_PARTIEL = "PARTIEL" +SUGGESTION_ATRAITER = "ATRAITER" +SUGGESTION_ENCOURS = "ENCOURS" +SUGGESTION_ERREUR = "ERREUR" +SUGGESTION_SUCCES = "SUCCES" + +# SuggestionCohorte actions +SUGGESTION_CLUSTERING = "CLUSTERING" +SUGGESTION_SOURCE_AJOUT = "SOURCE_AJOUT" +SUGGESTION_SOURCE_MODIFICATION = "SOURCE_MODIFICATION" +SUGGESTION_SOURCE_SUPRESSION = "SOURCE_SUPRESSION" + # Public accueilli PUBLIC_PAR = "Particuliers" PUBLIC_PRO = "Professionnels" diff --git a/dags/sources/dags/source_aliapur.py b/dags/sources/dags/source_aliapur.py index 9f4bd9c4f..89b4d216c 100755 --- a/dags/sources/dags/source_aliapur.py +++ b/dags/sources/dags/source_aliapur.py @@ -46,21 +46,6 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, - # { - # "origin": "exclusivite_de_reprisereparation", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "exclusivite_de_reprisereparation", - # }, - # { - # "origin": "reprise", - # "transformation": "clean_reprise", - # "destination": "reprise", - # }, { "origin": "produitsdechets_acceptes", "transformation": "clean_souscategorie_codes", @@ -72,6 +57,11 @@ "value": constants.ACTEUR_ACTIF, }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", diff --git a/dags/sources/dags/source_citeo.py b/dags/sources/dags/source_citeo.py index 6351503a2..17608fa84 100755 --- a/dags/sources/dags/source_citeo.py +++ b/dags/sources/dags/source_citeo.py @@ -18,10 +18,6 @@ "origin": "nom_de_lorganisme", "destination": "nom", }, - # { - # "origin": "enseigne_commerciale", - # "destination": "nom_commercial", - # }, { "origin": "longitudewgs84", "destination": "longitude", @@ -46,21 +42,6 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, - # { - # "origin": "exclusivite_de_reprisereparation", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "exclusivite_de_reprisereparation", - # }, - # { - # "origin": "reprise", - # "transformation": "clean_reprise", - # "destination": "reprise", - # }, { "origin": "produitsdechets_acceptes", "transformation": "clean_souscategorie_codes", @@ -76,6 +57,11 @@ "value": [], }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["id_point_apport_ou_reparation", "nom"], "transformation": "clean_identifiant_externe", diff --git a/dags/sources/dags/source_cma.py b/dags/sources/dags/source_cma.py index f74d40c17..78246df48 100755 --- a/dags/sources/dags/source_cma.py +++ b/dags/sources/dags/source_cma.py @@ -94,6 +94,11 @@ "value": "cmareparacteur", }, # 4. Transformation du dataframe + { + "origin": ["final_latitude", "final_longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["telephone", "code_postal"], "transformation": "clean_telephone", @@ -160,7 +165,6 @@ "ignore_duplicates": False, "validate_address_with_ban": False, "product_mapping": get_mapping_config(mapping_key="sous_categories_cma"), - "source_code": "cma_reparacteur", }, schedule=None, ) as dag: diff --git a/dags/sources/dags/source_corepile.py b/dags/sources/dags/source_corepile.py index 10e54b7cd..21e5feef3 100755 --- a/dags/sources/dags/source_corepile.py +++ b/dags/sources/dags/source_corepile.py @@ -46,21 +46,6 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, - # { - # "origin": "exclusivite_de_reprisereparation", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "exclusivite_de_reprisereparation", - # }, - # { - # "origin": "reprise", - # "transformation": "clean_reprise", - # "destination": "reprise", - # }, { "origin": "produitsdechets_acceptes", "transformation": "clean_souscategorie_codes", @@ -76,11 +61,11 @@ "value": [], }, # 4. Transformation du dataframe - # { - # "origin": ["labels_etou_bonus", "acteur_type_code"], - # "transformation": "clean_label_codes", - # "destination": ["label_codes"], - # }, + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["id_point_apport_ou_reparation", "nom"], "transformation": "clean_identifiant_externe", @@ -140,15 +125,6 @@ {"remove": "point_de_reparation"}, # 6. Colonnes à garder (rien à faire, utilisé pour le controle) ], - "column_mapping": { - "id_point_apport_ou_reparation": "identifiant_externe", - "type_de_point_de_collecte": "acteur_type_id", - "ecoorganisme": "source_id", - "nom_de_lorganisme": "nom", - "enseigne_commerciale": "nom_commercial", - "longitudewgs84": "longitude", - "latitudewgs84": "latitude", - }, "endpoint": ( "https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/" "donnees-eo-corepile/lines?size=10000" diff --git a/dags/sources/dags/source_ecodds.py b/dags/sources/dags/source_ecodds.py index 324adc883..d02c0ec80 100755 --- a/dags/sources/dags/source_ecodds.py +++ b/dags/sources/dags/source_ecodds.py @@ -46,11 +46,6 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, { "origin": "exclusivite_de_reprisereparation", "transformation": "cast_eo_boolean_or_string_to_boolean", @@ -72,6 +67,11 @@ "value": constants.ACTEUR_ACTIF, }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", @@ -90,11 +90,6 @@ "transformation": "clean_identifiant_unique", "destination": ["identifiant_unique"], }, - # { - # "origin": ["siret"], - # "transformation": "clean_siret_and_siren", - # "destination": ["siret", "siren"], - # }, { "origin": ["adresse_format_ban"], "transformation": "clean_adresse", diff --git a/dags/sources/dags/source_ecologic.py b/dags/sources/dags/source_ecologic.py index 809ccbbdf..b4a0d9642 100755 --- a/dags/sources/dags/source_ecologic.py +++ b/dags/sources/dags/source_ecologic.py @@ -18,10 +18,6 @@ "origin": "nom_de_lorganisme", "destination": "nom", }, - # { - # "origin": "enseigne_commerciale", - # "destination": "nom_commercial", - # }, { "origin": "longitudewgs84", "destination": "longitude", @@ -46,16 +42,6 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, - # { - # "origin": "exclusivite_de_reprisereparation", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "exclusivite_de_reprisereparation", - # }, { "origin": "reprise", "transformation": "clean_reprise", @@ -76,11 +62,11 @@ "value": [], }, # 4. Transformation du dataframe - # { - # "origin": ["labels_etou_bonus", "acteur_type_code"], - # "transformation": "clean_label_codes", - # "destination": ["label_codes"], - # }, + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["id_point_apport_ou_reparation", "nom"], "transformation": "clean_identifiant_externe", @@ -94,11 +80,6 @@ "transformation": "clean_identifiant_unique", "destination": ["identifiant_unique"], }, - # { - # "origin": ["siret"], - # "transformation": "clean_siret_and_siren", - # "destination": ["siret", "siren"], - # }, { "origin": ["adresse_format_ban"], "transformation": "clean_adresse", @@ -135,7 +116,6 @@ {"remove": "adresse_format_ban"}, {"remove": "id_point_apport_ou_reparation"}, {"remove": "point_de_collecte_ou_de_reprise_des_dechets"}, - # {"remove": "labels_etou_bonus"}, {"remove": "point_dapport_de_service_reparation"}, {"remove": "point_dapport_pour_reemploi"}, {"remove": "point_de_reparation"}, @@ -145,9 +125,6 @@ "https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/" "donnees-eo-ecologic/lines?size=10000" ), - "columns_to_add_by_default": { - "statut": constants.ACTEUR_ACTIF, - }, "ignore_duplicates": False, "validate_address_with_ban": False, "merge_duplicated_acteurs": True, # In case of multi ecoorganisme or filiere diff --git a/dags/sources/dags/source_ecomaison.py b/dags/sources/dags/source_ecomaison.py index f386e620c..be1afecb3 100755 --- a/dags/sources/dags/source_ecomaison.py +++ b/dags/sources/dags/source_ecomaison.py @@ -56,16 +56,6 @@ "transformation": "cast_eo_boolean_or_string_to_boolean", "destination": "uniquement_sur_rdv", }, - # { - # "origin": "exclusivite_de_reprisereparation", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "exclusivite_de_reprisereparation", - # }, - # { - # "origin": "reprise", - # "transformation": "clean_reprise", - # "destination": "reprise", - # }, { "origin": "produitsdechets_acceptes", "transformation": "clean_souscategorie_codes", @@ -81,11 +71,11 @@ "value": [], }, # 4. Transformation du dataframe - # { - # "origin": ["labels_etou_bonus", "acteur_type_code"], - # "transformation": "clean_label_codes", - # "destination": ["label_codes"], - # }, + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["id_point_apport_ou_reparation", "nom"], "transformation": "clean_identifiant_externe", diff --git a/dags/sources/dags/source_ecosystem.py b/dags/sources/dags/source_ecosystem.py index a3745786f..fa438259b 100755 --- a/dags/sources/dags/source_ecosystem.py +++ b/dags/sources/dags/source_ecosystem.py @@ -46,21 +46,6 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, - # { - # "origin": "exclusivite_de_reprisereparation", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "exclusivite_de_reprisereparation", - # }, - # { - # "origin": "reprise", - # "transformation": "clean_reprise", - # "destination": "reprise", - # }, { "origin": "produitsdechets_acceptes", "transformation": "clean_souscategorie_codes", @@ -76,11 +61,11 @@ "value": [], }, # 4. Transformation du dataframe - # { - # "origin": ["labels_etou_bonus", "acteur_type_code"], - # "transformation": "clean_label_codes", - # "destination": ["label_codes"], - # }, + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["id_point_apport_ou_reparation", "nom"], "transformation": "clean_identifiant_externe", @@ -106,8 +91,6 @@ }, { "origin": [ - # "point_dapport_de_service_reparation", - # "point_de_reparation", "point_dapport_pour_reemploi", "point_de_collecte_ou_de_reprise_des_dechets", ], @@ -116,8 +99,6 @@ }, { "origin": [ - # "point_dapport_de_service_reparation", - # "point_de_reparation", "point_dapport_pour_reemploi", "point_de_collecte_ou_de_reprise_des_dechets", ], @@ -135,10 +116,7 @@ {"remove": "adresse_format_ban"}, {"remove": "id_point_apport_ou_reparation"}, {"remove": "point_de_collecte_ou_de_reprise_des_dechets"}, - # {"remove": "labels_etou_bonus"}, - # {"remove": "point_dapport_de_service_reparation"}, {"remove": "point_dapport_pour_reemploi"}, - # {"remove": "point_de_reparation"}, {"remove": "siret"}, # 6. Colonnes à garder (rien à faire, utilisé pour le controle) {"keep": "adresse_complement"}, diff --git a/dags/sources/dags/source_ocab.py b/dags/sources/dags/source_ocab.py index 197c24e9a..e23f970fd 100755 --- a/dags/sources/dags/source_ocab.py +++ b/dags/sources/dags/source_ocab.py @@ -46,21 +46,6 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, - # { - # "origin": "exclusivite_de_reprisereparation", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "exclusivite_de_reprisereparation", - # }, - # { - # "origin": "reprise", - # "transformation": "clean_reprise", - # "destination": "reprise", - # }, { "origin": "produitsdechets_acceptes", "transformation": "clean_souscategorie_codes", @@ -76,11 +61,11 @@ "value": [], }, # 4. Transformation du dataframe - # { - # "origin": ["labels_etou_bonus", "acteur_type_code"], - # "transformation": "clean_label_codes", - # "destination": ["label_codes"], - # }, + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["id_point_apport_ou_reparation", "nom"], "transformation": "clean_identifiant_externe", @@ -135,10 +120,7 @@ {"remove": "adresse_format_ban"}, {"remove": "id_point_apport_ou_reparation"}, {"remove": "point_de_collecte_ou_de_reprise_des_dechets"}, - # {"remove": "labels_etou_bonus"}, - # {"remove": "point_dapport_de_service_reparation"}, {"remove": "point_dapport_pour_reemploi"}, - # {"remove": "point_de_reparation"}, # 6. Colonnes à garder (rien à faire, utilisé pour le controle) ], "endpoint": ( diff --git a/dags/sources/dags/source_ocad3e.py b/dags/sources/dags/source_ocad3e.py index c8f53345f..367891773 100755 --- a/dags/sources/dags/source_ocad3e.py +++ b/dags/sources/dags/source_ocad3e.py @@ -46,21 +46,11 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, { "origin": "exclusivite_de_reprisereparation", "transformation": "cast_eo_boolean_or_string_to_boolean", "destination": "exclusivite_de_reprisereparation", }, - # { - # "origin": "reprise", - # "transformation": "clean_reprise", - # "destination": "reprise", - # }, { "origin": "produitsdechets_acceptes", "transformation": "clean_souscategorie_codes", @@ -75,11 +65,12 @@ "column": "statut", "value": constants.ACTEUR_ACTIF, }, - # { - # "column": "label_codes", - # "value": [], - # }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", @@ -117,8 +108,6 @@ "origin": [ "point_dapport_de_service_reparation", "point_de_reparation", - # "point_dapport_pour_reemploi", - # "point_de_collecte_ou_de_reprise_des_dechets", ], "transformation": "clean_acteurservice_codes", "destination": ["acteurservice_codes"], @@ -127,8 +116,6 @@ "origin": [ "point_dapport_de_service_reparation", "point_de_reparation", - # "point_dapport_pour_reemploi", - # "point_de_collecte_ou_de_reprise_des_dechets", ], "transformation": "clean_action_codes", "destination": ["action_codes"], @@ -143,10 +130,8 @@ {"remove": "_score"}, {"remove": "adresse_format_ban"}, {"remove": "id_point_apport_ou_reparation"}, - # {"remove": "point_de_collecte_ou_de_reprise_des_dechets"}, {"remove": "labels_etou_bonus"}, {"remove": "point_dapport_de_service_reparation"}, - # {"remove": "point_dapport_pour_reemploi"}, {"remove": "point_de_reparation"}, {"remove": "perimetre_dintervention"}, {"remove": "ecoorganisme"}, diff --git a/dags/sources/dags/source_pharmacies.py b/dags/sources/dags/source_pharmacies.py index 24e899c3b..6ca5ced72 100755 --- a/dags/sources/dags/source_pharmacies.py +++ b/dags/sources/dags/source_pharmacies.py @@ -11,10 +11,6 @@ params={ "normalization_rules": [ # 1. Renommage des colonnes - # { # fait en dur dans la code car l'apostrophe est mal géré par airflow - # "origin": "Numéro d\\'établissement", - # "destination": "identifiant_externe", - # }, # 2. Transformation des colonnes { "origin": "Raison sociale", diff --git a/dags/sources/dags/source_pyreo.py b/dags/sources/dags/source_pyreo.py index a16aa675a..f87eff16a 100755 --- a/dags/sources/dags/source_pyreo.py +++ b/dags/sources/dags/source_pyreo.py @@ -81,6 +81,11 @@ # "value": [], # }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", diff --git a/dags/sources/dags/source_refashion.py b/dags/sources/dags/source_refashion.py index a135aff76..ce5ddc54b 100755 --- a/dags/sources/dags/source_refashion.py +++ b/dags/sources/dags/source_refashion.py @@ -80,11 +80,12 @@ "column": "statut", "value": constants.ACTEUR_ACTIF, }, - # { - # "column": "label_codes", - # "value": [], - # }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", diff --git a/dags/sources/dags/source_screlec.py b/dags/sources/dags/source_screlec.py index 80d353102..4b7ddf7d0 100644 --- a/dags/sources/dags/source_screlec.py +++ b/dags/sources/dags/source_screlec.py @@ -41,11 +41,6 @@ "transformation": "strip_lower_string", "destination": "source_code", }, - # { - # "origin": "site_web", - # "transformation": "clean_url", - # "destination": "url", - # }, { "origin": "type_de_point_de_collecte", "transformation": "clean_acteur_type_code", @@ -81,11 +76,12 @@ "column": "statut", "value": constants.ACTEUR_ACTIF, }, - # { - # "column": "label_codes", - # "value": [], - # }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", @@ -114,11 +110,6 @@ "transformation": "clean_adresse", "destination": ["adresse", "code_postal", "ville"], }, - # { - # "origin": ["telephone", "code_postal"], - # "transformation": "clean_telephone", - # "destination": ["telephone"], - # }, { "origin": [ "point_dapport_de_service_reparation", diff --git a/dags/sources/dags/source_sinoe.py b/dags/sources/dags/source_sinoe.py index b2a2c699d..b3a117c25 100755 --- a/dags/sources/dags/source_sinoe.py +++ b/dags/sources/dags/source_sinoe.py @@ -84,6 +84,11 @@ "transformation": "get_latlng_from_geopoint", "destination": ["latitude", "longitude"], }, + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["TEL_SERVICE", "code_postal"], "transformation": "clean_telephone", diff --git a/dags/sources/dags/source_soren.py b/dags/sources/dags/source_soren.py index c53c33474..9c4998e87 100755 --- a/dags/sources/dags/source_soren.py +++ b/dags/sources/dags/source_soren.py @@ -18,10 +18,6 @@ "origin": "nom_de_lorganisme", "destination": "nom", }, - # { - # "origin": "enseigne_commerciale", - # "destination": "nom_commercial", - # }, { "origin": "longitudewgs84", "destination": "longitude", @@ -36,11 +32,6 @@ "transformation": "strip_lower_string", "destination": "source_code", }, - # { - # "origin": "site_web", - # "transformation": "clean_url", - # "destination": "url", - # }, { "origin": "horaires_douverture", "transformation": "convert_opening_hours", @@ -81,11 +72,12 @@ "column": "statut", "value": constants.ACTEUR_ACTIF, }, - # { - # "column": "label_codes", - # "value": [], - # }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", @@ -109,11 +101,6 @@ "transformation": "clean_adresse", "destination": ["adresse", "code_postal", "ville"], }, - # { - # "origin": ["telephone", "code_postal"], - # "transformation": "clean_telephone", - # "destination": ["telephone"], - # }, { "origin": [ "point_dapport_de_service_reparation", diff --git a/dags/sources/dags/source_valdelia.py b/dags/sources/dags/source_valdelia.py index 6bee2054f..afd37f36e 100755 --- a/dags/sources/dags/source_valdelia.py +++ b/dags/sources/dags/source_valdelia.py @@ -36,11 +36,6 @@ "transformation": "strip_lower_string", "destination": "source_code", }, - # { - # "origin": "site_web", - # "transformation": "clean_url", - # "destination": "url", - # }, { "origin": "type_de_point_de_collecte", "transformation": "clean_acteur_type_code", @@ -76,11 +71,12 @@ "column": "statut", "value": constants.ACTEUR_ACTIF, }, - # { - # "column": "label_codes", - # "value": [], - # }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", @@ -151,28 +147,6 @@ {"remove": "point_de_reparation"}, # 6. Colonnes à garder (rien à faire, utilisé pour le controle) ], - "column_mapping": { - "id_point_apport_ou_reparation": "identifiant_externe", - "type_de_point_de_collecte": "acteur_type_id", - "exclusivite_de_reprisereparation": "exclusivite_de_reprisereparation", - "uniquement_sur_rdv": "uniquement_sur_rdv", - "public_accueilli": "public_accueilli", - "reprise": "reprise", - "siret": "siret", - "telephone": "telephone", - "produitsdechets_acceptes": "produitsdechets_acceptes", - "labels_etou_bonus": "labels_etou_bonus", - "point_de_reparation": "point_de_reparation", - "ecoorganisme": "source_id", - "adresse_format_ban": "adresse_format_ban", - "nom_de_lorganisme": "nom", - "perimetre_dintervention": "perimetre_dintervention", - "longitudewgs84": "longitude", - "latitudewgs84": "latitude", - }, - "columns_to_add_by_default": { - "statut": constants.ACTEUR_ACTIF, - }, "endpoint": ( "https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/" "donnees-eo-valdelia/lines?size=10000" diff --git a/dags/sources/tasks/airflow_logic/db_data_prepare_task.py b/dags/sources/tasks/airflow_logic/db_data_prepare_task.py index 005a6077a..8bb5093c4 100644 --- a/dags/sources/tasks/airflow_logic/db_data_prepare_task.py +++ b/dags/sources/tasks/airflow_logic/db_data_prepare_task.py @@ -28,6 +28,7 @@ def db_data_prepare_wrapper(**kwargs): df_pssc = kwargs["ti"].xcom_pull(task_ids="propose_services_sous_categories") df_labels = kwargs["ti"].xcom_pull(task_ids="propose_labels") df_acteur_services = kwargs["ti"].xcom_pull(task_ids="propose_acteur_services") + df_acteurs_from_db = kwargs["ti"].xcom_pull(task_ids="db_read_acteur") source_id_by_code = read_mapping_from_postgres(table_name="qfdmo_source") acteurtype_id_by_code = read_mapping_from_postgres(table_name="qfdmo_acteurtype") @@ -37,6 +38,7 @@ def db_data_prepare_wrapper(**kwargs): log.preview("df_pssc", df_pssc) log.preview("df_labels", df_labels) log.preview("df_acteur_services", df_acteur_services) + log.preview("df_acteurs_from_db", df_acteurs_from_db) log.preview("source_id_by_code", source_id_by_code) log.preview("acteurtype_id_by_code", acteurtype_id_by_code) @@ -47,6 +49,7 @@ def db_data_prepare_wrapper(**kwargs): df_pssc=df_pssc, df_labels=df_labels, df_acteur_services=df_acteur_services, + df_acteurs_from_db=df_acteurs_from_db, source_id_by_code=source_id_by_code, acteurtype_id_by_code=acteurtype_id_by_code, ) diff --git a/dags/sources/tasks/airflow_logic/db_write_type_action_suggestions_task.py b/dags/sources/tasks/airflow_logic/db_write_type_action_suggestions_task.py new file mode 100644 index 000000000..cdc1e5e25 --- /dev/null +++ b/dags/sources/tasks/airflow_logic/db_write_type_action_suggestions_task.py @@ -0,0 +1,51 @@ +import logging + +from airflow import DAG +from airflow.operators.python import PythonOperator +from sources.tasks.business_logic.db_write_type_action_suggestions import ( + db_write_type_action_suggestions, +) +from utils import logging_utils as log + +logger = logging.getLogger(__name__) + + +def db_write_type_action_suggestions_task(dag: DAG) -> PythonOperator: + return PythonOperator( + task_id="db_write_suggestion", + python_callable=db_write_type_action_suggestions_wrapper, + dag=dag, + ) + + +def db_write_type_action_suggestions_wrapper(**kwargs) -> None: + dag_name = kwargs["dag"].dag_display_name or kwargs["dag"].dag_id + run_id = kwargs["run_id"] + dfs_acteur = kwargs["ti"].xcom_pull(task_ids="db_data_prepare") + df_acteur_to_delete = dfs_acteur["df_acteur_to_delete"] + df_acteur_to_create = dfs_acteur["df_acteur_to_create"] + df_acteur_to_update = dfs_acteur["df_acteur_to_update"] + + log.preview("dag_name", dag_name) + log.preview("run_id", run_id) + log.preview("df_acteur_to_delete", df_acteur_to_delete) + log.preview("df_acteur_to_create", df_acteur_to_create) + log.preview("df_acteur_to_update", df_acteur_to_update) + + if ( + df_acteur_to_create.empty + and df_acteur_to_delete.empty + and df_acteur_to_update.empty + ): + logger.warning("!!! Aucune suggestion à traiter pour cette source !!!") + # set the task to airflow skip status + kwargs["ti"].xcom_push(key="skip", value=True) + return + + return db_write_type_action_suggestions( + dag_name=dag_name, + run_id=run_id, + df_acteur_to_create=df_acteur_to_create, + df_acteur_to_delete=df_acteur_to_delete, + df_acteur_to_update=df_acteur_to_update, + ) diff --git a/dags/sources/tasks/airflow_logic/operators.py b/dags/sources/tasks/airflow_logic/operators.py index d37098470..efa5fba9f 100755 --- a/dags/sources/tasks/airflow_logic/operators.py +++ b/dags/sources/tasks/airflow_logic/operators.py @@ -2,12 +2,14 @@ from airflow import DAG from airflow.models.baseoperator import chain -from shared.tasks.airflow_logic.write_data_task import write_data_task from sources.tasks.airflow_logic.db_data_prepare_task import db_data_prepare_task from sources.tasks.airflow_logic.db_read_acteur_task import db_read_acteur_task from sources.tasks.airflow_logic.db_read_propositions_max_id_task import ( db_read_propositions_max_id_task, ) +from sources.tasks.airflow_logic.db_write_type_action_suggestions_task import ( + db_write_type_action_suggestions_task, +) from sources.tasks.airflow_logic.propose_acteur_changes_task import ( propose_acteur_changes_task, ) @@ -91,5 +93,5 @@ def eo_task_chain(dag: DAG) -> None: create_tasks, propose_services_sous_categories_task(dag), db_data_prepare_task(dag), - write_data_task(dag), + db_write_type_action_suggestions_task(dag), ) diff --git a/dags/sources/tasks/airflow_logic/propose_acteur_changes_task.py b/dags/sources/tasks/airflow_logic/propose_acteur_changes_task.py index fe9e05d14..68bcf70e1 100644 --- a/dags/sources/tasks/airflow_logic/propose_acteur_changes_task.py +++ b/dags/sources/tasks/airflow_logic/propose_acteur_changes_task.py @@ -13,18 +13,13 @@ def propose_acteur_changes_task(dag: DAG) -> PythonOperator: def propose_acteur_changes_wrapper(**kwargs): - df = kwargs["ti"].xcom_pull(task_ids="source_data_normalize") - df_acteurs = kwargs["ti"].xcom_pull(task_ids="db_read_acteur") + df_acteur = kwargs["ti"].xcom_pull(task_ids="source_data_normalize") + df_acteur_from_db = kwargs["ti"].xcom_pull(task_ids="db_read_acteur") - params = kwargs["params"] - column_to_drop = params.get("column_to_drop", []) - - log.preview("df (source_data_normalize)", df) - log.preview("df_acteurs", df_acteurs) - log.preview("column_to_drop", column_to_drop) + log.preview("df (source_data_normalize)", df_acteur) + log.preview("df_acteurs", df_acteur_from_db) return propose_acteur_changes( - df=df, - df_acteurs=df_acteurs, - column_to_drop=column_to_drop, + df_acteur=df_acteur, + df_acteur_from_db=df_acteur_from_db, ) diff --git a/dags/sources/tasks/business_logic/db_data_prepare.py b/dags/sources/tasks/business_logic/db_data_prepare.py index 1d4145cae..c2e5e216e 100644 --- a/dags/sources/tasks/business_logic/db_data_prepare.py +++ b/dags/sources/tasks/business_logic/db_data_prepare.py @@ -14,22 +14,20 @@ def db_data_prepare( df_pssc: pd.DataFrame, df_labels: pd.DataFrame, df_acteur_services: pd.DataFrame, + df_acteurs_from_db: pd.DataFrame, source_id_by_code: dict, acteurtype_id_by_code: dict, ): + update_actors_columns = ["identifiant_unique", "statut", "cree_le"] - df_acteur_to_delete["row_updates"] = df_acteur_to_delete[ + df_acteur_to_delete["suggestion"] = df_acteur_to_delete[ update_actors_columns ].apply(lambda row: json.dumps(row.to_dict(), default=str), axis=1) - # Created or updated Acteurs - df_acteur_services = ( - df_acteur_services - if df_acteur_services is not None - else pd.DataFrame(columns=["acteur_id", "acteurservice_id"]) - ) if df_acteur.empty: - raise ValueError("df_actors est vide") + raise ValueError("df_acteur est vide") + if df_acteur_services.empty: + raise ValueError("df_acteur_services est vide") if df_ps.empty: raise ValueError("df_ps est vide") if df_pssc.empty: @@ -41,6 +39,8 @@ def db_data_prepare( acteurtype_id_by_code ) + # FIXME: A bouger dans un tache compute_ps qui remplacera propose_services et + # propose_services_sous_categories aggregated_pdsc = ( df_pssc.groupby("propositionservice_id") .apply(lambda x: x.to_dict("records") if not x.empty else []) @@ -57,11 +57,9 @@ def db_data_prepare( df_pds_joined["propositionservice_id"] = df_pds_joined[ "propositionservice_id" ].astype(str) - df_pds_joined["pds_sous_categories"] = df_pds_joined["pds_sous_categories"].apply( lambda x: x if isinstance(x, list) else [] ) - df_pds_joined.drop("id", axis=1, inplace=True) aggregated_pds = ( @@ -124,11 +122,24 @@ def db_data_prepare( df_joined = df_joined.where(pd.notna(df_joined), None) - df_joined["row_updates"] = df_joined.apply( + df_joined["suggestion"] = df_joined.apply( lambda row: json.dumps(row.to_dict(), default=str), axis=1 ) df_joined.drop_duplicates("identifiant_unique", keep="first", inplace=True) - log.preview("df_joined", df_joined) + + df_acteur_to_create = df_joined[ + ~df_joined["identifiant_unique"].isin(df_acteurs_from_db["identifiant_unique"]) + ] + df_acteur_to_update = df_joined[ + df_joined["identifiant_unique"].isin(df_acteurs_from_db["identifiant_unique"]) + ] + + log.preview("df_acteur_to_create", df_acteur_to_create) + log.preview("df_acteur_to_update", df_acteur_to_update) log.preview("df_acteur_to_delete", df_acteur_to_delete) - return {"all": {"df": df_joined}, "to_disable": {"df": df_acteur_to_delete}} + return { + "df_acteur_to_create": df_acteur_to_create, + "df_acteur_to_update": df_acteur_to_update, + "df_acteur_to_delete": df_acteur_to_delete, + } diff --git a/dags/sources/tasks/business_logic/db_write_type_action_suggestions.py b/dags/sources/tasks/business_logic/db_write_type_action_suggestions.py new file mode 100644 index 000000000..eaed2cead --- /dev/null +++ b/dags/sources/tasks/business_logic/db_write_type_action_suggestions.py @@ -0,0 +1,94 @@ +import json +import logging +from datetime import datetime + +import pandas as pd +from shared.tasks.database_logic.db_manager import PostgresConnectionManager +from sources.config import shared_constants as constants + +logger = logging.getLogger(__name__) + + +def db_write_type_action_suggestions( + dag_name: str, + run_id: str, + df_acteur_to_create: pd.DataFrame, + df_acteur_to_delete: pd.DataFrame, + df_acteur_to_update: pd.DataFrame, +): + + metadata = {} + + run_name = run_id.replace("__", " - ") + + insert_suggestion( + df=df_acteur_to_create, + metadata=metadata, + dag_name=f"{dag_name} - AJOUT", + run_name=run_name, + type_action=constants.SUGGESTION_SOURCE_AJOUT, + ) + insert_suggestion( + df=df_acteur_to_delete, + metadata=metadata, + dag_name=f"{dag_name} - SUPRESSION", + run_name=run_name, + type_action=constants.SUGGESTION_SOURCE_SUPRESSION, + ) + insert_suggestion( + df=df_acteur_to_update, + metadata=metadata, + dag_name=f"{dag_name} - MISES A JOUR", + run_name=run_name, + type_action=constants.SUGGESTION_SOURCE_MODIFICATION, + ) + + +def insert_suggestion( + df: pd.DataFrame, metadata: dict, dag_name: str, run_name: str, type_action: str +): + if df.empty: + return + engine = PostgresConnectionManager().engine + current_date = datetime.now() + + with engine.connect() as conn: + # Insert a new suggestion + result = conn.execute( + """ + INSERT INTO data_suggestioncohorte + ( + identifiant_action, + identifiant_execution, + type_action, + statut, + metadata, + cree_le, + modifie_le + ) + VALUES (%s, %s, %s, %s, %s, %s, %s) + RETURNING ID; + """, + ( + dag_name, + run_name, + type_action, + constants.SUGGESTION_AVALIDER, + json.dumps(metadata), + current_date, + current_date, + ), + ) + suggestion_cohorte_id = result.fetchone()[0] + + # Insert dag_run_change + df["suggestion_cohorte_id"] = suggestion_cohorte_id + df["statut"] = constants.SUGGESTION_AVALIDER + df[["suggestion", "suggestion_cohorte_id", "statut"]].to_sql( + "data_suggestion", + engine, + if_exists="append", + index=False, + method="multi", + chunksize=1000, + ) diff --git a/dags/sources/tasks/business_logic/propose_acteur_changes.py b/dags/sources/tasks/business_logic/propose_acteur_changes.py index 4171c9942..4e459d51d 100644 --- a/dags/sources/tasks/business_logic/propose_acteur_changes.py +++ b/dags/sources/tasks/business_logic/propose_acteur_changes.py @@ -3,58 +3,41 @@ import numpy as np import pandas as pd -from utils.base_utils import transform_location -from utils.mapping_utils import parse_float logger = logging.getLogger(__name__) def propose_acteur_changes( - df: pd.DataFrame, - df_acteurs: pd.DataFrame, - column_to_drop: list = [], + df_acteur: pd.DataFrame, + df_acteur_from_db: pd.DataFrame, ): - - # TODO: à déplacer dans la source_data_normalize - # intersection of columns in df and column_to_drop - column_to_drop = list(set(column_to_drop) & set(df.columns)) - df = df.drop(column_to_drop, axis=1) - - if "latitude" in df.columns and "longitude" in df.columns: - df["latitude"] = df["latitude"].apply(parse_float) - df["longitude"] = df["longitude"].apply(parse_float) - df["location"] = df.apply( - lambda row: transform_location(row["longitude"], row["latitude"]), - axis=1, - ) - # On garde le cree_le de qfdmo_acteur - df.drop(columns=["cree_le"], inplace=True, errors="ignore") - df = df.merge( - df_acteurs[["identifiant_unique", "cree_le"]], + df_acteur.drop(columns=["cree_le"], inplace=True, errors="ignore") + df_acteur = df_acteur.merge( + df_acteur_from_db[["identifiant_unique", "cree_le"]], on="identifiant_unique", how="left", ) - df["cree_le"] = df["cree_le"].fillna(datetime.now()) + df_acteur["cree_le"] = df_acteur["cree_le"].fillna(datetime.now()) # On met à jour le modifie_le de qfdmo_acteur - df["modifie_le"] = datetime.now() + df_acteur["modifie_le"] = datetime.now() - df = df.replace({np.nan: None}) + df_acteur = df_acteur.replace({np.nan: None}) - duplicates_mask = df.duplicated("identifiant_unique", keep=False) - duplicate_ids = df.loc[duplicates_mask, "identifiant_unique"].unique() + duplicates_mask = df_acteur.duplicated("identifiant_unique", keep=False) + duplicate_ids = df_acteur.loc[duplicates_mask, "identifiant_unique"].unique() number_of_duplicates = len(duplicate_ids) metadata = { "number_of_duplicates": number_of_duplicates, "duplicate_ids": list(duplicate_ids), - "acteurs_to_add_or_update": len(df), + "acteurs_to_add_or_update": len(df_acteur), } - df = df.drop_duplicates(subset="identifiant_unique", keep="first") - df["event"] = "CREATE" + df_acteur = df_acteur.drop_duplicates(subset="identifiant_unique", keep="first") + df_acteur["event"] = "CREATE" return { - "df": df, + "df": df_acteur, "metadata": metadata, } diff --git a/dags/sources/tasks/business_logic/propose_acteur_to_delete.py b/dags/sources/tasks/business_logic/propose_acteur_to_delete.py index 64a667c2c..c8e800f99 100644 --- a/dags/sources/tasks/business_logic/propose_acteur_to_delete.py +++ b/dags/sources/tasks/business_logic/propose_acteur_to_delete.py @@ -24,6 +24,10 @@ def propose_acteur_to_delete( df_acteur_to_delete["statut"] = "SUPPRIME" df_acteur_to_delete["event"] = "UPDATE_ACTOR" + # FIXME: ajouter le contexte de la suppression + # ajouter une colonne context avec le contenu de df_acteurs_for_db en json pour + # chaque colonne en jonction sur identifiant_unique + return { "metadata": {"number_of_removed_actors": len(df_acteur_to_delete)}, "df_acteur_to_delete": df_acteur_to_delete, diff --git a/dags/sources/tasks/business_logic/source_data_normalize.py b/dags/sources/tasks/business_logic/source_data_normalize.py index 8c4faa99c..40026ec3d 100755 --- a/dags/sources/tasks/business_logic/source_data_normalize.py +++ b/dags/sources/tasks/business_logic/source_data_normalize.py @@ -14,7 +14,7 @@ NormalizationColumnTransform, NormalizationDFTransform, ) -from sources.tasks.transform.transform_df import merge_duplicates +from sources.tasks.transform.transform_df import compute_location, merge_duplicates from sqlalchemy import text from tenacity import retry, stop_after_attempt, wait_fixed from utils import logging_utils as log @@ -70,7 +70,6 @@ def _transform_df(df: pd.DataFrame, dag_config: DAGConfig) -> pd.DataFrame: for column_to_transform_df in columns_to_transform_df: function_name = column_to_transform_df.transformation normalisation_function = get_transformation_function(function_name, dag_config) - logger.warning(f"Transformation {function_name}") df[column_to_transform_df.destination] = df[ column_to_transform_df.origin @@ -142,6 +141,22 @@ def _remove_undesired_lines(df: pd.DataFrame, dag_config: DAGConfig) -> pd.DataF return df +def _display_warning_about_missing_location(df: pd.DataFrame) -> None: + # TODO: A voir ce qu'on doit faire de ces acteurs non digitaux mais sans + # localisation (proposition : les afficher en erreur directement ?) + if "location" in df.columns and "acteur_type_code" in df.columns: + df_acteur_sans_loc = df[ + (df["location"].isnull()) & (df["acteur_type_code"] != "acteur_digital") + ] + if not df_acteur_sans_loc.empty: + nb_acteurs = len(df) + logger.warning( + f"Nombre d'acteur sans localisation: {len(df_acteur_sans_loc)} / " + f"{nb_acteurs}" + ) + log.preview("Acteurs sans localisation", df_acteur_sans_loc) + + def source_data_normalize( df_acteur_from_source: pd.DataFrame, dag_config: DAGConfig, @@ -187,35 +202,19 @@ def source_data_normalize( # TODO: Remplacer par le dag_id if dag_id == "sinoe": - df = df_normalize_sinoe( - df, - product_mapping=dag_config.product_mapping, - dechet_mapping=dag_config.dechet_mapping, - ) + df = df_normalize_sinoe(df) # Merge et suppression des lignes indésirables df = _remove_undesired_lines(df, dag_config) + # Log si des localisations sont manquantes parmis les acteurs non digitaux + _display_warning_about_missing_location(df) + log.preview("df après normalisation", df) if df.empty: raise ValueError("Plus aucune donnée disponible après normalisation") return df - # # TODO: Je n'ai pas vu la source qui applique cette règle - # if "statut" in df.columns: - # df["statut"] = df["statut"].map( - # { - # 1: constants.ACTEUR_ACTIF, - # 0: constants.ACTEUR_SUPPRIME, - # constants.ACTEUR_ACTIF: constants.ACTEUR_ACTIF, - # "INACTIF": constants.ACTEUR_INACTIF, - # "SUPPRIME": constants.ACTEUR_SUPPRIME, - # } - # ) - # df["statut"] = df["statut"].fillna(constants.ACTEUR_ACTIF) - # else: - # df["statut"] = constants.ACTEUR_ACTIF - def df_normalize_pharmacie(df: pd.DataFrame) -> pd.DataFrame: # FIXME : à déplacer dans une fonction df ? @@ -234,8 +233,6 @@ def df_normalize_pharmacie(df: pd.DataFrame) -> pd.DataFrame: def df_normalize_sinoe( df: pd.DataFrame, - product_mapping: dict, - dechet_mapping: dict, ) -> pd.DataFrame: # DOUBLONS: extra sécurité: même si on ne devrait pas obtenir @@ -259,7 +256,7 @@ def enrich_from_ban_api(row: pd.Series) -> pd.Series: ban_cache_row = engine.execute( text( - "SELECT * FROM qfdmo_bancache WHERE adresse = :adresse and code_postal = " + "SELECT * FROM data_bancache WHERE adresse = :adresse and code_postal = " ":code_postal and ville = :ville and modifie_le > now() - interval '30 day'" " order by modifie_le desc limit 1" ), @@ -279,7 +276,7 @@ def enrich_from_ban_api(row: pd.Series) -> pd.Series: result = r.json() engine.execute( text( - "INSERT INTO qfdmo_bancache" + "INSERT INTO data_bancache" " (adresse, code_postal, ville, ban_returned, modifie_le)" " VALUES (:adresse, :code_postal, :ville, :result, NOW())" ), @@ -312,6 +309,8 @@ def enrich_from_ban_api(row: pd.Series) -> pd.Series: else: row["longitude"] = 0 row["latitude"] = 0 + + row["location"] = compute_location(row[["latitude", "longitude"]], None) return row diff --git a/dags/sources/tasks/transform/transform_column.py b/dags/sources/tasks/transform/transform_column.py index 6f8dcebe0..9dc36c8fe 100644 --- a/dags/sources/tasks/transform/transform_column.py +++ b/dags/sources/tasks/transform/transform_column.py @@ -175,7 +175,6 @@ def clean_souscategorie_codes( return souscategorie_codes product_mapping = dag_config.product_mapping - logger.warning(f"{sscat_list=}") for sscat in sscat_list.split("|"): sscat = sscat.strip().lower() if not sscat: diff --git a/dags/sources/tasks/transform/transform_df.py b/dags/sources/tasks/transform/transform_df.py index 2bbe93441..4c8a8c42b 100644 --- a/dags/sources/tasks/transform/transform_df.py +++ b/dags/sources/tasks/transform/transform_df.py @@ -11,7 +11,9 @@ clean_siren, clean_siret, ) +from utils.base_utils import transform_location from utils.formatter import format_libelle_to_code +from utils.mapping_utils import parse_float logger = logging.getLogger(__name__) @@ -218,6 +220,16 @@ def get_latlng_from_geopoint(row: pd.Series, _) -> pd.Series: return row[["latitude", "longitude"]] +def compute_location(row: pd.Series, _): + # first column is latitude, second is longitude + lat_column = row.keys()[0] + lng_column = row.keys()[1] + row[lat_column] = parse_float(row[lat_column]) + row[lng_column] = parse_float(row[lng_column]) + row["location"] = transform_location(row[lng_column], row[lat_column]) + return row[["location"]] + + ### Fonctions de résolution de l'adresse au format BAN et avec vérification via l'API # adresse.data.gouv.fr en option # TODO : A déplacer ? diff --git a/dags/suggestions/dags/apply_suggestions.py b/dags/suggestions/dags/apply_suggestions.py new file mode 100755 index 000000000..abbca01ae --- /dev/null +++ b/dags/suggestions/dags/apply_suggestions.py @@ -0,0 +1,36 @@ +from datetime import timedelta + +from airflow.models import DAG +from airflow.utils.dates import days_ago +from suggestions.tasks.airflow_logic import ( + db_normalize_suggestion_task, + db_read_suggestiontoprocess_task, + db_write_validsuggestions_task, + launch_compute_carte_acteur_task, +) + +default_args = { + "owner": "airflow", + "depends_on_past": False, + "start_date": days_ago(1), + "retries": 1, + "retry_delay": timedelta(minutes=5), +} + +dag = DAG( + dag_id="apply_suggestions", + dag_display_name="Application des suggestions validées", + default_args=default_args, + description="traiter les suggestions à traiter", + schedule="*/5 * * * *", + catchup=False, + max_active_runs=1, +) + + +( + db_read_suggestiontoprocess_task(dag) + >> db_normalize_suggestion_task(dag) + >> db_write_validsuggestions_task(dag) + >> launch_compute_carte_acteur_task(dag) +) diff --git a/dags/suggestions/tasks/airflow_logic/__init__.py b/dags/suggestions/tasks/airflow_logic/__init__.py new file mode 100644 index 000000000..a9e6fc049 --- /dev/null +++ b/dags/suggestions/tasks/airflow_logic/__init__.py @@ -0,0 +1,4 @@ +from suggestions.tasks.airflow_logic.db_normalize_suggestion_task import * # noqa +from suggestions.tasks.airflow_logic.db_read_suggestiontoprocess_task import * # noqa +from suggestions.tasks.airflow_logic.db_write_validsuggestions_task import * # noqa +from suggestions.tasks.airflow_logic.launch_compute_carte_acteur_task import * # noqa diff --git a/dags/suggestions/tasks/airflow_logic/db_normalize_suggestion_task.py b/dags/suggestions/tasks/airflow_logic/db_normalize_suggestion_task.py new file mode 100644 index 000000000..decfee015 --- /dev/null +++ b/dags/suggestions/tasks/airflow_logic/db_normalize_suggestion_task.py @@ -0,0 +1,17 @@ +from airflow.models import DAG +from airflow.operators.python import PythonOperator +from suggestions.tasks.business_logic.db_normalize_suggestion import ( + db_normalize_suggestion, +) + + +def db_normalize_suggestion_task(dag: DAG): + return PythonOperator( + task_id="db_normalize_suggestion", + python_callable=db_normalize_suggestion_wrapper, + dag=dag, + ) + + +def db_normalize_suggestion_wrapper(**kwargs): + return db_normalize_suggestion() diff --git a/dags/suggestions/tasks/airflow_logic/db_read_suggestiontoprocess_task.py b/dags/suggestions/tasks/airflow_logic/db_read_suggestiontoprocess_task.py new file mode 100644 index 000000000..d86347ff9 --- /dev/null +++ b/dags/suggestions/tasks/airflow_logic/db_read_suggestiontoprocess_task.py @@ -0,0 +1,13 @@ +from airflow.models import DAG +from airflow.operators.python import ShortCircuitOperator +from suggestions.tasks.business_logic.db_read_suggestiontoprocess import ( + db_read_suggestiontoprocess, +) + + +def db_read_suggestiontoprocess_task(dag: DAG): + return ShortCircuitOperator( + task_id="check_suggestion_to_process", + python_callable=db_read_suggestiontoprocess, + dag=dag, + ) diff --git a/dags/suggestions/tasks/airflow_logic/db_write_validsuggestions_task.py b/dags/suggestions/tasks/airflow_logic/db_write_validsuggestions_task.py new file mode 100644 index 000000000..9a010d83c --- /dev/null +++ b/dags/suggestions/tasks/airflow_logic/db_write_validsuggestions_task.py @@ -0,0 +1,25 @@ +from airflow.models import DAG +from airflow.operators.python import PythonOperator +from suggestions.tasks.business_logic.db_write_validsuggestions import ( + db_write_validsuggestions, +) +from utils import logging_utils as log + + +def db_write_validsuggestions_task(dag: DAG) -> PythonOperator: + return PythonOperator( + task_id="db_write_validsuggestions", + python_callable=db_write_validsuggestions_wrapper, + dag=dag, + ) + + +def db_write_validsuggestions_wrapper(**kwargs): + data_acteurs_normalized = kwargs["ti"].xcom_pull(task_ids="db_normalize_suggestion") + + log.preview("data_acteurs_normalized acteur", data_acteurs_normalized["actors"]) + log.preview( + "data_acteurs_normalized change_type", data_acteurs_normalized["change_type"] + ) + + return db_write_validsuggestions(data_acteurs_normalized=data_acteurs_normalized) diff --git a/dags/suggestions/tasks/airflow_logic/launch_compute_carte_acteur_task.py b/dags/suggestions/tasks/airflow_logic/launch_compute_carte_acteur_task.py new file mode 100644 index 000000000..963593762 --- /dev/null +++ b/dags/suggestions/tasks/airflow_logic/launch_compute_carte_acteur_task.py @@ -0,0 +1,10 @@ +from airflow.models import DAG +from airflow.operators.trigger_dagrun import TriggerDagRunOperator + + +def launch_compute_carte_acteur_task(dag: DAG) -> TriggerDagRunOperator: + return TriggerDagRunOperator( + task_id="launch_compute_carte_acteur", + trigger_dag_id="compute_carte_acteur", + dag=dag, + ) diff --git a/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py b/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py new file mode 100644 index 000000000..5551da313 --- /dev/null +++ b/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py @@ -0,0 +1,104 @@ +import pandas as pd +from shared.tasks.database_logic.db_manager import PostgresConnectionManager +from sources.config import shared_constants as constants +from suggestions.tasks.business_logic.db_read_suggestiontoprocess import ( + get_first_suggetsioncohorte_to_insert, +) +from utils import logging_utils as log + + +def db_normalize_suggestion(): + suggestion_cohorte = get_first_suggetsioncohorte_to_insert() + if suggestion_cohorte is None: + raise ValueError("No suggestion found") + suggestion_cohorte_id = suggestion_cohorte["id"] + type_action = suggestion_cohorte["type_action"] + + engine = PostgresConnectionManager().engine + + df_sql = pd.read_sql_query( + f""" + SELECT * FROM data_suggestion + WHERE suggestion_cohorte_id = '{suggestion_cohorte_id}' + """, + engine, + ) + log.preview("df_acteur_to_delete", df_sql) + + if ( + type_action + in [ + constants.SUGGESTION_SOURCE_AJOUT, + constants.SUGGESTION_SOURCE_MODIFICATION, + ] + and not df_sql.empty + ): + normalized_dfs = df_sql["suggestion"].apply(pd.json_normalize) + df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) + return normalize_acteur_update_for_db( + df_acteur, suggestion_cohorte_id, engine, type_action + ) + if type_action == constants.SUGGESTION_SOURCE_SUPRESSION and not df_sql.empty: + normalized_dfs = df_sql["suggestion"].apply(pd.json_normalize) + df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) + log.preview("df_acteur_to_delete", df_acteur) + return { + "actors": df_acteur, + "dag_run_id": suggestion_cohorte_id, + "change_type": type_action, + } + + raise ValueError("No suggestion found") + + +def normalize_acteur_update_for_db(df_actors, dag_run_id, engine, type_action): + df_labels = process_many2many_df(df_actors, "labels") + df_acteur_services = process_many2many_df( + df_actors, "acteur_services", df_columns=["acteur_id", "acteurservice_id"] + ) + + max_id_pds = pd.read_sql_query( + "SELECT max(id) FROM qfdmo_propositionservice", engine + )["max"][0] + normalized_pds_dfs = df_actors["proposition_services"].apply(pd.json_normalize) + df_pds = pd.concat(normalized_pds_dfs.tolist(), ignore_index=True) + ids_range = range(max_id_pds + 1, max_id_pds + 1 + len(df_pds)) + + df_pds["id"] = ids_range + df_pds["pds_sous_categories"] = df_pds.apply( + lambda row: [ + {**d, "propositionservice_id": row["id"]} + for d in row["pds_sous_categories"] + ], + axis=1, + ) + + normalized_pdssc_dfs = df_pds["pds_sous_categories"].apply(pd.json_normalize) + df_pdssc = pd.concat(normalized_pdssc_dfs.tolist(), ignore_index=True) + + return { + "actors": df_actors, + "pds": df_pds[["id", "action_id", "acteur_id"]], + "pds_sous_categories": df_pdssc[ + ["propositionservice_id", "souscategorieobjet_id"] + ], + "dag_run_id": dag_run_id, + "labels": df_labels[["acteur_id", "labelqualite_id"]], + "acteur_services": df_acteur_services[["acteur_id", "acteurservice_id"]], + "change_type": type_action, + } + + +def process_many2many_df(df, column_name, df_columns=["acteur_id", "labelqualite_id"]): + try: + # Attempt to process the 'labels' column if it exists and is not empty + normalized_df = df[column_name].dropna().apply(pd.json_normalize) + if normalized_df.empty: + return pd.DataFrame( + columns=df_columns + ) # Return empty DataFrame if no data to process + else: + return pd.concat(normalized_df.tolist(), ignore_index=True) + except KeyError: + # Handle the case where the specified column does not exist + return pd.DataFrame(columns=df_columns) diff --git a/dags/suggestions/tasks/business_logic/db_read_suggestiontoprocess.py b/dags/suggestions/tasks/business_logic/db_read_suggestiontoprocess.py new file mode 100644 index 000000000..12e8154f1 --- /dev/null +++ b/dags/suggestions/tasks/business_logic/db_read_suggestiontoprocess.py @@ -0,0 +1,24 @@ +import pandas as pd +from shared.tasks.database_logic.db_manager import PostgresConnectionManager +from sources.config import shared_constants as constants + + +def get_first_suggetsioncohorte_to_insert(): + engine = PostgresConnectionManager().engine + + # get first cohorte suggestion to process as a dict + suggestion_cohorte = pd.read_sql_query( + f""" + SELECT * FROM data_suggestioncohorte + WHERE statut = '{constants.SUGGESTION_ATRAITER}' + LIMIT 1 + """, + engine, + ) + if suggestion_cohorte.empty: + return None + return suggestion_cohorte.to_dict(orient="records")[0] + + +def db_read_suggestiontoprocess(**kwargs): + return bool(get_first_suggetsioncohorte_to_insert()) diff --git a/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py b/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py new file mode 100644 index 000000000..05c3543f6 --- /dev/null +++ b/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py @@ -0,0 +1,173 @@ +import logging + +from shared.tasks.database_logic.db_manager import PostgresConnectionManager +from sources.config import shared_constants as constants +from utils import logging_utils as log + +logger = logging.getLogger(__name__) + + +def db_write_validsuggestions(data_acteurs_normalized: dict): + # If data_set is empty, nothing to do + dag_run_id = data_acteurs_normalized["dag_run_id"] + engine = PostgresConnectionManager().engine + if "actors" not in data_acteurs_normalized: + with engine.begin() as connection: + update_suggestion_status( + connection, dag_run_id, constants.SUGGESTION_ENCOURS + ) + return + df_actors = data_acteurs_normalized["actors"] + df_labels = data_acteurs_normalized.get("labels") + df_acteur_services = data_acteurs_normalized.get("acteur_services") + df_pds = data_acteurs_normalized.get("pds") + df_pdssc = data_acteurs_normalized.get("pds_sous_categories") + dag_run_id = data_acteurs_normalized["dag_run_id"] + change_type = data_acteurs_normalized.get("change_type", "CREATE") + + with engine.begin() as connection: + if change_type in [ + constants.SUGGESTION_SOURCE_AJOUT, + constants.SUGGESTION_SOURCE_MODIFICATION, + ]: + db_write_acteurupdate( + connection, df_actors, df_labels, df_acteur_services, df_pds, df_pdssc + ) + elif change_type == constants.SUGGESTION_SOURCE_SUPRESSION: + db_write_acteurdelete(connection, df_actors) + else: + raise ValueError("Invalid change_type") + + update_suggestion_status(connection, dag_run_id, constants.SUGGESTION_SUCCES) + + +def db_write_acteurupdate( + connection, df_actors, df_labels, df_acteur_services, df_pds, df_pdssc +): + logger.warning("Création ou mise à jour des acteurs") + + df_actors[["identifiant_unique"]].to_sql( + "temp_actors", connection, if_exists="replace" + ) + + delete_queries = [ + """ + DELETE FROM qfdmo_propositionservice_sous_categories + WHERE propositionservice_id IN ( + SELECT id FROM qfdmo_propositionservice + WHERE acteur_id IN ( SELECT identifiant_unique FROM temp_actors ) + ); + """, + """ + DELETE FROM qfdmo_acteur_labels + WHERE acteur_id IN ( SELECT identifiant_unique FROM temp_actors ); + """, + """ + DELETE FROM qfdmo_acteur_acteur_services + WHERE acteur_id IN ( SELECT identifiant_unique FROM temp_actors ); + """, + """ + DELETE FROM qfdmo_propositionservice + WHERE acteur_id IN ( SELECT identifiant_unique FROM temp_actors ); + """, + """ + DELETE FROM qfdmo_acteur WHERE identifiant_unique + IN ( SELECT identifiant_unique FROM temp_actors); + """, + ] + + for query in delete_queries: + connection.execute(query) + + # Liste des colonnes souhaitées + collection = connection.execute( + "SELECT column_name FROM information_schema.columns WHERE table_name =" + " 'qfdmo_acteur';" + ) + colonnes_souhaitees = [col[0] for col in collection] + + # Filtrer les colonnes qui existent dans le DataFrame + colonnes_existantes = [ + col for col in colonnes_souhaitees if col in df_actors.columns + ] + + df_actors[colonnes_existantes].to_sql( + "qfdmo_acteur", + connection, + if_exists="append", + index=False, + method="multi", + chunksize=1000, + ) + + df_labels = df_labels[["acteur_id", "labelqualite_id"]] + df_labels.drop_duplicates(inplace=True) + df_labels[["acteur_id", "labelqualite_id"]].to_sql( + "qfdmo_acteur_labels", + connection, + if_exists="append", + index=False, + method="multi", + chunksize=1000, + ) + + df_acteur_services = df_acteur_services[["acteur_id", "acteurservice_id"]] + df_acteur_services.drop_duplicates(inplace=True) + df_acteur_services.to_sql( + "qfdmo_acteur_acteur_services", + connection, + if_exists="append", + index=False, + method="multi", + chunksize=1000, + ) + + df_pds[["id", "action_id", "acteur_id"]].to_sql( + "qfdmo_propositionservice", + connection, + if_exists="append", + index=False, + method="multi", + chunksize=1000, + ) + + df_pdssc[["propositionservice_id", "souscategorieobjet_id"]].to_sql( + "qfdmo_propositionservice_sous_categories", + connection, + if_exists="append", + index=False, + method="multi", + chunksize=1000, + ) + + +def db_write_acteurdelete(connection, df_acteur_to_delete): + # mettre le statut des acteur à "SUPPRIMER" pour tous les acteurs à supprimer + logger.warning("Suppression des acteurs") + identifiant_uniques = list( + set(df_acteur_to_delete[["identifiant_unique"]].values.flatten()) + ) + quoted_identifiant_uniques = [ + f"'{identifiant_unique}'" for identifiant_unique in identifiant_uniques + ] + query_acteur_to_delete = f""" + UPDATE qfdmo_acteur + SET statut='{constants.ACTEUR_SUPPRIME}' + WHERE identifiant_unique IN ({",".join(quoted_identifiant_uniques)}); + UPDATE qfdmo_revisionacteur + SET statut='{constants.ACTEUR_SUPPRIME}' + WHERE identifiant_unique IN ({",".join(quoted_identifiant_uniques)}); + """ + log.preview("query_acteur_to_delete", query_acteur_to_delete) + connection.execute(query_acteur_to_delete) + + +def update_suggestion_status( + connection, suggestion_id, statut=constants.SUGGESTION_ENCOURS +): + query = f""" + UPDATE data_suggestioncohorte + SET statut = '{statut}' + WHERE id = {suggestion_id}; + """ + connection.execute(query) diff --git a/dags/utils/base_utils.py b/dags/utils/base_utils.py index 135d8b964..85d01a098 100755 --- a/dags/utils/base_utils.py +++ b/dags/utils/base_utils.py @@ -114,6 +114,8 @@ def extract_details(row, col="adresse_format_ban"): def transform_location(longitude, latitude): + if not longitude or not latitude or math.isnan(longitude) or math.isnan(latitude): + return None return wkb.dumps(Point(longitude, latitude)).hex() diff --git a/dags_unit_tests/sources/tasks/business_logic/test_propose_acteur_changes.py b/dags_unit_tests/sources/tasks/business_logic/test_propose_acteur_changes.py index 5ec9808c7..46903ae8d 100644 --- a/dags_unit_tests/sources/tasks/business_logic/test_propose_acteur_changes.py +++ b/dags_unit_tests/sources/tasks/business_logic/test_propose_acteur_changes.py @@ -2,8 +2,6 @@ import pandas as pd import pytest -from shapely import wkb -from shapely.geometry import Point from sources.tasks.business_logic.propose_acteur_changes import propose_acteur_changes @@ -61,43 +59,11 @@ def test_create_actors_cree_le( expected_cree_le, ): result = propose_acteur_changes( - df=df_data_from_api, - df_acteurs=df_acteur, + df_acteur=df_data_from_api, + df_acteur_from_db=df_acteur, ) df_result = result["df"] assert "cree_le" in df_result.columns assert df_result["cree_le"].notnull().all() assert df_result["cree_le"][0].date() == expected_cree_le - - -class TestActorsLocation: - @pytest.mark.parametrize( - "latitude, longitude", - [ - (48.8566, 2.3522), - ("48.8566", "2.3522"), - ("48,8566", "2,3522"), - ], - ) - def test_create_actors_location( - self, - df_empty_acteurs_from_db, - latitude, - longitude, - ): - result = propose_acteur_changes( - df=pd.DataFrame( - { - "identifiant_unique": ["1"], - "latitude": [latitude], - "longitude": [longitude], - } - ), - df_acteurs=df_empty_acteurs_from_db, - ) - df_result = result["df"] - - expected_location = wkb.dumps(Point(2.3522, 48.8566)).hex() - - assert df_result["location"].iloc[0] == expected_location diff --git a/dags_unit_tests/sources/tasks/business_logic/test_source_data_normalize.py b/dags_unit_tests/sources/tasks/business_logic/test_source_data_normalize.py index 6922886da..d5cc28479 100755 --- a/dags_unit_tests/sources/tasks/business_logic/test_source_data_normalize.py +++ b/dags_unit_tests/sources/tasks/business_logic/test_source_data_normalize.py @@ -76,8 +76,6 @@ def test_annee_unique(self, product_mapping, dechet_mapping, acteurtype_id_by_co with pytest.raises(ValueError): df = df_normalize_sinoe( df=df, - product_mapping=product_mapping, - dechet_mapping=dechet_mapping, ) def test_drop_annee_column( @@ -85,8 +83,6 @@ def test_drop_annee_column( ): df = df_normalize_sinoe( df=df_sinoe, - product_mapping=product_mapping, - dechet_mapping=dechet_mapping, ) assert "ANNEE" not in df.columns diff --git a/dags_unit_tests/sources/tasks/transform/test_transform_df.py b/dags_unit_tests/sources/tasks/transform/test_transform_df.py index 351ac8d24..03e6ae344 100644 --- a/dags_unit_tests/sources/tasks/transform/test_transform_df.py +++ b/dags_unit_tests/sources/tasks/transform/test_transform_df.py @@ -8,6 +8,7 @@ clean_label_codes, clean_siret_and_siren, clean_telephone, + compute_location, get_latlng_from_geopoint, merge_and_clean_souscategorie_codes, merge_duplicates, @@ -451,3 +452,33 @@ def test_get_latlng_from_geopoint(self): result = get_latlng_from_geopoint(row, None) assert result["latitude"] == 48.8588443 assert result["longitude"] == 2.2943506 + + +PARIS_LOCATION = ( + "0101000000a835cd3b4ed1024076e09c11a56d4840" # pragma: allowlist secret +) + + +LONDON_LOCATION = ( + "0101000000ebe2361ac05bc0bfc5feb27bf2c04940" # pragma: allowlist secret +) + + +class TestComputeLocation: + + @pytest.mark.parametrize( + "latitude, longitude, expected_location", + [ + (48.8566, 2.3522, PARIS_LOCATION), + ("48.8566", "2.3522", PARIS_LOCATION), + (51.5074, -0.1278, LONDON_LOCATION), + (None, None, None), # Missing lat and long + ], + ) + def test_compute_location(self, latitude, longitude, expected_location): + + result = compute_location( + pd.Series({"latitude": latitude, "longitude": longitude}), None + ) + print(result["location"]) + assert result["location"] == expected_location diff --git a/dags_unit_tests/utils/test_db_data_prepare.py b/dags_unit_tests/utils/test_db_data_prepare.py index 047ef17e0..dde80bb35 100644 --- a/dags_unit_tests/utils/test_db_data_prepare.py +++ b/dags_unit_tests/utils/test_db_data_prepare.py @@ -12,7 +12,7 @@ class TestDBDataPrepare: [ ( pd.DataFrame(columns=["acteur_id", "labelqualite_id"]), - {0: None, 1: None}, + [None, None], ), ( pd.DataFrame( @@ -21,14 +21,14 @@ class TestDBDataPrepare: "labelqualite_id": [1, 1, 2], } ), - { - 0: [ + [ + [ { "acteur_id": 1, "labelqualite_id": 1, } ], - 1: [ + [ { "acteur_id": 2, "labelqualite_id": 1, @@ -38,7 +38,7 @@ class TestDBDataPrepare: "labelqualite_id": 2, }, ], - }, + ], ), ], ) @@ -48,6 +48,7 @@ def test_db_data_prepare_labels( df_proposition_services_sous_categories, propose_labels, expected_labels, + df_acteurs_from_db, source_id_by_code, acteurtype_id_by_code, ): @@ -71,25 +72,26 @@ def test_db_data_prepare_labels( df_pssc=df_proposition_services_sous_categories, df_labels=propose_labels, df_acteur_services=pd.DataFrame( - columns=["acteur_id", "acteurservice_id", "acteurservice"] + { + "acteur_id": [1, 2], + "acteurservice_id": [10, 10], + "acteurservice": [ + "Service de réparation", + "Service de réparation", + ], + } ), + df_acteurs_from_db=df_acteurs_from_db, source_id_by_code=source_id_by_code, acteurtype_id_by_code=acteurtype_id_by_code, ) - result = df_result["all"]["df"].to_dict() - labels = result["labels"] - assert labels == expected_labels + assert "labels" in df_result["df_acteur_to_create"].columns + assert list(df_result["df_acteur_to_create"]["labels"]) == expected_labels @pytest.mark.parametrize( "propose_acteur_services, expected_acteur_services", [ - ( - pd.DataFrame( - columns=["acteur_id", "acteurservice_id", "acteurservice"] - ), - {0: None, 1: None}, - ), ( pd.DataFrame( { @@ -102,15 +104,15 @@ def test_db_data_prepare_labels( ], } ), - { - 0: [ + [ + [ { "acteur_id": 1, "acteurservice": "Service de réparation", "acteurservice_id": 10, } ], - 1: [ + [ { "acteur_id": 2, "acteurservice": "Service de réparation", @@ -122,7 +124,7 @@ def test_db_data_prepare_labels( "acteurservice_id": 20, }, ], - }, + ], ), ], ) @@ -132,6 +134,7 @@ def test_db_data_prepare_acteur_services( df_proposition_services_sous_categories, propose_acteur_services, expected_acteur_services, + df_acteurs_from_db, source_id_by_code, acteurtype_id_by_code, ): @@ -154,10 +157,55 @@ def test_db_data_prepare_acteur_services( df_pssc=df_proposition_services_sous_categories, df_labels=pd.DataFrame(columns=["acteur_id", "labelqualite_id"]), df_acteur_services=propose_acteur_services, + df_acteurs_from_db=df_acteurs_from_db, source_id_by_code=source_id_by_code, acteurtype_id_by_code=acteurtype_id_by_code, ) - result = df_result["all"]["df"].to_dict() - acteur_services = result["acteur_services"] - assert acteur_services == expected_acteur_services + assert "acteur_services" in df_result["df_acteur_to_create"].columns + assert ( + list(df_result["df_acteur_to_create"]["acteur_services"]) + == expected_acteur_services + ) + + def test_db_data_prepare_acteur_services_empty( + self, + df_proposition_services, + df_proposition_services_sous_categories, + df_acteurs_from_db, + source_id_by_code, + acteurtype_id_by_code, + ): + + with pytest.raises(ValueError) as erreur: + db_data_prepare( + df_acteur_to_delete=pd.DataFrame( + { + "identifiant_unique": [3], + "statut": ["ACTIF"], + "cree_le": [datetime(2024, 1, 1)], + } + ), + df_acteur=pd.DataFrame( + { + "identifiant_unique": [1, 2], + "source_code": ["source1", "source2"], + "acteur_type_code": ["commerce", "commerce"], + } + ), + df_ps=df_proposition_services, + df_pssc=df_proposition_services_sous_categories, + df_labels=pd.DataFrame(columns=["acteur_id", "labelqualite_id"]), + df_acteur_services=pd.DataFrame( + columns=["acteur_id", "acteurservice_id", "acteurservice"] + ), + df_acteurs_from_db=df_acteurs_from_db, + source_id_by_code=source_id_by_code, + acteurtype_id_by_code=acteurtype_id_by_code, + ) + assert str(erreur.value) == "df_acteur_services est vide" + + +class TestActeurToCreateToDeleteToUpdate: + # FIXME : tests à écrire + pass diff --git a/data/__init__.py b/data/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/data/admin.py b/data/admin.py new file mode 100644 index 000000000..b15dcb50f --- /dev/null +++ b/data/admin.py @@ -0,0 +1,15 @@ +from django.contrib.gis import admin + +from data.models import Suggestion, SuggestionCohorte + + +class SuggestionCohorteAdmin(admin.ModelAdmin): + pass + + +class SuggestionAdmin(admin.ModelAdmin): + pass + + +admin.site.register(SuggestionCohorte, SuggestionCohorteAdmin) +admin.site.register(Suggestion, SuggestionAdmin) diff --git a/data/apps.py b/data/apps.py new file mode 100644 index 000000000..29617343f --- /dev/null +++ b/data/apps.py @@ -0,0 +1,8 @@ +from django.apps import AppConfig + + +class DataConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "data" + label = "data" + verbose_name = "Gestion des interactions avec la plateforme de données" diff --git a/data/forms.py b/data/forms.py new file mode 100644 index 000000000..9ab32ea91 --- /dev/null +++ b/data/forms.py @@ -0,0 +1,18 @@ +from django import forms + +from data.models import SuggestionCohorte, SuggestionStatut + + +class SuggestionCohorteForm(forms.Form): + suggestion_cohorte = forms.ModelChoiceField( + label="Séléctionner l'execution d'un DAG", + widget=forms.Select( + attrs={ + "class": "fr-select", + } + ), + queryset=SuggestionCohorte.objects.filter( + statut=SuggestionStatut.AVALIDER.value + ), + required=True, + ) diff --git a/data/migrations/0001_bancache.py b/data/migrations/0001_bancache.py new file mode 100644 index 000000000..286ee979f --- /dev/null +++ b/data/migrations/0001_bancache.py @@ -0,0 +1,55 @@ +# Generated by Django 5.1.4 on 2025-01-09 14:04 + +import django.contrib.gis.db.models.fields +import django.core.validators +import django.db.models.deletion +import django.db.models.functions.datetime +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name="BANCache", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("adresse", models.CharField(blank=True, null=True)), + ( + "code_postal", + models.CharField(blank=True, null=True), + ), + ("ville", models.CharField(blank=True, null=True)), + ( + "location", + django.contrib.gis.db.models.fields.PointField( + blank=True, null=True, srid=4326 + ), + ), + ("ban_returned", models.JSONField(blank=True, null=True)), + ( + "modifie_le", + models.DateTimeField( + auto_now=True, + db_default=django.db.models.functions.datetime.Now(), + ), + ), + ], + options={ + "verbose_name": "Cache BAN", + "verbose_name_plural": "Caches BAN", + }, + ), + ] diff --git a/data/migrations/0002_tables_suggestion.py b/data/migrations/0002_tables_suggestion.py new file mode 100644 index 000000000..2b51a67c4 --- /dev/null +++ b/data/migrations/0002_tables_suggestion.py @@ -0,0 +1,154 @@ +# Generated by Django 5.1.4 on 2025-01-09 14:04 + +import django.contrib.gis.db.models.fields +import django.core.validators +import django.db.models.deletion +import django.db.models.functions.datetime +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ("data", "0001_bancache"), + ] + + operations = [ + migrations.CreateModel( + name="SuggestionCohorte", + fields=[ + ("id", models.AutoField(primary_key=True, serialize=False)), + ( + "identifiant_action", + models.CharField( + verbose_name="Identifiant de l'action", + help_text="(ex : dag_id pour Airflow)", + ), + ), + ( + "identifiant_execution", + models.CharField( + verbose_name="Identifiant de l'execution", + help_text="(ex : run_id pour Airflow)", + ), + ), + ( + "type_action", + models.CharField( + blank=True, + choices=[ + ("CLUSTERING", "regroupement/déduplication des acteurs"), + ( + "SOURCE_AJOUT", + "ingestion de source de données - nouveau acteur", + ), + ( + "SOURCE_MODIFICATION", + "ingestion de source de données - modification d'acteur existant", + ), + ("SOURCE_SUPRESSION", "ingestion de source de données"), + ], + max_length=50, + ), + ), + ( + "statut", + models.CharField( + choices=[ + ("AVALIDER", "À valider"), + ("REJETEE", "Rejetée"), + ("ATRAITER", "À traiter"), + ("ENCOURS", "En cours de traitement"), + ("ERREUR", "Fini en erreur"), + ("PARTIEL", "Fini avec succès partiel"), + ("SUCCES", "Fini avec succès"), + ], + default="AVALIDER", + max_length=50, + ), + ), + ( + "metadata", + models.JSONField( + blank=True, + verbose_name="Metadata de la cohorte, données statistiques", + null=True, + ), + ), + ( + "cree_le", + models.DateTimeField( + auto_now_add=True, + db_default=django.db.models.functions.datetime.Now(), + ), + ), + ( + "modifie_le", + models.DateTimeField( + auto_now=True, + db_default=django.db.models.functions.datetime.Now(), + ), + ), + ], + ), + migrations.CreateModel( + name="Suggestion", + fields=[ + ("id", models.AutoField(primary_key=True, serialize=False)), + ( + "statut", + models.CharField( + choices=[ + ("AVALIDER", "À valider"), + ("REJETEE", "Rejetée"), + ("ATRAITER", "À traiter"), + ("ENCOURS", "En cours de traitement"), + ("ERREUR", "Fini en erreur"), + ("PARTIEL", "Fini avec succès partiel"), + ("SUCCES", "Fini avec succès"), + ], + default="AVALIDER", + max_length=50, + ), + ), + ( + "context", + models.JSONField( + blank=True, + verbose_name="Contexte de la suggestion : données initiales", + null=True, + ), + ), + ( + "suggestion", + models.JSONField( + blank=True, verbose_name="Suggestion de modification" + ), + ), + ( + "cree_le", + models.DateTimeField( + auto_now_add=True, + db_default=django.db.models.functions.datetime.Now(), + ), + ), + ( + "modifie_le", + models.DateTimeField( + auto_now=True, + db_default=django.db.models.functions.datetime.Now(), + ), + ), + ( + "suggestion_cohorte", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="suggestion_unitaires", + to="data.suggestioncohorte", + ), + ), + ], + ), + ] diff --git a/data/migrations/__init__.py b/data/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/data/models.py b/data/models.py new file mode 100644 index 000000000..c8f0730ce --- /dev/null +++ b/data/models.py @@ -0,0 +1,162 @@ +from django.contrib.gis.db import models +from django.db.models.functions import Now + +from core.models import TimestampedModel +from dags.sources.config.shared_constants import ( + SUGGESTION_ATRAITER, + SUGGESTION_AVALIDER, + SUGGESTION_CLUSTERING, + SUGGESTION_ENCOURS, + SUGGESTION_ERREUR, + SUGGESTION_PARTIEL, + SUGGESTION_REJETEE, + SUGGESTION_SOURCE_AJOUT, + SUGGESTION_SOURCE_MODIFICATION, + SUGGESTION_SOURCE_SUPRESSION, + SUGGESTION_SUCCES, +) +from qfdmo.models.acteur import ActeurType, Source + + +class SuggestionStatut(models.TextChoices): + AVALIDER = SUGGESTION_AVALIDER, "À valider" + REJETEE = SUGGESTION_REJETEE, "Rejetée" + ATRAITER = SUGGESTION_ATRAITER, "À traiter" + ENCOURS = SUGGESTION_ENCOURS, "En cours de traitement" + ERREUR = SUGGESTION_ERREUR, "Fini en erreur" + PARTIEL = SUGGESTION_PARTIEL, "Fini avec succès partiel" + SUCCES = SUGGESTION_SUCCES, "Fini avec succès" + + +class SuggestionAction(models.TextChoices): + CLUSTERING = SUGGESTION_CLUSTERING, "regroupement/déduplication des acteurs" + SOURCE_AJOUT = ( + SUGGESTION_SOURCE_AJOUT, + "ingestion de source de données - nouveau acteur", + ) + SOURCE_MODIFICATION = ( + SUGGESTION_SOURCE_MODIFICATION, + "ingestion de source de données - modification d'acteur existant", + ) + SOURCE_SUPPRESSION = SUGGESTION_SOURCE_SUPRESSION, "ingestion de source de données" + + +class SuggestionCohorte(TimestampedModel): + id = models.AutoField(primary_key=True) + # On utilise identifiant car le champ n'est pas utilisé pour résoudre une relation + # en base de données + identifiant_action = models.CharField( + verbose_name="Identifiant de l'action", help_text="(ex : dag_id pour Airflow)" + ) + identifiant_execution = models.CharField( + verbose_name="Identifiant de l'execution", + help_text="(ex : run_id pour Airflow)", + ) + type_action = models.CharField( + choices=SuggestionAction.choices, + max_length=50, + blank=True, + ) + statut = models.CharField( + max_length=50, + choices=SuggestionStatut.choices, + default=SuggestionStatut.AVALIDER, + ) + metadata = models.JSONField( + null=True, + blank=True, + verbose_name="Metadata de la cohorte, données statistiques", + ) + + @property + def is_source_type(self) -> bool: + return self.type_action in [ + SuggestionAction.SOURCE_AJOUT, + SuggestionAction.SOURCE_MODIFICATION, + SuggestionAction.SOURCE_SUPPRESSION, + ] + + @property + def is_clustering_type(self) -> bool: + return self.type_action == SuggestionAction.CLUSTERING + + def __str__(self) -> str: + return f"{self.identifiant_action} - {self.identifiant_execution}" + + +class Suggestion(models.Model): + id = models.AutoField(primary_key=True) + suggestion_cohorte = models.ForeignKey( + SuggestionCohorte, on_delete=models.CASCADE, related_name="suggestion_unitaires" + ) + statut = models.CharField( + max_length=50, + choices=SuggestionStatut.choices, + default=SuggestionStatut.AVALIDER, + ) + context = models.JSONField( + null=True, + blank=True, + verbose_name="Contexte de la suggestion : données initiales", + ) + suggestion = models.JSONField(blank=True, verbose_name="Suggestion de modification") + cree_le = models.DateTimeField(auto_now_add=True, db_default=Now()) + modifie_le = models.DateTimeField(auto_now=True, db_default=Now()) + + # FIXME: A revoir + def display_acteur_details(self) -> dict: + displayed_details = {} + for field, field_value in { + "nom": "Nom", + "nom_commercial": "Nom commercial", + "siret": "SIRET", + "siren": "SIREN", + "url": "Site web", + "email": "Email", + "telephone": "Téléphone", + "adresse": "Adresse", + "adresse_complement": "Complement d'adresse", + "code_postal": "Code postal", + "ville": "Ville", + "commentaires": "Commentaires", + "horaires_description": "Horaires", + "latitude": "latitude", + "longitude": "longitude", + "identifiant_unique": "identifiant_unique", + "identifiant_externe": "identifiant_externe", + }.items(): + if value := self.suggestion.get(field): + displayed_details[field_value] = value + if value := self.suggestion.get("acteur_type_id"): + displayed_details["Type d'acteur"] = ActeurType.objects.get( + pk=value + ).libelle + if value := self.suggestion.get("source_id"): + displayed_details["Source"] = Source.objects.get(pk=value).libelle + if value := self.suggestion.get("labels"): + displayed_details["Labels"] = ", ".join( + [str(v["labelqualite_id"]) for v in value] + ) + if value := self.suggestion.get("acteur_services"): + displayed_details["Acteur Services"] = ", ".join( + [str(v["acteurservice_id"]) for v in value] + ) + + return displayed_details + + # FIXME: A revoir + def display_proposition_service(self): + return self.suggestion.get("proposition_services", []) + + +class BANCache(models.Model): + class Meta: + verbose_name = "Cache BAN" + verbose_name_plural = "Caches BAN" + + adresse = models.CharField(blank=True, null=True) + code_postal = models.CharField(blank=True, null=True) + ville = models.CharField(blank=True, null=True) + location = models.PointField(blank=True, null=True) + ban_returned = models.JSONField(blank=True, null=True) + modifie_le = models.DateTimeField(auto_now=True, db_default=Now()) diff --git a/data/urls.py b/data/urls.py new file mode 100644 index 000000000..b4026f355 --- /dev/null +++ b/data/urls.py @@ -0,0 +1,11 @@ +from django.urls import path + +from data.views import SuggestionManagement + +urlpatterns = [ + path( + "suggestions/", + SuggestionManagement.as_view(), + name="suggestions", + ), +] diff --git a/data/views.py b/data/views.py new file mode 100644 index 000000000..e1ebbfb12 --- /dev/null +++ b/data/views.py @@ -0,0 +1,71 @@ +""" +DEPRECATED: cette vue sera bentôt caduque, on utilisera l'administration django +""" + +from django.contrib import messages +from django.shortcuts import render +from django.urls import reverse +from django.views.generic.edit import FormView + +from core.views import IsStaffMixin +from data.forms import SuggestionCohorteForm +from data.models import SuggestionAction, SuggestionStatut + +ACTION_TO_VERB = { + SuggestionAction.SOURCE_AJOUT: "ajoutera", + SuggestionAction.SOURCE_SUPPRESSION: "supprimera", + SuggestionAction.SOURCE_MODIFICATION: "modifiera", +} + + +class SuggestionManagement(IsStaffMixin, FormView): + form_class = SuggestionCohorteForm + template_name = "data/dags_validations.html" + # success_url = "/data/suggestions" + + def get_success_url(self) -> str: + return reverse("data:suggestions") + + def form_valid(self, form): + # MANAGE search and display suggestion_cohorte details + suggestion_cohorte = form.cleaned_data["suggestion_cohorte"] + if self.request.POST.get("search"): + context = {"form": form} + context["suggestion_cohorte_instance"] = suggestion_cohorte + suggestion_unitaires = suggestion_cohorte.suggestion_unitaires.all() + context["metadata"] = { + "nb_suggestions": suggestion_unitaires.count(), + "description": ( + "La validation de cette cohorte de suggestion " + f"{ACTION_TO_VERB[suggestion_cohorte.type_action]} l'ensemble des " + "acteurs" + ), + "source": suggestion_cohorte.identifiant_action, + } + suggestion_unitaires = suggestion_unitaires.order_by("?")[:100] + context["suggestion_unitaires"] = suggestion_unitaires + return render(self.request, self.template_name, context) + # ELSE: update the status of the suggestion_cohorte and its + # suggestion_cohortelines + suggestion_cohorte = form.cleaned_data["suggestion_cohorte"] + new_status = ( + SuggestionStatut.ATRAITER.value + if self.request.POST.get("dag_valid") == "1" + else SuggestionStatut.REJETEE.value + ) + + suggestion_cohorte.suggestion_unitaires.all().update(statut=new_status) + suggestion_cohorte.statut = new_status + suggestion_cohorte.save() + + messages.success( + self.request, + f"La cohorte {suggestion_cohorte} a été mise à jour avec le " + f"statut {new_status}", + ) + + return super().form_valid(form) + + def form_invalid(self, form): + messages.error(self.request, "Il y a des erreurs dans le formulaire.") + return super().form_invalid(form) diff --git a/dev-requirements.txt b/dev-requirements.txt index 854156923..c6ed309ca 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1024,6 +1024,10 @@ jmespath==1.0.1 \ # via # -c requirements.txt # apache-airflow-providers-fab +joblib==1.4.2 \ + --hash=sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6 \ + --hash=sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e + # via scikit-learn jsonschema==4.23.0 \ --hash=sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4 \ --hash=sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566 @@ -1439,6 +1443,9 @@ numpy==1.26.4 \ --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f # via # -c requirements.txt + # pandas + # scikit-learn + # scipy # shapely opentelemetry-api==1.29.0 \ --hash=sha256:5fcd94c4141cc49c736271f3e1efb777bebe9cc535759c54c936cca4f1b312b8 \ @@ -1503,6 +1510,35 @@ packaging==24.2 \ # marshmallow-sqlalchemy # pytest # sphinx +pandas==2.1.4 \ + --hash=sha256:00028e6737c594feac3c2df15636d73ace46b8314d236100b57ed7e4b9ebe8d9 \ + --hash=sha256:0aa6e92e639da0d6e2017d9ccff563222f4eb31e4b2c3cf32a2a392fc3103c0d \ + --hash=sha256:1ebfd771110b50055712b3b711b51bee5d50135429364d0498e1213a7adc2be8 \ + --hash=sha256:294d96cfaf28d688f30c918a765ea2ae2e0e71d3536754f4b6de0ea4a496d034 \ + --hash=sha256:3f06bda01a143020bad20f7a85dd5f4a1600112145f126bc9e3e42077c24ef34 \ + --hash=sha256:426dc0f1b187523c4db06f96fb5c8d1a845e259c99bda74f7de97bd8a3bb3139 \ + --hash=sha256:45d63d2a9b1b37fa6c84a68ba2422dc9ed018bdaa668c7f47566a01188ceeec1 \ + --hash=sha256:482d5076e1791777e1571f2e2d789e940dedd927325cc3cb6d0800c6304082f6 \ + --hash=sha256:6b728fb8deba8905b319f96447a27033969f3ea1fea09d07d296c9030ab2ed1d \ + --hash=sha256:8a706cfe7955c4ca59af8c7a0517370eafbd98593155b48f10f9811da440248b \ + --hash=sha256:8ea107e0be2aba1da619cc6ba3f999b2bfc9669a83554b1904ce3dd9507f0860 \ + --hash=sha256:ab5796839eb1fd62a39eec2916d3e979ec3130509930fea17fe6f81e18108f6a \ + --hash=sha256:b0513a132a15977b4a5b89aabd304647919bc2169eac4c8536afb29c07c23540 \ + --hash=sha256:b7d852d16c270e4331f6f59b3e9aa23f935f5c4b0ed2d0bc77637a8890a5d092 \ + --hash=sha256:bd7d5f2f54f78164b3d7a40f33bf79a74cdee72c31affec86bfcabe7e0789821 \ + --hash=sha256:bdec823dc6ec53f7a6339a0e34c68b144a7a1fd28d80c260534c39c62c5bf8c9 \ + --hash=sha256:d2d3e7b00f703aea3945995ee63375c61b2e6aa5aa7871c5d622870e5e137623 \ + --hash=sha256:d65148b14788b3758daf57bf42725caa536575da2b64df9964c563b015230984 \ + --hash=sha256:d797591b6846b9db79e65dc2d0d48e61f7db8d10b2a9480b4e3faaddc421a171 \ + --hash=sha256:dc9bf7ade01143cddc0074aa6995edd05323974e6e40d9dbde081021ded8510e \ + --hash=sha256:e9f17f2b6fc076b2a0078862547595d66244db0f41bf79fc5f64a5c4d635bead \ + --hash=sha256:edbaf9e8d3a63a9276d707b4d25930a262341bca9874fcb22eff5e3da5394732 \ + --hash=sha256:f237e6ca6421265643608813ce9793610ad09b40154a3344a088159590469e46 \ + --hash=sha256:f69b0c9bb174a2342818d3e2778584e18c740d56857fc5cdb944ec8bbe4082cf \ + --hash=sha256:fcb68203c833cc735321512e13861358079a96c174a61f5116a1de89c58c0ef7 + # via + # -c requirements.txt + # -r dev-requirements.in parso==0.8.4 \ --hash=sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18 \ --hash=sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d @@ -1870,6 +1906,7 @@ python-dateutil==2.9.0.post0 \ # croniter # faker # flask-appbuilder + # pandas # pendulum # time-machine python-dotenv==1.0.1 \ @@ -1896,6 +1933,7 @@ pytz==2024.2 \ # -c requirements.txt # croniter # flask-babel + # pandas pyyaml==6.0.2 \ --hash=sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff \ --hash=sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48 \ @@ -2216,6 +2254,76 @@ ruff==0.9.1 \ --hash=sha256:f0c8b149e9c7353cace7d698e1656ffcf1e36e50f8ea3b5d5f7f87ff9986a7ca \ --hash=sha256:fd2b25ecaf907d6458fa842675382c8597b3c746a2dde6717fe3415425df0c17 # via -r dev-requirements.in +scikit-learn==1.3.2 \ + --hash=sha256:0402638c9a7c219ee52c94cbebc8fcb5eb9fe9c773717965c1f4185588ad3107 \ + --hash=sha256:0ee107923a623b9f517754ea2f69ea3b62fc898a3641766cb7deb2f2ce450161 \ + --hash=sha256:1215e5e58e9880b554b01187b8c9390bf4dc4692eedeaf542d3273f4785e342c \ + --hash=sha256:15e1e94cc23d04d39da797ee34236ce2375ddea158b10bee3c343647d615581d \ + --hash=sha256:18424efee518a1cde7b0b53a422cde2f6625197de6af36da0b57ec502f126157 \ + --hash=sha256:1d08ada33e955c54355d909b9c06a4789a729977f165b8bae6f225ff0a60ec4a \ + --hash=sha256:3271552a5eb16f208a6f7f617b8cc6d1f137b52c8a1ef8edf547db0259b2c9fb \ + --hash=sha256:35a22e8015048c628ad099da9df5ab3004cdbf81edc75b396fd0cff8699ac58c \ + --hash=sha256:535805c2a01ccb40ca4ab7d081d771aea67e535153e35a1fd99418fcedd1648a \ + --hash=sha256:5b2de18d86f630d68fe1f87af690d451388bb186480afc719e5f770590c2ef6c \ + --hash=sha256:61a6efd384258789aa89415a410dcdb39a50e19d3d8410bd29be365bcdd512d5 \ + --hash=sha256:64381066f8aa63c2710e6b56edc9f0894cc7bf59bd71b8ce5613a4559b6145e0 \ + --hash=sha256:67f37d708f042a9b8d59551cf94d30431e01374e00dc2645fa186059c6c5d78b \ + --hash=sha256:6c43290337f7a4b969d207e620658372ba3c1ffb611f8bc2b6f031dc5c6d1d03 \ + --hash=sha256:6fb6bc98f234fda43163ddbe36df8bcde1d13ee176c6dc9b92bb7d3fc842eb66 \ + --hash=sha256:763f0ae4b79b0ff9cca0bf3716bcc9915bdacff3cebea15ec79652d1cc4fa5c9 \ + --hash=sha256:785a2213086b7b1abf037aeadbbd6d67159feb3e30263434139c98425e3dcfcf \ + --hash=sha256:8db94cd8a2e038b37a80a04df8783e09caac77cbe052146432e67800e430c028 \ + --hash=sha256:a19f90f95ba93c1a7f7924906d0576a84da7f3b2282ac3bfb7a08a32801add93 \ + --hash=sha256:a2f54c76accc15a34bfb9066e6c7a56c1e7235dda5762b990792330b52ccfb05 \ + --hash=sha256:b8692e395a03a60cd927125eef3a8e3424d86dde9b2370d544f0ea35f78a8073 \ + --hash=sha256:cb06f8dce3f5ddc5dee1715a9b9f19f20d295bed8e3cd4fa51e1d050347de525 \ + --hash=sha256:dc9002fc200bed597d5d34e90c752b74df516d592db162f756cc52836b38fe0e \ + --hash=sha256:e326c0eb5cf4d6ba40f93776a20e9a7a69524c4db0757e7ce24ba222471ee8a1 \ + --hash=sha256:ed932ea780517b00dae7431e031faae6b49b20eb6950918eb83bd043237950e0 \ + --hash=sha256:fc4144a5004a676d5022b798d9e573b05139e77f271253a4703eed295bde0433 + # via -r dev-requirements.in +scipy==1.15.1 \ + --hash=sha256:033a75ddad1463970c96a88063a1df87ccfddd526437136b6ee81ff0312ebdf6 \ + --hash=sha256:0458839c9f873062db69a03de9a9765ae2e694352c76a16be44f93ea45c28d2b \ + --hash=sha256:070d10654f0cb6abd295bc96c12656f948e623ec5f9a4eab0ddb1466c000716e \ + --hash=sha256:09c52320c42d7f5c7748b69e9f0389266fd4f82cf34c38485c14ee976cb8cb04 \ + --hash=sha256:0ac102ce99934b162914b1e4a6b94ca7da0f4058b6d6fd65b0cef330c0f3346f \ + --hash=sha256:0fb57b30f0017d4afa5fe5f5b150b8f807618819287c21cbe51130de7ccdaed2 \ + --hash=sha256:100193bb72fbff37dbd0bf14322314fc7cbe08b7ff3137f11a34d06dc0ee6b85 \ + --hash=sha256:14eaa373c89eaf553be73c3affb11ec6c37493b7eaaf31cf9ac5dffae700c2e0 \ + --hash=sha256:2114a08daec64980e4b4cbdf5bee90935af66d750146b1d2feb0d3ac30613692 \ + --hash=sha256:21e10b1dd56ce92fba3e786007322542361984f8463c6d37f6f25935a5a6ef52 \ + --hash=sha256:2722a021a7929d21168830790202a75dbb20b468a8133c74a2c0230c72626b6c \ + --hash=sha256:395be70220d1189756068b3173853029a013d8c8dd5fd3d1361d505b2aa58fa7 \ + --hash=sha256:3fe1d95944f9cf6ba77aa28b82dd6bb2a5b52f2026beb39ecf05304b8392864b \ + --hash=sha256:491d57fe89927fa1aafbe260f4cfa5ffa20ab9f1435025045a5315006a91b8f5 \ + --hash=sha256:4b17d4220df99bacb63065c76b0d1126d82bbf00167d1730019d2a30d6ae01ea \ + --hash=sha256:4c9d8fc81d6a3b6844235e6fd175ee1d4c060163905a2becce8e74cb0d7554ce \ + --hash=sha256:55cc79ce4085c702ac31e49b1e69b27ef41111f22beafb9b49fea67142b696c4 \ + --hash=sha256:5b190b935e7db569960b48840e5bef71dc513314cc4e79a1b7d14664f57fd4ff \ + --hash=sha256:5bd8d27d44e2c13d0c1124e6a556454f52cd3f704742985f6b09e75e163d20d2 \ + --hash=sha256:5dff14e75cdbcf07cdaa1c7707db6017d130f0af9ac41f6ce443a93318d6c6e0 \ + --hash=sha256:5eb0ca35d4b08e95da99a9f9c400dc9f6c21c424298a0ba876fdc69c7afacedf \ + --hash=sha256:63b9b6cd0333d0eb1a49de6f834e8aeaefe438df8f6372352084535ad095219e \ + --hash=sha256:667f950bf8b7c3a23b4199db24cb9bf7512e27e86d0e3813f015b74ec2c6e3df \ + --hash=sha256:6b3e71893c6687fc5e29208d518900c24ea372a862854c9888368c0b267387ab \ + --hash=sha256:71ba9a76c2390eca6e359be81a3e879614af3a71dfdabb96d1d7ab33da6f2364 \ + --hash=sha256:74bb864ff7640dea310a1377d8567dc2cb7599c26a79ca852fc184cc851954ac \ + --hash=sha256:82add84e8a9fb12af5c2c1a3a3f1cb51849d27a580cb9e6bd66226195142be6e \ + --hash=sha256:837299eec3d19b7e042923448d17d95a86e43941104d33f00da7e31a0f715d3c \ + --hash=sha256:900f3fa3db87257510f011c292a5779eb627043dd89731b9c461cd16ef76ab3d \ + --hash=sha256:9f151e9fb60fbf8e52426132f473221a49362091ce7a5e72f8aa41f8e0da4f25 \ + --hash=sha256:af0b61c1de46d0565b4b39c6417373304c1d4f5220004058bdad3061c9fa8a95 \ + --hash=sha256:bc7136626261ac1ed988dca56cfc4ab5180f75e0ee52e58f1e6aa74b5f3eacd5 \ + --hash=sha256:be3deeb32844c27599347faa077b359584ba96664c5c79d71a354b80a0ad0ce0 \ + --hash=sha256:c09aa9d90f3500ea4c9b393ee96f96b0ccb27f2f350d09a47f533293c78ea776 \ + --hash=sha256:c352c1b6d7cac452534517e022f8f7b8d139cd9f27e6fbd9f3cbd0bfd39f5bef \ + --hash=sha256:c64ded12dcab08afff9e805a67ff4480f5e69993310e093434b10e85dc9d43e1 \ + --hash=sha256:cdde8414154054763b42b74fe8ce89d7f3d17a7ac5dd77204f0e142cdc9239e9 \ + --hash=sha256:ce3a000cd28b4430426db2ca44d96636f701ed12e2b3ca1f2b1dd7abdd84b39a \ + --hash=sha256:f735bc41bd1c792c96bc426dece66c8723283695f02df61dcc4d0a707a42fc54 \ + --hash=sha256:f82fcf4e5b377f819542fbc8541f7b5fbcf1c0017d0df0bc22c781bf60abc4d8 + # via scikit-learn setproctitle==1.3.4 \ --hash=sha256:020ea47a79b2bbd7bd7b94b85ca956ba7cb026e82f41b20d2e1dac4008cead25 \ --hash=sha256:02ca3802902d91a89957f79da3ec44b25b5804c88026362cb85eea7c1fbdefd1 \ @@ -2489,6 +2597,10 @@ text-unidecode==1.3 \ --hash=sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8 \ --hash=sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93 # via python-slugify +threadpoolctl==3.5.0 \ + --hash=sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107 \ + --hash=sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467 + # via scikit-learn time-machine==2.16.0 \ --hash=sha256:01bc257e9418980a4922de94775be42a966e1a082fb01a1635917f9afc7b84ca \ --hash=sha256:09531af59fdfb39bfd24d28bd1e837eff5a5d98318509a31b6cfd57d27801e52 \ @@ -2569,6 +2681,7 @@ tzdata==2024.2 \ --hash=sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd # via # -c requirements.txt + # pandas # pendulum uc-micro-py==1.0.3 \ --hash=sha256:d321b92cff673ec58027c04015fcaa8bb1e005478643ff4a500882eaab88c48a \ @@ -2767,107 +2880,3 @@ zipp==3.21.0 \ --hash=sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4 \ --hash=sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931 # via importlib-metadata -joblib==1.4.2 \ - --hash=sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6 \ - --hash=sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e - # via scikit-learn -scikit-learn==1.3.2 \ - --hash=sha256:0402638c9a7c219ee52c94cbebc8fcb5eb9fe9c773717965c1f4185588ad3107 \ - --hash=sha256:0ee107923a623b9f517754ea2f69ea3b62fc898a3641766cb7deb2f2ce450161 \ - --hash=sha256:1215e5e58e9880b554b01187b8c9390bf4dc4692eedeaf542d3273f4785e342c \ - --hash=sha256:15e1e94cc23d04d39da797ee34236ce2375ddea158b10bee3c343647d615581d \ - --hash=sha256:18424efee518a1cde7b0b53a422cde2f6625197de6af36da0b57ec502f126157 \ - --hash=sha256:1d08ada33e955c54355d909b9c06a4789a729977f165b8bae6f225ff0a60ec4a \ - --hash=sha256:3271552a5eb16f208a6f7f617b8cc6d1f137b52c8a1ef8edf547db0259b2c9fb \ - --hash=sha256:35a22e8015048c628ad099da9df5ab3004cdbf81edc75b396fd0cff8699ac58c \ - --hash=sha256:535805c2a01ccb40ca4ab7d081d771aea67e535153e35a1fd99418fcedd1648a \ - --hash=sha256:5b2de18d86f630d68fe1f87af690d451388bb186480afc719e5f770590c2ef6c \ - --hash=sha256:61a6efd384258789aa89415a410dcdb39a50e19d3d8410bd29be365bcdd512d5 \ - --hash=sha256:64381066f8aa63c2710e6b56edc9f0894cc7bf59bd71b8ce5613a4559b6145e0 \ - --hash=sha256:67f37d708f042a9b8d59551cf94d30431e01374e00dc2645fa186059c6c5d78b \ - --hash=sha256:6c43290337f7a4b969d207e620658372ba3c1ffb611f8bc2b6f031dc5c6d1d03 \ - --hash=sha256:6fb6bc98f234fda43163ddbe36df8bcde1d13ee176c6dc9b92bb7d3fc842eb66 \ - --hash=sha256:763f0ae4b79b0ff9cca0bf3716bcc9915bdacff3cebea15ec79652d1cc4fa5c9 \ - --hash=sha256:785a2213086b7b1abf037aeadbbd6d67159feb3e30263434139c98425e3dcfcf \ - --hash=sha256:8db94cd8a2e038b37a80a04df8783e09caac77cbe052146432e67800e430c028 \ - --hash=sha256:a19f90f95ba93c1a7f7924906d0576a84da7f3b2282ac3bfb7a08a32801add93 \ - --hash=sha256:a2f54c76accc15a34bfb9066e6c7a56c1e7235dda5762b990792330b52ccfb05 \ - --hash=sha256:b8692e395a03a60cd927125eef3a8e3424d86dde9b2370d544f0ea35f78a8073 \ - --hash=sha256:cb06f8dce3f5ddc5dee1715a9b9f19f20d295bed8e3cd4fa51e1d050347de525 \ - --hash=sha256:dc9002fc200bed597d5d34e90c752b74df516d592db162f756cc52836b38fe0e \ - --hash=sha256:e326c0eb5cf4d6ba40f93776a20e9a7a69524c4db0757e7ce24ba222471ee8a1 \ - --hash=sha256:ed932ea780517b00dae7431e031faae6b49b20eb6950918eb83bd043237950e0 \ - --hash=sha256:fc4144a5004a676d5022b798d9e573b05139e77f271253a4703eed295bde0433 - # via -r req_sickit.in -scipy==1.15.1 \ - --hash=sha256:033a75ddad1463970c96a88063a1df87ccfddd526437136b6ee81ff0312ebdf6 \ - --hash=sha256:0458839c9f873062db69a03de9a9765ae2e694352c76a16be44f93ea45c28d2b \ - --hash=sha256:070d10654f0cb6abd295bc96c12656f948e623ec5f9a4eab0ddb1466c000716e \ - --hash=sha256:09c52320c42d7f5c7748b69e9f0389266fd4f82cf34c38485c14ee976cb8cb04 \ - --hash=sha256:0ac102ce99934b162914b1e4a6b94ca7da0f4058b6d6fd65b0cef330c0f3346f \ - --hash=sha256:0fb57b30f0017d4afa5fe5f5b150b8f807618819287c21cbe51130de7ccdaed2 \ - --hash=sha256:100193bb72fbff37dbd0bf14322314fc7cbe08b7ff3137f11a34d06dc0ee6b85 \ - --hash=sha256:14eaa373c89eaf553be73c3affb11ec6c37493b7eaaf31cf9ac5dffae700c2e0 \ - --hash=sha256:2114a08daec64980e4b4cbdf5bee90935af66d750146b1d2feb0d3ac30613692 \ - --hash=sha256:21e10b1dd56ce92fba3e786007322542361984f8463c6d37f6f25935a5a6ef52 \ - --hash=sha256:2722a021a7929d21168830790202a75dbb20b468a8133c74a2c0230c72626b6c \ - --hash=sha256:395be70220d1189756068b3173853029a013d8c8dd5fd3d1361d505b2aa58fa7 \ - --hash=sha256:3fe1d95944f9cf6ba77aa28b82dd6bb2a5b52f2026beb39ecf05304b8392864b \ - --hash=sha256:491d57fe89927fa1aafbe260f4cfa5ffa20ab9f1435025045a5315006a91b8f5 \ - --hash=sha256:4b17d4220df99bacb63065c76b0d1126d82bbf00167d1730019d2a30d6ae01ea \ - --hash=sha256:4c9d8fc81d6a3b6844235e6fd175ee1d4c060163905a2becce8e74cb0d7554ce \ - --hash=sha256:55cc79ce4085c702ac31e49b1e69b27ef41111f22beafb9b49fea67142b696c4 \ - --hash=sha256:5b190b935e7db569960b48840e5bef71dc513314cc4e79a1b7d14664f57fd4ff \ - --hash=sha256:5bd8d27d44e2c13d0c1124e6a556454f52cd3f704742985f6b09e75e163d20d2 \ - --hash=sha256:5dff14e75cdbcf07cdaa1c7707db6017d130f0af9ac41f6ce443a93318d6c6e0 \ - --hash=sha256:5eb0ca35d4b08e95da99a9f9c400dc9f6c21c424298a0ba876fdc69c7afacedf \ - --hash=sha256:63b9b6cd0333d0eb1a49de6f834e8aeaefe438df8f6372352084535ad095219e \ - --hash=sha256:667f950bf8b7c3a23b4199db24cb9bf7512e27e86d0e3813f015b74ec2c6e3df \ - --hash=sha256:6b3e71893c6687fc5e29208d518900c24ea372a862854c9888368c0b267387ab \ - --hash=sha256:71ba9a76c2390eca6e359be81a3e879614af3a71dfdabb96d1d7ab33da6f2364 \ - --hash=sha256:74bb864ff7640dea310a1377d8567dc2cb7599c26a79ca852fc184cc851954ac \ - --hash=sha256:82add84e8a9fb12af5c2c1a3a3f1cb51849d27a580cb9e6bd66226195142be6e \ - --hash=sha256:837299eec3d19b7e042923448d17d95a86e43941104d33f00da7e31a0f715d3c \ - --hash=sha256:900f3fa3db87257510f011c292a5779eb627043dd89731b9c461cd16ef76ab3d \ - --hash=sha256:9f151e9fb60fbf8e52426132f473221a49362091ce7a5e72f8aa41f8e0da4f25 \ - --hash=sha256:af0b61c1de46d0565b4b39c6417373304c1d4f5220004058bdad3061c9fa8a95 \ - --hash=sha256:bc7136626261ac1ed988dca56cfc4ab5180f75e0ee52e58f1e6aa74b5f3eacd5 \ - --hash=sha256:be3deeb32844c27599347faa077b359584ba96664c5c79d71a354b80a0ad0ce0 \ - --hash=sha256:c09aa9d90f3500ea4c9b393ee96f96b0ccb27f2f350d09a47f533293c78ea776 \ - --hash=sha256:c352c1b6d7cac452534517e022f8f7b8d139cd9f27e6fbd9f3cbd0bfd39f5bef \ - --hash=sha256:c64ded12dcab08afff9e805a67ff4480f5e69993310e093434b10e85dc9d43e1 \ - --hash=sha256:cdde8414154054763b42b74fe8ce89d7f3d17a7ac5dd77204f0e142cdc9239e9 \ - --hash=sha256:ce3a000cd28b4430426db2ca44d96636f701ed12e2b3ca1f2b1dd7abdd84b39a \ - --hash=sha256:f735bc41bd1c792c96bc426dece66c8723283695f02df61dcc4d0a707a42fc54 \ - --hash=sha256:f82fcf4e5b377f819542fbc8541f7b5fbcf1c0017d0df0bc22c781bf60abc4d8 - # via scikit-learn -threadpoolctl==3.5.0 \ - --hash=sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107 \ - --hash=sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467 - # via scikit-learn -pandas==2.1.4 \ - --hash=sha256:00028e6737c594feac3c2df15636d73ace46b8314d236100b57ed7e4b9ebe8d9 \ - --hash=sha256:0aa6e92e639da0d6e2017d9ccff563222f4eb31e4b2c3cf32a2a392fc3103c0d \ - --hash=sha256:1ebfd771110b50055712b3b711b51bee5d50135429364d0498e1213a7adc2be8 \ - --hash=sha256:294d96cfaf28d688f30c918a765ea2ae2e0e71d3536754f4b6de0ea4a496d034 \ - --hash=sha256:3f06bda01a143020bad20f7a85dd5f4a1600112145f126bc9e3e42077c24ef34 \ - --hash=sha256:426dc0f1b187523c4db06f96fb5c8d1a845e259c99bda74f7de97bd8a3bb3139 \ - --hash=sha256:45d63d2a9b1b37fa6c84a68ba2422dc9ed018bdaa668c7f47566a01188ceeec1 \ - --hash=sha256:482d5076e1791777e1571f2e2d789e940dedd927325cc3cb6d0800c6304082f6 \ - --hash=sha256:6b728fb8deba8905b319f96447a27033969f3ea1fea09d07d296c9030ab2ed1d \ - --hash=sha256:8a706cfe7955c4ca59af8c7a0517370eafbd98593155b48f10f9811da440248b \ - --hash=sha256:8ea107e0be2aba1da619cc6ba3f999b2bfc9669a83554b1904ce3dd9507f0860 \ - --hash=sha256:ab5796839eb1fd62a39eec2916d3e979ec3130509930fea17fe6f81e18108f6a \ - --hash=sha256:b0513a132a15977b4a5b89aabd304647919bc2169eac4c8536afb29c07c23540 \ - --hash=sha256:b7d852d16c270e4331f6f59b3e9aa23f935f5c4b0ed2d0bc77637a8890a5d092 \ - --hash=sha256:bd7d5f2f54f78164b3d7a40f33bf79a74cdee72c31affec86bfcabe7e0789821 \ - --hash=sha256:bdec823dc6ec53f7a6339a0e34c68b144a7a1fd28d80c260534c39c62c5bf8c9 \ - --hash=sha256:d2d3e7b00f703aea3945995ee63375c61b2e6aa5aa7871c5d622870e5e137623 \ - --hash=sha256:d65148b14788b3758daf57bf42725caa536575da2b64df9964c563b015230984 \ - --hash=sha256:d797591b6846b9db79e65dc2d0d48e61f7db8d10b2a9480b4e3faaddc421a171 \ - --hash=sha256:dc9bf7ade01143cddc0074aa6995edd05323974e6e40d9dbde081021ded8510e \ - --hash=sha256:e9f17f2b6fc076b2a0078862547595d66244db0f41bf79fc5f64a5c4d635bead \ - --hash=sha256:edbaf9e8d3a63a9276d707b4d25930a262341bca9874fcb22eff5e3da5394732 \ - --hash=sha256:f237e6ca6421265643608813ce9793610ad09b40154a3344a088159590469e46 \ - --hash=sha256:f69b0c9bb174a2342818d3e2778584e18c740d56857fc5cdb944ec8bbe4082cf \ - --hash=sha256:fcb68203c833cc735321512e13861358079a96c174a61f5116a1de89c58c0ef7 \ No newline at end of file diff --git a/docs/reference/303-systeme-de-suggestions.md b/docs/reference/303-systeme-de-suggestions.md new file mode 100644 index 000000000..4eea24829 --- /dev/null +++ b/docs/reference/303-systeme-de-suggestions.md @@ -0,0 +1,33 @@ +# Système de suggestion + +**Statut : ❓ À approuver** + +Cette proposition de modification de l'architecture pour faire évoluer le système de suggestion est un travail itératif. Il est donc nessaire de garder en tête la cibe et le moyen d'y aller. + +## Systeme de Suggestion + +Les suggestions sont créées par l'exécution d'un pipeline ou d'un script. Les suggestions sont faites par paquet qu'on appelle **Cohorte**, les Cohortes comprennent un ensemble de suggestions de modification + +Les cohortes ont un type d'événement : `clustering`, `enrichissement`, `source` selon le type de l'action lancée à l'origine de la suggestion de modification + +Les cohortes et les suggestions ont un statut de traitement qui représente leur cycle de vie : `à valider`, `rejeter`, `à traiter`, `en cours de traitement`, `fini avec succès`, `fini avec succès partiel` (uniquement pour les cohortes), `fini en erreur` + +### Représentation dans Django + +- SuggestionCohorte représente les cohortes, c'est à dire un ensemble de suggestions de la même nature +- Suggestion représente les propositions de modification + +### Cycle de vie d'une suggestion + +```mermaid +--- +title: Cycle de vie d'une suggestion (cohorte et unitaire) +--- + +flowchart TB + + AVALIDER[À valider] --> ATRAITER[À traiter] --> ENCOURS[En cours de traitement] --> SUCCES[Fini avec succès] + AVALIDER[À valider] --> REJETEE[Rejetée] + ENCOURS --> PARTIEL[Fini avec succès partiel] + ENCOURS --> ERREUR[Fini en erreur] +``` diff --git a/jinja2/qfdmo/dags_validations.html b/jinja2/qfdmo/dags_validations.html index 6ff4c93eb..5122f2b60 100644 --- a/jinja2/qfdmo/dags_validations.html +++ b/jinja2/qfdmo/dags_validations.html @@ -3,7 +3,10 @@ {% block content %}
-

Validations des «DAGs»

+

Interface dépréciée

+ + La nouvelle interface est disponible à l'adresse suivante : /data/suggestions +

Validations des «DAGs»

Cette page permet de valider les données des «DAGs». diff --git a/lvao_data/adhoc_analysis/refashion_adhoc.ipynb b/lvao_data/adhoc_analysis/refashion_adhoc.ipynb deleted file mode 100644 index 042b6244b..000000000 --- a/lvao_data/adhoc_analysis/refashion_adhoc.ipynb +++ /dev/null @@ -1,2416 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "a00ac129-f66d-41b7-9605-9ddb6b8b8fca", - "metadata": {}, - "source": [ - "## Intégrer Refashion depuis l'api pointsapport :\n", - "\n", - "- Récupérer les données-eo-refashion depuis l'api pointsapport.\n", - "- Créer et mapper les données vers les tables Acteurs, Proposition de Services et Sous-catégories.\n", - "- Enregistrer chaque table dans un fichier CSV." - ] - }, - { - "cell_type": "markdown", - "id": "fa714639-b9f1-4a6f-8ef6-582956f2223a", - "metadata": {}, - "source": [ - "#### préprod" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "ae6d5405-0e71-4f1b-9acb-c3ef733c900e", - "metadata": {}, - "outputs": [], - "source": [ - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", - "import os\n", - "\n", - "# Load environment variables from .env file\n", - "load_dotenv()\n", - "\n", - "# Accessing environment variables\n", - "user = os.getenv('DB_USER')\n", - "password = os.getenv('DB_PASSWORD')\n", - "host = os.getenv('DB_HOST')\n", - "port = os.getenv('DB_PORT') # Default PostgreSQL port is 5432, but we're using a custom one here\n", - "db_name = os.getenv('DB_NAME')\n", - "\n", - "# Create the connection URL\n", - "connection_string = f'postgresql://{user}:{password}@{host}:{port}/{db_name}'\n", - "\n", - "# Create the engine\n", - "engine = create_engine(connection_string) \n" - ] - }, - { - "cell_type": "markdown", - "id": "f75dab43-a1f4-4347-87ee-c35dbe7c4469", - "metadata": {}, - "source": [ - "#### prod" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "f370e9c3-749d-4c2e-87ca-87fe6e7610e9", - "metadata": {}, - "outputs": [], - "source": [ - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", - "import os\n", - "\n", - "# Load environment variables from .env file\n", - "load_dotenv()\n", - "\n", - "# Accessing environment variables\n", - "user = os.getenv('DB_USER_PROD')\n", - "password = os.getenv('DB_PASSWORD_PROD')\n", - "host = os.getenv('DB_HOST_PROD')\n", - "port = os.getenv('DB_PORT_PROD') # Default PostgreSQL port is 5432, but we're using a custom one here\n", - "db_name = os.getenv('DB_NAME_PROD')\n", - "\n", - "# Create the connection URL\n", - "connection_string = f'postgresql://{user}:{password}@{host}:{port}/{db_name}'\n", - "\n", - "# Create the engine\n", - "engine_prod = create_engine(connection_string)\n" - ] - }, - { - "cell_type": "markdown", - "id": "92d728a8-6324-46ce-b18a-16d371092df4", - "metadata": {}, - "source": [ - "## Get data from point apport " - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "e751080b-b54d-4f3c-a97b-0224ddb4d1b4", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/hamzaa/data/quefairedemesobjets/venv/lib/python3.9/site-packages/urllib3/__init__.py:34: NotOpenSSLWarning: urllib3 v2.0 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: https://github.com/urllib3/urllib3/issues/3020\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/zkt20z09p8jl6oix18a5kcte/lines?size=10000&after=1709624636413%2C403043131910\n", - "https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/zkt20z09p8jl6oix18a5kcte/lines?size=10000&after=1709624630377%2C403037095920\n", - "https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/zkt20z09p8jl6oix18a5kcte/lines?size=10000&after=1709624624501%2C403031219930\n", - "None\n" - ] - } - ], - "source": [ - "import requests\n", - "import pandas as pd\n", - "\n", - "\n", - "def fetch_all_data(url):\n", - " all_data = []\n", - " while url:\n", - " response = requests.get(url)\n", - " if response.status_code == 200:\n", - " data = response.json()\n", - " all_data.extend(data['results'])\n", - " # Check if there's a next page link\n", - " url = data.get('next', None)\n", - " print(url)\n", - " else:\n", - " print(f\"Failed to fetch data: {response.status_code}\")\n", - " break\n", - " return all_data\n", - "\n", - "api_url = \"https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/donnees-eo-refashion/lines?size=10000\"\n", - "\n", - "data = fetch_all_data(api_url)\n", - "\n", - "df = pd.DataFrame(data)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "14ddd3e2-c35c-433b-8d90-cf827849988d", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/hamzaa/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/sql.py:1665: SAWarning: Did not recognize type 'geometry' of column 'location'\n", - " self.meta.reflect(bind=self.con, only=[table_name], views=True)\n", - "/Users/hamzaa/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/sql.py:1665: SAWarning: Did not recognize type 'geometry' of column 'location'\n", - " self.meta.reflect(bind=self.con, only=[table_name], views=True)\n", - "/Users/hamzaa/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/sql.py:1665: SAWarning: Did not recognize type 'geometry' of column 'location'\n", - " self.meta.reflect(bind=self.con, only=[table_name], views=True)\n" - ] - } - ], - "source": [ - "df_acteurtype = pd.read_sql_table('qfdmo_acteurtype', engine)\n", - "df_sources = pd.read_sql_table('qfdmo_source', engine)\n", - "df_da = pd.read_sql_table('qfdmo_displayedacteur', engine)\n", - "df_ps = pd.read_sql_table('qfdmo_propositionservice', engine)\n", - "df_ps['id'].max()\n", - "df_pssc = pd.read_sql_table('qfdmo_propositionservice_sous_categories', engine)\n", - "df_action = pd.read_sql_table('qfdmo_action', engine)\n", - "df_ac = pd.read_sql_table('qfdmo_acteur', engine)\n", - "df_libel = pd.read_sql_table('qfdmo_labelqualite', engine)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "367b49e9-c625-4933-a7b4-f77b8f9a9803", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "

\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idlibellecodeafficherbonusurllogo_file
01Repar'ActeurreparacteurTrueFalsehttps://www.artisanat.fr/annuaire-repar-acteurs
13Re_fashionrefashionTrueTruehttps://refashion.fr/citoyen/fr/bonus-reparationlogos/Refashion_32.png
24EcomaisonecomaisonTrueTruehttps://ecomaison.com/developper-reparation/logos/ecomaison32.png
35Bonus RéparbonusreparTrueTrueNonelogos/BonusRepar32.png
42QualiRéparqualireparTrueTruehttps://www.label-qualirepar.fr/logos/logo-qualirepar.png
\n", - "
" - ], - "text/plain": [ - " id libelle code afficher bonus \\\n", - "0 1 Repar'Acteur reparacteur True False \n", - "1 3 Re_fashion refashion True True \n", - "2 4 Ecomaison ecomaison True True \n", - "3 5 Bonus Répar bonusrepar True True \n", - "4 2 QualiRépar qualirepar True True \n", - "\n", - " url logo_file \n", - "0 https://www.artisanat.fr/annuaire-repar-acteurs \n", - "1 https://refashion.fr/citoyen/fr/bonus-reparation logos/Refashion_32.png \n", - "2 https://ecomaison.com/developper-reparation/ logos/ecomaison32.png \n", - "3 None logos/BonusRepar32.png \n", - "4 https://www.label-qualirepar.fr/ logos/logo-qualirepar.png " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_libel" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "36c97099-1826-44df-bdf4-9259f70c9bca", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Engine(postgresql://quefairedem_2657:***@quefairedem-2657.postgresql.a.osc-fr1.scalingo-dbs.com:33517/quefairedem_2657)" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_action = pd.read_sql_table('qfdmo_action', engine)\n", - "engine" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "b8ebc8c3-87a0-4dec-905d-5afc2fc85278", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcodelibelleorderdescriptioncouleuriconafficher
09echangeréchanger7Noneblue-cumulusfr-icon-action-echangerTrue
16mettreenlocationmettre en location4Mettre en locationpurple-glycinefr-icon-action-mettreenlocationTrue
25louerlouer3Nonepurple-glycinefr-icon-action-louerTrue
34donnerdonner6Noneyellow-tournesolfr-icon-action-donnerTrue
43revendrevendre9Nonebrown-cafe-cremefr-icon-action-vendreTrue
52acheteracheter de seconde main8acheter d'occasionbrown-cafe-cremefr-icon-action-acheterTrue
61reparerréparer5Nonegreen-menthefr-icon-action-reparerTrue
78preterprêter1Noneorange-terre-battuefr-icon-action-preterTrue
87emprunteremprunter2Noneorange-terre-battuefr-icon-action-emprunterTrue
943triertrier10trier pour recycleryellow-tournesolfr-icon-recycle-lineTrue
\n", - "
" - ], - "text/plain": [ - " id code libelle order description \\\n", - "0 9 echanger échanger 7 None \n", - "1 6 mettreenlocation mettre en location 4 Mettre en location \n", - "2 5 louer louer 3 None \n", - "3 4 donner donner 6 None \n", - "4 3 revendre vendre 9 None \n", - "5 2 acheter acheter de seconde main 8 acheter d'occasion \n", - "6 1 reparer réparer 5 None \n", - "7 8 preter prêter 1 None \n", - "8 7 emprunter emprunter 2 None \n", - "9 43 trier trier 10 trier pour recycler \n", - "\n", - " couleur icon afficher \n", - "0 blue-cumulus fr-icon-action-echanger True \n", - "1 purple-glycine fr-icon-action-mettreenlocation True \n", - "2 purple-glycine fr-icon-action-louer True \n", - "3 yellow-tournesol fr-icon-action-donner True \n", - "4 brown-cafe-creme fr-icon-action-vendre True \n", - "5 brown-cafe-creme fr-icon-action-acheter True \n", - "6 green-menthe fr-icon-action-reparer True \n", - "7 orange-terre-battue fr-icon-action-preter True \n", - "8 orange-terre-battue fr-icon-action-emprunter True \n", - "9 yellow-tournesol fr-icon-recycle-line True " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_action" - ] - }, - { - "cell_type": "markdown", - "id": "5877b711-6b06-4f69-a22d-9746aaae7c8d", - "metadata": {}, - "source": [ - "### Mappers" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "99ba1159-5704-478a-8cac-01fd89a8339f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'id_point_apport_ou_reparation': 'identifiant_externe', 'adresse_complement': 'adresse_complement', 'type_de_point_de_collecte': 'acteur_type_id', 'telephone': 'telephone', 'siret': 'siret', 'uniquement_sur_rdv': '', 'exclusivite_de_reprisereparation': '', 'filiere': '', 'public_accueilli': '', 'produitsdechets_acceptes': '', 'labels_etou_bonus': '', 'reprise': '', 'point_de_reparation': '', 'ecoorganisme': 'source_id', 'adresse_format_ban': 'adresse', 'nom_de_lorganisme': 'nom', 'enseigne_commerciale': 'nom_commercial', '_updatedAt': 'cree_le', 'site_web': 'url', 'email': 'email', 'perimetre_dintervention': '', 'longitudewgs84': 'location', 'latitudewgs84': 'location', 'horaires_douverture': 'horaires', 'consignes_dacces': 'description'}\n" - ] - } - ], - "source": [ - "column_mapping = {\n", - " 'id_point_apport_ou_reparation': 'identifiant_externe',\n", - " 'adresse_complement': 'adresse_complement',\n", - " 'type_de_point_de_collecte': 'acteur_type_id',\n", - " 'telephone': 'telephone',\n", - " 'siret': 'siret',\n", - " 'uniquement_sur_rdv': '',\n", - " 'exclusivite_de_reprisereparation': '',\n", - " 'filiere': '',\n", - " 'public_accueilli': '',\n", - " 'produitsdechets_acceptes': '',\n", - " 'labels_etou_bonus': '',\n", - " 'reprise': '',\n", - " 'point_de_reparation': '',\n", - " 'ecoorganisme': 'source_id',\n", - " 'adresse_format_ban': 'adresse',\n", - " 'nom_de_lorganisme': 'nom',\n", - " 'enseigne_commerciale':'nom_commercial',\n", - " '_updatedAt':'cree_le',\n", - " 'site_web': 'url',\n", - " 'email': 'email',\n", - " 'perimetre_dintervention': '',\n", - " 'longitudewgs84': 'location', \n", - " 'latitudewgs84': 'location', \n", - " 'horaires_douverture': 'horaires',\n", - " 'consignes_dacces': 'description',\n", - "}\n", - "\n", - "\n", - "# Print the dictionary for visual confirmation\n", - "print(column_mapping)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "eb706aae-67f4-4810-a6fb-77f21b534798", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['id_point_apport_ou_reparation', 'adresse_complement',\n", - " 'type_de_point_de_collecte', 'telephone', '_i', 'siret',\n", - " 'uniquement_sur_rdv', 'exclusivite_de_reprisereparation', 'filiere',\n", - " 'public_accueilli', '_rand', 'point_dapport_pour_reemploi',\n", - " 'point_de_collecte_ou_de_reprise_des_dechets',\n", - " 'produitsdechets_acceptes', 'labels_etou_bonus', 'reprise',\n", - " 'point_de_reparation', 'ecoorganisme', 'adresse_format_ban',\n", - " 'nom_de_lorganisme', 'enseigne_commerciale', '_updatedAt',\n", - " 'point_dapport_de_service_reparation', 'site_web', '_score', '_id',\n", - " 'service_a_domicile', 'email', 'perimetre_dintervention',\n", - " 'longitudewgs84', '_geopoint', 'latitudewgs84', 'horaires_douverture',\n", - " 'consignes_dacces', 'identifiant_externe', 'acteur_type_id'],\n", - " dtype='object')" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.columns" - ] - }, - { - "cell_type": "markdown", - "id": "efbd6891-e90a-49ae-ae76-ff02a48ffde7", - "metadata": {}, - "source": [ - "### Transformations" - ] - }, - { - "cell_type": "markdown", - "id": "1799717a-a4ff-4039-b1eb-7c4669384841", - "metadata": {}, - "source": [ - "#### Create Actors" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "8fba9bda-e2d6-448a-80ee-8386e42f6677", - "metadata": {}, - "outputs": [], - "source": [ - "from shapely.geometry import Point\n", - "from shapely import wkb\n", - "import re\n", - "import hashlib\n", - "\n", - "\n", - "selected_columns = ['nom', 'adresse', 'type_de_point_de_collecte', 'id_point_apport_ou_reparation','identifiant_externe']\n", - "\n", - "def generate_unique_id(row):\n", - " unique_str = '_'.join([str(row[col]) for col in selected_columns])\n", - " return hashlib.sha256(unique_str.encode()).hexdigest()\n", - "def transform_acteur_type_id(value):\n", - " mapping_dict = {\n", - " \"Solution en ligne (site web, app. mobile)\": \"en ligne (web, mobile)\",\n", - " \"Artisan, commerce indépendant\": \"artisan, commerce indépendant\",\n", - " \"Magasin / Franchise, Enseigne commerciale / Distributeur / Point de vente\": \"commerce\",\n", - " \"Point d'Apport Volontaire Publique\": \"point d'apport volontaire public\",\n", - " \"Association, entreprise de l’économie sociale et solidaire (ESS)\": \"Association, entreprise de l'ESS\",\n", - " \"Déchèterie\": \"déchèterie\",\n", - " }\n", - " libelle = mapping_dict.get(value)\n", - " id_value = df_acteurtype.loc[df_acteurtype['libelle'] == libelle, 'id'].values[0] if any(df_acteurtype['libelle'] == libelle) else None\n", - " return id_value\n", - "\n", - "\n", - "\n", - "def transform_location(longitude, latitude):\n", - " point = Point(longitude, latitude)\n", - " \n", - " transformed_location_binary = wkb.dumps(point)\n", - " transformed_location_hex = transformed_location_binary.hex()\n", - "\n", - " return transformed_location_hex\n", - "\n", - "def transform_ecoorganisme(value):\n", - " \n", - " id_value = df_sources.loc[df_sources['code'].str.lower() == value.lower(), 'id'].values[0] if any(df_sources['code'].str.lower() == value.lower()) else None\n", - " return id_value\n", - "\n", - "def extract_details(row):\n", - " pattern = re.compile(r'\\b(\\d{5})\\s+(.*)')\n", - " \n", - " address = None\n", - " postal_code = None\n", - " city = None\n", - " if pd.isnull(row['adresse_format_ban']):\n", - " return pd.Series([None, None, None])\n", - "\n", - " # Ensure adress_ban is treated as a string\n", - " adress_ban = str(row['adresse_format_ban'])\n", - " \n", - " # Search for the pattern\n", - " match = pattern.search(adress_ban)\n", - " if match:\n", - " postal_code = match.group(1)\n", - " city = match.group(2)\n", - " address = adress_ban[:match.start()].strip()\n", - " \n", - " return pd.Series([address, postal_code, city])\n", - "\n", - "# Apply the function and assign the result to new columns\n", - "for old_col, new_col in column_mapping.items():\n", - " if new_col: \n", - " if old_col == 'type_de_point_de_collecte':\n", - " df[new_col] = df[old_col].apply(transform_acteur_type_id)\n", - " elif old_col in ('longitudewgs84', 'latitudewgs84'):\n", - " df['location'] = df.apply(lambda row: transform_location(row['longitudewgs84'], row['latitudewgs84']), axis=1)\n", - " elif old_col == 'ecoorganisme':\n", - " df[new_col] = df[old_col].apply(transform_ecoorganisme)\n", - " elif old_col == 'adresse_format_ban':\n", - " df[['adresse', 'code_postal', 'ville']] = df.apply(extract_details, axis=1)\n", - " else:\n", - " df[new_col] = df[old_col]\n", - "df['label_reparacteur']=False\n", - "df['statut']='ACTIF'\n", - "df['identifiant_unique'] = df.apply(generate_unique_id, axis=1)\n", - " \n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "42ec6cd5-9bea-4a30-82bb-4e8176ebb935", - "metadata": {}, - "outputs": [], - "source": [ - "df.loc[df['service_a_domicile']=='service à domicile uniquement','statut'] = 'SUPPRIME'" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "fdf57e11-358a-4671-81c8-234f5046e5a7", - "metadata": {}, - "outputs": [], - "source": [ - "df['modifie_le'] = df['cree_le']\n", - "df['siret'] = df['siret'].astype(str).apply(lambda x : x[:14])\n", - "df['telephone'] = df['telephone'].dropna().apply(lambda x: x.replace(' ', ''))\n", - "df['telephone'] = df['telephone'].dropna().apply(lambda x: '0' + x[2:] if x.startswith('33') else x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "be500ad1-3b1f-4966-8ad3-4d6a96cca0a2", - "metadata": {}, - "outputs": [], - "source": [ - "df.drop_duplicates('identifiant_unique', keep='first', inplace=True)" - ] - }, - { - "cell_type": "markdown", - "id": "4006cd25-abb6-48d2-b0b0-57a59465c217", - "metadata": {}, - "source": [ - "#### Create Proposition de services" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c1ffc6bf-b0d7-49fe-a10d-4da1f7d79e64", - "metadata": {}, - "outputs": [], - "source": [ - "rows_list = []\n", - "\n", - "for index, row in df.iterrows():\n", - " acteur_id = row['identifiant_unique']\n", - " action_id = None\n", - " sous_categories = row['produitsdechets_acceptes']\n", - " if row['point_dapport_de_service_reparation']:\n", - " acteur_service_id = 17\n", - " action_id = 1\n", - " elif row['point_dapport_pour_reemploi']:\n", - " acteur_service_id = 4\n", - " action_id = 4\n", - " elif row['point_de_reparation']:\n", - " acteur_service_id = 15\n", - " action_id = 1\n", - " elif row['point_de_collecte_ou_de_reprise_des_dechets']:\n", - " acteur_service_id = 4\n", - " action_id = 43\n", - " else:\n", - " continue # Skip rows that don't match any criteria\n", - " \n", - " rows_list.append({\"acteur_service_id\": acteur_service_id, \"action_id\": action_id, \"acteur_id\": acteur_id, \"sous_categories\":sous_categories})\n", - "\n", - "df_pds = pd.DataFrame(rows_list)\n", - "df_pds.index = range(df_ps['id'].max()+1, df_ps['id'].max()+1 + len(df_pds))\n", - "\n", - "df_pds['id'] = df_pds.index\n" - ] - }, - { - "cell_type": "markdown", - "id": "21cd6591-c3bf-4e88-b9fa-5624acaeede4", - "metadata": {}, - "source": [ - "#### Create sous categories" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dd0aa7a8-5a29-4425-bf00-90b24dba3ff3", - "metadata": {}, - "outputs": [], - "source": [ - "rows_list=[]\n", - "sous_categories = { \n", - " \"Vêtement\" : 107,\n", - " \"Linge\" : 104,\n", - " \"Chaussure\":109\n", - "}\n", - "for index, row in df_pds.iterrows():\n", - " products = str(row[\"sous_categories\"]).split(\"|\")\n", - " for product in products:\n", - " if product.strip() in sous_categories:\n", - " rows_list.append({\n", - " 'propositionservice_id': row['id'], \n", - " 'souscategorieobjet_id': sous_categories[product.strip()]\n", - " })\n", - "\n", - "df_sous_categories = pd.DataFrame(rows_list, columns=['propositionservice_id', 'souscategorieobjet_id'])\n", - "\n", - "df_sous_categories" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "3fa2edc3-e1ce-4ed4-a6d2-5e4fb91b15a4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['Agréé Bonus Réparation', nan], dtype=object)" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df['labels_etou_bonus'].unique()" - ] - }, - { - "cell_type": "markdown", - "id": "35a5b05a-f8b5-4a1a-8962-8f830319fe61", - "metadata": {}, - "source": [ - "#### Create libellé" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "922b66b1-75a8-4779-aef2-8117d4a9f29e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
acteur_idlabelqualite_idlabelqualite
05c761cd79c679af340a540c4d77cca46bd5cef232e2fcf...3Re_fashion
1da991542b3c34ec43043501b5e8c0fa7025c9e4d485a97...3Re_fashion
236ff562c2a87bda80a22950f01bb7cf66c4f2dcda31949...3Re_fashion
3c868120d7e4f4f2d400672ac8af567bbc771966814ff37...3Re_fashion
4c1dc4d491615af30a41ea0042ce0ac785d0fb434361d92...3Re_fashion
............
916f978ca97c24b6f509e97008edbac01e2b7b6f9335026b4...3Re_fashion
9170cf31f76552155f68a6b5bf3d6ac7b27c402a329306998...3Re_fashion
918dc9ec750646094de9057bfa26d06d8eaee5030dbf17458...3Re_fashion
91999ec7e15a132bc57a741cbb65f5bae338a5a1af13de8f9...3Re_fashion
92032b19f5dd787346d34bbb537864d4607eaa8d9ef9324cd...3Re_fashion
\n", - "

921 rows × 3 columns

\n", - "
" - ], - "text/plain": [ - " acteur_id labelqualite_id \\\n", - "0 5c761cd79c679af340a540c4d77cca46bd5cef232e2fcf... 3 \n", - "1 da991542b3c34ec43043501b5e8c0fa7025c9e4d485a97... 3 \n", - "2 36ff562c2a87bda80a22950f01bb7cf66c4f2dcda31949... 3 \n", - "3 c868120d7e4f4f2d400672ac8af567bbc771966814ff37... 3 \n", - "4 c1dc4d491615af30a41ea0042ce0ac785d0fb434361d92... 3 \n", - ".. ... ... \n", - "916 f978ca97c24b6f509e97008edbac01e2b7b6f9335026b4... 3 \n", - "917 0cf31f76552155f68a6b5bf3d6ac7b27c402a329306998... 3 \n", - "918 dc9ec750646094de9057bfa26d06d8eaee5030dbf17458... 3 \n", - "919 99ec7e15a132bc57a741cbb65f5bae338a5a1af13de8f9... 3 \n", - "920 32b19f5dd787346d34bbb537864d4607eaa8d9ef9324cd... 3 \n", - "\n", - " labelqualite \n", - "0 Re_fashion \n", - "1 Re_fashion \n", - "2 Re_fashion \n", - "3 Re_fashion \n", - "4 Re_fashion \n", - ".. ... \n", - "916 Re_fashion \n", - "917 Re_fashion \n", - "918 Re_fashion \n", - "919 Re_fashion \n", - "920 Re_fashion \n", - "\n", - "[921 rows x 3 columns]" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rows_list= []\n", - "for index, row in df.iterrows():\n", - " label = str(row[\"labels_etou_bonus\"])\n", - " if label == 'Agréé Bonus Réparation':\n", - " rows_list.append({\n", - " 'acteur_id': row['identifiant_unique'], \n", - " 'labelqualite_id': 3,\n", - " 'labelqualite': df_libel.loc[df_libel[\"id\"]==3,\"libelle\"].tolist()[0]\n", - " })\n", - "\n", - "df_libelles = pd.DataFrame(rows_list, columns=['acteur_id', 'labelqualite_id', 'labelqualite'])\n", - "\n", - "df_libelles" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "d3a66633-862c-43e2-a6c3-e2e61900f0f1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Re_fashion'" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "markdown", - "id": "23321c55-0d13-466a-8ea0-10a4f87d640e", - "metadata": {}, - "source": [ - "#### Add to DB" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8e4b3c28-5f10-45f2-b194-8d8fce24a26f", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "df[[\n", - " 'identifiant_unique',\n", - " 'acteur_type_id',\n", - "'adresse',\n", - " 'code_postal', 'ville',\n", - " 'adresse_complement',\n", - " 'commentaires',\n", - " 'description',\n", - " 'email',\n", - " 'horaires',\n", - " 'identifiant_externe',\n", - " 'label_reparacteur',\n", - " 'nom_commercial',\n", - " 'nom',\n", - " 'location',\n", - "'cree_le',\n", - "'modifie_le',\n", - "'multi_base',\n", - "'manuel',\n", - "'statut',\n", - " 'siret',\n", - " 'source_id',\n", - " 'telephone',\n", - " 'url'\n", - "]].to_sql(\"qfdmo_acteur\",engine, if_exists='append',index=False,method='multi',chunksize=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b59130e3-4843-45f4-a435-f48afe795b81", - "metadata": {}, - "outputs": [], - "source": [ - "df_pds[['acteur_service_id','action_id','acteur_id','id']].to_csv('refashion_propositionservice.csv')\n", - "df_pds[['id','acteur_service_id','action_id','acteur_id']].to_sql(\"qfdmo_propositionservice\",engine, if_exists='append',index=False,method='multi',chunksize=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6be62cd5-afbe-4dd8-ab05-00149be0fb23", - "metadata": {}, - "outputs": [], - "source": [ - "df_sous_categories[['propositionservice_id','souscategorieobjet_id']].to_csv('refashion_sous_categories.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8bda7e30-4236-4d36-829b-f60c5682d5f6", - "metadata": {}, - "outputs": [], - "source": [ - "df_sous_categories.to_sql(\"qfdmo_propositionservice_sous_categories\",engine, if_exists='append',index=False,method='multi',chunksize=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e148879d-1c5d-4d44-97fb-75f1170c84fb", - "metadata": {}, - "outputs": [], - "source": [ - "delete_query= \"\"\"\n", - "DELETE FROM qfdmo_propositionservice_sous_categories\n", - " USING qfdmo_propositionservice_sous_categories_refashion\n", - " WHERE qfdmo_propositionservice_sous_categories.propositionservice_id = qfdmo_propositionservice_sous_categories_refashion.propositionservice_id\n", - " AND qfdmo_propositionservice_sous_categories.souscategorieobjet_id = qfdmo_propositionservice_sous_categories_refashion.souscategorieobjet_id;\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0daa4b03-4b75-4e17-9ae1-04a01b3d9b84", - "metadata": {}, - "outputs": [], - "source": [ - "import psycopg2\n", - "from psycopg2 import sql\n", - "from sqlalchemy import create_engine\n", - "user = \n", - "password = \n", - "host = \n", - "port = '33517' # default PostgreSQL port is 5432\n", - "db_name = 'quefairedem_2657'\n", - "# Access variables in .env\n", - "conn = psycopg2.connect(\n", - " dbname=db_name, \n", - " user=user, \n", - " password=password, \n", - " host=host,\n", - " port=port\n", - ")\n", - "conn.autocommit = True\n", - "cursor = conn.cursor()\n", - "\n", - "query = sql.SQL(sql_query)\n", - "cursor.execute(query)\n", - "\n", - "cursor.close()\n", - "conn.close()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3d8bece0-bb45-494d-a115-5533e5b4fb98", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8900582c-e316-46fc-9b3e-26930baeda73", - "metadata": {}, - "outputs": [], - "source": [ - "df.loc[df['service_a_domicile']=='service à domicile uniquement',['statut','identifiant_unique']].to_sql(\"qfdmo_acteur_fix_sd\",engined)" - ] - }, - { - "cell_type": "markdown", - "id": "ffb1a425-7ab2-47bc-b829-ff636e9f8729", - "metadata": {}, - "source": [ - "## Revision Christian --> revisionacteur" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "id": "839957c6-a8d0-4da9-8a91-f0adec021026", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/0b/ssm8dl5n5td_t_2lb_8qn6500000gn/T/ipykernel_47073/3244965094.py:1: DtypeWarning: Columns (8,12,22,24) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df_chris_rev = pd.read_csv(\"./../../refashion_acteurs_chris_28032024.csv\")\n" - ] - } - ], - "source": [ - "df_chris_rev = pd.read_csv(\"./../../refashion_acteurs_chris_28032024.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "id": "a0901dad-3648-461a-91c0-5dc3af1b4f07", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nomhorairesurladresse_complement
0MFCNaNhttps://www.lamanufacture49.fr/SAINT-PIERRE-MONTLIMART
1TILLINaNhttps://tilli.fr/NaN
2TILLINaNhttps://tilli.fr/NaN
3TILLINaNhttps://tilli.fr/NaN
4A.C.L PROXI POL.NaNNaNNaN
...............
35925APIVET24h/24. 7j/7https://www.apivet49.com/Place Tout Blanc
35926APIVET24h/24. 7j/7https://www.apivet49.com/NaN
35927APIVET24h/24. 7j/7https://www.apivet49.com/NaN
35928APIVET24h/24. 7j/7https://www.apivet49.com/NaN
35929APIVET24h/24. 7j/7https://www.apivet49.com/NaN
\n", - "

35930 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " nom horaires url \\\n", - "0 MFC NaN https://www.lamanufacture49.fr/ \n", - "1 TILLI NaN https://tilli.fr/ \n", - "2 TILLI NaN https://tilli.fr/ \n", - "3 TILLI NaN https://tilli.fr/ \n", - "4 A.C.L PROXI POL. NaN NaN \n", - "... ... ... ... \n", - "35925 APIVET 24h/24. 7j/7 https://www.apivet49.com/ \n", - "35926 APIVET 24h/24. 7j/7 https://www.apivet49.com/ \n", - "35927 APIVET 24h/24. 7j/7 https://www.apivet49.com/ \n", - "35928 APIVET 24h/24. 7j/7 https://www.apivet49.com/ \n", - "35929 APIVET 24h/24. 7j/7 https://www.apivet49.com/ \n", - "\n", - " adresse_complement \n", - "0 SAINT-PIERRE-MONTLIMART \n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN \n", - "... ... \n", - "35925 Place Tout Blanc \n", - "35926 NaN \n", - "35927 NaN \n", - "35928 NaN \n", - "35929 NaN \n", - "\n", - "[35930 rows x 4 columns]" - ] - }, - "execution_count": 69, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_chris_rev[['nom','horaires','url','adresse_complement']]" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "49a64056-f1ed-4994-b1c1-7acb322fe6a6", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/hamzaa/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/sql.py:1665: SAWarning: Did not recognize type 'geometry' of column 'location'\n", - " self.meta.reflect(bind=self.con, only=[table_name], views=True)\n", - "/Users/hamzaa/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/sql.py:1665: SAWarning: Did not recognize type 'geometry' of column 'location'\n", - " self.meta.reflect(bind=self.con, only=[table_name], views=True)\n" - ] - } - ], - "source": [ - "df_revact = pd.read_sql_table(\"qfdmo_revisionacteur\",engine_prod)\n", - "df_actprod = pd.read_sql_table(\"qfdmo_acteur\",engine_prod)" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "c5453169-209a-41d3-b1ff-7f1b9d580f08", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "160552" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_revact['identifiant_unique'].count()" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "aa95a640-2312-45d2-82c4-5d7712acbe80", - "metadata": {}, - "outputs": [], - "source": [ - "df_rev_man = pd.merge(df_actprod[(df_actprod['source_id']==45)][['identifiant_unique']],df_revact, on = ['identifiant_unique'])" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "id": "487b683a-ccb9-4088-837c-7536f37a0c1e", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/hamzaa/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/sql.py:1665: SAWarning: Did not recognize type 'geometry' of column 'location'\n", - " self.meta.reflect(bind=self.con, only=[table_name], views=True)\n" - ] - } - ], - "source": [ - "df_refashion = pd.read_sql_table(\"qfdmo_acteur\", engine)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "1b6f06bf-005b-4a6c-84c1-383ef39af61b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nomidentifiant_uniqueadresseadresse_complementcode_postalvilleurlemaillocationtelephone...acteur_type_idstatutsource_idcree_lemodifie_lenaf_principalcommentaireshoraires_osmdescriptionhoraires_description
21690Ghenam Reparationrefashion_SWKLYBWCFOLZ7 Rue FranklinNone93100Montreuilhttps://ghenam-reparation.jimdosite.com/nos-ta...None0101000020E6100000990F0874268D034068321CCF676E...None...3ACTIF452023-11-08 17:28:48.329397+00:002023-11-24 10:17:25.965587+00:00NoneNoneNoneNone
42031Patine Studiorefashion_FINMHILZPXHW8 rue MartelNone75010Parishttps://www.patine.fr/blogs/book-a-sessionhello@patine.fr0101000020E6100000D828EB3713D30240A75D4C33DD6F...0181701618...3ACTIF452024-01-11 12:55:34.682766+00:002024-01-11 12:56:35.642529+00:0047.91ATu-Sa 11:00-19:00NoneNone
42232Au fil et à mesurerefashion_WWXFNUVVIMHC7 Rue de la VendéeNone79130Secondignyhttps://secondigny.fr/utile/annuaire-entrepris...None0101000020E61000001477BCC96FD1DABF58478E74064E...06 24 35 91 55...3ACTIF452024-01-17 07:55:28.288884+00:002024-01-17 07:55:28.288894+00:0014.13ZNoneNoneNone
42420La Réserve Varzyrefashion_KFUJSXXOKJPM19 Rue DelangleNone58210Varzyhttps://www.facebook.com/lareservevarzy/asso.lareservevarzy@gmail.com0101000020E61000007FA4880CAB180B400853944BE3AD...06 04 01 18 10...4ACTIF452024-03-06 17:44:16.226699+00:002024-03-06 17:44:16.226711+00:0047.29ZNoneNone
168751MFCrefashion_TLC-REFASHION-REP-455001208507113095_dRTE CHAUDRONSAINT-PIERRE-MONTLIMART49110MONTREVAULT-SUR-EVREhttps://www.lamanufacture49.fr/None0101000020E6100000000000000000F87F000000000000...0241754850...5ACTIF452024-03-05 07:44:03.205000+00:002024-03-05 07:44:03.205000+00:00NoneNoneNoneNoneNone
..................................................................
205929APIVETrefashion_TLC-REFASHION-PAV-3271797Angle rue Bertin et Avenue Jean JoxePlace Tout Blanc49000AngersNoneNone0101000020E610000082C5E1CCAF66E1BF82548A1D8DBD...None...10ACTIF452024-03-05 07:43:41.565000+00:002024-03-05 07:43:41.565000+00:00NoneNoneNoneNone24h/24. 7j/7
205930APIVETrefashion_TLC-REFASHION-PAV-3271796rue Louis GainNone49000AngersNoneNone0101000020E6100000925852EE3E47E1BF2B4CDF6B08BC...None...10ACTIF452024-03-05 07:43:41.565000+00:002024-03-05 07:43:41.565000+00:00NoneNoneNoneNone24h/24. 7j/7
205931APIVETrefashion_TLC-REFASHION-PAV-3271795Place André LeroyNone49000AngersNoneNone0101000020E6100000B2BCAB1E308FE1BFBA1457957DBB...None...10ACTIF452024-03-05 07:43:41.565000+00:002024-03-05 07:43:41.565000+00:00NoneNoneNoneNone24h/24. 7j/7
205932APIVETrefashion_TLC-REFASHION-PAV-3271794101 RUE ST NICOLASNone49000AngersNoneNone0101000020E6100000822A244F4821E2BF03DA0C26B4BC...None...10ACTIF452024-03-05 07:43:41.565000+00:002024-03-05 07:43:41.565000+00:00NoneNoneNoneNone24h/24. 7j/7
205933APIVETrefashion_TLC-REFASHION-PAV-3271793Place BichonNone49000AngersNoneNone0101000020E6100000EE60C43E0114E2BFFAB9A1293BBD...None...10ACTIF452024-03-05 07:43:41.565000+00:002024-03-05 07:43:41.565000+00:00NoneNoneNoneNone24h/24. 7j/7
\n", - "

35934 rows × 24 columns

\n", - "
" - ], - "text/plain": [ - " nom identifiant_unique \\\n", - "21690 Ghenam Reparation refashion_SWKLYBWCFOLZ \n", - "42031 Patine Studio refashion_FINMHILZPXHW \n", - "42232 Au fil et à mesure refashion_WWXFNUVVIMHC \n", - "42420 La Réserve Varzy refashion_KFUJSXXOKJPM \n", - "168751 MFC refashion_TLC-REFASHION-REP-455001208507113095_d \n", - "... ... ... \n", - "205929 APIVET refashion_TLC-REFASHION-PAV-3271797 \n", - "205930 APIVET refashion_TLC-REFASHION-PAV-3271796 \n", - "205931 APIVET refashion_TLC-REFASHION-PAV-3271795 \n", - "205932 APIVET refashion_TLC-REFASHION-PAV-3271794 \n", - "205933 APIVET refashion_TLC-REFASHION-PAV-3271793 \n", - "\n", - " adresse adresse_complement \\\n", - "21690 7 Rue Franklin None \n", - "42031 8 rue Martel None \n", - "42232 7 Rue de la Vendée None \n", - "42420 19 Rue Delangle None \n", - "168751 RTE CHAUDRON SAINT-PIERRE-MONTLIMART \n", - "... ... ... \n", - "205929 Angle rue Bertin et Avenue Jean Joxe Place Tout Blanc \n", - "205930 rue Louis Gain None \n", - "205931 Place André Leroy None \n", - "205932 101 RUE ST NICOLAS None \n", - "205933 Place Bichon None \n", - "\n", - " code_postal ville \\\n", - "21690 93100 Montreuil \n", - "42031 75010 Paris \n", - "42232 79130 Secondigny \n", - "42420 58210 Varzy \n", - "168751 49110 MONTREVAULT-SUR-EVRE \n", - "... ... ... \n", - "205929 49000 Angers \n", - "205930 49000 Angers \n", - "205931 49000 Angers \n", - "205932 49000 Angers \n", - "205933 49000 Angers \n", - "\n", - " url \\\n", - "21690 https://ghenam-reparation.jimdosite.com/nos-ta... \n", - "42031 https://www.patine.fr/blogs/book-a-session \n", - "42232 https://secondigny.fr/utile/annuaire-entrepris... \n", - "42420 https://www.facebook.com/lareservevarzy/ \n", - "168751 https://www.lamanufacture49.fr/ \n", - "... ... \n", - "205929 None \n", - "205930 None \n", - "205931 None \n", - "205932 None \n", - "205933 None \n", - "\n", - " email \\\n", - "21690 None \n", - "42031 hello@patine.fr \n", - "42232 None \n", - "42420 asso.lareservevarzy@gmail.com \n", - "168751 None \n", - "... ... \n", - "205929 None \n", - "205930 None \n", - "205931 None \n", - "205932 None \n", - "205933 None \n", - "\n", - " location telephone \\\n", - "21690 0101000020E6100000990F0874268D034068321CCF676E... None \n", - "42031 0101000020E6100000D828EB3713D30240A75D4C33DD6F... 0181701618 \n", - "42232 0101000020E61000001477BCC96FD1DABF58478E74064E... 06 24 35 91 55 \n", - "42420 0101000020E61000007FA4880CAB180B400853944BE3AD... 06 04 01 18 10 \n", - "168751 0101000020E6100000000000000000F87F000000000000... 0241754850 \n", - "... ... ... \n", - "205929 0101000020E610000082C5E1CCAF66E1BF82548A1D8DBD... None \n", - "205930 0101000020E6100000925852EE3E47E1BF2B4CDF6B08BC... None \n", - "205931 0101000020E6100000B2BCAB1E308FE1BFBA1457957DBB... None \n", - "205932 0101000020E6100000822A244F4821E2BF03DA0C26B4BC... None \n", - "205933 0101000020E6100000EE60C43E0114E2BFFAB9A1293BBD... None \n", - "\n", - " ... acteur_type_id statut source_id cree_le \\\n", - "21690 ... 3 ACTIF 45 2023-11-08 17:28:48.329397+00:00 \n", - "42031 ... 3 ACTIF 45 2024-01-11 12:55:34.682766+00:00 \n", - "42232 ... 3 ACTIF 45 2024-01-17 07:55:28.288884+00:00 \n", - "42420 ... 4 ACTIF 45 2024-03-06 17:44:16.226699+00:00 \n", - "168751 ... 5 ACTIF 45 2024-03-05 07:44:03.205000+00:00 \n", - "... ... ... ... ... ... \n", - "205929 ... 10 ACTIF 45 2024-03-05 07:43:41.565000+00:00 \n", - "205930 ... 10 ACTIF 45 2024-03-05 07:43:41.565000+00:00 \n", - "205931 ... 10 ACTIF 45 2024-03-05 07:43:41.565000+00:00 \n", - "205932 ... 10 ACTIF 45 2024-03-05 07:43:41.565000+00:00 \n", - "205933 ... 10 ACTIF 45 2024-03-05 07:43:41.565000+00:00 \n", - "\n", - " modifie_le naf_principal commentaires \\\n", - "21690 2023-11-24 10:17:25.965587+00:00 None \n", - "42031 2024-01-11 12:56:35.642529+00:00 47.91A \n", - "42232 2024-01-17 07:55:28.288894+00:00 14.13Z \n", - "42420 2024-03-06 17:44:16.226711+00:00 47.29Z \n", - "168751 2024-03-05 07:44:03.205000+00:00 None None \n", - "... ... ... ... \n", - "205929 2024-03-05 07:43:41.565000+00:00 None None \n", - "205930 2024-03-05 07:43:41.565000+00:00 None None \n", - "205931 2024-03-05 07:43:41.565000+00:00 None None \n", - "205932 2024-03-05 07:43:41.565000+00:00 None None \n", - "205933 2024-03-05 07:43:41.565000+00:00 None None \n", - "\n", - " horaires_osm description horaires_description \n", - "21690 None None None \n", - "42031 Tu-Sa 11:00-19:00 None None \n", - "42232 None None None \n", - "42420 None None \n", - "168751 None None None \n", - "... ... ... ... \n", - "205929 None None 24h/24. 7j/7 \n", - "205930 None None 24h/24. 7j/7 \n", - "205931 None None 24h/24. 7j/7 \n", - "205932 None None 24h/24. 7j/7 \n", - "205933 None None 24h/24. 7j/7 \n", - "\n", - "[35934 rows x 24 columns]" - ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "filtered_df = df_refashion[df_refashion['identifiant_unique'].str.startswith(\"refashion_\")]\n", - "filtered_df" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "id": "19a4623f-4118-40c0-95ad-d163012b6021", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "81" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_rev_man.to_sql(\"rev_refashion\",engine_prod, if_exists='replace')" - ] - }, - { - "cell_type": "code", - "execution_count": 140, - "id": "3e0a7cf1-6c27-4e8c-b8c9-d572347df0d5", - "metadata": {}, - "outputs": [], - "source": [ - "import psycopg2\n", - "from psycopg2 import sql\n", - "\n", - "# Connect to the database again\n", - "conn = psycopg2.connect(\n", - " dbname=db_name, \n", - " user=user, \n", - " password=password, \n", - " host=host,\n", - " port=port\n", - ")\n", - "conn.autocommit = True\n", - "cursor = conn.cursor()\n", - "\n", - "# Perform the update\n", - "cursor.execute(\"\"\"\n", - " UPDATE qfdmo_revisionacteur\n", - "SET \n", - " acteur_type_id = NULL,\n", - " adresse = NULL,\n", - " code_postal = NULL, \n", - " ville = NULL,\n", - " email = NULL,\n", - " horaires = NULL,\n", - " identifiant_externe = NULL,\n", - " label_reparacteur = qfdmo_revision_acteur_enrich_christian.label_reparacteur,\n", - " nom_commercial = NULL,\n", - " nom = NULL,\n", - " location = NULL,\n", - " cree_le = NOW(),\n", - " modifie_le = NOW(),\n", - " statut = qfdmo_revision_acteur_enrich_christian.statut,\n", - " siret = NULL,\n", - " source_id = NULL,\n", - " telephone = NULL,\n", - " description = qfdmo_revision_acteur_enrich_christian.description,\n", - " adresse_complement = qfdmo_revision_acteur_enrich_christian.adresse_complement,\n", - " url = qfdmo_revision_acteur_enrich_christian.url\n", - "FROM rev_refashion\n", - "WHERE qfdmo_revisionacteur.identifiant_unique = qfdmo_revision_acteur_enrich_christian.identifiant_unique;\n", - "\n", - "\"\"\")\n", - "\n", - "# Cleanup\n", - "cursor.close()\n", - "conn.close()" - ] - }, - { - "cell_type": "markdown", - "id": "98499bbf-de06-40da-8e10-ae33ac429c69", - "metadata": {}, - "source": [ - "DELETE FROM qfdmo_displayedpropositionservice_sous_categories\n", - "WHERE propositionservice_id IN (\n", - " SELECT id FROM qfdmo_propositionservice\n", - " WHERE acteur_id IN (\n", - " SELECT identifiant_unique FROM qfdmo_acteur WHERE source_id = 45\n", - " )\n", - ");\n", - "DELETE 105969\n", - "quefairedem_2657=> DELETE FROM qfdmo_propositionservice \n", - "WHERE acteur_id IN (\n", - " SELECT identifiant_unique FROM qfdmo_acteur WHERE source_id = 45\n", - ");\n", - "DELETE 35930\n", - "quefairedem_2657=> delete from qfdmo_acteur where identifiant_unique =45;\n", - "ERROR: operator does not exist: character varying = integer\n", - "LINE 1: delete from qfdmo_acteur where identifiant_unique =45;\n", - " ^\n", - "HINT: No operator matches the given name and argument types. You might need to add explicit type casts.\n", - "quefairedem_2657=> delete from qfdmo_acteur where source_id =45;\n" - ] - }, - { - "cell_type": "code", - "execution_count": 133, - "id": "5ab625ac-398e-4558-b6fb-97cd6c6bf2d3", - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.read_sql_table(\"rev_refashion\",engine)" - ] - }, - { - "cell_type": "code", - "execution_count": 149, - "id": "6a4233c1-e4cc-4852-a18f-9a4736b40187", - "metadata": {}, - "outputs": [], - "source": [ - "sources = pd.read_sql_table(\"qfdmo_sources_acteurs\",engine)" - ] - }, - { - "cell_type": "code", - "execution_count": 180, - "id": "e88cb3b8-33a5-484e-b01b-6ae34a9a89ff", - "metadata": {}, - "outputs": [], - "source": [ - "df_rev = pd.merge(df[df['source_id_y']==45],sources[['identifiant_unique','identifiant_externe']],left_on=['identifiant_externe_y'], right_on=['identifiant_externe'])" - ] - }, - { - "cell_type": "code", - "execution_count": 182, - "id": "cedb934c-3a27-44b9-b767-af9708edfeee", - "metadata": {}, - "outputs": [], - "source": [ - "df_rev = df_rev.drop(columns=['identifiant_unique_x'])\n", - "df_rev = df_rev.rename(columns={'identifiant_unique_y':'identifiant_unique'})\n", - "df_rev.drop_duplicates()" - ] - }, - { - "cell_type": "code", - "execution_count": 203, - "id": "9b04521a-d325-4e34-9ca5-8432595913e6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "35929" - ] - }, - "execution_count": 203, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_rev[\n", - " [\n", - " \"identifiant_unique\",\n", - " \"nom\",\n", - " \"adresse\",\n", - " \"adresse_complement\",\n", - " \"code_postal\",\n", - " \"ville\",\n", - " \"url\",\n", - " \"email\",\n", - " \"location\",\n", - " \"telephone\",\n", - " \"nom_commercial\",\n", - " \"nom_officiel\",\n", - " \"siret\",\n", - " \"identifiant_externe\",\n", - " \"acteur_type_id\",\n", - " \"statut\",\n", - " \"cree_le\",\n", - " \"modifie_le\",\n", - " \"naf_principal\",\n", - " \"commentaires\",\n", - " \"horaires_osm\",\n", - " \"horaires_description\",\n", - " \"description\",\n", - " ]\n", - " ].to_sql('qfdmo_revisionacteur',engine, index=False,\n", - "if_exists=\"append\",method=\"multi\",\n", - " chunksize=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "bdbd3d74-97d6-4e47-92de-e8b73eaddd53", - "metadata": {}, - "outputs": [], - "source": [ - "df_sql = pd.read_sql_query(\n", - " \"SELECT * FROM qfdmo_dagrunchange WHERE \"\n", - " \"dag_run_id IN \"\n", - " \"(SELECT id FROM qfdmo_dagrun WHERE status = 'DagRunStatus.TO_INSERT')\",\n", - " engine,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "84faf089-9319-4cd2-bab6-87d839984d67", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idchange_typemeta_datarow_updatesdag_run_id
0646816CREATENone{'nom': 'MFC', 'url': 'https://www.lamanufactu...30
1646817CREATENone{'nom': 'TILLI', 'url': 'https://tilli.fr/', '...30
2646818CREATENone{'nom': 'TILLI', 'url': 'https://tilli.fr/', '...30
3646819CREATENone{'nom': 'TILLI', 'url': 'https://tilli.fr/', '...30
4646820CREATENone{'nom': 'A.C.L PROXI POL.', 'url': None, 'emai...30
..................
35925682741CREATENone{'nom': 'APIVET', 'url': None, 'email': None, ...30
35926682742CREATENone{'nom': 'APIVET', 'url': None, 'email': None, ...30
35927682743CREATENone{'nom': 'APIVET', 'url': None, 'email': None, ...30
35928682744CREATENone{'nom': 'APIVET', 'url': None, 'email': None, ...30
35929682745CREATENone{'nom': 'APIVET', 'url': None, 'email': None, ...30
\n", - "

35930 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " id change_type meta_data \\\n", - "0 646816 CREATE None \n", - "1 646817 CREATE None \n", - "2 646818 CREATE None \n", - "3 646819 CREATE None \n", - "4 646820 CREATE None \n", - "... ... ... ... \n", - "35925 682741 CREATE None \n", - "35926 682742 CREATE None \n", - "35927 682743 CREATE None \n", - "35928 682744 CREATE None \n", - "35929 682745 CREATE None \n", - "\n", - " row_updates dag_run_id \n", - "0 {'nom': 'MFC', 'url': 'https://www.lamanufactu... 30 \n", - "1 {'nom': 'TILLI', 'url': 'https://tilli.fr/', '... 30 \n", - "2 {'nom': 'TILLI', 'url': 'https://tilli.fr/', '... 30 \n", - "3 {'nom': 'TILLI', 'url': 'https://tilli.fr/', '... 30 \n", - "4 {'nom': 'A.C.L PROXI POL.', 'url': None, 'emai... 30 \n", - "... ... ... \n", - "35925 {'nom': 'APIVET', 'url': None, 'email': None, ... 30 \n", - "35926 {'nom': 'APIVET', 'url': None, 'email': None, ... 30 \n", - "35927 {'nom': 'APIVET', 'url': None, 'email': None, ... 30 \n", - "35928 {'nom': 'APIVET', 'url': None, 'email': None, ... 30 \n", - "35929 {'nom': 'APIVET', 'url': None, 'email': None, ... 30 \n", - "\n", - "[35930 rows x 5 columns]" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_sql" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "66eca3b3-2146-45fd-99c1-38ae83c861a2", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/0b/ssm8dl5n5td_t_2lb_8qn6500000gn/T/ipykernel_47073/3724072109.py:13: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", - " df_actors = pd.concat(normalized_dfs.tolist(), ignore_index=True)\n" - ] - }, - { - "ename": "NotImplementedError", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[22], line 15\u001b[0m\n\u001b[1;32m 12\u001b[0m normalized_dfs \u001b[38;5;241m=\u001b[39m df_sql[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrow_updates\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(pd\u001b[38;5;241m.\u001b[39mjson_normalize)\n\u001b[1;32m 13\u001b[0m df_actors \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mconcat(normalized_dfs\u001b[38;5;241m.\u001b[39mtolist(), ignore_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m---> 15\u001b[0m normalized_labels_dfs \u001b[38;5;241m=\u001b[39m \u001b[43mdf_actors\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlabels\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson_normalize\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 16\u001b[0m df_labels \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mconcat(normalized_labels_dfs\u001b[38;5;241m.\u001b[39mtolist(), ignore_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 18\u001b[0m normalized_pds_dfs \u001b[38;5;241m=\u001b[39m df_actors[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mproposition_services\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(pd\u001b[38;5;241m.\u001b[39mjson_normalize)\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/series.py:4757\u001b[0m, in \u001b[0;36mSeries.apply\u001b[0;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001b[0m\n\u001b[1;32m 4629\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\n\u001b[1;32m 4630\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 4631\u001b[0m func: AggFuncType,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4636\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 4637\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Series:\n\u001b[1;32m 4638\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 4639\u001b[0m \u001b[38;5;124;03m Invoke function on values of Series.\u001b[39;00m\n\u001b[1;32m 4640\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4755\u001b[0m \u001b[38;5;124;03m dtype: float64\u001b[39;00m\n\u001b[1;32m 4756\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 4757\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mSeriesApply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4758\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4759\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4760\u001b[0m \u001b[43m \u001b[49m\u001b[43mconvert_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4761\u001b[0m \u001b[43m \u001b[49m\u001b[43mby_row\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby_row\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4762\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4763\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4764\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/apply.py:1209\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_compat()\n\u001b[1;32m 1208\u001b[0m \u001b[38;5;66;03m# self.func is Callable\u001b[39;00m\n\u001b[0;32m-> 1209\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/apply.py:1289\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1283\u001b[0m \u001b[38;5;66;03m# row-wise access\u001b[39;00m\n\u001b[1;32m 1284\u001b[0m \u001b[38;5;66;03m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001b[39;00m\n\u001b[1;32m 1285\u001b[0m \u001b[38;5;66;03m# we need to give `na_action=\"ignore\"` for categorical data.\u001b[39;00m\n\u001b[1;32m 1286\u001b[0m \u001b[38;5;66;03m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001b[39;00m\n\u001b[1;32m 1287\u001b[0m \u001b[38;5;66;03m# Categorical (GH51645).\u001b[39;00m\n\u001b[1;32m 1288\u001b[0m action \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj\u001b[38;5;241m.\u001b[39mdtype, CategoricalDtype) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1289\u001b[0m mapped \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1290\u001b[0m \u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurried\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\n\u001b[1;32m 1291\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[1;32m 1294\u001b[0m \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m 1295\u001b[0m \u001b[38;5;66;03m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m 1296\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/base.py:921\u001b[0m, in \u001b[0;36mIndexOpsMixin._map_values\u001b[0;34m(self, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 918\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arr, ExtensionArray):\n\u001b[1;32m 919\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mmap(mapper, na_action\u001b[38;5;241m=\u001b[39mna_action)\n\u001b[0;32m--> 921\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43malgorithms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mna_action\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/algorithms.py:1814\u001b[0m, in \u001b[0;36mmap_array\u001b[0;34m(arr, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 1812\u001b[0m values \u001b[38;5;241m=\u001b[39m arr\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 1813\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m na_action \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1814\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_infer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1815\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1816\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mmap_infer_mask(\n\u001b[1;32m 1817\u001b[0m values, mapper, mask\u001b[38;5;241m=\u001b[39misna(values)\u001b[38;5;241m.\u001b[39mview(np\u001b[38;5;241m.\u001b[39muint8), convert\u001b[38;5;241m=\u001b[39mconvert\n\u001b[1;32m 1818\u001b[0m )\n", - "File \u001b[0;32mlib.pyx:2926\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/json/_normalize.py:445\u001b[0m, in \u001b[0;36mjson_normalize\u001b[0;34m(data, record_path, meta, meta_prefix, record_prefix, errors, sep, max_level)\u001b[0m\n\u001b[1;32m 443\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(data)\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 445\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;66;03m# check to see if a simple recursive function is possible to\u001b[39;00m\n\u001b[1;32m 448\u001b[0m \u001b[38;5;66;03m# improve performance (see #15621) but only for cases such\u001b[39;00m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;66;03m# as pd.Dataframe(data) or pd.Dataframe(data, sep)\u001b[39;00m\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 451\u001b[0m record_path \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m meta \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 455\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m max_level \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 456\u001b[0m ):\n", - "\u001b[0;31mNotImplementedError\u001b[0m: " - ] - } - ], - "source": [ - "max_id_pds = pd.read_sql_query(\n", - " \"SELECT max(id) FROM qfdmo_displayedpropositionservice\", engine\n", - ")[\"max\"][0]\n", - "df_sql = pd.read_sql_query(\n", - " \"SELECT * FROM qfdmo_dagrunchange WHERE \"\n", - " \"dag_run_id IN \"\n", - " \"(SELECT id FROM qfdmo_dagrun WHERE status = 'DagRunStatus.TO_INSERT')\",\n", - " engine,\n", - ")\n", - "dag_run_id = df_sql[\"dag_run_id\"].iloc[0]\n", - "\n", - "normalized_dfs = df_sql[\"row_updates\"].apply(pd.json_normalize)\n", - "df_actors = pd.concat(normalized_dfs.tolist(), ignore_index=True)\n", - "\n", - "normalized_labels_dfs = df_actors[\"labels\"].apply(pd.json_normalize)\n", - "df_labels = pd.concat(normalized_labels_dfs.tolist(), ignore_index=True)\n", - "\n", - "normalized_pds_dfs = df_actors[\"proposition_services\"].apply(pd.json_normalize)\n", - "df_pds = pd.concat(normalized_pds_dfs.tolist(), ignore_index=True)\n", - "ids_range = range(max_id_pds + 1, max_id_pds + 1 + len(df_pds))\n", - "\n", - "df_pds[\"id\"] = ids_range\n", - "df_pds[\"pds_sous_categories\"] = df_pds.apply(\n", - " lambda row: [\n", - " {**d, \"propositionservice_id\": row[\"id\"]}\n", - " for d in row[\"pds_sous_categories\"]\n", - " ],\n", - " axis=1,\n", - ")\n", - "\n", - "normalized_pdssc_dfs = df_pds[\"pds_sous_categories\"].apply(pd.json_normalize)\n", - "df_pdssc = pd.concat(normalized_pdssc_dfs.tolist(), ignore_index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "88a31925-803d-4571-9e47-72ff4f3ad38a", - "metadata": {}, - "outputs": [ - { - "ename": "NotImplementedError", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[26], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m normalized_labels_dfs \u001b[38;5;241m=\u001b[39m \u001b[43mdf_actors\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlabels\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson_normalize\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/series.py:4757\u001b[0m, in \u001b[0;36mSeries.apply\u001b[0;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001b[0m\n\u001b[1;32m 4629\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\n\u001b[1;32m 4630\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 4631\u001b[0m func: AggFuncType,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4636\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 4637\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Series:\n\u001b[1;32m 4638\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 4639\u001b[0m \u001b[38;5;124;03m Invoke function on values of Series.\u001b[39;00m\n\u001b[1;32m 4640\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4755\u001b[0m \u001b[38;5;124;03m dtype: float64\u001b[39;00m\n\u001b[1;32m 4756\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 4757\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mSeriesApply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4758\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4759\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4760\u001b[0m \u001b[43m \u001b[49m\u001b[43mconvert_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4761\u001b[0m \u001b[43m \u001b[49m\u001b[43mby_row\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby_row\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4762\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4763\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4764\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/apply.py:1209\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_compat()\n\u001b[1;32m 1208\u001b[0m \u001b[38;5;66;03m# self.func is Callable\u001b[39;00m\n\u001b[0;32m-> 1209\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/apply.py:1289\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1283\u001b[0m \u001b[38;5;66;03m# row-wise access\u001b[39;00m\n\u001b[1;32m 1284\u001b[0m \u001b[38;5;66;03m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001b[39;00m\n\u001b[1;32m 1285\u001b[0m \u001b[38;5;66;03m# we need to give `na_action=\"ignore\"` for categorical data.\u001b[39;00m\n\u001b[1;32m 1286\u001b[0m \u001b[38;5;66;03m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001b[39;00m\n\u001b[1;32m 1287\u001b[0m \u001b[38;5;66;03m# Categorical (GH51645).\u001b[39;00m\n\u001b[1;32m 1288\u001b[0m action \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj\u001b[38;5;241m.\u001b[39mdtype, CategoricalDtype) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1289\u001b[0m mapped \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1290\u001b[0m \u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurried\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\n\u001b[1;32m 1291\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[1;32m 1294\u001b[0m \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m 1295\u001b[0m \u001b[38;5;66;03m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m 1296\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/base.py:921\u001b[0m, in \u001b[0;36mIndexOpsMixin._map_values\u001b[0;34m(self, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 918\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arr, ExtensionArray):\n\u001b[1;32m 919\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mmap(mapper, na_action\u001b[38;5;241m=\u001b[39mna_action)\n\u001b[0;32m--> 921\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43malgorithms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mna_action\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/algorithms.py:1814\u001b[0m, in \u001b[0;36mmap_array\u001b[0;34m(arr, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 1812\u001b[0m values \u001b[38;5;241m=\u001b[39m arr\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 1813\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m na_action \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1814\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_infer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1815\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1816\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mmap_infer_mask(\n\u001b[1;32m 1817\u001b[0m values, mapper, mask\u001b[38;5;241m=\u001b[39misna(values)\u001b[38;5;241m.\u001b[39mview(np\u001b[38;5;241m.\u001b[39muint8), convert\u001b[38;5;241m=\u001b[39mconvert\n\u001b[1;32m 1818\u001b[0m )\n", - "File \u001b[0;32mlib.pyx:2926\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/json/_normalize.py:445\u001b[0m, in \u001b[0;36mjson_normalize\u001b[0;34m(data, record_path, meta, meta_prefix, record_prefix, errors, sep, max_level)\u001b[0m\n\u001b[1;32m 443\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(data)\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 445\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;66;03m# check to see if a simple recursive function is possible to\u001b[39;00m\n\u001b[1;32m 448\u001b[0m \u001b[38;5;66;03m# improve performance (see #15621) but only for cases such\u001b[39;00m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;66;03m# as pd.Dataframe(data) or pd.Dataframe(data, sep)\u001b[39;00m\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 451\u001b[0m record_path \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m meta \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 455\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m max_level \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 456\u001b[0m ):\n", - "\u001b[0;31mNotImplementedError\u001b[0m: " - ] - } - ], - "source": [ - "normalized_labels_dfs = df_actors[\"labels\"].apply(pd.json_normalize)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "20d8d270-8209-41b1-bc89-21187de8b5c3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
acteur_idlabelqualitelabelqualite_id
0refashion_TLC-REFASHION-REP-455001208507113095_dRe_fashion3
\n", - "
" - ], - "text/plain": [ - " acteur_id labelqualite \\\n", - "0 refashion_TLC-REFASHION-REP-455001208507113095_d Re_fashion \n", - "\n", - " labelqualite_id \n", - "0 3 " - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_actors[\"labels\"].dropna().apply(pd.json_normalize)[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b4a72b9e-e22d-4ffb-aa17-4547c8d212e4", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/qfdmo/forms.py b/qfdmo/forms.py index fd4f9fdd8..b8ec9a27f 100644 --- a/qfdmo/forms.py +++ b/qfdmo/forms.py @@ -342,6 +342,10 @@ def load_choices( class DagsForm(forms.Form): + """ + DEPRECATED, should use the data django app + """ + dagrun = forms.ModelChoiceField( label="Séléctionner l'execution d'un DAG", widget=forms.Select( @@ -475,7 +479,6 @@ def load_choices(self): "data-search-solution-form-target": "direction", }, ), - # FIXME: I guess async error comes from here choices=[ ("carte", "Carte"), ("form", "Formulaire"), diff --git a/qfdmo/migrations/0111_delete_bancache.py b/qfdmo/migrations/0111_delete_bancache.py new file mode 100644 index 000000000..4ef826cdf --- /dev/null +++ b/qfdmo/migrations/0111_delete_bancache.py @@ -0,0 +1,16 @@ +# Generated by Django 5.1.4 on 2025-01-14 09:56 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("qfdmo", "0110_alter_source_code"), + ] + + operations = [ + migrations.DeleteModel( + name="BANCache", + ), + ] diff --git a/qfdmo/models/acteur.py b/qfdmo/models/acteur.py index f3bd8a3b7..4721dbc30 100644 --- a/qfdmo/models/acteur.py +++ b/qfdmo/models/acteur.py @@ -17,7 +17,6 @@ from django.core.cache import cache from django.core.files.images import get_image_dimensions from django.db.models import Case, Exists, Min, OuterRef, Q, Value, When -from django.db.models.functions import Now from django.forms import ValidationError, model_to_dict from django.http import HttpRequest from django.urls import reverse @@ -25,6 +24,7 @@ from unidecode import unidecode from core.constants import DIGITAL_ACTEUR_CODE +from core.models import TimestampedModel from dags.sources.config.shared_constants import REPRISE_1POUR0, REPRISE_1POUR1 from qfdmo.models.action import Action, get_action_instances from qfdmo.models.categorie_objet import SousCategorieObjet @@ -268,7 +268,7 @@ def get_queryset(self): return DisplayedActeurQuerySet(self.model, using=self._db) -class BaseActeur(NomAsNaturalKeyModel): +class BaseActeur(TimestampedModel, NomAsNaturalKeyModel): class Meta: abstract = True @@ -303,8 +303,6 @@ class Meta: ) naf_principal = models.CharField(max_length=255, blank=True, null=True) commentaires = models.TextField(blank=True, null=True) - cree_le = models.DateTimeField(auto_now_add=True, db_default=Now()) - modifie_le = models.DateTimeField(auto_now=True, db_default=Now()) horaires_osm = models.CharField( blank=True, null=True, validators=[validate_opening_hours] ) diff --git a/qfdmo/models/data.py b/qfdmo/models/data.py index ad8cf0ca4..58427d5af 100644 --- a/qfdmo/models/data.py +++ b/qfdmo/models/data.py @@ -1,5 +1,8 @@ +""" +DEPRECATED, should use the data django app +""" + from django.contrib.gis.db import models -from django.db.models.functions import Now from dags.sources.config.shared_constants import ( DAGRUN_FINISHED, @@ -143,16 +146,3 @@ def update_row_update_candidate(self, status, index): def get_candidat(self, index): return self.row_updates["ae_result"][int(index) - 1] - - -class BANCache(models.Model): - class Meta: - verbose_name = "Cache BAN" - verbose_name_plural = "Cache BAN" - - adresse = models.CharField(max_length=255, blank=True, null=True) - code_postal = models.CharField(max_length=255, blank=True, null=True) - ville = models.CharField(max_length=255, blank=True, null=True) - location = models.PointField(blank=True, null=True) - ban_returned = models.JSONField(blank=True, null=True) - modifie_le = models.DateTimeField(auto_now=True, db_default=Now()) diff --git a/qfdmo/urls.py b/qfdmo/urls.py index fa903e306..2fe90c501 100644 --- a/qfdmo/urls.py +++ b/qfdmo/urls.py @@ -87,11 +87,6 @@ TemplateView.as_view(template_name="tests/iframe.html"), name="test_iframe", ), - path( - "dags/validations", - DagsValidation.as_view(), - name="dags_validations", - ), path( "configurateur", ConfiguratorView.as_view(), @@ -102,4 +97,10 @@ AdvancedConfiguratorView.as_view(), name="advanced_iframe_configurator", ), + # DEPRECATED, should use the data django app + path( + "dags/validations", + DagsValidation.as_view(), + name="dags_validations", + ), ] diff --git a/qfdmo/views/dags.py b/qfdmo/views/dags.py index 34db7ea76..582b2a5e6 100644 --- a/qfdmo/views/dags.py +++ b/qfdmo/views/dags.py @@ -1,21 +1,18 @@ +""" +DEPRECATED, should use the data django app +""" + import logging -from django.contrib.auth.mixins import LoginRequiredMixin from django.core.paginator import Paginator from django.shortcuts import render from django.views.generic.edit import FormView +from core.views import IsStaffMixin from qfdmo.forms import DagsForm from qfdmo.models.data import DagRun, DagRunStatus -class IsStaffMixin(LoginRequiredMixin): - def dispatch(self, request, *args, **kwargs): - if not request.user.is_staff: - return self.handle_no_permission() - return super().dispatch(request, *args, **kwargs) - - class DagsValidation(IsStaffMixin, FormView): form_class = DagsForm template_name = "qfdmo/dags_validations.html" diff --git a/templates/data/_partials/source_ajout_event.html b/templates/data/_partials/source_ajout_event.html new file mode 100644 index 000000000..4618c92f8 --- /dev/null +++ b/templates/data/_partials/source_ajout_event.html @@ -0,0 +1,44 @@ + + + Acteur (après mise à jour) + Proposition de service + suggestion + + + + {% for suggestion_unitaire in suggestion_unitaires %} + + + {% for key, value in suggestion_unitaire.display_acteur_details.items %} +

{{ key }} : {{ value }}

+ {% endfor %} + + + + + + + + {% for service in suggestion_unitaire.display_proposition_service %} + + + + + {% endfor %} +
ActionSous-Catégories
{{ service.action }} +
    + {% for sous_cat in service.pds_sous_categories %} +
  • {{ sous_cat.souscategorie }}
  • + {% endfor %} +
+
+ + +
+ Données brutes +
{{ suggestion_unitaire.suggestion }}
+
+ + + {% endfor %} + diff --git a/templates/data/_partials/source_event.html b/templates/data/_partials/source_event.html new file mode 100644 index 000000000..e830a6acf --- /dev/null +++ b/templates/data/_partials/source_event.html @@ -0,0 +1,25 @@ +

Instance du DAG : {{ suggestion_cohorte_instance }}

+

Meta données

+ +

Source : {{ metadata.source }} +
Statut de la cohorte : {{ suggestion_cohorte_instance.get_statut_display }} +
Description : {{ metadata.description }} +
Nb de suggestions : {{ metadata.nb_suggestions }}

+ +

Exemples

+
+ + + {% if suggestion_cohorte_instance.type_action == "SOURCE_AJOUT" or suggestion_cohorte_instance.type_action == "SOURCE_MODIFICATION" %} + {% include 'data/_partials/source_ajout_event.html' %} + {% elif suggestion_cohorte_instance.type_action == "SOURCE_SUPRESSION" %} + {% include 'data/_partials/source_supression_event.html' %} + {% else %} + {% include 'data/_partials/create_actor_event.html' %} + {% endif %} +
Suggestion de source à valider
+
+
+ + +
diff --git a/templates/data/_partials/source_supression_event.html b/templates/data/_partials/source_supression_event.html new file mode 100644 index 000000000..3b6257b24 --- /dev/null +++ b/templates/data/_partials/source_supression_event.html @@ -0,0 +1,27 @@ + + + + type_action + Acteur + suggestion + + + + + {% for suggestion_unitaire in suggestion_unitaires %} + + {{ suggestion_unitaire.get_type_action_display }} + + {% for key, value in suggestion_unitaire.display_acteur_details.items %} +

{{ key }} : {{ value }}

+ {% endfor %} + + +
+ Données brutes +
{{ suggestion_unitaire.suggestion }}
+
+ + + {% endfor %} + diff --git a/templates/data/_partials/update_actor_event.html b/templates/data/_partials/update_actor_event.html new file mode 100644 index 000000000..25eeb6323 --- /dev/null +++ b/templates/data/_partials/update_actor_event.html @@ -0,0 +1,55 @@ +{% if suggestion_unitaires|length > 0 and suggestion_unitaires[0].change_type == 'UPDATE_ACTOR' %} + + + + change_type + meta_data + Identifiant Unique + Candidats + suggestion + + + + + {% for suggestion_unitaire in suggestion_unitaires if suggestion_unitaire.change_type == 'UPDATE_ACTOR' %} + + {{ suggestion_unitaire.get_change_type_display() }} + {{ suggestion_unitaire.meta_data if suggestion_unitaire.meta_data else "-" }} + + {% with identifiant_unique=suggestion_unitaire.display_acteur_details().identifiant_unique %} + {{ identifiant_unique }} + {% endwith %} + + + {% with candidat=candidat, index=loop.index, suggestion_cohorte=request.GET.suggestion_cohorte, + identifiant_unique=suggestion_unitaire.display_acteur_details().identifiant_unique %} + {% include 'qfdmo/partials/candidat_row.html' %} + + {% endwith %} + + +
+ Données brutes +
{{ suggestion_unitaire.suggestion }}
+
+ + +{% endfor %} + +{% if suggestion_unitaires.has_other_pages %} +
+ + + + +
+{% endif %} +{% endif %} diff --git a/templates/data/base.html b/templates/data/base.html new file mode 100644 index 000000000..2423a1f3a --- /dev/null +++ b/templates/data/base.html @@ -0,0 +1,60 @@ +{% load dsfr_tags qfdmd_tags static %} + + + + + + + + + + + {# Title and desc #} + {% block page_title %}Longue vie aux objets{% endblock %} + + + {% favicon %} + + {# Css #} + {% dsfr_css %} + + + {% block css_extras %}{% endblock %} + + {# Js #} + {% block javascript_extras %}{% endblock %} + + + + +
+ +
+
+

Une mise à jour de votre navigateur est nécessaire

+

La version de votre navigateur étant trop ancienne, nous ne pouvons vous garantir une expérience optimale sur la carte Longue vie aux objets. Si vous le souhaitez, vous pouvez aussi essayer de vous connecter sur un autre navigateur.

+
+
+ {% block content %}{% endblock %} +
+ + {% block modals %}{% endblock %} + {% block js %} + {% endblock js %} + + diff --git a/templates/data/dags_validations.html b/templates/data/dags_validations.html new file mode 100644 index 000000000..7a73d0a86 --- /dev/null +++ b/templates/data/dags_validations.html @@ -0,0 +1,39 @@ +{% extends 'data/base.html' %} + +{% block content %} + +
+

Gestion des suggestions de modification

+ +

+ Cette page permet de valider les données des «DAGs». +

+ {% if messages %} + + {% endif %} +
+ {% csrf_token %} + {{ form }} +
+ +
+ + {% if suggestion_cohorte_instance %} + {% if suggestion_cohorte_instance.is_source_type %} + {% include 'data/_partials/source_event.html' %} + {% elif suggestion_cohorte_instance.is_enrichissement_type %} + L'interface de suggestions d'enrichissement n'est pas encore implémentée + {% elif suggestion_cohorte_instance.is_clustering_type %} + L'interface de suggestion de clustering n'est pas encore implémentée + {% endif %} + + {% endif %} +
+ +
+ +{% endblock content %} diff --git a/unit_tests/data/test_models.py b/unit_tests/data/test_models.py new file mode 100644 index 000000000..84703d66e --- /dev/null +++ b/unit_tests/data/test_models.py @@ -0,0 +1,36 @@ +import pytest + +from data.models import ( # Remplacez YourModel par le nom de votre modèle + SuggestionAction, + SuggestionCohorte, +) + + +@pytest.mark.parametrize( + "type_action, expected_result", + [ + (SuggestionAction.SOURCE_AJOUT, True), + (SuggestionAction.SOURCE_MODIFICATION, True), + (SuggestionAction.SOURCE_SUPPRESSION, True), + (SuggestionAction.CLUSTERING, False), + ("other_action", False), + ], +) +def test_is_source_type(type_action, expected_result): + instance = SuggestionCohorte(type_action=type_action) + assert instance.is_source_type == expected_result + + +@pytest.mark.parametrize( + "type_action, expected_result", + [ + (SuggestionAction.CLUSTERING, True), + (SuggestionAction.SOURCE_AJOUT, False), + (SuggestionAction.SOURCE_MODIFICATION, False), + (SuggestionAction.SOURCE_SUPPRESSION, False), + ("other_action", False), + ], +) +def test_is_clustering_type(type_action, expected_result): + instance = SuggestionCohorte(type_action=type_action) + assert instance.is_clustering_type == expected_result