From eff178cd3c60fe37b57a8f52718d8d53987ab925 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Thu, 2 Jan 2025 08:15:40 +0100 Subject: [PATCH 01/26] =?UTF-8?q?D=C3=A9but=20de=20la=20refactorisation=20?= =?UTF-8?q?de=20l'interface=20dags/validations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- jinja2/qfdmo/dags_validations.html | 13 ++++++-- qfdmo/views/dags.py | 48 ++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/jinja2/qfdmo/dags_validations.html b/jinja2/qfdmo/dags_validations.html index 6ff4c93eb..ec8eda4c5 100644 --- a/jinja2/qfdmo/dags_validations.html +++ b/jinja2/qfdmo/dags_validations.html @@ -8,12 +8,19 @@

Validations des «DAGs»

Cette page permet de valider les données des «DAGs».

- -
+ {% if messages %} + + {% endif %} + {{ csrf_input }} {{ form }} + {{ form.dagrun.value() }}
- +
{% if dagrun_instance %} diff --git a/qfdmo/views/dags.py b/qfdmo/views/dags.py index 34db7ea76..65a0bbedb 100644 --- a/qfdmo/views/dags.py +++ b/qfdmo/views/dags.py @@ -1,5 +1,6 @@ import logging +from django.contrib import messages from django.contrib.auth.mixins import LoginRequiredMixin from django.core.paginator import Paginator from django.shortcuts import render @@ -21,12 +22,59 @@ class DagsValidation(IsStaffMixin, FormView): template_name = "qfdmo/dags_validations.html" success_url = "/dags/validations" + def form_valid(self, form): + # MANAGE search and display dagrun details + if self.request.POST.get("search"): + dagrun = form.cleaned_data["dagrun"] + context = {"form": form} + context["dagrun_instance"] = dagrun + dagrun_lines = dagrun.dagrunchanges.all().order_by("?")[:100] + context["dagrun_lines"] = dagrun_lines + return render(self.request, "qfdmo/dags_validations.html", context) + + # ELSE: update the status of the dagrun and its dagrunlines + dagrun = form.cleaned_data["dagrun"] + new_status = ( + DagRunStatus.TO_INSERT.value + if self.request.POST.get("dag_valid") == "1" + else DagRunStatus.REJECTED.value + ) + + # FIXME: I am not sure we need the filter here + dagrun.dagrunchanges.filter(status=DagRunStatus.TO_VALIDATE.value).update( + status=new_status + ) + dagrun.status = new_status + dagrun.save() + messages.success( + self.request, + f"La cohorte {dagrun} a été mise à jour avec le statut {new_status}", + ) + + return super().form_valid(form) + + def form_invalid(self, form): + messages.error(self.request, "Il y a des erreurs dans le formulaire.") + return super().form_invalid(form) + + # def form_valid(self, form): + # if self.request.POST.get("search"): + # messages.add_message(self.request, messages.INFO, "Form Valid.") + # return super().form_valid(form) + + +class DagsValidation1(IsStaffMixin, FormView): + form_class = DagsForm + template_name = "qfdmo/dags_validations.html" + success_url = "/dags/validations" + def get_initial(self): initial = super().get_initial() initial["dagrun"] = self.request.GET.get("dagrun") return initial def post(self, request, *args, **kwargs): + dag_valid = request.POST.get("dag_valid") if dag_valid in ["1", "0"]: return self.form_valid(self.get_form()) From 2e02f6c0e3e042259495b840615666fdd465f4ea Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Thu, 2 Jan 2025 11:37:03 +0100 Subject: [PATCH 02/26] simplification et proposition de modification --- dags/ingest_validated_dataset_to_db.py | 29 ++++--------- docs/reference/303-systeme-de-suggestions.md | 44 ++++++++++++++++++++ 2 files changed, 51 insertions(+), 22 deletions(-) create mode 100644 docs/reference/303-systeme-de-suggestions.md diff --git a/dags/ingest_validated_dataset_to_db.py b/dags/ingest_validated_dataset_to_db.py index d4a61e029..e8cf9a80b 100755 --- a/dags/ingest_validated_dataset_to_db.py +++ b/dags/ingest_validated_dataset_to_db.py @@ -2,7 +2,7 @@ import pandas as pd from airflow.models import DAG -from airflow.operators.python import BranchPythonOperator, PythonOperator +from airflow.operators.python import PythonOperator, ShortCircuitOperator from airflow.operators.trigger_dagrun import TriggerDagRunOperator from airflow.providers.postgres.hooks.postgres import PostgresHook from airflow.utils.dates import days_ago @@ -39,14 +39,10 @@ def _get_first_dagrun_to_insert(): return row -def check_for_validation(**kwargs): +def check_suggestion_to_process(**kwargs): # get first row from table qfdmo_dagrun with status TO_INSERT row = _get_first_dagrun_to_insert() - - # Skip if row is None - if row is None: - return "skip_processing" - return "fetch_and_parse_data" + return bool(row) def fetch_and_parse_data(**context): @@ -125,19 +121,9 @@ def write_data_to_postgres(**kwargs): ) -def skip_processing(**kwargs): - print("No records to validate. DAG run completes successfully.") - - -skip_processing_task = PythonOperator( - task_id="skip_processing", - python_callable=skip_processing, - dag=dag, -) - -branch_task = BranchPythonOperator( - task_id="branch_processing", - python_callable=check_for_validation, +check_suggestion_to_process_task = ShortCircuitOperator( + task_id="check_suggestion_to_process", + python_callable=check_suggestion_to_process, dag=dag, ) @@ -153,9 +139,8 @@ def skip_processing(**kwargs): dag=dag, ) -branch_task >> skip_processing_task ( - branch_task + check_suggestion_to_process_task >> fetch_parse_task >> write_to_postgres_task >> trigger_create_final_actors_dag diff --git a/docs/reference/303-systeme-de-suggestions.md b/docs/reference/303-systeme-de-suggestions.md new file mode 100644 index 000000000..e6aa76f91 --- /dev/null +++ b/docs/reference/303-systeme-de-suggestions.md @@ -0,0 +1,44 @@ +# Système de suggestion + +**Statut : ❓ À approuver** + +Cette proposition de modification de l'architecture pour faire évoluer le système de suggestion est un travail itératif. Il est donc nessaire de garder en tête la cibe et le moyen d'y aller. + +## Existant et problématique + +il existe les tables `dagrun` et `dagrunchange`: + +- `dagrun` représente un ensemble de suggestions produit par l'execution d'un DAG airflow +- `dagrinchange` représente la suggestion de modification pour une ligne donnée + +On a quelques problème de lisibilité des ces tables: + +- les types des évenements sont imprécis et utilisé pour plusieurs propos, par exemple, `UPDATE_ACTOR` est utilisé pour des propositions de siretisation et de suppression de acteurs lors de l'ingestion de la source +- les types des évenements sont définis au niveau de chaque ligne, pour connaitre le type de +- si une ligne est problématique, aucune ligne n'est mise à jour +- on n'à pas de vu sur les DAG qui on réussi ou se sont terminés en erreur + +## Proposition d'amélioration + +### Base de données + +- Renommage des tables : `dagrun` -> `suggestion_cohorte` , `dagrunchange` -> `suggestion_ligne` +- Écrire les champs en français comme le reste des tables de l'application +- Revu des statuts de `suggestion_cohorte` : à traiter, en cours de traitement, fini avec succès, fini avec succès partiel, fini en erreur +- Ajout d'un type d'évenement à `suggestion_cohorte` : source, enrichissement +- Ajout d'un sous-type d'évenement à `suggestion_cohorte` : source - ajout acteur, source - suppression acteur, source - modification acteur, enrichissement - déménagement… + +### Interface + +Si possible, utiliser l'interface d'administration de Django pour gérer les suggestions (cela devrait bien fonctionner au mons pour la partie `ingestion des sources`). + +- Division des interfaces de validation : + - `ingestion des sources` : nouvelles sources ou nouvelle version d'une source existante + - `enrichissements` : fermetures, démenagements, enrichissement avec annuaire-entrprise, l'API BAN ou d'autres API +- Ajout de filtre sur le statut (à traiter est sélectionné par défaut) +- Ajout de la pagination +- permettre de cocher les suggestions et d'executer une action our l'ensemble + +### Pipeline + +- Le DAG de validation des cohortes doit intégrer la même architecture que les autres DAGs From 7dcb234caa1eae4dffba80183c09f18fd78b56a3 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Wed, 8 Jan 2025 17:52:53 +0100 Subject: [PATCH 03/26] WIP --- dags/annuaire_entreprise_checks.py | 2 +- dags/ingest_validated_dataset_to_db.py | 82 +- .../shared/tasks/business_logic/write_data.py | 4 +- dags/sources/config/shared_constants.py | 21 +- .../tasks/business_logic/db_data_prepare.py | 4 +- dags/utils/dag_eo_utils.py | 41 +- dags/utils/dag_ingest_validated_utils.py | 8 +- docs/reference/303-systeme-de-suggestions.md | 35 +- jinja2/qfdmo/create_actor_event.html | 16 +- jinja2/qfdmo/dags_validations.html | 55 - jinja2/qfdmo/partials/candidat_row.html | 10 +- jinja2/qfdmo/update_actor_event.html | 26 +- .../adhoc_analysis/refashion_adhoc.ipynb | 2416 ----------------- qfdmo/admin/__init__.py | 1 + qfdmo/admin/data.py | 15 + qfdmo/forms.py | 9 +- .../commands/reinitialize_dagrun.py | 12 +- qfdmo/migrations/0052_dagrun.py | 2 +- ...te_remove_dagrunchange_dag_run_and_more.py | 181 ++ qfdmo/models/data.py | 187 +- qfdmo/views/dags.py | 118 +- .../data/_partials/create_actor_event.html | 50 + templates/data/_partials/source_event.html | 21 + .../data/_partials/update_actor_event.html | 55 + templates/data/base.html | 61 + templates/data/dags_validations.html | 39 + 26 files changed, 784 insertions(+), 2687 deletions(-) delete mode 100644 jinja2/qfdmo/dags_validations.html delete mode 100644 lvao_data/adhoc_analysis/refashion_adhoc.ipynb create mode 100644 qfdmo/admin/data.py create mode 100644 qfdmo/migrations/0109_suggestioncohorte_remove_dagrunchange_dag_run_and_more.py create mode 100644 templates/data/_partials/create_actor_event.html create mode 100644 templates/data/_partials/source_event.html create mode 100644 templates/data/_partials/update_actor_event.html create mode 100644 templates/data/base.html create mode 100644 templates/data/dags_validations.html diff --git a/dags/annuaire_entreprise_checks.py b/dags/annuaire_entreprise_checks.py index 2a878c927..f93ae2352 100755 --- a/dags/annuaire_entreprise_checks.py +++ b/dags/annuaire_entreprise_checks.py @@ -282,7 +282,7 @@ def db_data_prepare(**kwargs): serialized_data = {} for key, df in data.items(): df["event"] = "UPDATE_ACTOR" - df["row_updates"] = df[columns].apply( + df["suggestion"] = df[columns].apply( lambda row: json.dumps(row.to_dict(), default=str), axis=1 ) serialized_data[key] = {"df": df, "metadata": {"updated_rows": len(df)}} diff --git a/dags/ingest_validated_dataset_to_db.py b/dags/ingest_validated_dataset_to_db.py index e8cf9a80b..f74130959 100755 --- a/dags/ingest_validated_dataset_to_db.py +++ b/dags/ingest_validated_dataset_to_db.py @@ -19,68 +19,104 @@ } dag = DAG( - dag_id="validate_and_process_dagruns", + dag_id="validate_and_process_suggestions", dag_display_name="Traitement des cohortes de données validées", default_args=default_args, - description="Check for VALIDATE in qfdmo_dagrun and process qfdmo_dagrunchange", + description="traiter les suggestions à traiter", schedule="*/5 * * * *", catchup=False, max_active_runs=1, ) -def _get_first_dagrun_to_insert(): +def _get_first_suggetsioncohorte_to_insert(): hook = PostgresHook(postgres_conn_id="qfdmo_django_db") - # get first row from table qfdmo_dagrun with status TO_INSERT row = hook.get_first( - f"SELECT * FROM qfdmo_dagrun WHERE status = '{constants.DAGRUN_TOINSERT}'" - " LIMIT 1" + f""" + SELECT * FROM qfdmo_suggestioncohorte + WHERE statut = '{constants.SUGGESTION_ATRAITER}' + LIMIT 1 + """ ) return row def check_suggestion_to_process(**kwargs): - # get first row from table qfdmo_dagrun with status TO_INSERT - row = _get_first_dagrun_to_insert() + row = _get_first_suggetsioncohorte_to_insert() return bool(row) def fetch_and_parse_data(**context): - row = _get_first_dagrun_to_insert() - dag_run_id = row[0] + row = _get_first_suggetsioncohorte_to_insert() + suggestion_cohorte_id = row[0] engine = PostgresConnectionManager().engine df_sql = pd.read_sql_query( - f"SELECT * FROM qfdmo_dagrunchange WHERE dag_run_id = '{dag_run_id}'", + f""" + SELECT * FROM qfdmo_suggestionunitaire + WHERE suggestion_cohorte_id = '{suggestion_cohorte_id}' + """, engine, ) - df_create = df_sql[df_sql["change_type"] == "CREATE"] - df_update_actor = df_sql[df_sql["change_type"] == "UPDATE_ACTOR"] - - if not df_create.empty: - normalized_dfs = df_create["row_updates"].apply(pd.json_normalize) - df_actors_create = pd.concat(normalized_dfs.tolist(), ignore_index=True) + df_acteur_to_create = df_sql[ + df_sql["type_action"] == constants.SUGGESTION_SOURCE_AJOUT + ] + df_acteur_to_update = df_sql[ + df_sql["type_action"] == constants.SUGGESTION_SOURCE_AJOUT + ] + df_acteur_to_delete = df_sql[ + df_sql["type_action"] == constants.SUGGESTION_SOURCE_SUPRESSION + ] + df_acteur_to_enrich = df_sql[ + df_sql["type_action"] == constants.SUGGESTION_ENRICHISSEMENT + ] + + df_update_actor = df_sql[df_sql["type_action"] == "UPDATE_ACTOR"] + + if not df_acteur_to_create.empty: + normalized_dfs = df_acteur_to_create["suggestion"].apply(pd.json_normalize) + df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) + return dag_ingest_validated_utils.handle_create_event( + df_acteur, suggestion_cohorte_id, engine + ) + if not df_acteur_to_update.empty: + normalized_dfs = df_acteur_to_update["suggestion"].apply(pd.json_normalize) + df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) return dag_ingest_validated_utils.handle_create_event( - df_actors_create, dag_run_id, engine + df_acteur, suggestion_cohorte_id, engine + ) + if not df_acteur_to_delete.empty: + normalized_dfs = df_acteur_to_delete["suggestion"].apply(pd.json_normalize) + df_actors_update_actor = pd.concat(normalized_dfs.tolist(), ignore_index=True) + status_repeated = ( + df_acteur_to_delete["status"] + .repeat(df_acteur_to_delete["suggestion"].apply(len)) + .reset_index(drop=True) + ) + df_actors_update_actor["status"] = status_repeated + + return dag_ingest_validated_utils.handle_update_actor_event( + df_actors_update_actor, suggestion_cohorte_id ) - if not df_update_actor.empty: - normalized_dfs = df_update_actor["row_updates"].apply(pd.json_normalize) + if not df_acteur_to_enrich.empty: + + normalized_dfs = df_update_actor["suggestion"].apply(pd.json_normalize) df_actors_update_actor = pd.concat(normalized_dfs.tolist(), ignore_index=True) status_repeated = ( df_update_actor["status"] - .repeat(df_update_actor["row_updates"].apply(len)) + .repeat(df_update_actor["suggestion"].apply(len)) .reset_index(drop=True) ) df_actors_update_actor["status"] = status_repeated return dag_ingest_validated_utils.handle_update_actor_event( - df_actors_update_actor, dag_run_id + df_actors_update_actor, suggestion_cohorte_id ) return { - "dag_run_id": dag_run_id, + "dag_run_id": suggestion_cohorte_id, } diff --git a/dags/shared/tasks/business_logic/write_data.py b/dags/shared/tasks/business_logic/write_data.py index 722663af6..4ab49057f 100644 --- a/dags/shared/tasks/business_logic/write_data.py +++ b/dags/shared/tasks/business_logic/write_data.py @@ -1,4 +1,4 @@ -from utils.dag_eo_utils import insert_dagrun_and_process_df +from utils.dag_eo_utils import insert_suggestion_and_process_df def write_data( @@ -20,4 +20,4 @@ def write_data( run_name = run_id.replace("__", " - ") df = data["df"] metadata.update(data.get("metadata", {})) - insert_dagrun_and_process_df(df, metadata, dag_name_suffixed, run_name) + insert_suggestion_and_process_df(df, metadata, dag_name_suffixed, run_name) diff --git a/dags/sources/config/shared_constants.py b/dags/sources/config/shared_constants.py index 030cf2e33..33fb402da 100755 --- a/dags/sources/config/shared_constants.py +++ b/dags/sources/config/shared_constants.py @@ -1,8 +1,19 @@ -# DagRun statuts -DAGRUN_TOVALIDATE = "TO_VALIDATE" -DAGRUN_TOINSERT = "TO_INSERT" -DAGRUN_REJECTED = "REJECTED" -DAGRUN_FINISHED = "FINISHED" +# Suggestion statuts (pour cohorte et unitaire) +SUGGESTION_AVALIDER = "AVALIDER" +SUGGESTION_REJETER = "REJETER" +SUGGESTION_PARTIEL = "PARTIEL" +SUGGESTION_ATRAITER = "ATRAITER" +SUGGESTION_ENCOURS = "ENCOURS" +SUGGESTION_ERREUR = "ERREUR" +SUGGESTION_SUCCES = "SUCCES" + +# SuggestionCohorte actions +SUGGESTION_CLUSTERING = "CLUSTERING" +SUGGESTION_SOURCE = "SOURCE" +SUGGESTION_SOURCE_AJOUT = "SOURCE_AJOUT" +SUGGESTION_SOURCE_MISESAJOUR = "SOURCE_MISESAJOUR" +SUGGESTION_SOURCE_SUPRESSION = "SOURCE_SUPRESSION" +SUGGESTION_ENRICHISSEMENT = "ENRICHISSEMENT" # Public accueilli PUBLIC_PAR = "Particuliers" diff --git a/dags/sources/tasks/business_logic/db_data_prepare.py b/dags/sources/tasks/business_logic/db_data_prepare.py index 1d4145cae..6d577ef19 100644 --- a/dags/sources/tasks/business_logic/db_data_prepare.py +++ b/dags/sources/tasks/business_logic/db_data_prepare.py @@ -18,7 +18,7 @@ def db_data_prepare( acteurtype_id_by_code: dict, ): update_actors_columns = ["identifiant_unique", "statut", "cree_le"] - df_acteur_to_delete["row_updates"] = df_acteur_to_delete[ + df_acteur_to_delete["suggestion"] = df_acteur_to_delete[ update_actors_columns ].apply(lambda row: json.dumps(row.to_dict(), default=str), axis=1) # Created or updated Acteurs @@ -124,7 +124,7 @@ def db_data_prepare( df_joined = df_joined.where(pd.notna(df_joined), None) - df_joined["row_updates"] = df_joined.apply( + df_joined["suggestion"] = df_joined.apply( lambda row: json.dumps(row.to_dict(), default=str), axis=1 ) df_joined.drop_duplicates("identifiant_unique", keep="first", inplace=True) diff --git a/dags/utils/dag_eo_utils.py b/dags/utils/dag_eo_utils.py index 1f98b6e09..6c5b5b090 100755 --- a/dags/utils/dag_eo_utils.py +++ b/dags/utils/dag_eo_utils.py @@ -8,38 +8,53 @@ logger = logging.getLogger(__name__) -def insert_dagrun_and_process_df(df_acteur_updates, metadata, dag_name, run_name): +def insert_suggestion_and_process_df(df_acteur_updates, metadata, dag_name, run_name): if df_acteur_updates.empty: return engine = PostgresConnectionManager().engine current_date = datetime.now() - + logger.warning(dag_name) + logger.warning(run_name) + logger.warning(constants.SUGGESTION_SOURCE) + logger.warning(constants.SUGGESTION_ATRAITER) + logger.warning(json.dumps(metadata)) with engine.connect() as conn: - # Insert a new dagrun + # Insert a new suggestion result = conn.execute( """ - INSERT INTO qfdmo_dagrun - (dag_id, run_id, status, meta_data, created_date, updated_date) - VALUES (%s, %s, %s, %s, %s, %s) + INSERT INTO qfdmo_suggestioncohorte + ( + identifiant_action, + identifiant_execution, + type_action, + statut, + metadata, + cree_le, + modifie_le + ) + VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING ID; """, ( dag_name, run_name, - "TO_VALIDATE", + constants.SUGGESTION_SOURCE, # FIXME: spécialiser les sources + constants.SUGGESTION_ATRAITER, json.dumps(metadata), current_date, current_date, ), ) - dag_run_id = result.fetchone()[0] + suggestion_cohorte_id = result.fetchone()[0] # Insert dag_run_change - df_acteur_updates["change_type"] = df_acteur_updates["event"] - df_acteur_updates["dag_run_id"] = dag_run_id - df_acteur_updates["status"] = constants.DAGRUN_TOVALIDATE - df_acteur_updates[["row_updates", "dag_run_id", "change_type", "status"]].to_sql( - "qfdmo_dagrunchange", + df_acteur_updates["type_action"] = df_acteur_updates["event"] + df_acteur_updates["suggestion_cohorte_id"] = suggestion_cohorte_id + df_acteur_updates["statut"] = constants.SUGGESTION_ATRAITER + df_acteur_updates[ + ["suggestion", "suggestion_cohorte_id", "type_action", "statut"] + ].to_sql( + "qfdmo_suggestionunitaire", engine, if_exists="append", index=False, diff --git a/dags/utils/dag_ingest_validated_utils.py b/dags/utils/dag_ingest_validated_utils.py index a4a3f648c..7bdc2126c 100755 --- a/dags/utils/dag_ingest_validated_utils.py +++ b/dags/utils/dag_ingest_validated_utils.py @@ -76,7 +76,7 @@ def handle_update_actor_event(df_actors, dag_run_id): ] current_time = datetime.now().astimezone().isoformat(timespec="microseconds") - df_actors = df_actors[df_actors["status"] == shared_constants.DAGRUN_TOINSERT] + df_actors = df_actors[df_actors["status"] == shared_constants.SUGGESTION_ATRAITER] df_actors = df_actors.apply(mapping_utils.replace_with_selected_candidat, axis=1) df_actors[["adresse", "code_postal", "ville"]] = df_actors.apply( lambda row: base_utils.extract_details(row, col="adresse_candidat"), axis=1 @@ -312,7 +312,9 @@ def handle_write_data_update_actor_event(connection, df_actors): def update_dag_run_status( - connection, dag_run_id, statut=shared_constants.DAGRUN_FINISHED + connection, dag_run_id, statut=shared_constants.SUGGESTION_SUCCES ): - query = f"UPDATE qfdmo_dagrun SET status = '{statut}' WHERE id = {dag_run_id}" + query = f""" + UPDATE qfdmo_suggestioncohorte SET statut = '{statut}' WHERE id = {dag_run_id} + """ connection.execute(query) diff --git a/docs/reference/303-systeme-de-suggestions.md b/docs/reference/303-systeme-de-suggestions.md index e6aa76f91..f42cee410 100644 --- a/docs/reference/303-systeme-de-suggestions.md +++ b/docs/reference/303-systeme-de-suggestions.md @@ -22,11 +22,14 @@ On a quelques problème de lisibilité des ces tables: ### Base de données -- Renommage des tables : `dagrun` -> `suggestion_cohorte` , `dagrunchange` -> `suggestion_ligne` +- Renommage des tables : `dagrun` -> `suggestion_cohorte` , `dagrunchange` -> `suggestion_unitaire` - Écrire les champs en français comme le reste des tables de l'application - Revu des statuts de `suggestion_cohorte` : à traiter, en cours de traitement, fini avec succès, fini avec succès partiel, fini en erreur - Ajout d'un type d'évenement à `suggestion_cohorte` : source, enrichissement - Ajout d'un sous-type d'évenement à `suggestion_cohorte` : source - ajout acteur, source - suppression acteur, source - modification acteur, enrichissement - déménagement… +- Ajout de champ pour stocker le message de sortie (au moins en cas d'erreur) +- Paramettre de tolérance d'erreur +- 2 champs JSON, 1 context initial, 1 suggestion ### Interface @@ -42,3 +45,33 @@ Si possible, utiliser l'interface d'administration de Django pour gérer les sug ### Pipeline - Le DAG de validation des cohortes doit intégrer la même architecture que les autres DAGs + +# Cible + +## Systeme de Suggestion + +Les suggestions sont crées par l'exécution d'un pipeline ou d'un script. Les suggestions sont faites par paquet qu'on appelle **Cohorte**, les Cohortes comprennent un ensemble de suggestion de mofification + +Les cohortes ont un type d'événement : `clustering`, `enrichissement`, `source` selon le type de l'action lancée à l'origine de la suggestion de modification + +Les cohortes et les suggestions ont un statut de traitement qui représente leur cycle de vie : `à valider`, `rejeter`, `à traiter`, `en cours de traitement`, `fini avec succès`, `fini avec succès partiel` (uniquement pour les cohortes), `fini en erreur` + +### Representation dans Django + +- SuggestionCohorte représente les cohortes +- SuggestionUnitaire représente les propositions de modification + +### Cycle de vie d'une suggestion + +```mermaid +--- +title: Cycle de vie d'une suggestion (cohorte et unitaire) +--- + +flowchart TB + + AVALIDER[À valider] --> ATRAITER[À traiter] --> ENCOURS[En cours de traitement] --> SUCCES[Fini avec succès] + AVALIDER[À valider] --> REJETER[Rejeter] + ENCOURS --> PARTIEL[Fini avec succès partiel] + ENCOURS --> ERREUR[Fini en erreur] +``` diff --git a/jinja2/qfdmo/create_actor_event.html b/jinja2/qfdmo/create_actor_event.html index bd5103df2..c70079843 100644 --- a/jinja2/qfdmo/create_actor_event.html +++ b/jinja2/qfdmo/create_actor_event.html @@ -1,4 +1,4 @@ -{% if dagrun_lines|length > 0 and dagrun_lines[0].change_type == 'CREATE' %} +{% if suggestion_unitaires|length > 0 and suggestion_unitaires[0].change_type == 'CREATE' %} @@ -6,17 +6,17 @@ meta_data Acteur Proposition de service - row_updates + suggestion - {% for dagrun_line in dagrun_lines if dagrun_line.change_type == 'CREATE' %} + {% for suggestion_unitaire in suggestion_unitaires if suggestion_unitaire.change_type == 'CREATE' %} - {{ dagrun_line.get_change_type_display() }} - {{ dagrun_line.meta_data if dagrun_line.meta_data else "-" }} + {{ suggestion_unitaire.get_change_type_display() }} + {{ suggestion_unitaire.meta_data if suggestion_unitaire.meta_data else "-" }} - {% for key, value in dagrun_line.display_acteur_details().items() %} + {% for key, value in suggestion_unitaire.display_acteur_details().items() %}

{{ key }} : {{ value }}

{% endfor %} @@ -26,7 +26,7 @@ Action Sous-Catégories - {% for service in dagrun_line.display_proposition_service() %} + {% for service in suggestion_unitaire.display_proposition_service() %} {{ service.action }} @@ -43,7 +43,7 @@
Données brutes -
{{ dagrun_line.row_updates }}
+
{{ suggestion_unitaire.suggestion }}
diff --git a/jinja2/qfdmo/dags_validations.html b/jinja2/qfdmo/dags_validations.html deleted file mode 100644 index ec8eda4c5..000000000 --- a/jinja2/qfdmo/dags_validations.html +++ /dev/null @@ -1,55 +0,0 @@ -{% extends 'layout/base.html' %} - -{% block content %} - -
-

Validations des «DAGs»

- -

- Cette page permet de valider les données des «DAGs». -

- {% if messages %} -
    - {% for message in messages %} - {{ message }} - {% endfor %} -
- {% endif %} - - {{ csrf_input }} - {{ form }} - {{ form.dagrun.value() }} -
- -
- - {% if dagrun_instance %} -

Instance du DAG : {{ dagrun_instance }}

-

Meta données

- {% for (meta_title, meta_data) in dagrun_instance.display_meta_data().items() %} -

{{ meta_title }} : {{meta_data}}

- {% endfor %} -
- meta_data brutes -
{{ dagrun_instance.meta_data }}
-
-

Exemples

- -
- - - {% include 'qfdmo/update_actor_event.html' %} - {% include 'qfdmo/create_actor_event.html' %} - -
Résumé du tableau (accessibilité)
-
-
- - -
- {% endif %} - - -
- -{% endblock %} diff --git a/jinja2/qfdmo/partials/candidat_row.html b/jinja2/qfdmo/partials/candidat_row.html index ae1950030..8136f849f 100644 --- a/jinja2/qfdmo/partials/candidat_row.html +++ b/jinja2/qfdmo/partials/candidat_row.html @@ -24,17 +24,17 @@ Meilleure proposition Map Link - {% for candidat in dagrun_line.row_updates.ae_result %} + {% for candidat in suggestion_unitaire.suggestion.ae_result %} {% if candidat.etat_admin_candidat != 'F' %} - {% with comparison_result=(dagrun_line.row_updates.best_candidat_index and loop.index == dagrun_line.row_updates.best_candidat_index|int) %} + {% with comparison_result=(suggestion_unitaire.suggestion.best_candidat_index and loop.index == suggestion_unitaire.suggestion.best_candidat_index|int) %} {# Ces valeurs sont définies dans dags/utils/shared_constants.py, à garder synchronisées entre Django et Airflow #} - +
{{ csrf_input }} - - + + diff --git a/jinja2/qfdmo/update_actor_event.html b/jinja2/qfdmo/update_actor_event.html index 334e84217..25eeb6323 100644 --- a/jinja2/qfdmo/update_actor_event.html +++ b/jinja2/qfdmo/update_actor_event.html @@ -1,4 +1,4 @@ -{% if dagrun_lines|length > 0 and dagrun_lines[0].change_type == 'UPDATE_ACTOR' %} +{% if suggestion_unitaires|length > 0 and suggestion_unitaires[0].change_type == 'UPDATE_ACTOR' %} @@ -6,23 +6,23 @@ meta_data Identifiant Unique Candidats - row_updates + suggestion - {% for dagrun_line in dagrun_lines if dagrun_line.change_type == 'UPDATE_ACTOR' %} + {% for suggestion_unitaire in suggestion_unitaires if suggestion_unitaire.change_type == 'UPDATE_ACTOR' %} - {{ dagrun_line.get_change_type_display() }} - {{ dagrun_line.meta_data if dagrun_line.meta_data else "-" }} + {{ suggestion_unitaire.get_change_type_display() }} + {{ suggestion_unitaire.meta_data if suggestion_unitaire.meta_data else "-" }} - {% with identifiant_unique=dagrun_line.display_acteur_details().identifiant_unique %} + {% with identifiant_unique=suggestion_unitaire.display_acteur_details().identifiant_unique %} {{ identifiant_unique }} {% endwith %} - {% with candidat=candidat, index=loop.index, dagrun=request.GET.dagrun, - identifiant_unique=dagrun_line.display_acteur_details().identifiant_unique %} + {% with candidat=candidat, index=loop.index, suggestion_cohorte=request.GET.suggestion_cohorte, + identifiant_unique=suggestion_unitaire.display_acteur_details().identifiant_unique %} {% include 'qfdmo/partials/candidat_row.html' %} {% endwith %} @@ -30,23 +30,23 @@
Données brutes -
{{ dagrun_line.row_updates }}
+
{{ suggestion_unitaire.suggestion }}
{% endfor %} -{% if dagrun_lines.has_other_pages %} +{% if suggestion_unitaires.has_other_pages %} - + diff --git a/lvao_data/adhoc_analysis/refashion_adhoc.ipynb b/lvao_data/adhoc_analysis/refashion_adhoc.ipynb deleted file mode 100644 index 042b6244b..000000000 --- a/lvao_data/adhoc_analysis/refashion_adhoc.ipynb +++ /dev/null @@ -1,2416 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "a00ac129-f66d-41b7-9605-9ddb6b8b8fca", - "metadata": {}, - "source": [ - "## Intégrer Refashion depuis l'api pointsapport :\n", - "\n", - "- Récupérer les données-eo-refashion depuis l'api pointsapport.\n", - "- Créer et mapper les données vers les tables Acteurs, Proposition de Services et Sous-catégories.\n", - "- Enregistrer chaque table dans un fichier CSV." - ] - }, - { - "cell_type": "markdown", - "id": "fa714639-b9f1-4a6f-8ef6-582956f2223a", - "metadata": {}, - "source": [ - "#### préprod" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "ae6d5405-0e71-4f1b-9acb-c3ef733c900e", - "metadata": {}, - "outputs": [], - "source": [ - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", - "import os\n", - "\n", - "# Load environment variables from .env file\n", - "load_dotenv()\n", - "\n", - "# Accessing environment variables\n", - "user = os.getenv('DB_USER')\n", - "password = os.getenv('DB_PASSWORD')\n", - "host = os.getenv('DB_HOST')\n", - "port = os.getenv('DB_PORT') # Default PostgreSQL port is 5432, but we're using a custom one here\n", - "db_name = os.getenv('DB_NAME')\n", - "\n", - "# Create the connection URL\n", - "connection_string = f'postgresql://{user}:{password}@{host}:{port}/{db_name}'\n", - "\n", - "# Create the engine\n", - "engine = create_engine(connection_string) \n" - ] - }, - { - "cell_type": "markdown", - "id": "f75dab43-a1f4-4347-87ee-c35dbe7c4469", - "metadata": {}, - "source": [ - "#### prod" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "f370e9c3-749d-4c2e-87ca-87fe6e7610e9", - "metadata": {}, - "outputs": [], - "source": [ - "from sqlalchemy import create_engine\n", - "from dotenv import load_dotenv\n", - "import os\n", - "\n", - "# Load environment variables from .env file\n", - "load_dotenv()\n", - "\n", - "# Accessing environment variables\n", - "user = os.getenv('DB_USER_PROD')\n", - "password = os.getenv('DB_PASSWORD_PROD')\n", - "host = os.getenv('DB_HOST_PROD')\n", - "port = os.getenv('DB_PORT_PROD') # Default PostgreSQL port is 5432, but we're using a custom one here\n", - "db_name = os.getenv('DB_NAME_PROD')\n", - "\n", - "# Create the connection URL\n", - "connection_string = f'postgresql://{user}:{password}@{host}:{port}/{db_name}'\n", - "\n", - "# Create the engine\n", - "engine_prod = create_engine(connection_string)\n" - ] - }, - { - "cell_type": "markdown", - "id": "92d728a8-6324-46ce-b18a-16d371092df4", - "metadata": {}, - "source": [ - "## Get data from point apport " - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "e751080b-b54d-4f3c-a97b-0224ddb4d1b4", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/hamzaa/data/quefairedemesobjets/venv/lib/python3.9/site-packages/urllib3/__init__.py:34: NotOpenSSLWarning: urllib3 v2.0 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: https://github.com/urllib3/urllib3/issues/3020\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/zkt20z09p8jl6oix18a5kcte/lines?size=10000&after=1709624636413%2C403043131910\n", - "https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/zkt20z09p8jl6oix18a5kcte/lines?size=10000&after=1709624630377%2C403037095920\n", - "https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/zkt20z09p8jl6oix18a5kcte/lines?size=10000&after=1709624624501%2C403031219930\n", - "None\n" - ] - } - ], - "source": [ - "import requests\n", - "import pandas as pd\n", - "\n", - "\n", - "def fetch_all_data(url):\n", - " all_data = []\n", - " while url:\n", - " response = requests.get(url)\n", - " if response.status_code == 200:\n", - " data = response.json()\n", - " all_data.extend(data['results'])\n", - " # Check if there's a next page link\n", - " url = data.get('next', None)\n", - " print(url)\n", - " else:\n", - " print(f\"Failed to fetch data: {response.status_code}\")\n", - " break\n", - " return all_data\n", - "\n", - "api_url = \"https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/donnees-eo-refashion/lines?size=10000\"\n", - "\n", - "data = fetch_all_data(api_url)\n", - "\n", - "df = pd.DataFrame(data)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "14ddd3e2-c35c-433b-8d90-cf827849988d", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/hamzaa/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/sql.py:1665: SAWarning: Did not recognize type 'geometry' of column 'location'\n", - " self.meta.reflect(bind=self.con, only=[table_name], views=True)\n", - "/Users/hamzaa/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/sql.py:1665: SAWarning: Did not recognize type 'geometry' of column 'location'\n", - " self.meta.reflect(bind=self.con, only=[table_name], views=True)\n", - "/Users/hamzaa/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/sql.py:1665: SAWarning: Did not recognize type 'geometry' of column 'location'\n", - " self.meta.reflect(bind=self.con, only=[table_name], views=True)\n" - ] - } - ], - "source": [ - "df_acteurtype = pd.read_sql_table('qfdmo_acteurtype', engine)\n", - "df_sources = pd.read_sql_table('qfdmo_source', engine)\n", - "df_da = pd.read_sql_table('qfdmo_displayedacteur', engine)\n", - "df_ps = pd.read_sql_table('qfdmo_propositionservice', engine)\n", - "df_ps['id'].max()\n", - "df_pssc = pd.read_sql_table('qfdmo_propositionservice_sous_categories', engine)\n", - "df_action = pd.read_sql_table('qfdmo_action', engine)\n", - "df_ac = pd.read_sql_table('qfdmo_acteur', engine)\n", - "df_libel = pd.read_sql_table('qfdmo_labelqualite', engine)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "367b49e9-c625-4933-a7b4-f77b8f9a9803", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idlibellecodeafficherbonusurllogo_file
01Repar'ActeurreparacteurTrueFalsehttps://www.artisanat.fr/annuaire-repar-acteurs
13Re_fashionrefashionTrueTruehttps://refashion.fr/citoyen/fr/bonus-reparationlogos/Refashion_32.png
24EcomaisonecomaisonTrueTruehttps://ecomaison.com/developper-reparation/logos/ecomaison32.png
35Bonus RéparbonusreparTrueTrueNonelogos/BonusRepar32.png
42QualiRéparqualireparTrueTruehttps://www.label-qualirepar.fr/logos/logo-qualirepar.png
\n", - "
" - ], - "text/plain": [ - " id libelle code afficher bonus \\\n", - "0 1 Repar'Acteur reparacteur True False \n", - "1 3 Re_fashion refashion True True \n", - "2 4 Ecomaison ecomaison True True \n", - "3 5 Bonus Répar bonusrepar True True \n", - "4 2 QualiRépar qualirepar True True \n", - "\n", - " url logo_file \n", - "0 https://www.artisanat.fr/annuaire-repar-acteurs \n", - "1 https://refashion.fr/citoyen/fr/bonus-reparation logos/Refashion_32.png \n", - "2 https://ecomaison.com/developper-reparation/ logos/ecomaison32.png \n", - "3 None logos/BonusRepar32.png \n", - "4 https://www.label-qualirepar.fr/ logos/logo-qualirepar.png " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_libel" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "36c97099-1826-44df-bdf4-9259f70c9bca", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Engine(postgresql://quefairedem_2657:***@quefairedem-2657.postgresql.a.osc-fr1.scalingo-dbs.com:33517/quefairedem_2657)" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_action = pd.read_sql_table('qfdmo_action', engine)\n", - "engine" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "b8ebc8c3-87a0-4dec-905d-5afc2fc85278", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcodelibelleorderdescriptioncouleuriconafficher
09echangeréchanger7Noneblue-cumulusfr-icon-action-echangerTrue
16mettreenlocationmettre en location4Mettre en locationpurple-glycinefr-icon-action-mettreenlocationTrue
25louerlouer3Nonepurple-glycinefr-icon-action-louerTrue
34donnerdonner6Noneyellow-tournesolfr-icon-action-donnerTrue
43revendrevendre9Nonebrown-cafe-cremefr-icon-action-vendreTrue
52acheteracheter de seconde main8acheter d'occasionbrown-cafe-cremefr-icon-action-acheterTrue
61reparerréparer5Nonegreen-menthefr-icon-action-reparerTrue
78preterprêter1Noneorange-terre-battuefr-icon-action-preterTrue
87emprunteremprunter2Noneorange-terre-battuefr-icon-action-emprunterTrue
943triertrier10trier pour recycleryellow-tournesolfr-icon-recycle-lineTrue
\n", - "
" - ], - "text/plain": [ - " id code libelle order description \\\n", - "0 9 echanger échanger 7 None \n", - "1 6 mettreenlocation mettre en location 4 Mettre en location \n", - "2 5 louer louer 3 None \n", - "3 4 donner donner 6 None \n", - "4 3 revendre vendre 9 None \n", - "5 2 acheter acheter de seconde main 8 acheter d'occasion \n", - "6 1 reparer réparer 5 None \n", - "7 8 preter prêter 1 None \n", - "8 7 emprunter emprunter 2 None \n", - "9 43 trier trier 10 trier pour recycler \n", - "\n", - " couleur icon afficher \n", - "0 blue-cumulus fr-icon-action-echanger True \n", - "1 purple-glycine fr-icon-action-mettreenlocation True \n", - "2 purple-glycine fr-icon-action-louer True \n", - "3 yellow-tournesol fr-icon-action-donner True \n", - "4 brown-cafe-creme fr-icon-action-vendre True \n", - "5 brown-cafe-creme fr-icon-action-acheter True \n", - "6 green-menthe fr-icon-action-reparer True \n", - "7 orange-terre-battue fr-icon-action-preter True \n", - "8 orange-terre-battue fr-icon-action-emprunter True \n", - "9 yellow-tournesol fr-icon-recycle-line True " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_action" - ] - }, - { - "cell_type": "markdown", - "id": "5877b711-6b06-4f69-a22d-9746aaae7c8d", - "metadata": {}, - "source": [ - "### Mappers" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "99ba1159-5704-478a-8cac-01fd89a8339f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'id_point_apport_ou_reparation': 'identifiant_externe', 'adresse_complement': 'adresse_complement', 'type_de_point_de_collecte': 'acteur_type_id', 'telephone': 'telephone', 'siret': 'siret', 'uniquement_sur_rdv': '', 'exclusivite_de_reprisereparation': '', 'filiere': '', 'public_accueilli': '', 'produitsdechets_acceptes': '', 'labels_etou_bonus': '', 'reprise': '', 'point_de_reparation': '', 'ecoorganisme': 'source_id', 'adresse_format_ban': 'adresse', 'nom_de_lorganisme': 'nom', 'enseigne_commerciale': 'nom_commercial', '_updatedAt': 'cree_le', 'site_web': 'url', 'email': 'email', 'perimetre_dintervention': '', 'longitudewgs84': 'location', 'latitudewgs84': 'location', 'horaires_douverture': 'horaires', 'consignes_dacces': 'description'}\n" - ] - } - ], - "source": [ - "column_mapping = {\n", - " 'id_point_apport_ou_reparation': 'identifiant_externe',\n", - " 'adresse_complement': 'adresse_complement',\n", - " 'type_de_point_de_collecte': 'acteur_type_id',\n", - " 'telephone': 'telephone',\n", - " 'siret': 'siret',\n", - " 'uniquement_sur_rdv': '',\n", - " 'exclusivite_de_reprisereparation': '',\n", - " 'filiere': '',\n", - " 'public_accueilli': '',\n", - " 'produitsdechets_acceptes': '',\n", - " 'labels_etou_bonus': '',\n", - " 'reprise': '',\n", - " 'point_de_reparation': '',\n", - " 'ecoorganisme': 'source_id',\n", - " 'adresse_format_ban': 'adresse',\n", - " 'nom_de_lorganisme': 'nom',\n", - " 'enseigne_commerciale':'nom_commercial',\n", - " '_updatedAt':'cree_le',\n", - " 'site_web': 'url',\n", - " 'email': 'email',\n", - " 'perimetre_dintervention': '',\n", - " 'longitudewgs84': 'location', \n", - " 'latitudewgs84': 'location', \n", - " 'horaires_douverture': 'horaires',\n", - " 'consignes_dacces': 'description',\n", - "}\n", - "\n", - "\n", - "# Print the dictionary for visual confirmation\n", - "print(column_mapping)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "eb706aae-67f4-4810-a6fb-77f21b534798", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['id_point_apport_ou_reparation', 'adresse_complement',\n", - " 'type_de_point_de_collecte', 'telephone', '_i', 'siret',\n", - " 'uniquement_sur_rdv', 'exclusivite_de_reprisereparation', 'filiere',\n", - " 'public_accueilli', '_rand', 'point_dapport_pour_reemploi',\n", - " 'point_de_collecte_ou_de_reprise_des_dechets',\n", - " 'produitsdechets_acceptes', 'labels_etou_bonus', 'reprise',\n", - " 'point_de_reparation', 'ecoorganisme', 'adresse_format_ban',\n", - " 'nom_de_lorganisme', 'enseigne_commerciale', '_updatedAt',\n", - " 'point_dapport_de_service_reparation', 'site_web', '_score', '_id',\n", - " 'service_a_domicile', 'email', 'perimetre_dintervention',\n", - " 'longitudewgs84', '_geopoint', 'latitudewgs84', 'horaires_douverture',\n", - " 'consignes_dacces', 'identifiant_externe', 'acteur_type_id'],\n", - " dtype='object')" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.columns" - ] - }, - { - "cell_type": "markdown", - "id": "efbd6891-e90a-49ae-ae76-ff02a48ffde7", - "metadata": {}, - "source": [ - "### Transformations" - ] - }, - { - "cell_type": "markdown", - "id": "1799717a-a4ff-4039-b1eb-7c4669384841", - "metadata": {}, - "source": [ - "#### Create Actors" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "8fba9bda-e2d6-448a-80ee-8386e42f6677", - "metadata": {}, - "outputs": [], - "source": [ - "from shapely.geometry import Point\n", - "from shapely import wkb\n", - "import re\n", - "import hashlib\n", - "\n", - "\n", - "selected_columns = ['nom', 'adresse', 'type_de_point_de_collecte', 'id_point_apport_ou_reparation','identifiant_externe']\n", - "\n", - "def generate_unique_id(row):\n", - " unique_str = '_'.join([str(row[col]) for col in selected_columns])\n", - " return hashlib.sha256(unique_str.encode()).hexdigest()\n", - "def transform_acteur_type_id(value):\n", - " mapping_dict = {\n", - " \"Solution en ligne (site web, app. mobile)\": \"en ligne (web, mobile)\",\n", - " \"Artisan, commerce indépendant\": \"artisan, commerce indépendant\",\n", - " \"Magasin / Franchise, Enseigne commerciale / Distributeur / Point de vente\": \"commerce\",\n", - " \"Point d'Apport Volontaire Publique\": \"point d'apport volontaire public\",\n", - " \"Association, entreprise de l’économie sociale et solidaire (ESS)\": \"Association, entreprise de l'ESS\",\n", - " \"Déchèterie\": \"déchèterie\",\n", - " }\n", - " libelle = mapping_dict.get(value)\n", - " id_value = df_acteurtype.loc[df_acteurtype['libelle'] == libelle, 'id'].values[0] if any(df_acteurtype['libelle'] == libelle) else None\n", - " return id_value\n", - "\n", - "\n", - "\n", - "def transform_location(longitude, latitude):\n", - " point = Point(longitude, latitude)\n", - " \n", - " transformed_location_binary = wkb.dumps(point)\n", - " transformed_location_hex = transformed_location_binary.hex()\n", - "\n", - " return transformed_location_hex\n", - "\n", - "def transform_ecoorganisme(value):\n", - " \n", - " id_value = df_sources.loc[df_sources['code'].str.lower() == value.lower(), 'id'].values[0] if any(df_sources['code'].str.lower() == value.lower()) else None\n", - " return id_value\n", - "\n", - "def extract_details(row):\n", - " pattern = re.compile(r'\\b(\\d{5})\\s+(.*)')\n", - " \n", - " address = None\n", - " postal_code = None\n", - " city = None\n", - " if pd.isnull(row['adresse_format_ban']):\n", - " return pd.Series([None, None, None])\n", - "\n", - " # Ensure adress_ban is treated as a string\n", - " adress_ban = str(row['adresse_format_ban'])\n", - " \n", - " # Search for the pattern\n", - " match = pattern.search(adress_ban)\n", - " if match:\n", - " postal_code = match.group(1)\n", - " city = match.group(2)\n", - " address = adress_ban[:match.start()].strip()\n", - " \n", - " return pd.Series([address, postal_code, city])\n", - "\n", - "# Apply the function and assign the result to new columns\n", - "for old_col, new_col in column_mapping.items():\n", - " if new_col: \n", - " if old_col == 'type_de_point_de_collecte':\n", - " df[new_col] = df[old_col].apply(transform_acteur_type_id)\n", - " elif old_col in ('longitudewgs84', 'latitudewgs84'):\n", - " df['location'] = df.apply(lambda row: transform_location(row['longitudewgs84'], row['latitudewgs84']), axis=1)\n", - " elif old_col == 'ecoorganisme':\n", - " df[new_col] = df[old_col].apply(transform_ecoorganisme)\n", - " elif old_col == 'adresse_format_ban':\n", - " df[['adresse', 'code_postal', 'ville']] = df.apply(extract_details, axis=1)\n", - " else:\n", - " df[new_col] = df[old_col]\n", - "df['label_reparacteur']=False\n", - "df['statut']='ACTIF'\n", - "df['identifiant_unique'] = df.apply(generate_unique_id, axis=1)\n", - " \n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "42ec6cd5-9bea-4a30-82bb-4e8176ebb935", - "metadata": {}, - "outputs": [], - "source": [ - "df.loc[df['service_a_domicile']=='service à domicile uniquement','statut'] = 'SUPPRIME'" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "fdf57e11-358a-4671-81c8-234f5046e5a7", - "metadata": {}, - "outputs": [], - "source": [ - "df['modifie_le'] = df['cree_le']\n", - "df['siret'] = df['siret'].astype(str).apply(lambda x : x[:14])\n", - "df['telephone'] = df['telephone'].dropna().apply(lambda x: x.replace(' ', ''))\n", - "df['telephone'] = df['telephone'].dropna().apply(lambda x: '0' + x[2:] if x.startswith('33') else x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "be500ad1-3b1f-4966-8ad3-4d6a96cca0a2", - "metadata": {}, - "outputs": [], - "source": [ - "df.drop_duplicates('identifiant_unique', keep='first', inplace=True)" - ] - }, - { - "cell_type": "markdown", - "id": "4006cd25-abb6-48d2-b0b0-57a59465c217", - "metadata": {}, - "source": [ - "#### Create Proposition de services" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c1ffc6bf-b0d7-49fe-a10d-4da1f7d79e64", - "metadata": {}, - "outputs": [], - "source": [ - "rows_list = []\n", - "\n", - "for index, row in df.iterrows():\n", - " acteur_id = row['identifiant_unique']\n", - " action_id = None\n", - " sous_categories = row['produitsdechets_acceptes']\n", - " if row['point_dapport_de_service_reparation']:\n", - " acteur_service_id = 17\n", - " action_id = 1\n", - " elif row['point_dapport_pour_reemploi']:\n", - " acteur_service_id = 4\n", - " action_id = 4\n", - " elif row['point_de_reparation']:\n", - " acteur_service_id = 15\n", - " action_id = 1\n", - " elif row['point_de_collecte_ou_de_reprise_des_dechets']:\n", - " acteur_service_id = 4\n", - " action_id = 43\n", - " else:\n", - " continue # Skip rows that don't match any criteria\n", - " \n", - " rows_list.append({\"acteur_service_id\": acteur_service_id, \"action_id\": action_id, \"acteur_id\": acteur_id, \"sous_categories\":sous_categories})\n", - "\n", - "df_pds = pd.DataFrame(rows_list)\n", - "df_pds.index = range(df_ps['id'].max()+1, df_ps['id'].max()+1 + len(df_pds))\n", - "\n", - "df_pds['id'] = df_pds.index\n" - ] - }, - { - "cell_type": "markdown", - "id": "21cd6591-c3bf-4e88-b9fa-5624acaeede4", - "metadata": {}, - "source": [ - "#### Create sous categories" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dd0aa7a8-5a29-4425-bf00-90b24dba3ff3", - "metadata": {}, - "outputs": [], - "source": [ - "rows_list=[]\n", - "sous_categories = { \n", - " \"Vêtement\" : 107,\n", - " \"Linge\" : 104,\n", - " \"Chaussure\":109\n", - "}\n", - "for index, row in df_pds.iterrows():\n", - " products = str(row[\"sous_categories\"]).split(\"|\")\n", - " for product in products:\n", - " if product.strip() in sous_categories:\n", - " rows_list.append({\n", - " 'propositionservice_id': row['id'], \n", - " 'souscategorieobjet_id': sous_categories[product.strip()]\n", - " })\n", - "\n", - "df_sous_categories = pd.DataFrame(rows_list, columns=['propositionservice_id', 'souscategorieobjet_id'])\n", - "\n", - "df_sous_categories" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "3fa2edc3-e1ce-4ed4-a6d2-5e4fb91b15a4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['Agréé Bonus Réparation', nan], dtype=object)" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df['labels_etou_bonus'].unique()" - ] - }, - { - "cell_type": "markdown", - "id": "35a5b05a-f8b5-4a1a-8962-8f830319fe61", - "metadata": {}, - "source": [ - "#### Create libellé" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "922b66b1-75a8-4779-aef2-8117d4a9f29e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
acteur_idlabelqualite_idlabelqualite
05c761cd79c679af340a540c4d77cca46bd5cef232e2fcf...3Re_fashion
1da991542b3c34ec43043501b5e8c0fa7025c9e4d485a97...3Re_fashion
236ff562c2a87bda80a22950f01bb7cf66c4f2dcda31949...3Re_fashion
3c868120d7e4f4f2d400672ac8af567bbc771966814ff37...3Re_fashion
4c1dc4d491615af30a41ea0042ce0ac785d0fb434361d92...3Re_fashion
............
916f978ca97c24b6f509e97008edbac01e2b7b6f9335026b4...3Re_fashion
9170cf31f76552155f68a6b5bf3d6ac7b27c402a329306998...3Re_fashion
918dc9ec750646094de9057bfa26d06d8eaee5030dbf17458...3Re_fashion
91999ec7e15a132bc57a741cbb65f5bae338a5a1af13de8f9...3Re_fashion
92032b19f5dd787346d34bbb537864d4607eaa8d9ef9324cd...3Re_fashion
\n", - "

921 rows × 3 columns

\n", - "
" - ], - "text/plain": [ - " acteur_id labelqualite_id \\\n", - "0 5c761cd79c679af340a540c4d77cca46bd5cef232e2fcf... 3 \n", - "1 da991542b3c34ec43043501b5e8c0fa7025c9e4d485a97... 3 \n", - "2 36ff562c2a87bda80a22950f01bb7cf66c4f2dcda31949... 3 \n", - "3 c868120d7e4f4f2d400672ac8af567bbc771966814ff37... 3 \n", - "4 c1dc4d491615af30a41ea0042ce0ac785d0fb434361d92... 3 \n", - ".. ... ... \n", - "916 f978ca97c24b6f509e97008edbac01e2b7b6f9335026b4... 3 \n", - "917 0cf31f76552155f68a6b5bf3d6ac7b27c402a329306998... 3 \n", - "918 dc9ec750646094de9057bfa26d06d8eaee5030dbf17458... 3 \n", - "919 99ec7e15a132bc57a741cbb65f5bae338a5a1af13de8f9... 3 \n", - "920 32b19f5dd787346d34bbb537864d4607eaa8d9ef9324cd... 3 \n", - "\n", - " labelqualite \n", - "0 Re_fashion \n", - "1 Re_fashion \n", - "2 Re_fashion \n", - "3 Re_fashion \n", - "4 Re_fashion \n", - ".. ... \n", - "916 Re_fashion \n", - "917 Re_fashion \n", - "918 Re_fashion \n", - "919 Re_fashion \n", - "920 Re_fashion \n", - "\n", - "[921 rows x 3 columns]" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rows_list= []\n", - "for index, row in df.iterrows():\n", - " label = str(row[\"labels_etou_bonus\"])\n", - " if label == 'Agréé Bonus Réparation':\n", - " rows_list.append({\n", - " 'acteur_id': row['identifiant_unique'], \n", - " 'labelqualite_id': 3,\n", - " 'labelqualite': df_libel.loc[df_libel[\"id\"]==3,\"libelle\"].tolist()[0]\n", - " })\n", - "\n", - "df_libelles = pd.DataFrame(rows_list, columns=['acteur_id', 'labelqualite_id', 'labelqualite'])\n", - "\n", - "df_libelles" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "d3a66633-862c-43e2-a6c3-e2e61900f0f1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Re_fashion'" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "markdown", - "id": "23321c55-0d13-466a-8ea0-10a4f87d640e", - "metadata": {}, - "source": [ - "#### Add to DB" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8e4b3c28-5f10-45f2-b194-8d8fce24a26f", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "df[[\n", - " 'identifiant_unique',\n", - " 'acteur_type_id',\n", - "'adresse',\n", - " 'code_postal', 'ville',\n", - " 'adresse_complement',\n", - " 'commentaires',\n", - " 'description',\n", - " 'email',\n", - " 'horaires',\n", - " 'identifiant_externe',\n", - " 'label_reparacteur',\n", - " 'nom_commercial',\n", - " 'nom',\n", - " 'location',\n", - "'cree_le',\n", - "'modifie_le',\n", - "'multi_base',\n", - "'manuel',\n", - "'statut',\n", - " 'siret',\n", - " 'source_id',\n", - " 'telephone',\n", - " 'url'\n", - "]].to_sql(\"qfdmo_acteur\",engine, if_exists='append',index=False,method='multi',chunksize=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b59130e3-4843-45f4-a435-f48afe795b81", - "metadata": {}, - "outputs": [], - "source": [ - "df_pds[['acteur_service_id','action_id','acteur_id','id']].to_csv('refashion_propositionservice.csv')\n", - "df_pds[['id','acteur_service_id','action_id','acteur_id']].to_sql(\"qfdmo_propositionservice\",engine, if_exists='append',index=False,method='multi',chunksize=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6be62cd5-afbe-4dd8-ab05-00149be0fb23", - "metadata": {}, - "outputs": [], - "source": [ - "df_sous_categories[['propositionservice_id','souscategorieobjet_id']].to_csv('refashion_sous_categories.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8bda7e30-4236-4d36-829b-f60c5682d5f6", - "metadata": {}, - "outputs": [], - "source": [ - "df_sous_categories.to_sql(\"qfdmo_propositionservice_sous_categories\",engine, if_exists='append',index=False,method='multi',chunksize=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e148879d-1c5d-4d44-97fb-75f1170c84fb", - "metadata": {}, - "outputs": [], - "source": [ - "delete_query= \"\"\"\n", - "DELETE FROM qfdmo_propositionservice_sous_categories\n", - " USING qfdmo_propositionservice_sous_categories_refashion\n", - " WHERE qfdmo_propositionservice_sous_categories.propositionservice_id = qfdmo_propositionservice_sous_categories_refashion.propositionservice_id\n", - " AND qfdmo_propositionservice_sous_categories.souscategorieobjet_id = qfdmo_propositionservice_sous_categories_refashion.souscategorieobjet_id;\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0daa4b03-4b75-4e17-9ae1-04a01b3d9b84", - "metadata": {}, - "outputs": [], - "source": [ - "import psycopg2\n", - "from psycopg2 import sql\n", - "from sqlalchemy import create_engine\n", - "user = \n", - "password = \n", - "host = \n", - "port = '33517' # default PostgreSQL port is 5432\n", - "db_name = 'quefairedem_2657'\n", - "# Access variables in .env\n", - "conn = psycopg2.connect(\n", - " dbname=db_name, \n", - " user=user, \n", - " password=password, \n", - " host=host,\n", - " port=port\n", - ")\n", - "conn.autocommit = True\n", - "cursor = conn.cursor()\n", - "\n", - "query = sql.SQL(sql_query)\n", - "cursor.execute(query)\n", - "\n", - "cursor.close()\n", - "conn.close()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3d8bece0-bb45-494d-a115-5533e5b4fb98", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8900582c-e316-46fc-9b3e-26930baeda73", - "metadata": {}, - "outputs": [], - "source": [ - "df.loc[df['service_a_domicile']=='service à domicile uniquement',['statut','identifiant_unique']].to_sql(\"qfdmo_acteur_fix_sd\",engined)" - ] - }, - { - "cell_type": "markdown", - "id": "ffb1a425-7ab2-47bc-b829-ff636e9f8729", - "metadata": {}, - "source": [ - "## Revision Christian --> revisionacteur" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "id": "839957c6-a8d0-4da9-8a91-f0adec021026", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/0b/ssm8dl5n5td_t_2lb_8qn6500000gn/T/ipykernel_47073/3244965094.py:1: DtypeWarning: Columns (8,12,22,24) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df_chris_rev = pd.read_csv(\"./../../refashion_acteurs_chris_28032024.csv\")\n" - ] - } - ], - "source": [ - "df_chris_rev = pd.read_csv(\"./../../refashion_acteurs_chris_28032024.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "id": "a0901dad-3648-461a-91c0-5dc3af1b4f07", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nomhorairesurladresse_complement
0MFCNaNhttps://www.lamanufacture49.fr/SAINT-PIERRE-MONTLIMART
1TILLINaNhttps://tilli.fr/NaN
2TILLINaNhttps://tilli.fr/NaN
3TILLINaNhttps://tilli.fr/NaN
4A.C.L PROXI POL.NaNNaNNaN
...............
35925APIVET24h/24. 7j/7https://www.apivet49.com/Place Tout Blanc
35926APIVET24h/24. 7j/7https://www.apivet49.com/NaN
35927APIVET24h/24. 7j/7https://www.apivet49.com/NaN
35928APIVET24h/24. 7j/7https://www.apivet49.com/NaN
35929APIVET24h/24. 7j/7https://www.apivet49.com/NaN
\n", - "

35930 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " nom horaires url \\\n", - "0 MFC NaN https://www.lamanufacture49.fr/ \n", - "1 TILLI NaN https://tilli.fr/ \n", - "2 TILLI NaN https://tilli.fr/ \n", - "3 TILLI NaN https://tilli.fr/ \n", - "4 A.C.L PROXI POL. NaN NaN \n", - "... ... ... ... \n", - "35925 APIVET 24h/24. 7j/7 https://www.apivet49.com/ \n", - "35926 APIVET 24h/24. 7j/7 https://www.apivet49.com/ \n", - "35927 APIVET 24h/24. 7j/7 https://www.apivet49.com/ \n", - "35928 APIVET 24h/24. 7j/7 https://www.apivet49.com/ \n", - "35929 APIVET 24h/24. 7j/7 https://www.apivet49.com/ \n", - "\n", - " adresse_complement \n", - "0 SAINT-PIERRE-MONTLIMART \n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN \n", - "... ... \n", - "35925 Place Tout Blanc \n", - "35926 NaN \n", - "35927 NaN \n", - "35928 NaN \n", - "35929 NaN \n", - "\n", - "[35930 rows x 4 columns]" - ] - }, - "execution_count": 69, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_chris_rev[['nom','horaires','url','adresse_complement']]" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "49a64056-f1ed-4994-b1c1-7acb322fe6a6", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/hamzaa/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/sql.py:1665: SAWarning: Did not recognize type 'geometry' of column 'location'\n", - " self.meta.reflect(bind=self.con, only=[table_name], views=True)\n", - "/Users/hamzaa/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/sql.py:1665: SAWarning: Did not recognize type 'geometry' of column 'location'\n", - " self.meta.reflect(bind=self.con, only=[table_name], views=True)\n" - ] - } - ], - "source": [ - "df_revact = pd.read_sql_table(\"qfdmo_revisionacteur\",engine_prod)\n", - "df_actprod = pd.read_sql_table(\"qfdmo_acteur\",engine_prod)" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "c5453169-209a-41d3-b1ff-7f1b9d580f08", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "160552" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_revact['identifiant_unique'].count()" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "aa95a640-2312-45d2-82c4-5d7712acbe80", - "metadata": {}, - "outputs": [], - "source": [ - "df_rev_man = pd.merge(df_actprod[(df_actprod['source_id']==45)][['identifiant_unique']],df_revact, on = ['identifiant_unique'])" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "id": "487b683a-ccb9-4088-837c-7536f37a0c1e", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/hamzaa/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/sql.py:1665: SAWarning: Did not recognize type 'geometry' of column 'location'\n", - " self.meta.reflect(bind=self.con, only=[table_name], views=True)\n" - ] - } - ], - "source": [ - "df_refashion = pd.read_sql_table(\"qfdmo_acteur\", engine)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "1b6f06bf-005b-4a6c-84c1-383ef39af61b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nomidentifiant_uniqueadresseadresse_complementcode_postalvilleurlemaillocationtelephone...acteur_type_idstatutsource_idcree_lemodifie_lenaf_principalcommentaireshoraires_osmdescriptionhoraires_description
21690Ghenam Reparationrefashion_SWKLYBWCFOLZ7 Rue FranklinNone93100Montreuilhttps://ghenam-reparation.jimdosite.com/nos-ta...None0101000020E6100000990F0874268D034068321CCF676E...None...3ACTIF452023-11-08 17:28:48.329397+00:002023-11-24 10:17:25.965587+00:00NoneNoneNoneNone
42031Patine Studiorefashion_FINMHILZPXHW8 rue MartelNone75010Parishttps://www.patine.fr/blogs/book-a-sessionhello@patine.fr0101000020E6100000D828EB3713D30240A75D4C33DD6F...0181701618...3ACTIF452024-01-11 12:55:34.682766+00:002024-01-11 12:56:35.642529+00:0047.91ATu-Sa 11:00-19:00NoneNone
42232Au fil et à mesurerefashion_WWXFNUVVIMHC7 Rue de la VendéeNone79130Secondignyhttps://secondigny.fr/utile/annuaire-entrepris...None0101000020E61000001477BCC96FD1DABF58478E74064E...06 24 35 91 55...3ACTIF452024-01-17 07:55:28.288884+00:002024-01-17 07:55:28.288894+00:0014.13ZNoneNoneNone
42420La Réserve Varzyrefashion_KFUJSXXOKJPM19 Rue DelangleNone58210Varzyhttps://www.facebook.com/lareservevarzy/asso.lareservevarzy@gmail.com0101000020E61000007FA4880CAB180B400853944BE3AD...06 04 01 18 10...4ACTIF452024-03-06 17:44:16.226699+00:002024-03-06 17:44:16.226711+00:0047.29ZNoneNone
168751MFCrefashion_TLC-REFASHION-REP-455001208507113095_dRTE CHAUDRONSAINT-PIERRE-MONTLIMART49110MONTREVAULT-SUR-EVREhttps://www.lamanufacture49.fr/None0101000020E6100000000000000000F87F000000000000...0241754850...5ACTIF452024-03-05 07:44:03.205000+00:002024-03-05 07:44:03.205000+00:00NoneNoneNoneNoneNone
..................................................................
205929APIVETrefashion_TLC-REFASHION-PAV-3271797Angle rue Bertin et Avenue Jean JoxePlace Tout Blanc49000AngersNoneNone0101000020E610000082C5E1CCAF66E1BF82548A1D8DBD...None...10ACTIF452024-03-05 07:43:41.565000+00:002024-03-05 07:43:41.565000+00:00NoneNoneNoneNone24h/24. 7j/7
205930APIVETrefashion_TLC-REFASHION-PAV-3271796rue Louis GainNone49000AngersNoneNone0101000020E6100000925852EE3E47E1BF2B4CDF6B08BC...None...10ACTIF452024-03-05 07:43:41.565000+00:002024-03-05 07:43:41.565000+00:00NoneNoneNoneNone24h/24. 7j/7
205931APIVETrefashion_TLC-REFASHION-PAV-3271795Place André LeroyNone49000AngersNoneNone0101000020E6100000B2BCAB1E308FE1BFBA1457957DBB...None...10ACTIF452024-03-05 07:43:41.565000+00:002024-03-05 07:43:41.565000+00:00NoneNoneNoneNone24h/24. 7j/7
205932APIVETrefashion_TLC-REFASHION-PAV-3271794101 RUE ST NICOLASNone49000AngersNoneNone0101000020E6100000822A244F4821E2BF03DA0C26B4BC...None...10ACTIF452024-03-05 07:43:41.565000+00:002024-03-05 07:43:41.565000+00:00NoneNoneNoneNone24h/24. 7j/7
205933APIVETrefashion_TLC-REFASHION-PAV-3271793Place BichonNone49000AngersNoneNone0101000020E6100000EE60C43E0114E2BFFAB9A1293BBD...None...10ACTIF452024-03-05 07:43:41.565000+00:002024-03-05 07:43:41.565000+00:00NoneNoneNoneNone24h/24. 7j/7
\n", - "

35934 rows × 24 columns

\n", - "
" - ], - "text/plain": [ - " nom identifiant_unique \\\n", - "21690 Ghenam Reparation refashion_SWKLYBWCFOLZ \n", - "42031 Patine Studio refashion_FINMHILZPXHW \n", - "42232 Au fil et à mesure refashion_WWXFNUVVIMHC \n", - "42420 La Réserve Varzy refashion_KFUJSXXOKJPM \n", - "168751 MFC refashion_TLC-REFASHION-REP-455001208507113095_d \n", - "... ... ... \n", - "205929 APIVET refashion_TLC-REFASHION-PAV-3271797 \n", - "205930 APIVET refashion_TLC-REFASHION-PAV-3271796 \n", - "205931 APIVET refashion_TLC-REFASHION-PAV-3271795 \n", - "205932 APIVET refashion_TLC-REFASHION-PAV-3271794 \n", - "205933 APIVET refashion_TLC-REFASHION-PAV-3271793 \n", - "\n", - " adresse adresse_complement \\\n", - "21690 7 Rue Franklin None \n", - "42031 8 rue Martel None \n", - "42232 7 Rue de la Vendée None \n", - "42420 19 Rue Delangle None \n", - "168751 RTE CHAUDRON SAINT-PIERRE-MONTLIMART \n", - "... ... ... \n", - "205929 Angle rue Bertin et Avenue Jean Joxe Place Tout Blanc \n", - "205930 rue Louis Gain None \n", - "205931 Place André Leroy None \n", - "205932 101 RUE ST NICOLAS None \n", - "205933 Place Bichon None \n", - "\n", - " code_postal ville \\\n", - "21690 93100 Montreuil \n", - "42031 75010 Paris \n", - "42232 79130 Secondigny \n", - "42420 58210 Varzy \n", - "168751 49110 MONTREVAULT-SUR-EVRE \n", - "... ... ... \n", - "205929 49000 Angers \n", - "205930 49000 Angers \n", - "205931 49000 Angers \n", - "205932 49000 Angers \n", - "205933 49000 Angers \n", - "\n", - " url \\\n", - "21690 https://ghenam-reparation.jimdosite.com/nos-ta... \n", - "42031 https://www.patine.fr/blogs/book-a-session \n", - "42232 https://secondigny.fr/utile/annuaire-entrepris... \n", - "42420 https://www.facebook.com/lareservevarzy/ \n", - "168751 https://www.lamanufacture49.fr/ \n", - "... ... \n", - "205929 None \n", - "205930 None \n", - "205931 None \n", - "205932 None \n", - "205933 None \n", - "\n", - " email \\\n", - "21690 None \n", - "42031 hello@patine.fr \n", - "42232 None \n", - "42420 asso.lareservevarzy@gmail.com \n", - "168751 None \n", - "... ... \n", - "205929 None \n", - "205930 None \n", - "205931 None \n", - "205932 None \n", - "205933 None \n", - "\n", - " location telephone \\\n", - "21690 0101000020E6100000990F0874268D034068321CCF676E... None \n", - "42031 0101000020E6100000D828EB3713D30240A75D4C33DD6F... 0181701618 \n", - "42232 0101000020E61000001477BCC96FD1DABF58478E74064E... 06 24 35 91 55 \n", - "42420 0101000020E61000007FA4880CAB180B400853944BE3AD... 06 04 01 18 10 \n", - "168751 0101000020E6100000000000000000F87F000000000000... 0241754850 \n", - "... ... ... \n", - "205929 0101000020E610000082C5E1CCAF66E1BF82548A1D8DBD... None \n", - "205930 0101000020E6100000925852EE3E47E1BF2B4CDF6B08BC... None \n", - "205931 0101000020E6100000B2BCAB1E308FE1BFBA1457957DBB... None \n", - "205932 0101000020E6100000822A244F4821E2BF03DA0C26B4BC... None \n", - "205933 0101000020E6100000EE60C43E0114E2BFFAB9A1293BBD... None \n", - "\n", - " ... acteur_type_id statut source_id cree_le \\\n", - "21690 ... 3 ACTIF 45 2023-11-08 17:28:48.329397+00:00 \n", - "42031 ... 3 ACTIF 45 2024-01-11 12:55:34.682766+00:00 \n", - "42232 ... 3 ACTIF 45 2024-01-17 07:55:28.288884+00:00 \n", - "42420 ... 4 ACTIF 45 2024-03-06 17:44:16.226699+00:00 \n", - "168751 ... 5 ACTIF 45 2024-03-05 07:44:03.205000+00:00 \n", - "... ... ... ... ... ... \n", - "205929 ... 10 ACTIF 45 2024-03-05 07:43:41.565000+00:00 \n", - "205930 ... 10 ACTIF 45 2024-03-05 07:43:41.565000+00:00 \n", - "205931 ... 10 ACTIF 45 2024-03-05 07:43:41.565000+00:00 \n", - "205932 ... 10 ACTIF 45 2024-03-05 07:43:41.565000+00:00 \n", - "205933 ... 10 ACTIF 45 2024-03-05 07:43:41.565000+00:00 \n", - "\n", - " modifie_le naf_principal commentaires \\\n", - "21690 2023-11-24 10:17:25.965587+00:00 None \n", - "42031 2024-01-11 12:56:35.642529+00:00 47.91A \n", - "42232 2024-01-17 07:55:28.288894+00:00 14.13Z \n", - "42420 2024-03-06 17:44:16.226711+00:00 47.29Z \n", - "168751 2024-03-05 07:44:03.205000+00:00 None None \n", - "... ... ... ... \n", - "205929 2024-03-05 07:43:41.565000+00:00 None None \n", - "205930 2024-03-05 07:43:41.565000+00:00 None None \n", - "205931 2024-03-05 07:43:41.565000+00:00 None None \n", - "205932 2024-03-05 07:43:41.565000+00:00 None None \n", - "205933 2024-03-05 07:43:41.565000+00:00 None None \n", - "\n", - " horaires_osm description horaires_description \n", - "21690 None None None \n", - "42031 Tu-Sa 11:00-19:00 None None \n", - "42232 None None None \n", - "42420 None None \n", - "168751 None None None \n", - "... ... ... ... \n", - "205929 None None 24h/24. 7j/7 \n", - "205930 None None 24h/24. 7j/7 \n", - "205931 None None 24h/24. 7j/7 \n", - "205932 None None 24h/24. 7j/7 \n", - "205933 None None 24h/24. 7j/7 \n", - "\n", - "[35934 rows x 24 columns]" - ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "filtered_df = df_refashion[df_refashion['identifiant_unique'].str.startswith(\"refashion_\")]\n", - "filtered_df" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "id": "19a4623f-4118-40c0-95ad-d163012b6021", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "81" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_rev_man.to_sql(\"rev_refashion\",engine_prod, if_exists='replace')" - ] - }, - { - "cell_type": "code", - "execution_count": 140, - "id": "3e0a7cf1-6c27-4e8c-b8c9-d572347df0d5", - "metadata": {}, - "outputs": [], - "source": [ - "import psycopg2\n", - "from psycopg2 import sql\n", - "\n", - "# Connect to the database again\n", - "conn = psycopg2.connect(\n", - " dbname=db_name, \n", - " user=user, \n", - " password=password, \n", - " host=host,\n", - " port=port\n", - ")\n", - "conn.autocommit = True\n", - "cursor = conn.cursor()\n", - "\n", - "# Perform the update\n", - "cursor.execute(\"\"\"\n", - " UPDATE qfdmo_revisionacteur\n", - "SET \n", - " acteur_type_id = NULL,\n", - " adresse = NULL,\n", - " code_postal = NULL, \n", - " ville = NULL,\n", - " email = NULL,\n", - " horaires = NULL,\n", - " identifiant_externe = NULL,\n", - " label_reparacteur = qfdmo_revision_acteur_enrich_christian.label_reparacteur,\n", - " nom_commercial = NULL,\n", - " nom = NULL,\n", - " location = NULL,\n", - " cree_le = NOW(),\n", - " modifie_le = NOW(),\n", - " statut = qfdmo_revision_acteur_enrich_christian.statut,\n", - " siret = NULL,\n", - " source_id = NULL,\n", - " telephone = NULL,\n", - " description = qfdmo_revision_acteur_enrich_christian.description,\n", - " adresse_complement = qfdmo_revision_acteur_enrich_christian.adresse_complement,\n", - " url = qfdmo_revision_acteur_enrich_christian.url\n", - "FROM rev_refashion\n", - "WHERE qfdmo_revisionacteur.identifiant_unique = qfdmo_revision_acteur_enrich_christian.identifiant_unique;\n", - "\n", - "\"\"\")\n", - "\n", - "# Cleanup\n", - "cursor.close()\n", - "conn.close()" - ] - }, - { - "cell_type": "markdown", - "id": "98499bbf-de06-40da-8e10-ae33ac429c69", - "metadata": {}, - "source": [ - "DELETE FROM qfdmo_displayedpropositionservice_sous_categories\n", - "WHERE propositionservice_id IN (\n", - " SELECT id FROM qfdmo_propositionservice\n", - " WHERE acteur_id IN (\n", - " SELECT identifiant_unique FROM qfdmo_acteur WHERE source_id = 45\n", - " )\n", - ");\n", - "DELETE 105969\n", - "quefairedem_2657=> DELETE FROM qfdmo_propositionservice \n", - "WHERE acteur_id IN (\n", - " SELECT identifiant_unique FROM qfdmo_acteur WHERE source_id = 45\n", - ");\n", - "DELETE 35930\n", - "quefairedem_2657=> delete from qfdmo_acteur where identifiant_unique =45;\n", - "ERROR: operator does not exist: character varying = integer\n", - "LINE 1: delete from qfdmo_acteur where identifiant_unique =45;\n", - " ^\n", - "HINT: No operator matches the given name and argument types. You might need to add explicit type casts.\n", - "quefairedem_2657=> delete from qfdmo_acteur where source_id =45;\n" - ] - }, - { - "cell_type": "code", - "execution_count": 133, - "id": "5ab625ac-398e-4558-b6fb-97cd6c6bf2d3", - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.read_sql_table(\"rev_refashion\",engine)" - ] - }, - { - "cell_type": "code", - "execution_count": 149, - "id": "6a4233c1-e4cc-4852-a18f-9a4736b40187", - "metadata": {}, - "outputs": [], - "source": [ - "sources = pd.read_sql_table(\"qfdmo_sources_acteurs\",engine)" - ] - }, - { - "cell_type": "code", - "execution_count": 180, - "id": "e88cb3b8-33a5-484e-b01b-6ae34a9a89ff", - "metadata": {}, - "outputs": [], - "source": [ - "df_rev = pd.merge(df[df['source_id_y']==45],sources[['identifiant_unique','identifiant_externe']],left_on=['identifiant_externe_y'], right_on=['identifiant_externe'])" - ] - }, - { - "cell_type": "code", - "execution_count": 182, - "id": "cedb934c-3a27-44b9-b767-af9708edfeee", - "metadata": {}, - "outputs": [], - "source": [ - "df_rev = df_rev.drop(columns=['identifiant_unique_x'])\n", - "df_rev = df_rev.rename(columns={'identifiant_unique_y':'identifiant_unique'})\n", - "df_rev.drop_duplicates()" - ] - }, - { - "cell_type": "code", - "execution_count": 203, - "id": "9b04521a-d325-4e34-9ca5-8432595913e6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "35929" - ] - }, - "execution_count": 203, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_rev[\n", - " [\n", - " \"identifiant_unique\",\n", - " \"nom\",\n", - " \"adresse\",\n", - " \"adresse_complement\",\n", - " \"code_postal\",\n", - " \"ville\",\n", - " \"url\",\n", - " \"email\",\n", - " \"location\",\n", - " \"telephone\",\n", - " \"nom_commercial\",\n", - " \"nom_officiel\",\n", - " \"siret\",\n", - " \"identifiant_externe\",\n", - " \"acteur_type_id\",\n", - " \"statut\",\n", - " \"cree_le\",\n", - " \"modifie_le\",\n", - " \"naf_principal\",\n", - " \"commentaires\",\n", - " \"horaires_osm\",\n", - " \"horaires_description\",\n", - " \"description\",\n", - " ]\n", - " ].to_sql('qfdmo_revisionacteur',engine, index=False,\n", - "if_exists=\"append\",method=\"multi\",\n", - " chunksize=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "bdbd3d74-97d6-4e47-92de-e8b73eaddd53", - "metadata": {}, - "outputs": [], - "source": [ - "df_sql = pd.read_sql_query(\n", - " \"SELECT * FROM qfdmo_dagrunchange WHERE \"\n", - " \"dag_run_id IN \"\n", - " \"(SELECT id FROM qfdmo_dagrun WHERE status = 'DagRunStatus.TO_INSERT')\",\n", - " engine,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "84faf089-9319-4cd2-bab6-87d839984d67", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idchange_typemeta_datarow_updatesdag_run_id
0646816CREATENone{'nom': 'MFC', 'url': 'https://www.lamanufactu...30
1646817CREATENone{'nom': 'TILLI', 'url': 'https://tilli.fr/', '...30
2646818CREATENone{'nom': 'TILLI', 'url': 'https://tilli.fr/', '...30
3646819CREATENone{'nom': 'TILLI', 'url': 'https://tilli.fr/', '...30
4646820CREATENone{'nom': 'A.C.L PROXI POL.', 'url': None, 'emai...30
..................
35925682741CREATENone{'nom': 'APIVET', 'url': None, 'email': None, ...30
35926682742CREATENone{'nom': 'APIVET', 'url': None, 'email': None, ...30
35927682743CREATENone{'nom': 'APIVET', 'url': None, 'email': None, ...30
35928682744CREATENone{'nom': 'APIVET', 'url': None, 'email': None, ...30
35929682745CREATENone{'nom': 'APIVET', 'url': None, 'email': None, ...30
\n", - "

35930 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " id change_type meta_data \\\n", - "0 646816 CREATE None \n", - "1 646817 CREATE None \n", - "2 646818 CREATE None \n", - "3 646819 CREATE None \n", - "4 646820 CREATE None \n", - "... ... ... ... \n", - "35925 682741 CREATE None \n", - "35926 682742 CREATE None \n", - "35927 682743 CREATE None \n", - "35928 682744 CREATE None \n", - "35929 682745 CREATE None \n", - "\n", - " row_updates dag_run_id \n", - "0 {'nom': 'MFC', 'url': 'https://www.lamanufactu... 30 \n", - "1 {'nom': 'TILLI', 'url': 'https://tilli.fr/', '... 30 \n", - "2 {'nom': 'TILLI', 'url': 'https://tilli.fr/', '... 30 \n", - "3 {'nom': 'TILLI', 'url': 'https://tilli.fr/', '... 30 \n", - "4 {'nom': 'A.C.L PROXI POL.', 'url': None, 'emai... 30 \n", - "... ... ... \n", - "35925 {'nom': 'APIVET', 'url': None, 'email': None, ... 30 \n", - "35926 {'nom': 'APIVET', 'url': None, 'email': None, ... 30 \n", - "35927 {'nom': 'APIVET', 'url': None, 'email': None, ... 30 \n", - "35928 {'nom': 'APIVET', 'url': None, 'email': None, ... 30 \n", - "35929 {'nom': 'APIVET', 'url': None, 'email': None, ... 30 \n", - "\n", - "[35930 rows x 5 columns]" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_sql" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "66eca3b3-2146-45fd-99c1-38ae83c861a2", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/0b/ssm8dl5n5td_t_2lb_8qn6500000gn/T/ipykernel_47073/3724072109.py:13: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", - " df_actors = pd.concat(normalized_dfs.tolist(), ignore_index=True)\n" - ] - }, - { - "ename": "NotImplementedError", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[22], line 15\u001b[0m\n\u001b[1;32m 12\u001b[0m normalized_dfs \u001b[38;5;241m=\u001b[39m df_sql[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrow_updates\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(pd\u001b[38;5;241m.\u001b[39mjson_normalize)\n\u001b[1;32m 13\u001b[0m df_actors \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mconcat(normalized_dfs\u001b[38;5;241m.\u001b[39mtolist(), ignore_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m---> 15\u001b[0m normalized_labels_dfs \u001b[38;5;241m=\u001b[39m \u001b[43mdf_actors\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlabels\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson_normalize\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 16\u001b[0m df_labels \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mconcat(normalized_labels_dfs\u001b[38;5;241m.\u001b[39mtolist(), ignore_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 18\u001b[0m normalized_pds_dfs \u001b[38;5;241m=\u001b[39m df_actors[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mproposition_services\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(pd\u001b[38;5;241m.\u001b[39mjson_normalize)\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/series.py:4757\u001b[0m, in \u001b[0;36mSeries.apply\u001b[0;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001b[0m\n\u001b[1;32m 4629\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\n\u001b[1;32m 4630\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 4631\u001b[0m func: AggFuncType,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4636\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 4637\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Series:\n\u001b[1;32m 4638\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 4639\u001b[0m \u001b[38;5;124;03m Invoke function on values of Series.\u001b[39;00m\n\u001b[1;32m 4640\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4755\u001b[0m \u001b[38;5;124;03m dtype: float64\u001b[39;00m\n\u001b[1;32m 4756\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 4757\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mSeriesApply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4758\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4759\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4760\u001b[0m \u001b[43m \u001b[49m\u001b[43mconvert_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4761\u001b[0m \u001b[43m \u001b[49m\u001b[43mby_row\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby_row\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4762\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4763\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4764\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/apply.py:1209\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_compat()\n\u001b[1;32m 1208\u001b[0m \u001b[38;5;66;03m# self.func is Callable\u001b[39;00m\n\u001b[0;32m-> 1209\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/apply.py:1289\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1283\u001b[0m \u001b[38;5;66;03m# row-wise access\u001b[39;00m\n\u001b[1;32m 1284\u001b[0m \u001b[38;5;66;03m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001b[39;00m\n\u001b[1;32m 1285\u001b[0m \u001b[38;5;66;03m# we need to give `na_action=\"ignore\"` for categorical data.\u001b[39;00m\n\u001b[1;32m 1286\u001b[0m \u001b[38;5;66;03m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001b[39;00m\n\u001b[1;32m 1287\u001b[0m \u001b[38;5;66;03m# Categorical (GH51645).\u001b[39;00m\n\u001b[1;32m 1288\u001b[0m action \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj\u001b[38;5;241m.\u001b[39mdtype, CategoricalDtype) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1289\u001b[0m mapped \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1290\u001b[0m \u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurried\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\n\u001b[1;32m 1291\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[1;32m 1294\u001b[0m \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m 1295\u001b[0m \u001b[38;5;66;03m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m 1296\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/base.py:921\u001b[0m, in \u001b[0;36mIndexOpsMixin._map_values\u001b[0;34m(self, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 918\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arr, ExtensionArray):\n\u001b[1;32m 919\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mmap(mapper, na_action\u001b[38;5;241m=\u001b[39mna_action)\n\u001b[0;32m--> 921\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43malgorithms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mna_action\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/algorithms.py:1814\u001b[0m, in \u001b[0;36mmap_array\u001b[0;34m(arr, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 1812\u001b[0m values \u001b[38;5;241m=\u001b[39m arr\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 1813\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m na_action \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1814\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_infer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1815\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1816\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mmap_infer_mask(\n\u001b[1;32m 1817\u001b[0m values, mapper, mask\u001b[38;5;241m=\u001b[39misna(values)\u001b[38;5;241m.\u001b[39mview(np\u001b[38;5;241m.\u001b[39muint8), convert\u001b[38;5;241m=\u001b[39mconvert\n\u001b[1;32m 1818\u001b[0m )\n", - "File \u001b[0;32mlib.pyx:2926\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/json/_normalize.py:445\u001b[0m, in \u001b[0;36mjson_normalize\u001b[0;34m(data, record_path, meta, meta_prefix, record_prefix, errors, sep, max_level)\u001b[0m\n\u001b[1;32m 443\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(data)\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 445\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;66;03m# check to see if a simple recursive function is possible to\u001b[39;00m\n\u001b[1;32m 448\u001b[0m \u001b[38;5;66;03m# improve performance (see #15621) but only for cases such\u001b[39;00m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;66;03m# as pd.Dataframe(data) or pd.Dataframe(data, sep)\u001b[39;00m\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 451\u001b[0m record_path \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m meta \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 455\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m max_level \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 456\u001b[0m ):\n", - "\u001b[0;31mNotImplementedError\u001b[0m: " - ] - } - ], - "source": [ - "max_id_pds = pd.read_sql_query(\n", - " \"SELECT max(id) FROM qfdmo_displayedpropositionservice\", engine\n", - ")[\"max\"][0]\n", - "df_sql = pd.read_sql_query(\n", - " \"SELECT * FROM qfdmo_dagrunchange WHERE \"\n", - " \"dag_run_id IN \"\n", - " \"(SELECT id FROM qfdmo_dagrun WHERE status = 'DagRunStatus.TO_INSERT')\",\n", - " engine,\n", - ")\n", - "dag_run_id = df_sql[\"dag_run_id\"].iloc[0]\n", - "\n", - "normalized_dfs = df_sql[\"row_updates\"].apply(pd.json_normalize)\n", - "df_actors = pd.concat(normalized_dfs.tolist(), ignore_index=True)\n", - "\n", - "normalized_labels_dfs = df_actors[\"labels\"].apply(pd.json_normalize)\n", - "df_labels = pd.concat(normalized_labels_dfs.tolist(), ignore_index=True)\n", - "\n", - "normalized_pds_dfs = df_actors[\"proposition_services\"].apply(pd.json_normalize)\n", - "df_pds = pd.concat(normalized_pds_dfs.tolist(), ignore_index=True)\n", - "ids_range = range(max_id_pds + 1, max_id_pds + 1 + len(df_pds))\n", - "\n", - "df_pds[\"id\"] = ids_range\n", - "df_pds[\"pds_sous_categories\"] = df_pds.apply(\n", - " lambda row: [\n", - " {**d, \"propositionservice_id\": row[\"id\"]}\n", - " for d in row[\"pds_sous_categories\"]\n", - " ],\n", - " axis=1,\n", - ")\n", - "\n", - "normalized_pdssc_dfs = df_pds[\"pds_sous_categories\"].apply(pd.json_normalize)\n", - "df_pdssc = pd.concat(normalized_pdssc_dfs.tolist(), ignore_index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "88a31925-803d-4571-9e47-72ff4f3ad38a", - "metadata": {}, - "outputs": [ - { - "ename": "NotImplementedError", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[26], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m normalized_labels_dfs \u001b[38;5;241m=\u001b[39m \u001b[43mdf_actors\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlabels\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson_normalize\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/series.py:4757\u001b[0m, in \u001b[0;36mSeries.apply\u001b[0;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001b[0m\n\u001b[1;32m 4629\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\n\u001b[1;32m 4630\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 4631\u001b[0m func: AggFuncType,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4636\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 4637\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Series:\n\u001b[1;32m 4638\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 4639\u001b[0m \u001b[38;5;124;03m Invoke function on values of Series.\u001b[39;00m\n\u001b[1;32m 4640\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4755\u001b[0m \u001b[38;5;124;03m dtype: float64\u001b[39;00m\n\u001b[1;32m 4756\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 4757\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mSeriesApply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4758\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4759\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4760\u001b[0m \u001b[43m \u001b[49m\u001b[43mconvert_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4761\u001b[0m \u001b[43m \u001b[49m\u001b[43mby_row\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby_row\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4762\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4763\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4764\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/apply.py:1209\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_compat()\n\u001b[1;32m 1208\u001b[0m \u001b[38;5;66;03m# self.func is Callable\u001b[39;00m\n\u001b[0;32m-> 1209\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/apply.py:1289\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1283\u001b[0m \u001b[38;5;66;03m# row-wise access\u001b[39;00m\n\u001b[1;32m 1284\u001b[0m \u001b[38;5;66;03m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001b[39;00m\n\u001b[1;32m 1285\u001b[0m \u001b[38;5;66;03m# we need to give `na_action=\"ignore\"` for categorical data.\u001b[39;00m\n\u001b[1;32m 1286\u001b[0m \u001b[38;5;66;03m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001b[39;00m\n\u001b[1;32m 1287\u001b[0m \u001b[38;5;66;03m# Categorical (GH51645).\u001b[39;00m\n\u001b[1;32m 1288\u001b[0m action \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj\u001b[38;5;241m.\u001b[39mdtype, CategoricalDtype) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1289\u001b[0m mapped \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1290\u001b[0m \u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurried\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\n\u001b[1;32m 1291\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[1;32m 1294\u001b[0m \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m 1295\u001b[0m \u001b[38;5;66;03m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m 1296\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/base.py:921\u001b[0m, in \u001b[0;36mIndexOpsMixin._map_values\u001b[0;34m(self, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 918\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arr, ExtensionArray):\n\u001b[1;32m 919\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mmap(mapper, na_action\u001b[38;5;241m=\u001b[39mna_action)\n\u001b[0;32m--> 921\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43malgorithms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mna_action\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/core/algorithms.py:1814\u001b[0m, in \u001b[0;36mmap_array\u001b[0;34m(arr, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 1812\u001b[0m values \u001b[38;5;241m=\u001b[39m arr\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 1813\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m na_action \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1814\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_infer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1815\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1816\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mmap_infer_mask(\n\u001b[1;32m 1817\u001b[0m values, mapper, mask\u001b[38;5;241m=\u001b[39misna(values)\u001b[38;5;241m.\u001b[39mview(np\u001b[38;5;241m.\u001b[39muint8), convert\u001b[38;5;241m=\u001b[39mconvert\n\u001b[1;32m 1818\u001b[0m )\n", - "File \u001b[0;32mlib.pyx:2926\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32m~/data/quefairedemesobjets/venv/lib/python3.9/site-packages/pandas/io/json/_normalize.py:445\u001b[0m, in \u001b[0;36mjson_normalize\u001b[0;34m(data, record_path, meta, meta_prefix, record_prefix, errors, sep, max_level)\u001b[0m\n\u001b[1;32m 443\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(data)\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 445\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;66;03m# check to see if a simple recursive function is possible to\u001b[39;00m\n\u001b[1;32m 448\u001b[0m \u001b[38;5;66;03m# improve performance (see #15621) but only for cases such\u001b[39;00m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;66;03m# as pd.Dataframe(data) or pd.Dataframe(data, sep)\u001b[39;00m\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 451\u001b[0m record_path \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m meta \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 455\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m max_level \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 456\u001b[0m ):\n", - "\u001b[0;31mNotImplementedError\u001b[0m: " - ] - } - ], - "source": [ - "normalized_labels_dfs = df_actors[\"labels\"].apply(pd.json_normalize)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "20d8d270-8209-41b1-bc89-21187de8b5c3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
acteur_idlabelqualitelabelqualite_id
0refashion_TLC-REFASHION-REP-455001208507113095_dRe_fashion3
\n", - "
" - ], - "text/plain": [ - " acteur_id labelqualite \\\n", - "0 refashion_TLC-REFASHION-REP-455001208507113095_d Re_fashion \n", - "\n", - " labelqualite_id \n", - "0 3 " - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_actors[\"labels\"].dropna().apply(pd.json_normalize)[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b4a72b9e-e22d-4ffb-aa17-4547c8d212e4", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/qfdmo/admin/__init__.py b/qfdmo/admin/__init__.py index 983bf9b07..627508b0b 100644 --- a/qfdmo/admin/__init__.py +++ b/qfdmo/admin/__init__.py @@ -1,3 +1,4 @@ from .acteur import * # noqa from .action import * # noqa from .categorie_objet import * # noqa +from .data import * # noqa diff --git a/qfdmo/admin/data.py b/qfdmo/admin/data.py new file mode 100644 index 000000000..14a7d9870 --- /dev/null +++ b/qfdmo/admin/data.py @@ -0,0 +1,15 @@ +from django.contrib.gis import admin + +from qfdmo.models import SuggestionCohorte, SuggestionUnitaire + + +class SuggestionCohorteAdmin(admin.ModelAdmin): + pass + + +class SuggestionUnitaireAdmin(admin.ModelAdmin): + pass + + +admin.site.register(SuggestionCohorte, SuggestionCohorteAdmin) +admin.site.register(SuggestionUnitaire, SuggestionUnitaireAdmin) diff --git a/qfdmo/forms.py b/qfdmo/forms.py index fd4f9fdd8..a4c2ba924 100644 --- a/qfdmo/forms.py +++ b/qfdmo/forms.py @@ -9,7 +9,7 @@ from qfdmo.fields import GroupeActionChoiceField from qfdmo.geo_api import epcis_from, formatted_epcis_as_list_of_tuple -from qfdmo.models import DagRun, DagRunStatus, SousCategorieObjet +from qfdmo.models import SousCategorieObjet, SuggestionCohorte from qfdmo.models.action import ( Action, GroupeAction, @@ -17,6 +17,7 @@ get_directions, get_ordered_directions, ) +from qfdmo.models.data import SuggestionStatut from qfdmo.widgets import ( AutoCompleteInput, DSFRCheckboxSelectMultiple, @@ -342,14 +343,16 @@ def load_choices( class DagsForm(forms.Form): - dagrun = forms.ModelChoiceField( + suggestion_cohorte = forms.ModelChoiceField( label="Séléctionner l'execution d'un DAG", widget=forms.Select( attrs={ "class": "fr-select", } ), - queryset=DagRun.objects.filter(status=DagRunStatus.TO_VALIDATE.value), + queryset=SuggestionCohorte.objects.filter( + statut=SuggestionStatut.ATRAITER.value + ), required=True, ) diff --git a/qfdmo/management/commands/reinitialize_dagrun.py b/qfdmo/management/commands/reinitialize_dagrun.py index eea492018..6560a8f5f 100644 --- a/qfdmo/management/commands/reinitialize_dagrun.py +++ b/qfdmo/management/commands/reinitialize_dagrun.py @@ -7,9 +7,13 @@ class Command(BaseCommand): def handle(self, *args, **options): with connection.cursor() as cursor: - # Truncate the table qfdmo_dagrun and qfdmo_dagrunchange - cursor.execute("TRUNCATE TABLE qfdmo_dagrun CASCADE") + # Truncate the table qfdmo_suggestioncohorte and qfdmo_suggestionunitaire + cursor.execute("TRUNCATE TABLE qfdmo_suggestioncohorte CASCADE") # Set auto-increment to 1 - cursor.execute("ALTER SEQUENCE qfdmo_dagrun_id_seq RESTART WITH 1") - cursor.execute("ALTER SEQUENCE qfdmo_dagrunchange_id_seq RESTART WITH 1") + cursor.execute( + "ALTER SEQUENCE qfdmo_suggestioncohorte_id_seq RESTART WITH 1" + ) + cursor.execute( + "ALTER SEQUENCE qfdmo_suggestionunitaire_id_seq RESTART WITH 1" + ) diff --git a/qfdmo/migrations/0052_dagrun.py b/qfdmo/migrations/0052_dagrun.py index d3b95f1ae..b9f9bd3d9 100644 --- a/qfdmo/migrations/0052_dagrun.py +++ b/qfdmo/migrations/0052_dagrun.py @@ -51,7 +51,7 @@ class Migration(migrations.Migration): ), ), ("meta_data", models.JSONField(blank=True, null=True)), - ("row_updates", models.JSONField(blank=True, null=True)), + ("suggestion", models.JSONField(blank=True, null=True)), ( "dag_run", models.ForeignKey( diff --git a/qfdmo/migrations/0109_suggestioncohorte_remove_dagrunchange_dag_run_and_more.py b/qfdmo/migrations/0109_suggestioncohorte_remove_dagrunchange_dag_run_and_more.py new file mode 100644 index 000000000..afd0cc0f8 --- /dev/null +++ b/qfdmo/migrations/0109_suggestioncohorte_remove_dagrunchange_dag_run_and_more.py @@ -0,0 +1,181 @@ +# Generated by Django 5.1.4 on 2025-01-08 16:36 + +import django.db.models.deletion +import django.db.models.functions.datetime +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("qfdmo", "0108_remove_lvaobaserevision_lvao_base_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="SuggestionCohorte", + fields=[ + ("id", models.AutoField(primary_key=True, serialize=False)), + ( + "identifiant_action", + models.CharField( + help_text="Identifiant de l'action (ex : dag_id pour Airflow)", + max_length=250, + ), + ), + ( + "identifiant_execution", + models.CharField( + help_text="Identifiant de l'execution (ex : run_id pour Airflow)", + max_length=250, + ), + ), + ( + "type_action", + models.CharField( + blank=True, + choices=[ + ("CLUSTERING", "regroupement/déduplication des acteurs"), + ("SOURCE", "ingestion de source de données"), + ( + "SOURCE_AJOUT", + "ingestion de source de données - nouveau acteur", + ), + ( + "SOURCE_MISESAJOUR", + "ingestion de source de données - modification d'acteur existant", + ), + ("SOURCE_SUPRESSION", "ingestion de source de données"), + ], + max_length=250, + ), + ), + ( + "statut", + models.CharField( + choices=[ + ("AVALIDER", "À valider"), + ("REJETER", "Rejeter"), + ("ATRAITER", "À traiter"), + ("ENCOURS", "En cours de traitement"), + ("ERREUR", "Fini en erreur"), + ("PARTIEL", "Fini avec succès partiel"), + ("SUCCES", "Fini avec succès"), + ], + default="AVALIDER", + max_length=50, + ), + ), + ( + "metadata", + models.JSONField( + null=True, + blank=True, + help_text="Metadata de la cohorte, données statistiques", + ), + ), + ( + "cree_le", + models.DateTimeField( + auto_now_add=True, + db_default=django.db.models.functions.datetime.Now(), + ), + ), + ( + "modifie_le", + models.DateTimeField( + auto_now=True, + db_default=django.db.models.functions.datetime.Now(), + ), + ), + ], + ), + migrations.RemoveField( + model_name="dagrunchange", + name="dag_run", + ), + migrations.CreateModel( + name="SuggestionUnitaire", + fields=[ + ("id", models.AutoField(primary_key=True, serialize=False)), + ( + "type_action", + models.CharField( + blank=True, + choices=[ + ("CLUSTERING", "regroupement/déduplication des acteurs"), + ("SOURCE", "ingestion de source de données"), + ( + "SOURCE_AJOUT", + "ingestion de source de données - nouveau acteur", + ), + ( + "SOURCE_MISESAJOUR", + "ingestion de source de données - modification d'acteur existant", + ), + ("SOURCE_SUPRESSION", "ingestion de source de données"), + ], + max_length=250, + ), + ), + ( + "statut", + models.CharField( + choices=[ + ("AVALIDER", "À valider"), + ("REJETER", "Rejeter"), + ("ATRAITER", "À traiter"), + ("ENCOURS", "En cours de traitement"), + ("ERREUR", "Fini en erreur"), + ("PARTIEL", "Fini avec succès partiel"), + ("SUCCES", "Fini avec succès"), + ], + default="AVALIDER", + max_length=50, + ), + ), + ( + "context", + models.JSONField( + null=True, + blank=True, + help_text="Contexte de la suggestion : données initiales", + ), + ), + ( + "suggestion", + models.JSONField( + blank=True, help_text="Suggestion de modification" + ), + ), + ( + "cree_le", + models.DateTimeField( + auto_now_add=True, + db_default=django.db.models.functions.datetime.Now(), + ), + ), + ( + "modifie_le", + models.DateTimeField( + auto_now=True, + db_default=django.db.models.functions.datetime.Now(), + ), + ), + ( + "suggestion_cohorte", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="suggestion_unitaires", + to="qfdmo.suggestioncohorte", + ), + ), + ], + ), + migrations.DeleteModel( + name="DagRun", + ), + migrations.DeleteModel( + name="DagRunChange", + ), + ] diff --git a/qfdmo/models/data.py b/qfdmo/models/data.py index ad8cf0ca4..a13448a4d 100644 --- a/qfdmo/models/data.py +++ b/qfdmo/models/data.py @@ -2,73 +2,133 @@ from django.db.models.functions import Now from dags.sources.config.shared_constants import ( - DAGRUN_FINISHED, - DAGRUN_REJECTED, - DAGRUN_TOINSERT, - DAGRUN_TOVALIDATE, + SUGGESTION_ATRAITER, + SUGGESTION_AVALIDER, + SUGGESTION_CLUSTERING, + SUGGESTION_ENCOURS, + SUGGESTION_ERREUR, + SUGGESTION_PARTIEL, + SUGGESTION_REJETER, + SUGGESTION_SOURCE, + SUGGESTION_SOURCE_AJOUT, + SUGGESTION_SOURCE_MISESAJOUR, + SUGGESTION_SOURCE_SUPRESSION, + SUGGESTION_SUCCES, ) from qfdmo.models.acteur import ActeurType, Source -class DagRunStatus(models.TextChoices): - TO_VALIDATE = DAGRUN_TOVALIDATE - TO_INSERT = DAGRUN_TOINSERT - REJECTED = DAGRUN_REJECTED - FINISHED = DAGRUN_FINISHED +class SuggestionStatut(models.TextChoices): + AVALIDER = SUGGESTION_AVALIDER, "À valider" + REJETER = SUGGESTION_REJETER, "Rejeter" + ATRAITER = SUGGESTION_ATRAITER, "À traiter" + ENCOURS = SUGGESTION_ENCOURS, "En cours de traitement" + ERREUR = SUGGESTION_ERREUR, "Fini en erreur" + PARTIEL = SUGGESTION_PARTIEL, "Fini avec succès partiel" + SUCCES = SUGGESTION_SUCCES, "Fini avec succès" -class DagRun(models.Model): +class SuggestionAction(models.TextChoices): + CLUSTERING = SUGGESTION_CLUSTERING, "regroupement/déduplication des acteurs" + SOURCE = ( + SUGGESTION_SOURCE, + "ingestion de source de données", + ) + SOURCE_AJOUT = ( + SUGGESTION_SOURCE_AJOUT, + "ingestion de source de données - nouveau acteur", + ) + SOURCE_MISESAJOUR = ( + SUGGESTION_SOURCE_MISESAJOUR, + "ingestion de source de données - modification d'acteur existant", + ) + SOURCE_SUPPRESSION = SUGGESTION_SOURCE_SUPRESSION, "ingestion de source de données" + # A venir + # ENRICHISSEMENT… + + +class SuggestionCohorte(models.Model): id = models.AutoField(primary_key=True) - dag_id = models.CharField(max_length=250) - run_id = models.CharField(max_length=250) - created_date = models.DateTimeField(auto_now_add=True) - updated_date = models.DateTimeField(auto_now=True) - status = models.CharField( + # On utilise identifiant car le champ n'est pas utilisé pour résoudre une relation + # en base de données + identifiant_action = models.CharField( + max_length=250, help_text="Identifiant de l'action (ex : dag_id pour Airflow)" + ) + identifiant_execution = models.CharField( + max_length=250, + help_text="Identifiant de l'execution (ex : run_id pour Airflow)", + ) + type_action = models.CharField( + choices=SuggestionAction.choices, + max_length=250, + blank=True, + ) + statut = models.CharField( max_length=50, - choices=DagRunStatus.choices, - default=DagRunStatus.TO_VALIDATE, + choices=SuggestionStatut.choices, + default=SuggestionStatut.AVALIDER, + ) + metadata = models.JSONField( + null=True, blank=True, help_text="Metadata de la cohorte, données statistiques" ) - # {to_create : 134, to_update : 0, to_delete : 0, to_ignore : 0, errors : 0} - meta_data = models.JSONField(null=True, blank=True) + cree_le = models.DateTimeField(auto_now_add=True, db_default=Now()) + modifie_le = models.DateTimeField(auto_now=True, db_default=Now()) + + @property + def is_source_type(self) -> bool: + # FIXME: ajout de tests + return self.type_action in [ + SuggestionAction.SOURCE, + SuggestionAction.SOURCE_AJOUT, + SuggestionAction.SOURCE_MISESAJOUR, + SuggestionAction.SOURCE_SUPPRESSION, + ] + + @property + def is_clustering_type(self) -> bool: + # FIXME: ajout de tests + return self.type_action == SuggestionAction.CLUSTERING def __str__(self) -> str: - return f"{self.dag_id} - {self.run_id}" + return f"{self.identifiant_action} - {self.identifiant_execution}" def display_meta_data(self) -> dict: displayed_metadata = {} - displayed_metadata["Nombre d'acteur à créer"] = self.meta_data.get( - "added_rows", 0 + displayed_metadata["Nombre d'acteur à créer ou mettre à jour"] = ( + self.metadata.get("acteurs_to_add_or_update", 0) ) - displayed_metadata["Nombre de duplicats"] = self.meta_data.get( + displayed_metadata["Nombre de duplicats"] = self.metadata.get( "number_of_duplicates", 0 ) - displayed_metadata["Nombre d'acteur MAJ"] = self.meta_data.get( - "updated_rows", 0 + displayed_metadata["Nombre d'acteur à supprimer"] = self.metadata.get( + "number_of_removed_actors", 0 ) return displayed_metadata -class DagRunChangeType(models.Choices): - CREATE = "CREATE" - UPDATE = "UPDATE" - DELETE = "DELETE" - - -class DagRunChange(models.Model): +class SuggestionUnitaire(models.Model): id = models.AutoField(primary_key=True) - dag_run = models.ForeignKey( - DagRun, on_delete=models.CASCADE, related_name="dagrunchanges" + suggestion_cohorte = models.ForeignKey( + SuggestionCohorte, on_delete=models.CASCADE, related_name="suggestion_unitaires" + ) + type_action = models.CharField( + choices=SuggestionAction.choices, + max_length=250, + blank=True, ) - change_type = models.CharField(max_length=50, choices=DagRunChangeType.choices) - meta_data = models.JSONField(null=True, blank=True) - # metadata : JSON of any error or information about the line to change - row_updates = models.JSONField(null=True, blank=True) - status = models.CharField( + statut = models.CharField( max_length=50, - choices=DagRunStatus.choices, - default=DagRunStatus.TO_VALIDATE, + choices=SuggestionStatut.choices, + default=SuggestionStatut.AVALIDER, + ) + context = models.JSONField( + null=True, blank=True, help_text="Contexte de la suggestion : données initiales" ) + suggestion = models.JSONField(blank=True, help_text="Suggestion de modification") + cree_le = models.DateTimeField(auto_now_add=True, db_default=Now()) + modifie_le = models.DateTimeField(auto_now=True, db_default=Now()) + # FIXME: A revoir def display_acteur_details(self) -> dict: displayed_details = {} for field, field_value in { @@ -90,59 +150,28 @@ def display_acteur_details(self) -> dict: "identifiant_unique": "identifiant_unique", "identifiant_externe": "identifiant_externe", }.items(): - if value := self.row_updates.get(field): + if value := self.suggestion.get(field): displayed_details[field_value] = value - if value := self.row_updates.get("acteur_type_id"): + if value := self.suggestion.get("acteur_type_id"): displayed_details["Type d'acteur"] = ActeurType.objects.get( pk=value ).libelle - if value := self.row_updates.get("source_id"): + if value := self.suggestion.get("source_id"): displayed_details["Source"] = Source.objects.get(pk=value).libelle - if value := self.row_updates.get("labels"): + if value := self.suggestion.get("labels"): displayed_details["Labels"] = ", ".join( [str(v["labelqualite_id"]) for v in value] ) - if value := self.row_updates.get("acteur_services"): + if value := self.suggestion.get("acteur_services"): displayed_details["Acteur Services"] = ", ".join( [str(v["acteurservice_id"]) for v in value] ) return displayed_details + # FIXME: A revoir def display_proposition_service(self): - return self.row_updates.get("proposition_services", []) - - def update_row_update_field(self, field_name, value): - if self.row_updates is None: - self.row_updates = {} - - if field_name in self.row_updates and self.row_updates[field_name] == value: - del self.row_updates[field_name] - else: - self.row_updates[field_name] = value - - self.save() - - def update_row_update_candidate(self, status, index): - if self.row_updates is None: - self.row_updates = {} - - if ( - self.status == status - and "best_candidat_index" in self.row_updates - and self.row_updates["best_candidat_index"] == index - ): - self.status = DagRunStatus.TO_VALIDATE.value - del self.row_updates["best_candidat_index"] - - else: - self.status = status - self.row_updates["best_candidat_index"] = index - - self.save() - - def get_candidat(self, index): - return self.row_updates["ae_result"][int(index) - 1] + return self.suggestion.get("proposition_services", []) class BANCache(models.Model): diff --git a/qfdmo/views/dags.py b/qfdmo/views/dags.py index 65a0bbedb..926cf4ddd 100644 --- a/qfdmo/views/dags.py +++ b/qfdmo/views/dags.py @@ -7,7 +7,7 @@ from django.views.generic.edit import FormView from qfdmo.forms import DagsForm -from qfdmo.models.data import DagRun, DagRunStatus +from qfdmo.models.data import SuggestionCohorte, SuggestionStatut class IsStaffMixin(LoginRequiredMixin): @@ -19,36 +19,40 @@ def dispatch(self, request, *args, **kwargs): class DagsValidation(IsStaffMixin, FormView): form_class = DagsForm - template_name = "qfdmo/dags_validations.html" + template_name = "data/dags_validations.html" success_url = "/dags/validations" def form_valid(self, form): - # MANAGE search and display dagrun details + # MANAGE search and display suggestion_cohorte details if self.request.POST.get("search"): - dagrun = form.cleaned_data["dagrun"] + suggestion_cohorte = form.cleaned_data["suggestion_cohorte"] context = {"form": form} - context["dagrun_instance"] = dagrun - dagrun_lines = dagrun.dagrunchanges.all().order_by("?")[:100] - context["dagrun_lines"] = dagrun_lines - return render(self.request, "qfdmo/dags_validations.html", context) + context["suggestion_cohorte_instance"] = suggestion_cohorte + suggestion_unitaires = ( + suggestion_cohorte.suggestion_unitaires.all().order_by("?")[:100] + ) + context["suggestion_unitaires"] = suggestion_unitaires + return render(self.request, self.template_name, context) - # ELSE: update the status of the dagrun and its dagrunlines - dagrun = form.cleaned_data["dagrun"] + # ELSE: update the status of the suggestion_cohorte and its + # suggestion_cohortelines + suggestion_cohorte = form.cleaned_data["suggestion_cohorte"] new_status = ( - DagRunStatus.TO_INSERT.value + SuggestionStatut.ATRAITER.value if self.request.POST.get("dag_valid") == "1" - else DagRunStatus.REJECTED.value + else SuggestionStatut.REJETER.value ) # FIXME: I am not sure we need the filter here - dagrun.dagrunchanges.filter(status=DagRunStatus.TO_VALIDATE.value).update( - status=new_status - ) - dagrun.status = new_status - dagrun.save() + suggestion_cohorte.suggestion_unitaires.filter( + statut=SuggestionStatut.AVALIDER.value + ).update(statut=new_status) + suggestion_cohorte.statut = new_status + suggestion_cohorte.save() messages.success( self.request, - f"La cohorte {dagrun} a été mise à jour avec le statut {new_status}", + f"La cohorte {suggestion_cohorte} a été mise à jour avec le " + f"statut {new_status}", ) return super().form_valid(form) @@ -57,20 +61,15 @@ def form_invalid(self, form): messages.error(self.request, "Il y a des erreurs dans le formulaire.") return super().form_invalid(form) - # def form_valid(self, form): - # if self.request.POST.get("search"): - # messages.add_message(self.request, messages.INFO, "Form Valid.") - # return super().form_valid(form) - -class DagsValidation1(IsStaffMixin, FormView): +class DagsValidationDeprecated(IsStaffMixin, FormView): form_class = DagsForm template_name = "qfdmo/dags_validations.html" success_url = "/dags/validations" def get_initial(self): initial = super().get_initial() - initial["dagrun"] = self.request.GET.get("dagrun") + initial["suggestion_cohorte"] = self.request.GET.get("suggestion_cohorte") return initial def post(self, request, *args, **kwargs): @@ -79,23 +78,27 @@ def post(self, request, *args, **kwargs): if dag_valid in ["1", "0"]: return self.form_valid(self.get_form()) else: - dagrun_obj = DagRun.objects.get(pk=request.POST.get("dagrun")) + suggestion_cohorte_obj = SuggestionCohorte.objects.get( + pk=request.POST.get("suggestion_cohorte") + ) id = request.POST.get("id") - dagrun_line = dagrun_obj.dagrunchanges.filter(id=id).first() + suggestion_unitaire = suggestion_cohorte_obj.suggestion_unitaires.filter( + id=id + ).first() identifiant_unique = request.POST.get("identifiant_unique") index = request.POST.get("index") action = request.POST.get("action") if action == "validate": - dagrun_line.update_row_update_candidate( - DagRunStatus.TO_INSERT.value, index + suggestion_unitaire.update_row_update_candidate( + SuggestionStatut.ATRAITER.value, index ) elif action == "reject": - dagrun_line.update_row_update_candidate( - DagRunStatus.REJECTED.value, index + suggestion_unitaire.update_row_update_candidate( + SuggestionStatut.REJETER.value, index ) - updated_candidat = dagrun_line.get_candidat(index) + updated_candidat = suggestion_unitaire.get_candidat(index) return render( request, @@ -105,48 +108,57 @@ def post(self, request, *args, **kwargs): "candidat": updated_candidat, "index": index, "request": request, - "dagrun": request.POST.get("dagrun"), - "dagrun_line": dagrun_line, + "suggestion_cohorte": request.POST.get("suggestion_cohorte"), + "suggestion_unitaire": suggestion_unitaire, }, ) def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) - if self.request.GET.get("dagrun"): - dagrun = DagRun.objects.get(pk=self.request.GET.get("dagrun")) - context["dagrun_instance"] = dagrun - dagrun_lines = dagrun.dagrunchanges.all().order_by("?")[:100] - context["dagrun_lines"] = dagrun_lines + if self.request.GET.get("suggestion_cohorte"): + suggestion_cohorte = SuggestionCohorte.objects.get( + pk=self.request.GET.get("suggestion_cohorte") + ) + context["suggestion_cohorte_instance"] = suggestion_cohorte + suggestion_unitaires = ( + suggestion_cohorte.suggestion_unitaires.all().order_by("?")[:100] + ) + context["suggestion_unitaires"] = suggestion_unitaires - if dagrun_lines and dagrun_lines[0].change_type == "UPDATE_ACTOR": + if ( + suggestion_unitaires + and suggestion_unitaires[0].change_type == "UPDATE_ACTOR" + ): # Pagination - dagrun_lines = dagrun.dagrunchanges.all().order_by("id") - paginator = Paginator(dagrun_lines, 100) + suggestion_unitaires = ( + suggestion_cohorte.suggestion_unitaires.all().order_by("id") + ) + paginator = Paginator(suggestion_unitaires, 100) page_number = self.request.GET.get("page") page_obj = paginator.get_page(page_number) - context["dagrun_lines"] = page_obj + context["suggestion_unitaires"] = page_obj return context def form_valid(self, form): if not form.is_valid(): raise ValueError("Form is not valid") - dagrun_id = form.cleaned_data["dagrun"].id - dagrun_obj = DagRun.objects.get(pk=dagrun_id) + suggestion_cohorte_id = form.cleaned_data["suggestion_cohorte"].id + suggestion_cohorte_obj = SuggestionCohorte.objects.get(pk=suggestion_cohorte_id) new_status = ( - DagRunStatus.TO_INSERT.value + SuggestionStatut.ATRAITER.value if self.request.POST.get("dag_valid") == "1" - else DagRunStatus.REJECTED.value + else SuggestionStatut.REJETER.value ) # FIXME: I am not sure we need the filter here - dagrun_obj.dagrunchanges.filter(status=DagRunStatus.TO_VALIDATE.value).update( - status=new_status - ) + suggestion_cohorte_obj.suggestion_unitaires.filter( + status=SuggestionStatut.AVALIDER.value + ).update(status=new_status) - logging.info(f"{dagrun_id} - {self.request.user}") + logging.info(f"{suggestion_cohorte_id} - {self.request.user}") - dagrun_obj.status = new_status - dagrun_obj.save() + suggestion_cohorte_obj.statut = new_status + suggestion_cohorte_obj.save() return super().form_valid(form) diff --git a/templates/data/_partials/create_actor_event.html b/templates/data/_partials/create_actor_event.html new file mode 100644 index 000000000..6d9272f4c --- /dev/null +++ b/templates/data/_partials/create_actor_event.html @@ -0,0 +1,50 @@ + + + + change_type + meta_data + Acteur + Proposition de service + suggestion + + + + + {% for suggestion_unitaire in suggestion_unitaires %} + + {{ suggestion_unitaire.get_change_type_display }} + {{ suggestion_unitaire.meta_data|default:'-' }} + + {% for key, value in suggestion_unitaire.display_acteur_details.items %} +

{{ key }} : {{ value }}

+ {% endfor %} + + + + + + + + {% for service in suggestion_unitaire.display_proposition_service %} + + + + + {% endfor %} +
ActionSous-Catégories
{{ service.action }} +
    + {% for sous_cat in service.pds_sous_categories %} +
  • {{ sous_cat.souscategorie }}
  • + {% endfor %} +
+
+ + +
+ Données brutes +
{{ suggestion_unitaire.suggestion }}
+
+ + + {% endfor %} + diff --git a/templates/data/_partials/source_event.html b/templates/data/_partials/source_event.html new file mode 100644 index 000000000..4b07c4d53 --- /dev/null +++ b/templates/data/_partials/source_event.html @@ -0,0 +1,21 @@ +

Instance du DAG : {{ suggestion_cohorte_instance }}

+

Meta données

+{% for meta_title, meta_data in suggestion_cohorte_instance.display_meta_data.items %} +

{{ meta_title }} : {{ meta_data }}

+{% endfor %} +
+ meta_data brutes +
{{ suggestion_cohorte_instance.metadata }}
+
+

Exemples

+ +
+ + + {% include 'data/_partials/create_actor_event.html' %} +
Suggestion de source à valider
+
+
+ + +
diff --git a/templates/data/_partials/update_actor_event.html b/templates/data/_partials/update_actor_event.html new file mode 100644 index 000000000..25eeb6323 --- /dev/null +++ b/templates/data/_partials/update_actor_event.html @@ -0,0 +1,55 @@ +{% if suggestion_unitaires|length > 0 and suggestion_unitaires[0].change_type == 'UPDATE_ACTOR' %} + + + + change_type + meta_data + Identifiant Unique + Candidats + suggestion + + + + + {% for suggestion_unitaire in suggestion_unitaires if suggestion_unitaire.change_type == 'UPDATE_ACTOR' %} + + {{ suggestion_unitaire.get_change_type_display() }} + {{ suggestion_unitaire.meta_data if suggestion_unitaire.meta_data else "-" }} + + {% with identifiant_unique=suggestion_unitaire.display_acteur_details().identifiant_unique %} + {{ identifiant_unique }} + {% endwith %} + + + {% with candidat=candidat, index=loop.index, suggestion_cohorte=request.GET.suggestion_cohorte, + identifiant_unique=suggestion_unitaire.display_acteur_details().identifiant_unique %} + {% include 'qfdmo/partials/candidat_row.html' %} + + {% endwith %} + + +
+ Données brutes +
{{ suggestion_unitaire.suggestion }}
+
+ + +{% endfor %} + +{% if suggestion_unitaires.has_other_pages %} + + + + + +
+{% endif %} +{% endif %} diff --git a/templates/data/base.html b/templates/data/base.html new file mode 100644 index 000000000..8b4acc3eb --- /dev/null +++ b/templates/data/base.html @@ -0,0 +1,61 @@ +{% load dsfr_tags qfdmd_tags static %} + + + + + + + + + + + {# Title and desc #} + {% block page_title %}Longue vie aux objets{% endblock %} + + + {% favicon %} + + {# Css #} + {% dsfr_css %} + + + {% block css_extras %}{% endblock %} + + {# Js #} + + {% block javascript_extras %}{% endblock %} + + + + +
+ +
+
+

Une mise à jour de votre navigateur est nécessaire

+

La version de votre navigateur étant trop ancienne, nous ne pouvons vous garantir une expérience optimale sur la carte Longue vie aux objets. Si vous le souhaitez, vous pouvez aussi essayer de vous connecter sur un autre navigateur.

+
+
+ {% block content %}{% endblock %} +
+ + {% block modals %}{% endblock %} + {% block js %} + {% endblock js %} + + diff --git a/templates/data/dags_validations.html b/templates/data/dags_validations.html new file mode 100644 index 000000000..bad1665a9 --- /dev/null +++ b/templates/data/dags_validations.html @@ -0,0 +1,39 @@ +{% extends 'data/base.html' %} + +{% block content %} + +
+

Validations des «DAGs»

+ +

+ Cette page permet de valider les données des «DAGs». +

+ {% if messages %} +
    + {% for message in messages %} + {{ message }} + {% endfor %} +
+ {% endif %} +
+ {% csrf_token %} + {{ form }} +
+ +
+ + {% if suggestion_cohorte_instance %} + {% if suggestion_cohorte_instance.is_source_type %} + {% include 'data/_partials/source_event.html' %} + {% elif suggestion_cohorte_instance.is_enrichissement_type %} + L'interface de suggestions d'enrichissement n'est pas encore implémentée + {% elif suggestion_cohorte_instance.is_clustering_type %} + L'interface de suggestion de clustering n'est pas encore implémentée + {% endif %} + + {% endif %} +
+ +
+ +{% endblock content %} From 10e41f5e5431fc5ed60c69b76493186d34d8d07e Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Thu, 9 Jan 2025 18:44:56 +0100 Subject: [PATCH 04/26] isolation de data --- core/settings.py | 1 + core/urls.py | 1 + dags/ingest_validated_dataset_to_db.py | 6 +- dags/sources/config/airflow_params.py | 2 + dags/sources/dags/source_pyreo.py | 5 + .../airflow_logic/db_data_prepare_task.py | 3 + .../airflow_logic/db_write_suggestion_task.py | 49 ++++ dags/sources/tasks/airflow_logic/operators.py | 6 +- .../propose_acteur_changes_task.py | 17 +- .../tasks/business_logic/db_data_prepare.py | 37 ++- .../business_logic/db_write_suggestion.py | 95 ++++++++ .../business_logic/propose_acteur_changes.py | 45 ++-- .../propose_acteur_to_delete.py | 4 + .../business_logic/source_data_normalize.py | 23 +- dags/sources/tasks/transform/transform_df.py | 13 + dags/utils/dag_eo_utils.py | 13 +- dags/utils/dag_ingest_validated_utils.py | 2 +- .../test_propose_acteur_changes.py | 8 +- data/__init__.py | 0 qfdmo/admin/data.py => data/admin.py | 2 +- data/apps.py | 6 + data/forms.py | 18 ++ data/migrations/0001_initial.py | 225 ++++++++++++++++++ data/migrations/__init__.py | 0 qfdmo/models/data.py => data/models.py | 11 +- data/urls.py | 11 + data/views.py | 175 ++++++++++++++ qfdmo/admin/__init__.py | 1 - qfdmo/forms.py | 18 +- .../commands/reinitialize_dagrun.py | 8 +- ...e_pourcentage_erreurs_tolerees_and_more.py | 64 +++++ .../0111_delete_bancache_and_more.py | 26 ++ qfdmo/models/__init__.py | 1 - qfdmo/urls.py | 6 - qfdmo/views/dags.py | 164 ------------- .../data/_partials/create_actor_event.html | 50 ---- .../data/_partials/source_ajout_event.html | 44 ++++ templates/data/_partials/source_event.html | 22 +- .../_partials/source_supression_event.html | 27 +++ templates/data/base.html | 1 - 40 files changed, 861 insertions(+), 349 deletions(-) create mode 100644 dags/sources/tasks/airflow_logic/db_write_suggestion_task.py create mode 100644 dags/sources/tasks/business_logic/db_write_suggestion.py create mode 100644 data/__init__.py rename qfdmo/admin/data.py => data/admin.py (82%) create mode 100644 data/apps.py create mode 100644 data/forms.py create mode 100644 data/migrations/0001_initial.py create mode 100644 data/migrations/__init__.py rename qfdmo/models/data.py => data/models.py (94%) create mode 100644 data/urls.py create mode 100644 data/views.py create mode 100644 qfdmo/migrations/0110_suggestioncohorte_pourcentage_erreurs_tolerees_and_more.py create mode 100644 qfdmo/migrations/0111_delete_bancache_and_more.py delete mode 100644 qfdmo/views/dags.py delete mode 100644 templates/data/_partials/create_actor_event.html create mode 100644 templates/data/_partials/source_ajout_event.html create mode 100644 templates/data/_partials/source_supression_event.html diff --git a/core/settings.py b/core/settings.py index 540982469..efc2d116a 100644 --- a/core/settings.py +++ b/core/settings.py @@ -61,6 +61,7 @@ "core", "qfdmd", "qfdmo", + "data", "corsheaders", ] diff --git a/core/urls.py b/core/urls.py index 03b434959..fd4de64b9 100644 --- a/core/urls.py +++ b/core/urls.py @@ -60,6 +60,7 @@ class PaginatedSitemap(GenericSitemap): path("dsfr/", include(("dsfr_hacks.urls", "dsfr_hacks"), namespace="dsfr_hacks")), path("", include(("qfdmo.urls", "qfdmo"), namespace="qfdmo")), path("", include(("qfdmd.urls", "qfdmd"), namespace="qfdmd")), + path("", include(("data.urls", "data"), namespace="data")), path("docs/", TemplateView.as_view(template_name="techdocs.html"), name="techdocs"), ] diff --git a/dags/ingest_validated_dataset_to_db.py b/dags/ingest_validated_dataset_to_db.py index f74130959..d4778d964 100755 --- a/dags/ingest_validated_dataset_to_db.py +++ b/dags/ingest_validated_dataset_to_db.py @@ -33,7 +33,7 @@ def _get_first_suggetsioncohorte_to_insert(): hook = PostgresHook(postgres_conn_id="qfdmo_django_db") row = hook.get_first( f""" - SELECT * FROM qfdmo_suggestioncohorte + SELECT * FROM data_suggestioncohorte WHERE statut = '{constants.SUGGESTION_ATRAITER}' LIMIT 1 """ @@ -54,7 +54,7 @@ def fetch_and_parse_data(**context): df_sql = pd.read_sql_query( f""" - SELECT * FROM qfdmo_suggestionunitaire + SELECT * FROM data_suggestionunitaire WHERE suggestion_cohorte_id = '{suggestion_cohorte_id}' """, engine, @@ -91,7 +91,7 @@ def fetch_and_parse_data(**context): normalized_dfs = df_acteur_to_delete["suggestion"].apply(pd.json_normalize) df_actors_update_actor = pd.concat(normalized_dfs.tolist(), ignore_index=True) status_repeated = ( - df_acteur_to_delete["status"] + df_acteur_to_delete["statut"] .repeat(df_acteur_to_delete["suggestion"].apply(len)) .reset_index(drop=True) ) diff --git a/dags/sources/config/airflow_params.py b/dags/sources/config/airflow_params.py index a42644e89..d095ae980 100644 --- a/dags/sources/config/airflow_params.py +++ b/dags/sources/config/airflow_params.py @@ -26,6 +26,7 @@ clean_label_codes, clean_siret_and_siren, clean_telephone, + compute_location, get_latlng_from_geopoint, merge_and_clean_souscategorie_codes, merge_sous_categories_columns, @@ -65,6 +66,7 @@ "clean_souscategorie_codes_sinoe": clean_souscategorie_codes_sinoe, "get_latlng_from_geopoint": get_latlng_from_geopoint, "strip_lower_string": strip_lower_string, + "clean_location": compute_location, } diff --git a/dags/sources/dags/source_pyreo.py b/dags/sources/dags/source_pyreo.py index a16aa675a..5dc541254 100755 --- a/dags/sources/dags/source_pyreo.py +++ b/dags/sources/dags/source_pyreo.py @@ -81,6 +81,11 @@ # "value": [], # }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "clean_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", diff --git a/dags/sources/tasks/airflow_logic/db_data_prepare_task.py b/dags/sources/tasks/airflow_logic/db_data_prepare_task.py index 005a6077a..8bb5093c4 100644 --- a/dags/sources/tasks/airflow_logic/db_data_prepare_task.py +++ b/dags/sources/tasks/airflow_logic/db_data_prepare_task.py @@ -28,6 +28,7 @@ def db_data_prepare_wrapper(**kwargs): df_pssc = kwargs["ti"].xcom_pull(task_ids="propose_services_sous_categories") df_labels = kwargs["ti"].xcom_pull(task_ids="propose_labels") df_acteur_services = kwargs["ti"].xcom_pull(task_ids="propose_acteur_services") + df_acteurs_from_db = kwargs["ti"].xcom_pull(task_ids="db_read_acteur") source_id_by_code = read_mapping_from_postgres(table_name="qfdmo_source") acteurtype_id_by_code = read_mapping_from_postgres(table_name="qfdmo_acteurtype") @@ -37,6 +38,7 @@ def db_data_prepare_wrapper(**kwargs): log.preview("df_pssc", df_pssc) log.preview("df_labels", df_labels) log.preview("df_acteur_services", df_acteur_services) + log.preview("df_acteurs_from_db", df_acteurs_from_db) log.preview("source_id_by_code", source_id_by_code) log.preview("acteurtype_id_by_code", acteurtype_id_by_code) @@ -47,6 +49,7 @@ def db_data_prepare_wrapper(**kwargs): df_pssc=df_pssc, df_labels=df_labels, df_acteur_services=df_acteur_services, + df_acteurs_from_db=df_acteurs_from_db, source_id_by_code=source_id_by_code, acteurtype_id_by_code=acteurtype_id_by_code, ) diff --git a/dags/sources/tasks/airflow_logic/db_write_suggestion_task.py b/dags/sources/tasks/airflow_logic/db_write_suggestion_task.py new file mode 100644 index 000000000..f372d0e0b --- /dev/null +++ b/dags/sources/tasks/airflow_logic/db_write_suggestion_task.py @@ -0,0 +1,49 @@ +import logging + +from airflow import DAG +from airflow.operators.python import PythonOperator +from sources.tasks.business_logic.db_write_suggestion import db_write_suggestion +from utils import logging_utils as log + +logger = logging.getLogger(__name__) + + +def db_write_suggestion_task(dag: DAG) -> PythonOperator: + return PythonOperator( + task_id="db_write_suggestion", + python_callable=db_write_suggestion_wrapper, + dag=dag, + ) + + +def db_write_suggestion_wrapper(**kwargs) -> None: + dag_name = kwargs["dag"].dag_display_name or kwargs["dag"].dag_id + run_id = kwargs["run_id"] + dfs_acteur = kwargs["ti"].xcom_pull(task_ids="db_data_prepare") + df_acteur_to_delete = dfs_acteur["df_acteur_to_delete"] + df_acteur_to_create = dfs_acteur["df_acteur_to_create"] + df_acteur_to_update = dfs_acteur["df_acteur_to_update"] + + log.preview("dag_name", dag_name) + log.preview("run_id", run_id) + log.preview("df_acteur_to_delete", df_acteur_to_delete) + log.preview("df_acteur_to_create", df_acteur_to_create) + log.preview("df_acteur_to_update", df_acteur_to_update) + + if ( + df_acteur_to_create.empty + and df_acteur_to_delete.empty + and df_acteur_to_update.empty + ): + logger.warning("!!! Aucune suggestion à traiter pour cette source !!!") + # set the task to airflow skip status + kwargs["ti"].xcom_push(key="skip", value=True) + return + + return db_write_suggestion( + dag_name=dag_name, + run_id=run_id, + df_acteur_to_create=df_acteur_to_create, + df_acteur_to_delete=df_acteur_to_delete, + df_acteur_to_update=df_acteur_to_update, + ) diff --git a/dags/sources/tasks/airflow_logic/operators.py b/dags/sources/tasks/airflow_logic/operators.py index d37098470..a17e0052e 100755 --- a/dags/sources/tasks/airflow_logic/operators.py +++ b/dags/sources/tasks/airflow_logic/operators.py @@ -2,12 +2,14 @@ from airflow import DAG from airflow.models.baseoperator import chain -from shared.tasks.airflow_logic.write_data_task import write_data_task from sources.tasks.airflow_logic.db_data_prepare_task import db_data_prepare_task from sources.tasks.airflow_logic.db_read_acteur_task import db_read_acteur_task from sources.tasks.airflow_logic.db_read_propositions_max_id_task import ( db_read_propositions_max_id_task, ) +from sources.tasks.airflow_logic.db_write_suggestion_task import ( + db_write_suggestion_task, +) from sources.tasks.airflow_logic.propose_acteur_changes_task import ( propose_acteur_changes_task, ) @@ -91,5 +93,5 @@ def eo_task_chain(dag: DAG) -> None: create_tasks, propose_services_sous_categories_task(dag), db_data_prepare_task(dag), - write_data_task(dag), + db_write_suggestion_task(dag), ) diff --git a/dags/sources/tasks/airflow_logic/propose_acteur_changes_task.py b/dags/sources/tasks/airflow_logic/propose_acteur_changes_task.py index fe9e05d14..68bcf70e1 100644 --- a/dags/sources/tasks/airflow_logic/propose_acteur_changes_task.py +++ b/dags/sources/tasks/airflow_logic/propose_acteur_changes_task.py @@ -13,18 +13,13 @@ def propose_acteur_changes_task(dag: DAG) -> PythonOperator: def propose_acteur_changes_wrapper(**kwargs): - df = kwargs["ti"].xcom_pull(task_ids="source_data_normalize") - df_acteurs = kwargs["ti"].xcom_pull(task_ids="db_read_acteur") + df_acteur = kwargs["ti"].xcom_pull(task_ids="source_data_normalize") + df_acteur_from_db = kwargs["ti"].xcom_pull(task_ids="db_read_acteur") - params = kwargs["params"] - column_to_drop = params.get("column_to_drop", []) - - log.preview("df (source_data_normalize)", df) - log.preview("df_acteurs", df_acteurs) - log.preview("column_to_drop", column_to_drop) + log.preview("df (source_data_normalize)", df_acteur) + log.preview("df_acteurs", df_acteur_from_db) return propose_acteur_changes( - df=df, - df_acteurs=df_acteurs, - column_to_drop=column_to_drop, + df_acteur=df_acteur, + df_acteur_from_db=df_acteur_from_db, ) diff --git a/dags/sources/tasks/business_logic/db_data_prepare.py b/dags/sources/tasks/business_logic/db_data_prepare.py index 6d577ef19..98e804a49 100644 --- a/dags/sources/tasks/business_logic/db_data_prepare.py +++ b/dags/sources/tasks/business_logic/db_data_prepare.py @@ -14,22 +14,26 @@ def db_data_prepare( df_pssc: pd.DataFrame, df_labels: pd.DataFrame, df_acteur_services: pd.DataFrame, + df_acteurs_from_db: pd.DataFrame, source_id_by_code: dict, acteurtype_id_by_code: dict, ): + update_actors_columns = ["identifiant_unique", "statut", "cree_le"] df_acteur_to_delete["suggestion"] = df_acteur_to_delete[ update_actors_columns ].apply(lambda row: json.dumps(row.to_dict(), default=str), axis=1) # Created or updated Acteurs - df_acteur_services = ( - df_acteur_services - if df_acteur_services is not None - else pd.DataFrame(columns=["acteur_id", "acteurservice_id"]) - ) + # df_acteur_services = ( + # df_acteur_services + # if df_acteur_services is not None + # else pd.DataFrame(columns=["acteur_id", "acteurservice_id"]) + # ) if df_acteur.empty: - raise ValueError("df_actors est vide") + raise ValueError("df_acteur est vide") + if df_acteur_services.empty: + raise ValueError("df_acteur_services est vide") if df_ps.empty: raise ValueError("df_ps est vide") if df_pssc.empty: @@ -41,6 +45,8 @@ def db_data_prepare( acteurtype_id_by_code ) + # FIXME: A bouger dans un tache compute_ps qui remplacera propose_services et + # propose_services_sous_categories aggregated_pdsc = ( df_pssc.groupby("propositionservice_id") .apply(lambda x: x.to_dict("records") if not x.empty else []) @@ -57,11 +63,9 @@ def db_data_prepare( df_pds_joined["propositionservice_id"] = df_pds_joined[ "propositionservice_id" ].astype(str) - df_pds_joined["pds_sous_categories"] = df_pds_joined["pds_sous_categories"].apply( lambda x: x if isinstance(x, list) else [] ) - df_pds_joined.drop("id", axis=1, inplace=True) aggregated_pds = ( @@ -128,7 +132,20 @@ def db_data_prepare( lambda row: json.dumps(row.to_dict(), default=str), axis=1 ) df_joined.drop_duplicates("identifiant_unique", keep="first", inplace=True) - log.preview("df_joined", df_joined) + + df_acteur_to_create = df_joined[ + ~df_joined["identifiant_unique"].isin(df_acteurs_from_db["identifiant_unique"]) + ] + df_acteur_to_update = df_joined[ + df_joined["identifiant_unique"].isin(df_acteurs_from_db["identifiant_unique"]) + ] + + log.preview("df_acteur_to_create", df_acteur_to_create) + log.preview("df_acteur_to_update", df_acteur_to_update) log.preview("df_acteur_to_delete", df_acteur_to_delete) - return {"all": {"df": df_joined}, "to_disable": {"df": df_acteur_to_delete}} + return { + "df_acteur_to_create": df_acteur_to_create, + "df_acteur_to_update": df_acteur_to_update, + "df_acteur_to_delete": df_acteur_to_delete, + } diff --git a/dags/sources/tasks/business_logic/db_write_suggestion.py b/dags/sources/tasks/business_logic/db_write_suggestion.py new file mode 100644 index 000000000..a1f60e8f4 --- /dev/null +++ b/dags/sources/tasks/business_logic/db_write_suggestion.py @@ -0,0 +1,95 @@ +import json +import logging +from datetime import datetime + +import pandas as pd +from shared.tasks.database_logic.db_manager import PostgresConnectionManager +from sources.config import shared_constants as constants + +logger = logging.getLogger(__name__) + + +def db_write_suggestion( + dag_name: str, + run_id: str, + df_acteur_to_create: pd.DataFrame, + df_acteur_to_delete: pd.DataFrame, + df_acteur_to_update: pd.DataFrame, +): + + metadata = {} + + run_name = run_id.replace("__", " - ") + + insert_suggestion( + df=df_acteur_to_create, + metadata=metadata, + dag_name=f"{dag_name} - AJOUT", + run_name=run_name, + action_type=constants.SUGGESTION_SOURCE_AJOUT, + ) + insert_suggestion( + df=df_acteur_to_delete, + metadata=metadata, + dag_name=f"{dag_name} - SUPRESSION", + run_name=run_name, + action_type=constants.SUGGESTION_SOURCE_SUPRESSION, + ) + insert_suggestion( + df=df_acteur_to_update, + metadata=metadata, + dag_name=f"{dag_name} - MISES A JOUR", + run_name=run_name, + action_type=constants.SUGGESTION_SOURCE_MISESAJOUR, + ) + + +def insert_suggestion( + df: pd.DataFrame, metadata: dict, dag_name: str, run_name: str, action_type: str +): + if df.empty: + return + engine = PostgresConnectionManager().engine + current_date = datetime.now() + + with engine.connect() as conn: + # Insert a new suggestion + result = conn.execute( + """ + INSERT INTO data_suggestioncohorte + ( + identifiant_action, + identifiant_execution, + type_action, + statut, + metadata, + cree_le, + modifie_le + ) + VALUES (%s, %s, %s, %s, %s, %s, %s) + RETURNING ID; + """, + ( + dag_name, + run_name, + action_type, # FIXME: spécialiser les sources + constants.SUGGESTION_AVALIDER, + json.dumps(metadata), + current_date, + current_date, + ), + ) + suggestion_cohorte_id = result.fetchone()[0] + + # Insert dag_run_change + df["type_action"] = action_type + df["suggestion_cohorte_id"] = suggestion_cohorte_id + df["statut"] = constants.SUGGESTION_AVALIDER + df[["suggestion", "suggestion_cohorte_id", "type_action", "statut"]].to_sql( + "data_suggestionunitaire", + engine, + if_exists="append", + index=False, + method="multi", + chunksize=1000, + ) diff --git a/dags/sources/tasks/business_logic/propose_acteur_changes.py b/dags/sources/tasks/business_logic/propose_acteur_changes.py index 4171c9942..4e459d51d 100644 --- a/dags/sources/tasks/business_logic/propose_acteur_changes.py +++ b/dags/sources/tasks/business_logic/propose_acteur_changes.py @@ -3,58 +3,41 @@ import numpy as np import pandas as pd -from utils.base_utils import transform_location -from utils.mapping_utils import parse_float logger = logging.getLogger(__name__) def propose_acteur_changes( - df: pd.DataFrame, - df_acteurs: pd.DataFrame, - column_to_drop: list = [], + df_acteur: pd.DataFrame, + df_acteur_from_db: pd.DataFrame, ): - - # TODO: à déplacer dans la source_data_normalize - # intersection of columns in df and column_to_drop - column_to_drop = list(set(column_to_drop) & set(df.columns)) - df = df.drop(column_to_drop, axis=1) - - if "latitude" in df.columns and "longitude" in df.columns: - df["latitude"] = df["latitude"].apply(parse_float) - df["longitude"] = df["longitude"].apply(parse_float) - df["location"] = df.apply( - lambda row: transform_location(row["longitude"], row["latitude"]), - axis=1, - ) - # On garde le cree_le de qfdmo_acteur - df.drop(columns=["cree_le"], inplace=True, errors="ignore") - df = df.merge( - df_acteurs[["identifiant_unique", "cree_le"]], + df_acteur.drop(columns=["cree_le"], inplace=True, errors="ignore") + df_acteur = df_acteur.merge( + df_acteur_from_db[["identifiant_unique", "cree_le"]], on="identifiant_unique", how="left", ) - df["cree_le"] = df["cree_le"].fillna(datetime.now()) + df_acteur["cree_le"] = df_acteur["cree_le"].fillna(datetime.now()) # On met à jour le modifie_le de qfdmo_acteur - df["modifie_le"] = datetime.now() + df_acteur["modifie_le"] = datetime.now() - df = df.replace({np.nan: None}) + df_acteur = df_acteur.replace({np.nan: None}) - duplicates_mask = df.duplicated("identifiant_unique", keep=False) - duplicate_ids = df.loc[duplicates_mask, "identifiant_unique"].unique() + duplicates_mask = df_acteur.duplicated("identifiant_unique", keep=False) + duplicate_ids = df_acteur.loc[duplicates_mask, "identifiant_unique"].unique() number_of_duplicates = len(duplicate_ids) metadata = { "number_of_duplicates": number_of_duplicates, "duplicate_ids": list(duplicate_ids), - "acteurs_to_add_or_update": len(df), + "acteurs_to_add_or_update": len(df_acteur), } - df = df.drop_duplicates(subset="identifiant_unique", keep="first") - df["event"] = "CREATE" + df_acteur = df_acteur.drop_duplicates(subset="identifiant_unique", keep="first") + df_acteur["event"] = "CREATE" return { - "df": df, + "df": df_acteur, "metadata": metadata, } diff --git a/dags/sources/tasks/business_logic/propose_acteur_to_delete.py b/dags/sources/tasks/business_logic/propose_acteur_to_delete.py index 64a667c2c..c8e800f99 100644 --- a/dags/sources/tasks/business_logic/propose_acteur_to_delete.py +++ b/dags/sources/tasks/business_logic/propose_acteur_to_delete.py @@ -24,6 +24,10 @@ def propose_acteur_to_delete( df_acteur_to_delete["statut"] = "SUPPRIME" df_acteur_to_delete["event"] = "UPDATE_ACTOR" + # FIXME: ajouter le contexte de la suppression + # ajouter une colonne context avec le contenu de df_acteurs_for_db en json pour + # chaque colonne en jonction sur identifiant_unique + return { "metadata": {"number_of_removed_actors": len(df_acteur_to_delete)}, "df_acteur_to_delete": df_acteur_to_delete, diff --git a/dags/sources/tasks/business_logic/source_data_normalize.py b/dags/sources/tasks/business_logic/source_data_normalize.py index 8c4faa99c..7194ffcf7 100755 --- a/dags/sources/tasks/business_logic/source_data_normalize.py +++ b/dags/sources/tasks/business_logic/source_data_normalize.py @@ -187,11 +187,7 @@ def source_data_normalize( # TODO: Remplacer par le dag_id if dag_id == "sinoe": - df = df_normalize_sinoe( - df, - product_mapping=dag_config.product_mapping, - dechet_mapping=dag_config.dechet_mapping, - ) + df = df_normalize_sinoe(df) # Merge et suppression des lignes indésirables df = _remove_undesired_lines(df, dag_config) @@ -201,21 +197,6 @@ def source_data_normalize( raise ValueError("Plus aucune donnée disponible après normalisation") return df - # # TODO: Je n'ai pas vu la source qui applique cette règle - # if "statut" in df.columns: - # df["statut"] = df["statut"].map( - # { - # 1: constants.ACTEUR_ACTIF, - # 0: constants.ACTEUR_SUPPRIME, - # constants.ACTEUR_ACTIF: constants.ACTEUR_ACTIF, - # "INACTIF": constants.ACTEUR_INACTIF, - # "SUPPRIME": constants.ACTEUR_SUPPRIME, - # } - # ) - # df["statut"] = df["statut"].fillna(constants.ACTEUR_ACTIF) - # else: - # df["statut"] = constants.ACTEUR_ACTIF - def df_normalize_pharmacie(df: pd.DataFrame) -> pd.DataFrame: # FIXME : à déplacer dans une fonction df ? @@ -234,8 +215,6 @@ def df_normalize_pharmacie(df: pd.DataFrame) -> pd.DataFrame: def df_normalize_sinoe( df: pd.DataFrame, - product_mapping: dict, - dechet_mapping: dict, ) -> pd.DataFrame: # DOUBLONS: extra sécurité: même si on ne devrait pas obtenir diff --git a/dags/sources/tasks/transform/transform_df.py b/dags/sources/tasks/transform/transform_df.py index 2bbe93441..507d144d5 100644 --- a/dags/sources/tasks/transform/transform_df.py +++ b/dags/sources/tasks/transform/transform_df.py @@ -11,7 +11,9 @@ clean_siren, clean_siret, ) +from utils.base_utils import transform_location from utils.formatter import format_libelle_to_code +from utils.mapping_utils import parse_float logger = logging.getLogger(__name__) @@ -218,6 +220,17 @@ def get_latlng_from_geopoint(row: pd.Series, _) -> pd.Series: return row[["latitude", "longitude"]] +def compute_location(row: pd.Series, _): + # FIXME : tests à déplacer + # first column is latitude, second is longitude + lat_column = row.keys()[0] + lng_column = row.keys()[1] + row[lat_column] = parse_float(row[lat_column]) + row[lng_column] = parse_float(row[lng_column]) + row["location"] = transform_location(row[lng_column], row[lat_column]) + return row[["location"]] + + ### Fonctions de résolution de l'adresse au format BAN et avec vérification via l'API # adresse.data.gouv.fr en option # TODO : A déplacer ? diff --git a/dags/utils/dag_eo_utils.py b/dags/utils/dag_eo_utils.py index 6c5b5b090..f55c1dcba 100755 --- a/dags/utils/dag_eo_utils.py +++ b/dags/utils/dag_eo_utils.py @@ -13,16 +13,11 @@ def insert_suggestion_and_process_df(df_acteur_updates, metadata, dag_name, run_ return engine = PostgresConnectionManager().engine current_date = datetime.now() - logger.warning(dag_name) - logger.warning(run_name) - logger.warning(constants.SUGGESTION_SOURCE) - logger.warning(constants.SUGGESTION_ATRAITER) - logger.warning(json.dumps(metadata)) with engine.connect() as conn: # Insert a new suggestion result = conn.execute( """ - INSERT INTO qfdmo_suggestioncohorte + INSERT INTO data_suggestioncohorte ( identifiant_action, identifiant_execution, @@ -39,7 +34,7 @@ def insert_suggestion_and_process_df(df_acteur_updates, metadata, dag_name, run_ dag_name, run_name, constants.SUGGESTION_SOURCE, # FIXME: spécialiser les sources - constants.SUGGESTION_ATRAITER, + constants.SUGGESTION_AVALIDER, json.dumps(metadata), current_date, current_date, @@ -50,11 +45,11 @@ def insert_suggestion_and_process_df(df_acteur_updates, metadata, dag_name, run_ # Insert dag_run_change df_acteur_updates["type_action"] = df_acteur_updates["event"] df_acteur_updates["suggestion_cohorte_id"] = suggestion_cohorte_id - df_acteur_updates["statut"] = constants.SUGGESTION_ATRAITER + df_acteur_updates["statut"] = constants.SUGGESTION_AVALIDER df_acteur_updates[ ["suggestion", "suggestion_cohorte_id", "type_action", "statut"] ].to_sql( - "qfdmo_suggestionunitaire", + "data_suggestionunitaire", engine, if_exists="append", index=False, diff --git a/dags/utils/dag_ingest_validated_utils.py b/dags/utils/dag_ingest_validated_utils.py index 7bdc2126c..db24171ab 100755 --- a/dags/utils/dag_ingest_validated_utils.py +++ b/dags/utils/dag_ingest_validated_utils.py @@ -315,6 +315,6 @@ def update_dag_run_status( connection, dag_run_id, statut=shared_constants.SUGGESTION_SUCCES ): query = f""" - UPDATE qfdmo_suggestioncohorte SET statut = '{statut}' WHERE id = {dag_run_id} + UPDATE data_suggestioncohorte SET statut = '{statut}' WHERE id = {dag_run_id} """ connection.execute(query) diff --git a/dags_unit_tests/sources/tasks/business_logic/test_propose_acteur_changes.py b/dags_unit_tests/sources/tasks/business_logic/test_propose_acteur_changes.py index 5ec9808c7..87dc5c50b 100644 --- a/dags_unit_tests/sources/tasks/business_logic/test_propose_acteur_changes.py +++ b/dags_unit_tests/sources/tasks/business_logic/test_propose_acteur_changes.py @@ -61,8 +61,8 @@ def test_create_actors_cree_le( expected_cree_le, ): result = propose_acteur_changes( - df=df_data_from_api, - df_acteurs=df_acteur, + df_acteur=df_data_from_api, + df_acteur_from_db=df_acteur, ) df_result = result["df"] @@ -87,14 +87,14 @@ def test_create_actors_location( longitude, ): result = propose_acteur_changes( - df=pd.DataFrame( + df_acteur=pd.DataFrame( { "identifiant_unique": ["1"], "latitude": [latitude], "longitude": [longitude], } ), - df_acteurs=df_empty_acteurs_from_db, + df_acteur_from_db=df_empty_acteurs_from_db, ) df_result = result["df"] diff --git a/data/__init__.py b/data/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/qfdmo/admin/data.py b/data/admin.py similarity index 82% rename from qfdmo/admin/data.py rename to data/admin.py index 14a7d9870..11b18b4dd 100644 --- a/qfdmo/admin/data.py +++ b/data/admin.py @@ -1,6 +1,6 @@ from django.contrib.gis import admin -from qfdmo.models import SuggestionCohorte, SuggestionUnitaire +from data.models import SuggestionCohorte, SuggestionUnitaire class SuggestionCohorteAdmin(admin.ModelAdmin): diff --git a/data/apps.py b/data/apps.py new file mode 100644 index 000000000..b882be950 --- /dev/null +++ b/data/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class DataConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "data" diff --git a/data/forms.py b/data/forms.py new file mode 100644 index 000000000..9ab32ea91 --- /dev/null +++ b/data/forms.py @@ -0,0 +1,18 @@ +from django import forms + +from data.models import SuggestionCohorte, SuggestionStatut + + +class SuggestionCohorteForm(forms.Form): + suggestion_cohorte = forms.ModelChoiceField( + label="Séléctionner l'execution d'un DAG", + widget=forms.Select( + attrs={ + "class": "fr-select", + } + ), + queryset=SuggestionCohorte.objects.filter( + statut=SuggestionStatut.AVALIDER.value + ), + required=True, + ) diff --git a/data/migrations/0001_initial.py b/data/migrations/0001_initial.py new file mode 100644 index 000000000..e12402e22 --- /dev/null +++ b/data/migrations/0001_initial.py @@ -0,0 +1,225 @@ +# Generated by Django 5.1.4 on 2025-01-09 14:04 + +import django.contrib.gis.db.models.fields +import django.core.validators +import django.db.models.deletion +import django.db.models.functions.datetime +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name="BANCache", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("adresse", models.CharField(blank=True, max_length=255, null=True)), + ( + "code_postal", + models.CharField(blank=True, max_length=255, null=True), + ), + ("ville", models.CharField(blank=True, max_length=255, null=True)), + ( + "location", + django.contrib.gis.db.models.fields.PointField( + blank=True, null=True, srid=4326 + ), + ), + ("ban_returned", models.JSONField(blank=True, null=True)), + ( + "modifie_le", + models.DateTimeField( + auto_now=True, + db_default=django.db.models.functions.datetime.Now(), + ), + ), + ], + options={ + "verbose_name": "Cache BAN", + "verbose_name_plural": "Cache BAN", + }, + ), + migrations.CreateModel( + name="SuggestionCohorte", + fields=[ + ("id", models.AutoField(primary_key=True, serialize=False)), + ( + "identifiant_action", + models.CharField( + help_text="Identifiant de l'action (ex : dag_id pour Airflow)", + max_length=250, + ), + ), + ( + "identifiant_execution", + models.CharField( + help_text="Identifiant de l'execution (ex : run_id pour Airflow)", + max_length=250, + ), + ), + ( + "type_action", + models.CharField( + blank=True, + choices=[ + ("CLUSTERING", "regroupement/déduplication des acteurs"), + ("SOURCE", "ingestion de source de données"), + ( + "SOURCE_AJOUT", + "ingestion de source de données - nouveau acteur", + ), + ( + "SOURCE_MISESAJOUR", + "ingestion de source de données - modification d'acteur existant", + ), + ("SOURCE_SUPRESSION", "ingestion de source de données"), + ("ENRICHISSEMENT", "suggestion d'enrichissement"), + ], + max_length=250, + ), + ), + ( + "statut", + models.CharField( + choices=[ + ("AVALIDER", "À valider"), + ("REJETER", "Rejeter"), + ("ATRAITER", "À traiter"), + ("ENCOURS", "En cours de traitement"), + ("ERREUR", "Fini en erreur"), + ("PARTIEL", "Fini avec succès partiel"), + ("SUCCES", "Fini avec succès"), + ], + default="AVALIDER", + max_length=50, + ), + ), + ( + "metadata", + models.JSONField( + blank=True, + help_text="Metadata de la cohorte, données statistiques", + null=True, + ), + ), + ( + "pourcentage_erreurs_tolerees", + models.IntegerField( + db_default=0, + default=0, + help_text="Nombre d'erreurs tolérées en pourcentage", + validators=[ + django.core.validators.MinValueValidator(0), + django.core.validators.MaxValueValidator(100), + ], + ), + ), + ( + "cree_le", + models.DateTimeField( + auto_now_add=True, + db_default=django.db.models.functions.datetime.Now(), + ), + ), + ( + "modifie_le", + models.DateTimeField( + auto_now=True, + db_default=django.db.models.functions.datetime.Now(), + ), + ), + ], + ), + migrations.CreateModel( + name="SuggestionUnitaire", + fields=[ + ("id", models.AutoField(primary_key=True, serialize=False)), + ( + "type_action", + models.CharField( + blank=True, + choices=[ + ("CLUSTERING", "regroupement/déduplication des acteurs"), + ("SOURCE", "ingestion de source de données"), + ( + "SOURCE_AJOUT", + "ingestion de source de données - nouveau acteur", + ), + ( + "SOURCE_MISESAJOUR", + "ingestion de source de données - modification d'acteur existant", + ), + ("SOURCE_SUPRESSION", "ingestion de source de données"), + ("ENRICHISSEMENT", "suggestion d'enrichissement"), + ], + max_length=250, + ), + ), + ( + "statut", + models.CharField( + choices=[ + ("AVALIDER", "À valider"), + ("REJETER", "Rejeter"), + ("ATRAITER", "À traiter"), + ("ENCOURS", "En cours de traitement"), + ("ERREUR", "Fini en erreur"), + ("PARTIEL", "Fini avec succès partiel"), + ("SUCCES", "Fini avec succès"), + ], + default="AVALIDER", + max_length=50, + ), + ), + ( + "context", + models.JSONField( + blank=True, + help_text="Contexte de la suggestion : données initiales", + null=True, + ), + ), + ( + "suggestion", + models.JSONField( + blank=True, help_text="Suggestion de modification" + ), + ), + ( + "cree_le", + models.DateTimeField( + auto_now_add=True, + db_default=django.db.models.functions.datetime.Now(), + ), + ), + ( + "modifie_le", + models.DateTimeField( + auto_now=True, + db_default=django.db.models.functions.datetime.Now(), + ), + ), + ( + "suggestion_cohorte", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="suggestion_unitaires", + to="data.suggestioncohorte", + ), + ), + ], + ), + ] diff --git a/data/migrations/__init__.py b/data/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/qfdmo/models/data.py b/data/models.py similarity index 94% rename from qfdmo/models/data.py rename to data/models.py index a13448a4d..61c6e4abb 100644 --- a/qfdmo/models/data.py +++ b/data/models.py @@ -1,4 +1,5 @@ from django.contrib.gis.db import models +from django.core.validators import MaxValueValidator, MinValueValidator from django.db.models.functions import Now from dags.sources.config.shared_constants import ( @@ -6,6 +7,7 @@ SUGGESTION_AVALIDER, SUGGESTION_CLUSTERING, SUGGESTION_ENCOURS, + SUGGESTION_ENRICHISSEMENT, SUGGESTION_ERREUR, SUGGESTION_PARTIEL, SUGGESTION_REJETER, @@ -43,8 +45,7 @@ class SuggestionAction(models.TextChoices): "ingestion de source de données - modification d'acteur existant", ) SOURCE_SUPPRESSION = SUGGESTION_SOURCE_SUPRESSION, "ingestion de source de données" - # A venir - # ENRICHISSEMENT… + SOURCE_ENRICHISSEMENT = SUGGESTION_ENRICHISSEMENT, "suggestion d'enrichissement" class SuggestionCohorte(models.Model): @@ -71,6 +72,12 @@ class SuggestionCohorte(models.Model): metadata = models.JSONField( null=True, blank=True, help_text="Metadata de la cohorte, données statistiques" ) + pourcentage_erreurs_tolerees = models.IntegerField( + default=0, + db_default=0, + help_text="Nombre d'erreurs tolérées en pourcentage", + validators=[MinValueValidator(0), MaxValueValidator(100)], + ) cree_le = models.DateTimeField(auto_now_add=True, db_default=Now()) modifie_le = models.DateTimeField(auto_now=True, db_default=Now()) diff --git a/data/urls.py b/data/urls.py new file mode 100644 index 000000000..7a81e6b91 --- /dev/null +++ b/data/urls.py @@ -0,0 +1,11 @@ +from django.urls import path + +from data.views import DagsValidation + +urlpatterns = [ + path( + "dags/validations", + DagsValidation.as_view(), + name="dags_validations", + ), +] diff --git a/data/views.py b/data/views.py new file mode 100644 index 000000000..ec2dee9e3 --- /dev/null +++ b/data/views.py @@ -0,0 +1,175 @@ +from django.contrib import messages +from django.contrib.auth.mixins import LoginRequiredMixin +from django.shortcuts import render +from django.views.generic.edit import FormView + +from data.forms import SuggestionCohorteForm +from data.models import SuggestionAction, SuggestionStatut + + +class IsStaffMixin(LoginRequiredMixin): + def dispatch(self, request, *args, **kwargs): + if not request.user.is_staff: + return self.handle_no_permission() + return super().dispatch(request, *args, **kwargs) + + +ACTION_TO_VERB = { + SuggestionAction.SOURCE_AJOUT: "ajoutera", + SuggestionAction.SOURCE_SUPPRESSION: "supprimera", + SuggestionAction.SOURCE_MISESAJOUR: "mettra à jour", +} + + +class DagsValidation(IsStaffMixin, FormView): + form_class = SuggestionCohorteForm + template_name = "data/dags_validations.html" + success_url = "/dags/validations" + + def form_valid(self, form): + # MANAGE search and display suggestion_cohorte details + suggestion_cohorte = form.cleaned_data["suggestion_cohorte"] + if self.request.POST.get("search"): + context = {"form": form} + context["suggestion_cohorte_instance"] = suggestion_cohorte + suggestion_unitaires = suggestion_cohorte.suggestion_unitaires.all() + context["metadata"] = { + "nb_suggestions": suggestion_unitaires.count(), + "description": ( + "La validation de cette cohorte de suggestion " + f"{ACTION_TO_VERB[suggestion_cohorte.type_action]} l'ensemble des " + "acteurs" + ), + "source": suggestion_cohorte.identifiant_action, + } + suggestion_unitaires = suggestion_unitaires.order_by("?")[:100] + context["suggestion_unitaires"] = suggestion_unitaires + return render(self.request, self.template_name, context) + # ELSE: update the status of the suggestion_cohorte and its + # suggestion_cohortelines + suggestion_cohorte = form.cleaned_data["suggestion_cohorte"] + new_status = ( + SuggestionStatut.ATRAITER.value + if self.request.POST.get("dag_valid") == "1" + else SuggestionStatut.REJETER.value + ) + + suggestion_cohorte.suggestion_unitaires.all().update(statut=new_status) + suggestion_cohorte.statut = new_status + suggestion_cohorte.save() + + messages.success( + self.request, + f"La cohorte {suggestion_cohorte} a été mise à jour avec le " + f"statut {new_status}", + ) + + return super().form_valid(form) + + def form_invalid(self, form): + messages.error(self.request, "Il y a des erreurs dans le formulaire.") + return super().form_invalid(form) + + +# class DagsValidationDeprecated(IsStaffMixin, FormView): +# form_class = SuggestionCohorteForm +# template_name = "qfdmo/dags_validations.html" +# success_url = "/dags/validations" + +# def get_initial(self): +# initial = super().get_initial() +# initial["suggestion_cohorte"] = self.request.GET.get("suggestion_cohorte") +# return initial + +# def post(self, request, *args, **kwargs): + +# dag_valid = request.POST.get("dag_valid") +# if dag_valid in ["1", "0"]: +# return self.form_valid(self.get_form()) +# else: +# suggestion_cohorte_obj = SuggestionCohorte.objects.get( +# pk=request.POST.get("suggestion_cohorte") +# ) +# id = request.POST.get("id") +# suggestion_unitaire = suggestion_cohorte_obj.suggestion_unitaires.filter( +# id=id +# ).first() +# identifiant_unique = request.POST.get("identifiant_unique") +# index = request.POST.get("index") +# action = request.POST.get("action") + +# if action == "validate": +# suggestion_unitaire.update_row_update_candidate( +# SuggestionStatut.ATRAITER.value, index +# ) +# elif action == "reject": +# suggestion_unitaire.update_row_update_candidate( +# SuggestionStatut.REJETER.value, index +# ) + +# updated_candidat = suggestion_unitaire.get_candidat(index) + +# return render( +# request, +# "qfdmo/partials/candidat_row.html", +# { +# "identifiant_unique": identifiant_unique, +# "candidat": updated_candidat, +# "index": index, +# "request": request, +# "suggestion_cohorte": request.POST.get("suggestion_cohorte"), +# "suggestion_unitaire": suggestion_unitaire, +# }, +# ) + +# def get_context_data(self, **kwargs): +# context = super().get_context_data(**kwargs) +# if self.request.GET.get("suggestion_cohorte"): +# suggestion_cohorte = SuggestionCohorte.objects.get( +# pk=self.request.GET.get("suggestion_cohorte") +# ) +# context["suggestion_cohorte_instance"] = suggestion_cohorte +# suggestion_unitaires = ( +# suggestion_cohorte.suggestion_unitaires.all().order_by("?")[:100] +# ) +# context["suggestion_unitaires"] = suggestion_unitaires + +# if ( +# suggestion_unitaires +# and suggestion_unitaires[0].change_type == "UPDATE_ACTOR" +# ): +# # Pagination +# suggestion_unitaires = ( +# suggestion_cohorte.suggestion_unitaires.all().order_by("id") +# ) +# paginator = Paginator(suggestion_unitaires, 100) +# page_number = self.request.GET.get("page") +# page_obj = paginator.get_page(page_number) +# context["suggestion_unitaires"] = page_obj + +# return context + +# def form_valid(self, form): +# if not form.is_valid(): +# raise ValueError("Form is not valid") +# suggestion_cohorte_id = form.cleaned_data["suggestion_cohorte"].id +# suggestion_cohorte_obj = ( +# SuggestionCohorte.objects.get(pk=suggestion_cohorte_id) +# ) +# new_status = ( +# SuggestionStatut.ATRAITER.value +# if self.request.POST.get("dag_valid") == "1" +# else SuggestionStatut.REJETER.value +# ) + +# # FIXME: I am not sure we need the filter here +# suggestion_cohorte_obj.suggestion_unitaires.filter( +# status=SuggestionStatut.AVALIDER.value +# ).update(status=new_status) + +# logging.info(f"{suggestion_cohorte_id} - {self.request.user}") + +# suggestion_cohorte_obj.statut = new_status +# suggestion_cohorte_obj.save() + +# return super().form_valid(form) diff --git a/qfdmo/admin/__init__.py b/qfdmo/admin/__init__.py index 627508b0b..983bf9b07 100644 --- a/qfdmo/admin/__init__.py +++ b/qfdmo/admin/__init__.py @@ -1,4 +1,3 @@ from .acteur import * # noqa from .action import * # noqa from .categorie_objet import * # noqa -from .data import * # noqa diff --git a/qfdmo/forms.py b/qfdmo/forms.py index a4c2ba924..56e38568d 100644 --- a/qfdmo/forms.py +++ b/qfdmo/forms.py @@ -9,7 +9,7 @@ from qfdmo.fields import GroupeActionChoiceField from qfdmo.geo_api import epcis_from, formatted_epcis_as_list_of_tuple -from qfdmo.models import SousCategorieObjet, SuggestionCohorte +from qfdmo.models import SousCategorieObjet from qfdmo.models.action import ( Action, GroupeAction, @@ -17,7 +17,6 @@ get_directions, get_ordered_directions, ) -from qfdmo.models.data import SuggestionStatut from qfdmo.widgets import ( AutoCompleteInput, DSFRCheckboxSelectMultiple, @@ -342,21 +341,6 @@ def load_choices( ) -class DagsForm(forms.Form): - suggestion_cohorte = forms.ModelChoiceField( - label="Séléctionner l'execution d'un DAG", - widget=forms.Select( - attrs={ - "class": "fr-select", - } - ), - queryset=SuggestionCohorte.objects.filter( - statut=SuggestionStatut.ATRAITER.value - ), - required=True, - ) - - class ConfiguratorForm(DsfrBaseForm): # TODO: rename this field in all codebase -> actions_displayed action_list = GroupeActionChoiceField( diff --git a/qfdmo/management/commands/reinitialize_dagrun.py b/qfdmo/management/commands/reinitialize_dagrun.py index 6560a8f5f..1dcfcac0b 100644 --- a/qfdmo/management/commands/reinitialize_dagrun.py +++ b/qfdmo/management/commands/reinitialize_dagrun.py @@ -7,13 +7,13 @@ class Command(BaseCommand): def handle(self, *args, **options): with connection.cursor() as cursor: - # Truncate the table qfdmo_suggestioncohorte and qfdmo_suggestionunitaire - cursor.execute("TRUNCATE TABLE qfdmo_suggestioncohorte CASCADE") + # Truncate the table data_suggestioncohorte and data_suggestionunitaire + cursor.execute("TRUNCATE TABLE data_suggestioncohorte CASCADE") # Set auto-increment to 1 cursor.execute( - "ALTER SEQUENCE qfdmo_suggestioncohorte_id_seq RESTART WITH 1" + "ALTER SEQUENCE data_suggestioncohorte_id_seq RESTART WITH 1" ) cursor.execute( - "ALTER SEQUENCE qfdmo_suggestionunitaire_id_seq RESTART WITH 1" + "ALTER SEQUENCE data_suggestionunitaire_id_seq RESTART WITH 1" ) diff --git a/qfdmo/migrations/0110_suggestioncohorte_pourcentage_erreurs_tolerees_and_more.py b/qfdmo/migrations/0110_suggestioncohorte_pourcentage_erreurs_tolerees_and_more.py new file mode 100644 index 000000000..9ca12f647 --- /dev/null +++ b/qfdmo/migrations/0110_suggestioncohorte_pourcentage_erreurs_tolerees_and_more.py @@ -0,0 +1,64 @@ +# Generated by Django 5.1.4 on 2025-01-09 12:38 + +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("qfdmo", "0109_suggestioncohorte_remove_dagrunchange_dag_run_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="suggestioncohorte", + name="pourcentage_erreurs_tolerees", + field=models.IntegerField( + default=0, + help_text="Nombre d'erreurs tolérées en pourcentage", + validators=[ + django.core.validators.MinValueValidator(0), + django.core.validators.MaxValueValidator(100), + ], + ), + ), + migrations.AlterField( + model_name="suggestioncohorte", + name="type_action", + field=models.CharField( + blank=True, + choices=[ + ("CLUSTERING", "regroupement/déduplication des acteurs"), + ("SOURCE", "ingestion de source de données"), + ("SOURCE_AJOUT", "ingestion de source de données - nouveau acteur"), + ( + "SOURCE_MISESAJOUR", + "ingestion de source de données - modification d'acteur existant", + ), + ("SOURCE_SUPRESSION", "ingestion de source de données"), + ("ENRICHISSEMENT", "suggestion d'enrichissement"), + ], + max_length=250, + ), + ), + migrations.AlterField( + model_name="suggestionunitaire", + name="type_action", + field=models.CharField( + blank=True, + choices=[ + ("CLUSTERING", "regroupement/déduplication des acteurs"), + ("SOURCE", "ingestion de source de données"), + ("SOURCE_AJOUT", "ingestion de source de données - nouveau acteur"), + ( + "SOURCE_MISESAJOUR", + "ingestion de source de données - modification d'acteur existant", + ), + ("SOURCE_SUPRESSION", "ingestion de source de données"), + ("ENRICHISSEMENT", "suggestion d'enrichissement"), + ], + max_length=250, + ), + ), + ] diff --git a/qfdmo/migrations/0111_delete_bancache_and_more.py b/qfdmo/migrations/0111_delete_bancache_and_more.py new file mode 100644 index 000000000..a0d8d291e --- /dev/null +++ b/qfdmo/migrations/0111_delete_bancache_and_more.py @@ -0,0 +1,26 @@ +# Generated by Django 5.1.4 on 2025-01-09 13:50 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("qfdmo", "0110_suggestioncohorte_pourcentage_erreurs_tolerees_and_more"), + ] + + operations = [ + migrations.DeleteModel( + name="BANCache", + ), + migrations.RemoveField( + model_name="suggestionunitaire", + name="suggestion_cohorte", + ), + migrations.DeleteModel( + name="SuggestionCohorte", + ), + migrations.DeleteModel( + name="SuggestionUnitaire", + ), + ] diff --git a/qfdmo/models/__init__.py b/qfdmo/models/__init__.py index 23b9c02d5..80f4d9eba 100644 --- a/qfdmo/models/__init__.py +++ b/qfdmo/models/__init__.py @@ -1,5 +1,4 @@ from .acteur import * # noqa from .action import * # noqa from .categorie_objet import * # noqa -from .data import * # noqa from .utils import * # noqa diff --git a/qfdmo/urls.py b/qfdmo/urls.py index fa903e306..0c99afa07 100644 --- a/qfdmo/urls.py +++ b/qfdmo/urls.py @@ -16,7 +16,6 @@ ) from qfdmo.views.auth import LVAOLoginView from qfdmo.views.configurator import AdvancedConfiguratorView, ConfiguratorView -from qfdmo.views.dags import DagsValidation urlpatterns = [ path("", direct_access, name="direct_access"), @@ -87,11 +86,6 @@ TemplateView.as_view(template_name="tests/iframe.html"), name="test_iframe", ), - path( - "dags/validations", - DagsValidation.as_view(), - name="dags_validations", - ), path( "configurateur", ConfiguratorView.as_view(), diff --git a/qfdmo/views/dags.py b/qfdmo/views/dags.py deleted file mode 100644 index 926cf4ddd..000000000 --- a/qfdmo/views/dags.py +++ /dev/null @@ -1,164 +0,0 @@ -import logging - -from django.contrib import messages -from django.contrib.auth.mixins import LoginRequiredMixin -from django.core.paginator import Paginator -from django.shortcuts import render -from django.views.generic.edit import FormView - -from qfdmo.forms import DagsForm -from qfdmo.models.data import SuggestionCohorte, SuggestionStatut - - -class IsStaffMixin(LoginRequiredMixin): - def dispatch(self, request, *args, **kwargs): - if not request.user.is_staff: - return self.handle_no_permission() - return super().dispatch(request, *args, **kwargs) - - -class DagsValidation(IsStaffMixin, FormView): - form_class = DagsForm - template_name = "data/dags_validations.html" - success_url = "/dags/validations" - - def form_valid(self, form): - # MANAGE search and display suggestion_cohorte details - if self.request.POST.get("search"): - suggestion_cohorte = form.cleaned_data["suggestion_cohorte"] - context = {"form": form} - context["suggestion_cohorte_instance"] = suggestion_cohorte - suggestion_unitaires = ( - suggestion_cohorte.suggestion_unitaires.all().order_by("?")[:100] - ) - context["suggestion_unitaires"] = suggestion_unitaires - return render(self.request, self.template_name, context) - - # ELSE: update the status of the suggestion_cohorte and its - # suggestion_cohortelines - suggestion_cohorte = form.cleaned_data["suggestion_cohorte"] - new_status = ( - SuggestionStatut.ATRAITER.value - if self.request.POST.get("dag_valid") == "1" - else SuggestionStatut.REJETER.value - ) - - # FIXME: I am not sure we need the filter here - suggestion_cohorte.suggestion_unitaires.filter( - statut=SuggestionStatut.AVALIDER.value - ).update(statut=new_status) - suggestion_cohorte.statut = new_status - suggestion_cohorte.save() - messages.success( - self.request, - f"La cohorte {suggestion_cohorte} a été mise à jour avec le " - f"statut {new_status}", - ) - - return super().form_valid(form) - - def form_invalid(self, form): - messages.error(self.request, "Il y a des erreurs dans le formulaire.") - return super().form_invalid(form) - - -class DagsValidationDeprecated(IsStaffMixin, FormView): - form_class = DagsForm - template_name = "qfdmo/dags_validations.html" - success_url = "/dags/validations" - - def get_initial(self): - initial = super().get_initial() - initial["suggestion_cohorte"] = self.request.GET.get("suggestion_cohorte") - return initial - - def post(self, request, *args, **kwargs): - - dag_valid = request.POST.get("dag_valid") - if dag_valid in ["1", "0"]: - return self.form_valid(self.get_form()) - else: - suggestion_cohorte_obj = SuggestionCohorte.objects.get( - pk=request.POST.get("suggestion_cohorte") - ) - id = request.POST.get("id") - suggestion_unitaire = suggestion_cohorte_obj.suggestion_unitaires.filter( - id=id - ).first() - identifiant_unique = request.POST.get("identifiant_unique") - index = request.POST.get("index") - action = request.POST.get("action") - - if action == "validate": - suggestion_unitaire.update_row_update_candidate( - SuggestionStatut.ATRAITER.value, index - ) - elif action == "reject": - suggestion_unitaire.update_row_update_candidate( - SuggestionStatut.REJETER.value, index - ) - - updated_candidat = suggestion_unitaire.get_candidat(index) - - return render( - request, - "qfdmo/partials/candidat_row.html", - { - "identifiant_unique": identifiant_unique, - "candidat": updated_candidat, - "index": index, - "request": request, - "suggestion_cohorte": request.POST.get("suggestion_cohorte"), - "suggestion_unitaire": suggestion_unitaire, - }, - ) - - def get_context_data(self, **kwargs): - context = super().get_context_data(**kwargs) - if self.request.GET.get("suggestion_cohorte"): - suggestion_cohorte = SuggestionCohorte.objects.get( - pk=self.request.GET.get("suggestion_cohorte") - ) - context["suggestion_cohorte_instance"] = suggestion_cohorte - suggestion_unitaires = ( - suggestion_cohorte.suggestion_unitaires.all().order_by("?")[:100] - ) - context["suggestion_unitaires"] = suggestion_unitaires - - if ( - suggestion_unitaires - and suggestion_unitaires[0].change_type == "UPDATE_ACTOR" - ): - # Pagination - suggestion_unitaires = ( - suggestion_cohorte.suggestion_unitaires.all().order_by("id") - ) - paginator = Paginator(suggestion_unitaires, 100) - page_number = self.request.GET.get("page") - page_obj = paginator.get_page(page_number) - context["suggestion_unitaires"] = page_obj - - return context - - def form_valid(self, form): - if not form.is_valid(): - raise ValueError("Form is not valid") - suggestion_cohorte_id = form.cleaned_data["suggestion_cohorte"].id - suggestion_cohorte_obj = SuggestionCohorte.objects.get(pk=suggestion_cohorte_id) - new_status = ( - SuggestionStatut.ATRAITER.value - if self.request.POST.get("dag_valid") == "1" - else SuggestionStatut.REJETER.value - ) - - # FIXME: I am not sure we need the filter here - suggestion_cohorte_obj.suggestion_unitaires.filter( - status=SuggestionStatut.AVALIDER.value - ).update(status=new_status) - - logging.info(f"{suggestion_cohorte_id} - {self.request.user}") - - suggestion_cohorte_obj.statut = new_status - suggestion_cohorte_obj.save() - - return super().form_valid(form) diff --git a/templates/data/_partials/create_actor_event.html b/templates/data/_partials/create_actor_event.html deleted file mode 100644 index 6d9272f4c..000000000 --- a/templates/data/_partials/create_actor_event.html +++ /dev/null @@ -1,50 +0,0 @@ - - - - change_type - meta_data - Acteur - Proposition de service - suggestion - - - - - {% for suggestion_unitaire in suggestion_unitaires %} - - {{ suggestion_unitaire.get_change_type_display }} - {{ suggestion_unitaire.meta_data|default:'-' }} - - {% for key, value in suggestion_unitaire.display_acteur_details.items %} -

{{ key }} : {{ value }}

- {% endfor %} - - - - - - - - {% for service in suggestion_unitaire.display_proposition_service %} - - - - - {% endfor %} -
ActionSous-Catégories
{{ service.action }} -
    - {% for sous_cat in service.pds_sous_categories %} -
  • {{ sous_cat.souscategorie }}
  • - {% endfor %} -
-
- - -
- Données brutes -
{{ suggestion_unitaire.suggestion }}
-
- - - {% endfor %} - diff --git a/templates/data/_partials/source_ajout_event.html b/templates/data/_partials/source_ajout_event.html new file mode 100644 index 000000000..4618c92f8 --- /dev/null +++ b/templates/data/_partials/source_ajout_event.html @@ -0,0 +1,44 @@ + + + Acteur (après mise à jour) + Proposition de service + suggestion + + + + {% for suggestion_unitaire in suggestion_unitaires %} + + + {% for key, value in suggestion_unitaire.display_acteur_details.items %} +

{{ key }} : {{ value }}

+ {% endfor %} + + + + + + + + {% for service in suggestion_unitaire.display_proposition_service %} + + + + + {% endfor %} +
ActionSous-Catégories
{{ service.action }} +
    + {% for sous_cat in service.pds_sous_categories %} +
  • {{ sous_cat.souscategorie }}
  • + {% endfor %} +
+
+ + +
+ Données brutes +
{{ suggestion_unitaire.suggestion }}
+
+ + + {% endfor %} + diff --git a/templates/data/_partials/source_event.html b/templates/data/_partials/source_event.html index 4b07c4d53..acf653d6b 100644 --- a/templates/data/_partials/source_event.html +++ b/templates/data/_partials/source_event.html @@ -1,18 +1,22 @@

Instance du DAG : {{ suggestion_cohorte_instance }}

Meta données

-{% for meta_title, meta_data in suggestion_cohorte_instance.display_meta_data.items %} -

{{ meta_title }} : {{ meta_data }}

-{% endfor %} -
- meta_data brutes -
{{ suggestion_cohorte_instance.metadata }}
-
-

Exemples

+

Source : {{ metadata.source }} +
Statut de la cohorte : {{ suggestion_cohorte_instance.get_statut_display }} +
Description : {{ metadata.description }} +
Nb de suggestions : {{ metadata.nb_suggestions }}

+ +

Exemples

- {% include 'data/_partials/create_actor_event.html' %} + {% if suggestion_cohorte_instance.type_action == "SOURCE_AJOUT" or suggestion_cohorte_instance.type_action == "SOURCE_MISESAJOUR" %} + {% include 'data/_partials/source_ajout_event.html' %} + {% elif suggestion_cohorte_instance.type_action == "SOURCE_SUPRESSION" %} + {% include 'data/_partials/source_supression_event.html' %} + {% else %} + {% include 'data/_partials/create_actor_event.html' %} + {% endif %}
Suggestion de source à valider
diff --git a/templates/data/_partials/source_supression_event.html b/templates/data/_partials/source_supression_event.html new file mode 100644 index 000000000..b348c79b4 --- /dev/null +++ b/templates/data/_partials/source_supression_event.html @@ -0,0 +1,27 @@ + + + + action_type + Acteur + suggestion + + + + + {% for suggestion_unitaire in suggestion_unitaires %} + + {{ suggestion_unitaire.get_action_type_display }} + + {% for key, value in suggestion_unitaire.display_acteur_details.items %} +

{{ key }} : {{ value }}

+ {% endfor %} + + +
+ Données brutes +
{{ suggestion_unitaire.suggestion }}
+
+ + + {% endfor %} + diff --git a/templates/data/base.html b/templates/data/base.html index 8b4acc3eb..2423a1f3a 100644 --- a/templates/data/base.html +++ b/templates/data/base.html @@ -26,7 +26,6 @@ {% block css_extras %}{% endblock %} {# Js #} - {% block javascript_extras %}{% endblock %} From ae95aa285c4d0a0d87c3d3964c67622df299f9d8 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Fri, 10 Jan 2025 08:22:52 +0100 Subject: [PATCH 05/26] manage location --- dags/sources/config/airflow_params.py | 2 +- dags/sources/dags/source_aliapur.py | 5 +++++ dags/sources/dags/source_citeo.py | 5 +++++ dags/sources/dags/source_cma.py | 5 +++++ dags/sources/dags/source_corepile.py | 5 +++++ dags/sources/dags/source_ecodds.py | 5 +++++ dags/sources/dags/source_ecologic.py | 8 +++++--- dags/sources/dags/source_ecomaison.py | 5 +++++ dags/sources/dags/source_ecosystem.py | 5 +++++ dags/sources/dags/source_ocab.py | 5 +++++ dags/sources/dags/source_ocad3e.py | 5 +++++ dags/sources/dags/source_pyreo.py | 2 +- dags/sources/dags/source_refashion.py | 5 +++++ dags/sources/dags/source_screlec.py | 5 +++++ dags/sources/dags/source_sinoe.py | 5 +++++ dags/sources/dags/source_soren.py | 5 +++++ dags/sources/dags/source_valdelia.py | 8 +++++--- .../sources/tasks/business_logic/source_data_normalize.py | 8 +++++--- 18 files changed, 82 insertions(+), 11 deletions(-) diff --git a/dags/sources/config/airflow_params.py b/dags/sources/config/airflow_params.py index d095ae980..0ae7754b6 100644 --- a/dags/sources/config/airflow_params.py +++ b/dags/sources/config/airflow_params.py @@ -66,7 +66,7 @@ "clean_souscategorie_codes_sinoe": clean_souscategorie_codes_sinoe, "get_latlng_from_geopoint": get_latlng_from_geopoint, "strip_lower_string": strip_lower_string, - "clean_location": compute_location, + "compute_location": compute_location, } diff --git a/dags/sources/dags/source_aliapur.py b/dags/sources/dags/source_aliapur.py index 9f4bd9c4f..28fb8989d 100755 --- a/dags/sources/dags/source_aliapur.py +++ b/dags/sources/dags/source_aliapur.py @@ -72,6 +72,11 @@ "value": constants.ACTEUR_ACTIF, }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", diff --git a/dags/sources/dags/source_citeo.py b/dags/sources/dags/source_citeo.py index 6351503a2..c41972843 100755 --- a/dags/sources/dags/source_citeo.py +++ b/dags/sources/dags/source_citeo.py @@ -76,6 +76,11 @@ "value": [], }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["id_point_apport_ou_reparation", "nom"], "transformation": "clean_identifiant_externe", diff --git a/dags/sources/dags/source_cma.py b/dags/sources/dags/source_cma.py index f74d40c17..4b2e132ae 100755 --- a/dags/sources/dags/source_cma.py +++ b/dags/sources/dags/source_cma.py @@ -94,6 +94,11 @@ "value": "cmareparacteur", }, # 4. Transformation du dataframe + { + "origin": ["final_latitude", "final_longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["telephone", "code_postal"], "transformation": "clean_telephone", diff --git a/dags/sources/dags/source_corepile.py b/dags/sources/dags/source_corepile.py index 10e54b7cd..515b01fbb 100755 --- a/dags/sources/dags/source_corepile.py +++ b/dags/sources/dags/source_corepile.py @@ -76,6 +76,11 @@ "value": [], }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, # { # "origin": ["labels_etou_bonus", "acteur_type_code"], # "transformation": "clean_label_codes", diff --git a/dags/sources/dags/source_ecodds.py b/dags/sources/dags/source_ecodds.py index 324adc883..271712aed 100755 --- a/dags/sources/dags/source_ecodds.py +++ b/dags/sources/dags/source_ecodds.py @@ -72,6 +72,11 @@ "value": constants.ACTEUR_ACTIF, }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", diff --git a/dags/sources/dags/source_ecologic.py b/dags/sources/dags/source_ecologic.py index 809ccbbdf..cb4d4f6dc 100755 --- a/dags/sources/dags/source_ecologic.py +++ b/dags/sources/dags/source_ecologic.py @@ -76,6 +76,11 @@ "value": [], }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, # { # "origin": ["labels_etou_bonus", "acteur_type_code"], # "transformation": "clean_label_codes", @@ -145,9 +150,6 @@ "https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/" "donnees-eo-ecologic/lines?size=10000" ), - "columns_to_add_by_default": { - "statut": constants.ACTEUR_ACTIF, - }, "ignore_duplicates": False, "validate_address_with_ban": False, "merge_duplicated_acteurs": True, # In case of multi ecoorganisme or filiere diff --git a/dags/sources/dags/source_ecomaison.py b/dags/sources/dags/source_ecomaison.py index f386e620c..a15e5be39 100755 --- a/dags/sources/dags/source_ecomaison.py +++ b/dags/sources/dags/source_ecomaison.py @@ -81,6 +81,11 @@ "value": [], }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, # { # "origin": ["labels_etou_bonus", "acteur_type_code"], # "transformation": "clean_label_codes", diff --git a/dags/sources/dags/source_ecosystem.py b/dags/sources/dags/source_ecosystem.py index a3745786f..5152bc695 100755 --- a/dags/sources/dags/source_ecosystem.py +++ b/dags/sources/dags/source_ecosystem.py @@ -76,6 +76,11 @@ "value": [], }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, # { # "origin": ["labels_etou_bonus", "acteur_type_code"], # "transformation": "clean_label_codes", diff --git a/dags/sources/dags/source_ocab.py b/dags/sources/dags/source_ocab.py index 197c24e9a..ad45453e6 100755 --- a/dags/sources/dags/source_ocab.py +++ b/dags/sources/dags/source_ocab.py @@ -76,6 +76,11 @@ "value": [], }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, # { # "origin": ["labels_etou_bonus", "acteur_type_code"], # "transformation": "clean_label_codes", diff --git a/dags/sources/dags/source_ocad3e.py b/dags/sources/dags/source_ocad3e.py index c8f53345f..945553359 100755 --- a/dags/sources/dags/source_ocad3e.py +++ b/dags/sources/dags/source_ocad3e.py @@ -80,6 +80,11 @@ # "value": [], # }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", diff --git a/dags/sources/dags/source_pyreo.py b/dags/sources/dags/source_pyreo.py index 5dc541254..f87eff16a 100755 --- a/dags/sources/dags/source_pyreo.py +++ b/dags/sources/dags/source_pyreo.py @@ -83,7 +83,7 @@ # 4. Transformation du dataframe { "origin": ["latitude", "longitude"], - "transformation": "clean_location", + "transformation": "compute_location", "destination": ["location"], }, { diff --git a/dags/sources/dags/source_refashion.py b/dags/sources/dags/source_refashion.py index a135aff76..9960f06a3 100755 --- a/dags/sources/dags/source_refashion.py +++ b/dags/sources/dags/source_refashion.py @@ -85,6 +85,11 @@ # "value": [], # }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", diff --git a/dags/sources/dags/source_screlec.py b/dags/sources/dags/source_screlec.py index 80d353102..db259fbc1 100644 --- a/dags/sources/dags/source_screlec.py +++ b/dags/sources/dags/source_screlec.py @@ -86,6 +86,11 @@ # "value": [], # }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", diff --git a/dags/sources/dags/source_sinoe.py b/dags/sources/dags/source_sinoe.py index b2a2c699d..b3a117c25 100755 --- a/dags/sources/dags/source_sinoe.py +++ b/dags/sources/dags/source_sinoe.py @@ -84,6 +84,11 @@ "transformation": "get_latlng_from_geopoint", "destination": ["latitude", "longitude"], }, + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["TEL_SERVICE", "code_postal"], "transformation": "clean_telephone", diff --git a/dags/sources/dags/source_soren.py b/dags/sources/dags/source_soren.py index c53c33474..04d8b0f47 100755 --- a/dags/sources/dags/source_soren.py +++ b/dags/sources/dags/source_soren.py @@ -86,6 +86,11 @@ # "value": [], # }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", diff --git a/dags/sources/dags/source_valdelia.py b/dags/sources/dags/source_valdelia.py index 6bee2054f..cbd161091 100755 --- a/dags/sources/dags/source_valdelia.py +++ b/dags/sources/dags/source_valdelia.py @@ -81,6 +81,11 @@ # "value": [], # }, # 4. Transformation du dataframe + { + "origin": ["latitude", "longitude"], + "transformation": "compute_location", + "destination": ["location"], + }, { "origin": ["labels_etou_bonus", "acteur_type_code"], "transformation": "clean_label_codes", @@ -170,9 +175,6 @@ "longitudewgs84": "longitude", "latitudewgs84": "latitude", }, - "columns_to_add_by_default": { - "statut": constants.ACTEUR_ACTIF, - }, "endpoint": ( "https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/" "donnees-eo-valdelia/lines?size=10000" diff --git a/dags/sources/tasks/business_logic/source_data_normalize.py b/dags/sources/tasks/business_logic/source_data_normalize.py index 7194ffcf7..4bc546239 100755 --- a/dags/sources/tasks/business_logic/source_data_normalize.py +++ b/dags/sources/tasks/business_logic/source_data_normalize.py @@ -14,7 +14,7 @@ NormalizationColumnTransform, NormalizationDFTransform, ) -from sources.tasks.transform.transform_df import merge_duplicates +from sources.tasks.transform.transform_df import compute_location, merge_duplicates from sqlalchemy import text from tenacity import retry, stop_after_attempt, wait_fixed from utils import logging_utils as log @@ -238,7 +238,7 @@ def enrich_from_ban_api(row: pd.Series) -> pd.Series: ban_cache_row = engine.execute( text( - "SELECT * FROM qfdmo_bancache WHERE adresse = :adresse and code_postal = " + "SELECT * FROM data_bancache WHERE adresse = :adresse and code_postal = " ":code_postal and ville = :ville and modifie_le > now() - interval '30 day'" " order by modifie_le desc limit 1" ), @@ -258,7 +258,7 @@ def enrich_from_ban_api(row: pd.Series) -> pd.Series: result = r.json() engine.execute( text( - "INSERT INTO qfdmo_bancache" + "INSERT INTO data_bancache" " (adresse, code_postal, ville, ban_returned, modifie_le)" " VALUES (:adresse, :code_postal, :ville, :result, NOW())" ), @@ -291,6 +291,8 @@ def enrich_from_ban_api(row: pd.Series) -> pd.Series: else: row["longitude"] = 0 row["latitude"] = 0 + + row["location"] = compute_location(row[["latitude", "longitude"]], None) return row From fe8d8de3f6fa6d5683fbacb298113aa8de9f5e9c Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Fri, 10 Jan 2025 10:08:45 +0100 Subject: [PATCH 06/26] Ignorer les localisations nulle --- .../tasks/business_logic/source_data_normalize.py | 13 +++++++++++++ dags/sources/tasks/transform/transform_column.py | 1 - dags/utils/base_utils.py | 2 ++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/dags/sources/tasks/business_logic/source_data_normalize.py b/dags/sources/tasks/business_logic/source_data_normalize.py index 4bc546239..d68f01fa5 100755 --- a/dags/sources/tasks/business_logic/source_data_normalize.py +++ b/dags/sources/tasks/business_logic/source_data_normalize.py @@ -192,6 +192,19 @@ def source_data_normalize( # Merge et suppression des lignes indésirables df = _remove_undesired_lines(df, dag_config) + # TODO: A voir ce qu'on doit faire de ces acteurs non digitaus mais sans + # localisation (proposition : les afficher en erreur directement ?) + df_acteur_sans_loc = df[ + (df["location"].isnull()) & (df["acteur_type_code"] != "acteur_digital") + ] + if not df_acteur_sans_loc.empty: + nb_acteurs = len(df) + logger.warning( + f"Nombre d'acteur sans localisation: {len(df_acteur_sans_loc)} / " + f"{nb_acteurs}" + ) + log.preview("Acteurs sans localisation", df_acteur_sans_loc) + log.preview("df après normalisation", df) if df.empty: raise ValueError("Plus aucune donnée disponible après normalisation") diff --git a/dags/sources/tasks/transform/transform_column.py b/dags/sources/tasks/transform/transform_column.py index 6f8dcebe0..9dc36c8fe 100644 --- a/dags/sources/tasks/transform/transform_column.py +++ b/dags/sources/tasks/transform/transform_column.py @@ -175,7 +175,6 @@ def clean_souscategorie_codes( return souscategorie_codes product_mapping = dag_config.product_mapping - logger.warning(f"{sscat_list=}") for sscat in sscat_list.split("|"): sscat = sscat.strip().lower() if not sscat: diff --git a/dags/utils/base_utils.py b/dags/utils/base_utils.py index 135d8b964..c841cdcb1 100755 --- a/dags/utils/base_utils.py +++ b/dags/utils/base_utils.py @@ -114,6 +114,8 @@ def extract_details(row, col="adresse_format_ban"): def transform_location(longitude, latitude): + if not longitude or not latitude: + return None return wkb.dumps(Point(longitude, latitude)).hex() From a5a6286d7eb3d8f61bcf4e9d0b7a319776c48023 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Mon, 13 Jan 2025 11:17:20 +0100 Subject: [PATCH 07/26] update migrations --- dags/ingest_validated_dataset_to_db.py | 2 + dags/utils/dag_eo_utils.py | 2 +- qfdmo/migrations/0052_dagrun.py | 2 +- ...te_remove_dagrunchange_dag_run_and_more.py | 181 ------------------ ...e_pourcentage_erreurs_tolerees_and_more.py | 64 ------- ...e_remove_dagrunchange_dag_run_and_more.py} | 12 +- 6 files changed, 10 insertions(+), 253 deletions(-) delete mode 100644 qfdmo/migrations/0109_suggestioncohorte_remove_dagrunchange_dag_run_and_more.py delete mode 100644 qfdmo/migrations/0110_suggestioncohorte_pourcentage_erreurs_tolerees_and_more.py rename qfdmo/migrations/{0111_delete_bancache_and_more.py => 0111_delete_bancache_remove_dagrunchange_dag_run_and_more.py} (53%) diff --git a/dags/ingest_validated_dataset_to_db.py b/dags/ingest_validated_dataset_to_db.py index d4778d964..fb56f20ea 100755 --- a/dags/ingest_validated_dataset_to_db.py +++ b/dags/ingest_validated_dataset_to_db.py @@ -1,3 +1,5 @@ +# FIXME: intégrer ce dag dans l'architecture cible + from datetime import timedelta import pandas as pd diff --git a/dags/utils/dag_eo_utils.py b/dags/utils/dag_eo_utils.py index f55c1dcba..b5a311c78 100755 --- a/dags/utils/dag_eo_utils.py +++ b/dags/utils/dag_eo_utils.py @@ -33,7 +33,7 @@ def insert_suggestion_and_process_df(df_acteur_updates, metadata, dag_name, run_ ( dag_name, run_name, - constants.SUGGESTION_SOURCE, # FIXME: spécialiser les sources + constants.SUGGESTION_SOURCE, constants.SUGGESTION_AVALIDER, json.dumps(metadata), current_date, diff --git a/qfdmo/migrations/0052_dagrun.py b/qfdmo/migrations/0052_dagrun.py index b9f9bd3d9..d3b95f1ae 100644 --- a/qfdmo/migrations/0052_dagrun.py +++ b/qfdmo/migrations/0052_dagrun.py @@ -51,7 +51,7 @@ class Migration(migrations.Migration): ), ), ("meta_data", models.JSONField(blank=True, null=True)), - ("suggestion", models.JSONField(blank=True, null=True)), + ("row_updates", models.JSONField(blank=True, null=True)), ( "dag_run", models.ForeignKey( diff --git a/qfdmo/migrations/0109_suggestioncohorte_remove_dagrunchange_dag_run_and_more.py b/qfdmo/migrations/0109_suggestioncohorte_remove_dagrunchange_dag_run_and_more.py deleted file mode 100644 index afd0cc0f8..000000000 --- a/qfdmo/migrations/0109_suggestioncohorte_remove_dagrunchange_dag_run_and_more.py +++ /dev/null @@ -1,181 +0,0 @@ -# Generated by Django 5.1.4 on 2025-01-08 16:36 - -import django.db.models.deletion -import django.db.models.functions.datetime -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("qfdmo", "0108_remove_lvaobaserevision_lvao_base_and_more"), - ] - - operations = [ - migrations.CreateModel( - name="SuggestionCohorte", - fields=[ - ("id", models.AutoField(primary_key=True, serialize=False)), - ( - "identifiant_action", - models.CharField( - help_text="Identifiant de l'action (ex : dag_id pour Airflow)", - max_length=250, - ), - ), - ( - "identifiant_execution", - models.CharField( - help_text="Identifiant de l'execution (ex : run_id pour Airflow)", - max_length=250, - ), - ), - ( - "type_action", - models.CharField( - blank=True, - choices=[ - ("CLUSTERING", "regroupement/déduplication des acteurs"), - ("SOURCE", "ingestion de source de données"), - ( - "SOURCE_AJOUT", - "ingestion de source de données - nouveau acteur", - ), - ( - "SOURCE_MISESAJOUR", - "ingestion de source de données - modification d'acteur existant", - ), - ("SOURCE_SUPRESSION", "ingestion de source de données"), - ], - max_length=250, - ), - ), - ( - "statut", - models.CharField( - choices=[ - ("AVALIDER", "À valider"), - ("REJETER", "Rejeter"), - ("ATRAITER", "À traiter"), - ("ENCOURS", "En cours de traitement"), - ("ERREUR", "Fini en erreur"), - ("PARTIEL", "Fini avec succès partiel"), - ("SUCCES", "Fini avec succès"), - ], - default="AVALIDER", - max_length=50, - ), - ), - ( - "metadata", - models.JSONField( - null=True, - blank=True, - help_text="Metadata de la cohorte, données statistiques", - ), - ), - ( - "cree_le", - models.DateTimeField( - auto_now_add=True, - db_default=django.db.models.functions.datetime.Now(), - ), - ), - ( - "modifie_le", - models.DateTimeField( - auto_now=True, - db_default=django.db.models.functions.datetime.Now(), - ), - ), - ], - ), - migrations.RemoveField( - model_name="dagrunchange", - name="dag_run", - ), - migrations.CreateModel( - name="SuggestionUnitaire", - fields=[ - ("id", models.AutoField(primary_key=True, serialize=False)), - ( - "type_action", - models.CharField( - blank=True, - choices=[ - ("CLUSTERING", "regroupement/déduplication des acteurs"), - ("SOURCE", "ingestion de source de données"), - ( - "SOURCE_AJOUT", - "ingestion de source de données - nouveau acteur", - ), - ( - "SOURCE_MISESAJOUR", - "ingestion de source de données - modification d'acteur existant", - ), - ("SOURCE_SUPRESSION", "ingestion de source de données"), - ], - max_length=250, - ), - ), - ( - "statut", - models.CharField( - choices=[ - ("AVALIDER", "À valider"), - ("REJETER", "Rejeter"), - ("ATRAITER", "À traiter"), - ("ENCOURS", "En cours de traitement"), - ("ERREUR", "Fini en erreur"), - ("PARTIEL", "Fini avec succès partiel"), - ("SUCCES", "Fini avec succès"), - ], - default="AVALIDER", - max_length=50, - ), - ), - ( - "context", - models.JSONField( - null=True, - blank=True, - help_text="Contexte de la suggestion : données initiales", - ), - ), - ( - "suggestion", - models.JSONField( - blank=True, help_text="Suggestion de modification" - ), - ), - ( - "cree_le", - models.DateTimeField( - auto_now_add=True, - db_default=django.db.models.functions.datetime.Now(), - ), - ), - ( - "modifie_le", - models.DateTimeField( - auto_now=True, - db_default=django.db.models.functions.datetime.Now(), - ), - ), - ( - "suggestion_cohorte", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="suggestion_unitaires", - to="qfdmo.suggestioncohorte", - ), - ), - ], - ), - migrations.DeleteModel( - name="DagRun", - ), - migrations.DeleteModel( - name="DagRunChange", - ), - ] diff --git a/qfdmo/migrations/0110_suggestioncohorte_pourcentage_erreurs_tolerees_and_more.py b/qfdmo/migrations/0110_suggestioncohorte_pourcentage_erreurs_tolerees_and_more.py deleted file mode 100644 index 9ca12f647..000000000 --- a/qfdmo/migrations/0110_suggestioncohorte_pourcentage_erreurs_tolerees_and_more.py +++ /dev/null @@ -1,64 +0,0 @@ -# Generated by Django 5.1.4 on 2025-01-09 12:38 - -import django.core.validators -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("qfdmo", "0109_suggestioncohorte_remove_dagrunchange_dag_run_and_more"), - ] - - operations = [ - migrations.AddField( - model_name="suggestioncohorte", - name="pourcentage_erreurs_tolerees", - field=models.IntegerField( - default=0, - help_text="Nombre d'erreurs tolérées en pourcentage", - validators=[ - django.core.validators.MinValueValidator(0), - django.core.validators.MaxValueValidator(100), - ], - ), - ), - migrations.AlterField( - model_name="suggestioncohorte", - name="type_action", - field=models.CharField( - blank=True, - choices=[ - ("CLUSTERING", "regroupement/déduplication des acteurs"), - ("SOURCE", "ingestion de source de données"), - ("SOURCE_AJOUT", "ingestion de source de données - nouveau acteur"), - ( - "SOURCE_MISESAJOUR", - "ingestion de source de données - modification d'acteur existant", - ), - ("SOURCE_SUPRESSION", "ingestion de source de données"), - ("ENRICHISSEMENT", "suggestion d'enrichissement"), - ], - max_length=250, - ), - ), - migrations.AlterField( - model_name="suggestionunitaire", - name="type_action", - field=models.CharField( - blank=True, - choices=[ - ("CLUSTERING", "regroupement/déduplication des acteurs"), - ("SOURCE", "ingestion de source de données"), - ("SOURCE_AJOUT", "ingestion de source de données - nouveau acteur"), - ( - "SOURCE_MISESAJOUR", - "ingestion de source de données - modification d'acteur existant", - ), - ("SOURCE_SUPRESSION", "ingestion de source de données"), - ("ENRICHISSEMENT", "suggestion d'enrichissement"), - ], - max_length=250, - ), - ), - ] diff --git a/qfdmo/migrations/0111_delete_bancache_and_more.py b/qfdmo/migrations/0111_delete_bancache_remove_dagrunchange_dag_run_and_more.py similarity index 53% rename from qfdmo/migrations/0111_delete_bancache_and_more.py rename to qfdmo/migrations/0111_delete_bancache_remove_dagrunchange_dag_run_and_more.py index a0d8d291e..ce1aa0c50 100644 --- a/qfdmo/migrations/0111_delete_bancache_and_more.py +++ b/qfdmo/migrations/0111_delete_bancache_remove_dagrunchange_dag_run_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 5.1.4 on 2025-01-09 13:50 +# Generated by Django 5.1.4 on 2025-01-13 10:17 from django.db import migrations @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ("qfdmo", "0110_suggestioncohorte_pourcentage_erreurs_tolerees_and_more"), + ("qfdmo", "0110_alter_source_code"), ] operations = [ @@ -14,13 +14,13 @@ class Migration(migrations.Migration): name="BANCache", ), migrations.RemoveField( - model_name="suggestionunitaire", - name="suggestion_cohorte", + model_name="dagrunchange", + name="dag_run", ), migrations.DeleteModel( - name="SuggestionCohorte", + name="DagRun", ), migrations.DeleteModel( - name="SuggestionUnitaire", + name="DagRunChange", ), ] From 0bcf0aad2b93e1197cc2f93495e332c11117e0c1 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Mon, 13 Jan 2025 17:25:30 +0100 Subject: [PATCH 08/26] update tests --- .../business_logic/db_write_suggestion.py | 2 +- .../business_logic/source_data_normalize.py | 5 +- .../test_propose_acteur_changes.py | 34 ------- .../test_source_data_normalize.py | 4 - dags_unit_tests/utils/test_db_data_prepare.py | 92 ++++++++++++++----- 5 files changed, 73 insertions(+), 64 deletions(-) diff --git a/dags/sources/tasks/business_logic/db_write_suggestion.py b/dags/sources/tasks/business_logic/db_write_suggestion.py index a1f60e8f4..832869fa5 100644 --- a/dags/sources/tasks/business_logic/db_write_suggestion.py +++ b/dags/sources/tasks/business_logic/db_write_suggestion.py @@ -72,7 +72,7 @@ def insert_suggestion( ( dag_name, run_name, - action_type, # FIXME: spécialiser les sources + action_type, constants.SUGGESTION_AVALIDER, json.dumps(metadata), current_date, diff --git a/dags/sources/tasks/business_logic/source_data_normalize.py b/dags/sources/tasks/business_logic/source_data_normalize.py index d68f01fa5..c201cb8cc 100755 --- a/dags/sources/tasks/business_logic/source_data_normalize.py +++ b/dags/sources/tasks/business_logic/source_data_normalize.py @@ -52,7 +52,7 @@ def _transform_columns(df: pd.DataFrame, dag_config: DAGConfig) -> pd.DataFrame: for column_to_transform in columns_to_transform: function_name = column_to_transform.transformation normalisation_function = get_transformation_function(function_name, dag_config) - logger.warning(f"Transformation {function_name}") + # logger.warning(f"Transformation {function_name}") df[column_to_transform.destination] = df[column_to_transform.origin].apply( normalisation_function ) @@ -70,8 +70,7 @@ def _transform_df(df: pd.DataFrame, dag_config: DAGConfig) -> pd.DataFrame: for column_to_transform_df in columns_to_transform_df: function_name = column_to_transform_df.transformation normalisation_function = get_transformation_function(function_name, dag_config) - - logger.warning(f"Transformation {function_name}") + # logger.warning(f"Transformation {function_name}") df[column_to_transform_df.destination] = df[ column_to_transform_df.origin ].apply(normalisation_function, axis=1) diff --git a/dags_unit_tests/sources/tasks/business_logic/test_propose_acteur_changes.py b/dags_unit_tests/sources/tasks/business_logic/test_propose_acteur_changes.py index 87dc5c50b..46903ae8d 100644 --- a/dags_unit_tests/sources/tasks/business_logic/test_propose_acteur_changes.py +++ b/dags_unit_tests/sources/tasks/business_logic/test_propose_acteur_changes.py @@ -2,8 +2,6 @@ import pandas as pd import pytest -from shapely import wkb -from shapely.geometry import Point from sources.tasks.business_logic.propose_acteur_changes import propose_acteur_changes @@ -69,35 +67,3 @@ def test_create_actors_cree_le( assert "cree_le" in df_result.columns assert df_result["cree_le"].notnull().all() assert df_result["cree_le"][0].date() == expected_cree_le - - -class TestActorsLocation: - @pytest.mark.parametrize( - "latitude, longitude", - [ - (48.8566, 2.3522), - ("48.8566", "2.3522"), - ("48,8566", "2,3522"), - ], - ) - def test_create_actors_location( - self, - df_empty_acteurs_from_db, - latitude, - longitude, - ): - result = propose_acteur_changes( - df_acteur=pd.DataFrame( - { - "identifiant_unique": ["1"], - "latitude": [latitude], - "longitude": [longitude], - } - ), - df_acteur_from_db=df_empty_acteurs_from_db, - ) - df_result = result["df"] - - expected_location = wkb.dumps(Point(2.3522, 48.8566)).hex() - - assert df_result["location"].iloc[0] == expected_location diff --git a/dags_unit_tests/sources/tasks/business_logic/test_source_data_normalize.py b/dags_unit_tests/sources/tasks/business_logic/test_source_data_normalize.py index 6922886da..d5cc28479 100755 --- a/dags_unit_tests/sources/tasks/business_logic/test_source_data_normalize.py +++ b/dags_unit_tests/sources/tasks/business_logic/test_source_data_normalize.py @@ -76,8 +76,6 @@ def test_annee_unique(self, product_mapping, dechet_mapping, acteurtype_id_by_co with pytest.raises(ValueError): df = df_normalize_sinoe( df=df, - product_mapping=product_mapping, - dechet_mapping=dechet_mapping, ) def test_drop_annee_column( @@ -85,8 +83,6 @@ def test_drop_annee_column( ): df = df_normalize_sinoe( df=df_sinoe, - product_mapping=product_mapping, - dechet_mapping=dechet_mapping, ) assert "ANNEE" not in df.columns diff --git a/dags_unit_tests/utils/test_db_data_prepare.py b/dags_unit_tests/utils/test_db_data_prepare.py index 047ef17e0..dde80bb35 100644 --- a/dags_unit_tests/utils/test_db_data_prepare.py +++ b/dags_unit_tests/utils/test_db_data_prepare.py @@ -12,7 +12,7 @@ class TestDBDataPrepare: [ ( pd.DataFrame(columns=["acteur_id", "labelqualite_id"]), - {0: None, 1: None}, + [None, None], ), ( pd.DataFrame( @@ -21,14 +21,14 @@ class TestDBDataPrepare: "labelqualite_id": [1, 1, 2], } ), - { - 0: [ + [ + [ { "acteur_id": 1, "labelqualite_id": 1, } ], - 1: [ + [ { "acteur_id": 2, "labelqualite_id": 1, @@ -38,7 +38,7 @@ class TestDBDataPrepare: "labelqualite_id": 2, }, ], - }, + ], ), ], ) @@ -48,6 +48,7 @@ def test_db_data_prepare_labels( df_proposition_services_sous_categories, propose_labels, expected_labels, + df_acteurs_from_db, source_id_by_code, acteurtype_id_by_code, ): @@ -71,25 +72,26 @@ def test_db_data_prepare_labels( df_pssc=df_proposition_services_sous_categories, df_labels=propose_labels, df_acteur_services=pd.DataFrame( - columns=["acteur_id", "acteurservice_id", "acteurservice"] + { + "acteur_id": [1, 2], + "acteurservice_id": [10, 10], + "acteurservice": [ + "Service de réparation", + "Service de réparation", + ], + } ), + df_acteurs_from_db=df_acteurs_from_db, source_id_by_code=source_id_by_code, acteurtype_id_by_code=acteurtype_id_by_code, ) - result = df_result["all"]["df"].to_dict() - labels = result["labels"] - assert labels == expected_labels + assert "labels" in df_result["df_acteur_to_create"].columns + assert list(df_result["df_acteur_to_create"]["labels"]) == expected_labels @pytest.mark.parametrize( "propose_acteur_services, expected_acteur_services", [ - ( - pd.DataFrame( - columns=["acteur_id", "acteurservice_id", "acteurservice"] - ), - {0: None, 1: None}, - ), ( pd.DataFrame( { @@ -102,15 +104,15 @@ def test_db_data_prepare_labels( ], } ), - { - 0: [ + [ + [ { "acteur_id": 1, "acteurservice": "Service de réparation", "acteurservice_id": 10, } ], - 1: [ + [ { "acteur_id": 2, "acteurservice": "Service de réparation", @@ -122,7 +124,7 @@ def test_db_data_prepare_labels( "acteurservice_id": 20, }, ], - }, + ], ), ], ) @@ -132,6 +134,7 @@ def test_db_data_prepare_acteur_services( df_proposition_services_sous_categories, propose_acteur_services, expected_acteur_services, + df_acteurs_from_db, source_id_by_code, acteurtype_id_by_code, ): @@ -154,10 +157,55 @@ def test_db_data_prepare_acteur_services( df_pssc=df_proposition_services_sous_categories, df_labels=pd.DataFrame(columns=["acteur_id", "labelqualite_id"]), df_acteur_services=propose_acteur_services, + df_acteurs_from_db=df_acteurs_from_db, source_id_by_code=source_id_by_code, acteurtype_id_by_code=acteurtype_id_by_code, ) - result = df_result["all"]["df"].to_dict() - acteur_services = result["acteur_services"] - assert acteur_services == expected_acteur_services + assert "acteur_services" in df_result["df_acteur_to_create"].columns + assert ( + list(df_result["df_acteur_to_create"]["acteur_services"]) + == expected_acteur_services + ) + + def test_db_data_prepare_acteur_services_empty( + self, + df_proposition_services, + df_proposition_services_sous_categories, + df_acteurs_from_db, + source_id_by_code, + acteurtype_id_by_code, + ): + + with pytest.raises(ValueError) as erreur: + db_data_prepare( + df_acteur_to_delete=pd.DataFrame( + { + "identifiant_unique": [3], + "statut": ["ACTIF"], + "cree_le": [datetime(2024, 1, 1)], + } + ), + df_acteur=pd.DataFrame( + { + "identifiant_unique": [1, 2], + "source_code": ["source1", "source2"], + "acteur_type_code": ["commerce", "commerce"], + } + ), + df_ps=df_proposition_services, + df_pssc=df_proposition_services_sous_categories, + df_labels=pd.DataFrame(columns=["acteur_id", "labelqualite_id"]), + df_acteur_services=pd.DataFrame( + columns=["acteur_id", "acteurservice_id", "acteurservice"] + ), + df_acteurs_from_db=df_acteurs_from_db, + source_id_by_code=source_id_by_code, + acteurtype_id_by_code=acteurtype_id_by_code, + ) + assert str(erreur.value) == "df_acteur_services est vide" + + +class TestActeurToCreateToDeleteToUpdate: + # FIXME : tests à écrire + pass From c8fba02a02410308c35b91360773aff0a921487d Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Tue, 14 Jan 2025 09:46:53 +0100 Subject: [PATCH 09/26] Update tests --- .../business_logic/source_data_normalize.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/dags/sources/tasks/business_logic/source_data_normalize.py b/dags/sources/tasks/business_logic/source_data_normalize.py index c201cb8cc..8830fc8cb 100755 --- a/dags/sources/tasks/business_logic/source_data_normalize.py +++ b/dags/sources/tasks/business_logic/source_data_normalize.py @@ -191,18 +191,19 @@ def source_data_normalize( # Merge et suppression des lignes indésirables df = _remove_undesired_lines(df, dag_config) - # TODO: A voir ce qu'on doit faire de ces acteurs non digitaus mais sans + # TODO: A voir ce qu'on doit faire de ces acteurs non digitaux mais sans # localisation (proposition : les afficher en erreur directement ?) - df_acteur_sans_loc = df[ - (df["location"].isnull()) & (df["acteur_type_code"] != "acteur_digital") - ] - if not df_acteur_sans_loc.empty: - nb_acteurs = len(df) - logger.warning( - f"Nombre d'acteur sans localisation: {len(df_acteur_sans_loc)} / " - f"{nb_acteurs}" - ) - log.preview("Acteurs sans localisation", df_acteur_sans_loc) + if "location" in df.columns and "acteur_type_code" in df.columns: + df_acteur_sans_loc = df[ + (df["location"].isnull()) & (df["acteur_type_code"] != "acteur_digital") + ] + if not df_acteur_sans_loc.empty: + nb_acteurs = len(df) + logger.warning( + f"Nombre d'acteur sans localisation: {len(df_acteur_sans_loc)} / " + f"{nb_acteurs}" + ) + log.preview("Acteurs sans localisation", df_acteur_sans_loc) log.preview("df après normalisation", df) if df.empty: From 1c8c8d1558c25deb8b7f7b5f044de4cd95ecdda4 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Tue, 14 Jan 2025 11:16:31 +0100 Subject: [PATCH 10/26] keep dagrun managment and create suggestion aside it --- core/urls.py | 2 +- dags/annuaire_entreprise_checks.py | 2 +- .../shared/tasks/business_logic/write_data.py | 4 +- dags/sources/config/shared_constants.py | 6 + dags/sources/tasks/transform/transform_df.py | 1 + dags/utils/base_utils.py | 3 +- dags/utils/dag_eo_utils.py | 36 ++--- dags/utils/dag_ingest_validated_utils.py | 8 +- .../tasks/transform/test_transform_df.py | 31 ++++ data/urls.py | 8 +- data/views.py | 2 +- jinja2/qfdmo/create_actor_event.html | 16 +- jinja2/qfdmo/dags_validations.html | 51 ++++++ jinja2/qfdmo/partials/candidat_row.html | 10 +- jinja2/qfdmo/update_actor_event.html | 26 +-- qfdmo/forms.py | 19 ++- .../commands/reinitialize_dagrun.py | 12 +- qfdmo/migrations/0111_delete_bancache.py | 16 ++ ...he_remove_dagrunchange_dag_run_and_more.py | 26 --- qfdmo/models/__init__.py | 1 + qfdmo/models/data.py | 148 ++++++++++++++++++ qfdmo/urls.py | 7 + qfdmo/views/dags.py | 108 +++++++++++++ templates/data/dags_validations.html | 2 +- 24 files changed, 445 insertions(+), 100 deletions(-) create mode 100644 jinja2/qfdmo/dags_validations.html create mode 100644 qfdmo/migrations/0111_delete_bancache.py delete mode 100644 qfdmo/migrations/0111_delete_bancache_remove_dagrunchange_dag_run_and_more.py create mode 100644 qfdmo/models/data.py create mode 100644 qfdmo/views/dags.py diff --git a/core/urls.py b/core/urls.py index fd4de64b9..c0ffdf69b 100644 --- a/core/urls.py +++ b/core/urls.py @@ -60,7 +60,7 @@ class PaginatedSitemap(GenericSitemap): path("dsfr/", include(("dsfr_hacks.urls", "dsfr_hacks"), namespace="dsfr_hacks")), path("", include(("qfdmo.urls", "qfdmo"), namespace="qfdmo")), path("", include(("qfdmd.urls", "qfdmd"), namespace="qfdmd")), - path("", include(("data.urls", "data"), namespace="data")), + path("data/", include(("data.urls", "data"), namespace="data")), path("docs/", TemplateView.as_view(template_name="techdocs.html"), name="techdocs"), ] diff --git a/dags/annuaire_entreprise_checks.py b/dags/annuaire_entreprise_checks.py index f93ae2352..2a878c927 100755 --- a/dags/annuaire_entreprise_checks.py +++ b/dags/annuaire_entreprise_checks.py @@ -282,7 +282,7 @@ def db_data_prepare(**kwargs): serialized_data = {} for key, df in data.items(): df["event"] = "UPDATE_ACTOR" - df["suggestion"] = df[columns].apply( + df["row_updates"] = df[columns].apply( lambda row: json.dumps(row.to_dict(), default=str), axis=1 ) serialized_data[key] = {"df": df, "metadata": {"updated_rows": len(df)}} diff --git a/dags/shared/tasks/business_logic/write_data.py b/dags/shared/tasks/business_logic/write_data.py index 4ab49057f..722663af6 100644 --- a/dags/shared/tasks/business_logic/write_data.py +++ b/dags/shared/tasks/business_logic/write_data.py @@ -1,4 +1,4 @@ -from utils.dag_eo_utils import insert_suggestion_and_process_df +from utils.dag_eo_utils import insert_dagrun_and_process_df def write_data( @@ -20,4 +20,4 @@ def write_data( run_name = run_id.replace("__", " - ") df = data["df"] metadata.update(data.get("metadata", {})) - insert_suggestion_and_process_df(df, metadata, dag_name_suffixed, run_name) + insert_dagrun_and_process_df(df, metadata, dag_name_suffixed, run_name) diff --git a/dags/sources/config/shared_constants.py b/dags/sources/config/shared_constants.py index 33fb402da..18330f7a4 100755 --- a/dags/sources/config/shared_constants.py +++ b/dags/sources/config/shared_constants.py @@ -1,3 +1,9 @@ +# DEPRECATED DagRun statuts +DAGRUN_TOVALIDATE = "TO_VALIDATE" +DAGRUN_TOINSERT = "TO_INSERT" +DAGRUN_REJECTED = "REJECTED" +DAGRUN_FINISHED = "FINISHED" + # Suggestion statuts (pour cohorte et unitaire) SUGGESTION_AVALIDER = "AVALIDER" SUGGESTION_REJETER = "REJETER" diff --git a/dags/sources/tasks/transform/transform_df.py b/dags/sources/tasks/transform/transform_df.py index 507d144d5..df1516781 100644 --- a/dags/sources/tasks/transform/transform_df.py +++ b/dags/sources/tasks/transform/transform_df.py @@ -227,6 +227,7 @@ def compute_location(row: pd.Series, _): lng_column = row.keys()[1] row[lat_column] = parse_float(row[lat_column]) row[lng_column] = parse_float(row[lng_column]) + print(row[lat_column], row[lng_column]) row["location"] = transform_location(row[lng_column], row[lat_column]) return row[["location"]] diff --git a/dags/utils/base_utils.py b/dags/utils/base_utils.py index c841cdcb1..98548e25a 100755 --- a/dags/utils/base_utils.py +++ b/dags/utils/base_utils.py @@ -114,7 +114,8 @@ def extract_details(row, col="adresse_format_ban"): def transform_location(longitude, latitude): - if not longitude or not latitude: + if not longitude or not latitude or math.isnan(longitude) or math.isnan(latitude): + print("Longitude or latitude is missing.") return None return wkb.dumps(Point(longitude, latitude)).hex() diff --git a/dags/utils/dag_eo_utils.py b/dags/utils/dag_eo_utils.py index b5a311c78..1f98b6e09 100755 --- a/dags/utils/dag_eo_utils.py +++ b/dags/utils/dag_eo_utils.py @@ -8,48 +8,38 @@ logger = logging.getLogger(__name__) -def insert_suggestion_and_process_df(df_acteur_updates, metadata, dag_name, run_name): +def insert_dagrun_and_process_df(df_acteur_updates, metadata, dag_name, run_name): if df_acteur_updates.empty: return engine = PostgresConnectionManager().engine current_date = datetime.now() + with engine.connect() as conn: - # Insert a new suggestion + # Insert a new dagrun result = conn.execute( """ - INSERT INTO data_suggestioncohorte - ( - identifiant_action, - identifiant_execution, - type_action, - statut, - metadata, - cree_le, - modifie_le - ) - VALUES (%s, %s, %s, %s, %s, %s, %s) + INSERT INTO qfdmo_dagrun + (dag_id, run_id, status, meta_data, created_date, updated_date) + VALUES (%s, %s, %s, %s, %s, %s) RETURNING ID; """, ( dag_name, run_name, - constants.SUGGESTION_SOURCE, - constants.SUGGESTION_AVALIDER, + "TO_VALIDATE", json.dumps(metadata), current_date, current_date, ), ) - suggestion_cohorte_id = result.fetchone()[0] + dag_run_id = result.fetchone()[0] # Insert dag_run_change - df_acteur_updates["type_action"] = df_acteur_updates["event"] - df_acteur_updates["suggestion_cohorte_id"] = suggestion_cohorte_id - df_acteur_updates["statut"] = constants.SUGGESTION_AVALIDER - df_acteur_updates[ - ["suggestion", "suggestion_cohorte_id", "type_action", "statut"] - ].to_sql( - "data_suggestionunitaire", + df_acteur_updates["change_type"] = df_acteur_updates["event"] + df_acteur_updates["dag_run_id"] = dag_run_id + df_acteur_updates["status"] = constants.DAGRUN_TOVALIDATE + df_acteur_updates[["row_updates", "dag_run_id", "change_type", "status"]].to_sql( + "qfdmo_dagrunchange", engine, if_exists="append", index=False, diff --git a/dags/utils/dag_ingest_validated_utils.py b/dags/utils/dag_ingest_validated_utils.py index db24171ab..a4a3f648c 100755 --- a/dags/utils/dag_ingest_validated_utils.py +++ b/dags/utils/dag_ingest_validated_utils.py @@ -76,7 +76,7 @@ def handle_update_actor_event(df_actors, dag_run_id): ] current_time = datetime.now().astimezone().isoformat(timespec="microseconds") - df_actors = df_actors[df_actors["status"] == shared_constants.SUGGESTION_ATRAITER] + df_actors = df_actors[df_actors["status"] == shared_constants.DAGRUN_TOINSERT] df_actors = df_actors.apply(mapping_utils.replace_with_selected_candidat, axis=1) df_actors[["adresse", "code_postal", "ville"]] = df_actors.apply( lambda row: base_utils.extract_details(row, col="adresse_candidat"), axis=1 @@ -312,9 +312,7 @@ def handle_write_data_update_actor_event(connection, df_actors): def update_dag_run_status( - connection, dag_run_id, statut=shared_constants.SUGGESTION_SUCCES + connection, dag_run_id, statut=shared_constants.DAGRUN_FINISHED ): - query = f""" - UPDATE data_suggestioncohorte SET statut = '{statut}' WHERE id = {dag_run_id} - """ + query = f"UPDATE qfdmo_dagrun SET status = '{statut}' WHERE id = {dag_run_id}" connection.execute(query) diff --git a/dags_unit_tests/sources/tasks/transform/test_transform_df.py b/dags_unit_tests/sources/tasks/transform/test_transform_df.py index 351ac8d24..03e6ae344 100644 --- a/dags_unit_tests/sources/tasks/transform/test_transform_df.py +++ b/dags_unit_tests/sources/tasks/transform/test_transform_df.py @@ -8,6 +8,7 @@ clean_label_codes, clean_siret_and_siren, clean_telephone, + compute_location, get_latlng_from_geopoint, merge_and_clean_souscategorie_codes, merge_duplicates, @@ -451,3 +452,33 @@ def test_get_latlng_from_geopoint(self): result = get_latlng_from_geopoint(row, None) assert result["latitude"] == 48.8588443 assert result["longitude"] == 2.2943506 + + +PARIS_LOCATION = ( + "0101000000a835cd3b4ed1024076e09c11a56d4840" # pragma: allowlist secret +) + + +LONDON_LOCATION = ( + "0101000000ebe2361ac05bc0bfc5feb27bf2c04940" # pragma: allowlist secret +) + + +class TestComputeLocation: + + @pytest.mark.parametrize( + "latitude, longitude, expected_location", + [ + (48.8566, 2.3522, PARIS_LOCATION), + ("48.8566", "2.3522", PARIS_LOCATION), + (51.5074, -0.1278, LONDON_LOCATION), + (None, None, None), # Missing lat and long + ], + ) + def test_compute_location(self, latitude, longitude, expected_location): + + result = compute_location( + pd.Series({"latitude": latitude, "longitude": longitude}), None + ) + print(result["location"]) + assert result["location"] == expected_location diff --git a/data/urls.py b/data/urls.py index 7a81e6b91..3419263eb 100644 --- a/data/urls.py +++ b/data/urls.py @@ -1,11 +1,11 @@ from django.urls import path -from data.views import DagsValidation +from data.views import SuggestionManagment urlpatterns = [ path( - "dags/validations", - DagsValidation.as_view(), - name="dags_validations", + "suggestions", + SuggestionManagment.as_view(), + name="suggestions", ), ] diff --git a/data/views.py b/data/views.py index ec2dee9e3..259a073e7 100644 --- a/data/views.py +++ b/data/views.py @@ -21,7 +21,7 @@ def dispatch(self, request, *args, **kwargs): } -class DagsValidation(IsStaffMixin, FormView): +class SuggestionManagment(IsStaffMixin, FormView): form_class = SuggestionCohorteForm template_name = "data/dags_validations.html" success_url = "/dags/validations" diff --git a/jinja2/qfdmo/create_actor_event.html b/jinja2/qfdmo/create_actor_event.html index c70079843..bd5103df2 100644 --- a/jinja2/qfdmo/create_actor_event.html +++ b/jinja2/qfdmo/create_actor_event.html @@ -1,4 +1,4 @@ -{% if suggestion_unitaires|length > 0 and suggestion_unitaires[0].change_type == 'CREATE' %} +{% if dagrun_lines|length > 0 and dagrun_lines[0].change_type == 'CREATE' %} @@ -6,17 +6,17 @@ meta_data Acteur Proposition de service - suggestion + row_updates - {% for suggestion_unitaire in suggestion_unitaires if suggestion_unitaire.change_type == 'CREATE' %} + {% for dagrun_line in dagrun_lines if dagrun_line.change_type == 'CREATE' %} - {{ suggestion_unitaire.get_change_type_display() }} - {{ suggestion_unitaire.meta_data if suggestion_unitaire.meta_data else "-" }} + {{ dagrun_line.get_change_type_display() }} + {{ dagrun_line.meta_data if dagrun_line.meta_data else "-" }} - {% for key, value in suggestion_unitaire.display_acteur_details().items() %} + {% for key, value in dagrun_line.display_acteur_details().items() %}

{{ key }} : {{ value }}

{% endfor %} @@ -26,7 +26,7 @@ Action Sous-Catégories - {% for service in suggestion_unitaire.display_proposition_service() %} + {% for service in dagrun_line.display_proposition_service() %} {{ service.action }} @@ -43,7 +43,7 @@
Données brutes -
{{ suggestion_unitaire.suggestion }}
+
{{ dagrun_line.row_updates }}
diff --git a/jinja2/qfdmo/dags_validations.html b/jinja2/qfdmo/dags_validations.html new file mode 100644 index 000000000..9a5c7bf6d --- /dev/null +++ b/jinja2/qfdmo/dags_validations.html @@ -0,0 +1,51 @@ +{% extends 'layout/base.html' %} + +{% block content %} + +
+

Interface déprécier

+ + La nouvelle interface est disponible à l'adresse suivante : /data/suggestions +

Validations des «DAGs»

+ +

+ Cette page permet de valider les données des «DAGs». +

+ +
+ {{ csrf_input }} + {{ form }} +
+ +
+ + {% if dagrun_instance %} +

Instance du DAG : {{ dagrun_instance }}

+

Meta données

+ {% for (meta_title, meta_data) in dagrun_instance.display_meta_data().items() %} +

{{ meta_title }} : {{meta_data}}

+ {% endfor %} +
+ meta_data brutes +
{{ dagrun_instance.meta_data }}
+
+

Exemples

+ +
+ + + {% include 'qfdmo/update_actor_event.html' %} + {% include 'qfdmo/create_actor_event.html' %} + +
Résumé du tableau (accessibilité)
+
+
+ + +
+ {% endif %} +
+ +
+ +{% endblock %} diff --git a/jinja2/qfdmo/partials/candidat_row.html b/jinja2/qfdmo/partials/candidat_row.html index 8136f849f..ae1950030 100644 --- a/jinja2/qfdmo/partials/candidat_row.html +++ b/jinja2/qfdmo/partials/candidat_row.html @@ -24,17 +24,17 @@ Meilleure proposition Map Link - {% for candidat in suggestion_unitaire.suggestion.ae_result %} + {% for candidat in dagrun_line.row_updates.ae_result %} {% if candidat.etat_admin_candidat != 'F' %} - {% with comparison_result=(suggestion_unitaire.suggestion.best_candidat_index and loop.index == suggestion_unitaire.suggestion.best_candidat_index|int) %} + {% with comparison_result=(dagrun_line.row_updates.best_candidat_index and loop.index == dagrun_line.row_updates.best_candidat_index|int) %} {# Ces valeurs sont définies dans dags/utils/shared_constants.py, à garder synchronisées entre Django et Airflow #} - +
{{ csrf_input }} - - + + diff --git a/jinja2/qfdmo/update_actor_event.html b/jinja2/qfdmo/update_actor_event.html index 25eeb6323..334e84217 100644 --- a/jinja2/qfdmo/update_actor_event.html +++ b/jinja2/qfdmo/update_actor_event.html @@ -1,4 +1,4 @@ -{% if suggestion_unitaires|length > 0 and suggestion_unitaires[0].change_type == 'UPDATE_ACTOR' %} +{% if dagrun_lines|length > 0 and dagrun_lines[0].change_type == 'UPDATE_ACTOR' %} @@ -6,23 +6,23 @@ meta_data Identifiant Unique Candidats - suggestion + row_updates - {% for suggestion_unitaire in suggestion_unitaires if suggestion_unitaire.change_type == 'UPDATE_ACTOR' %} + {% for dagrun_line in dagrun_lines if dagrun_line.change_type == 'UPDATE_ACTOR' %} - {{ suggestion_unitaire.get_change_type_display() }} - {{ suggestion_unitaire.meta_data if suggestion_unitaire.meta_data else "-" }} + {{ dagrun_line.get_change_type_display() }} + {{ dagrun_line.meta_data if dagrun_line.meta_data else "-" }} - {% with identifiant_unique=suggestion_unitaire.display_acteur_details().identifiant_unique %} + {% with identifiant_unique=dagrun_line.display_acteur_details().identifiant_unique %} {{ identifiant_unique }} {% endwith %} - {% with candidat=candidat, index=loop.index, suggestion_cohorte=request.GET.suggestion_cohorte, - identifiant_unique=suggestion_unitaire.display_acteur_details().identifiant_unique %} + {% with candidat=candidat, index=loop.index, dagrun=request.GET.dagrun, + identifiant_unique=dagrun_line.display_acteur_details().identifiant_unique %} {% include 'qfdmo/partials/candidat_row.html' %} {% endwith %} @@ -30,23 +30,23 @@
Données brutes -
{{ suggestion_unitaire.suggestion }}
+
{{ dagrun_line.row_updates }}
{% endfor %} -{% if suggestion_unitaires.has_other_pages %} +{% if dagrun_lines.has_other_pages %} - + diff --git a/qfdmo/forms.py b/qfdmo/forms.py index 56e38568d..b637c8a22 100644 --- a/qfdmo/forms.py +++ b/qfdmo/forms.py @@ -9,7 +9,7 @@ from qfdmo.fields import GroupeActionChoiceField from qfdmo.geo_api import epcis_from, formatted_epcis_as_list_of_tuple -from qfdmo.models import SousCategorieObjet +from qfdmo.models import DagRun, DagRunStatus, SousCategorieObjet from qfdmo.models.action import ( Action, GroupeAction, @@ -341,6 +341,23 @@ def load_choices( ) +class DagsForm(forms.Form): + """ + DEPRECATED, should use the data django app + """ + + dagrun = forms.ModelChoiceField( + label="Séléctionner l'execution d'un DAG", + widget=forms.Select( + attrs={ + "class": "fr-select", + } + ), + queryset=DagRun.objects.filter(status=DagRunStatus.TO_VALIDATE.value), + required=True, + ) + + class ConfiguratorForm(DsfrBaseForm): # TODO: rename this field in all codebase -> actions_displayed action_list = GroupeActionChoiceField( diff --git a/qfdmo/management/commands/reinitialize_dagrun.py b/qfdmo/management/commands/reinitialize_dagrun.py index 1dcfcac0b..eea492018 100644 --- a/qfdmo/management/commands/reinitialize_dagrun.py +++ b/qfdmo/management/commands/reinitialize_dagrun.py @@ -7,13 +7,9 @@ class Command(BaseCommand): def handle(self, *args, **options): with connection.cursor() as cursor: - # Truncate the table data_suggestioncohorte and data_suggestionunitaire - cursor.execute("TRUNCATE TABLE data_suggestioncohorte CASCADE") + # Truncate the table qfdmo_dagrun and qfdmo_dagrunchange + cursor.execute("TRUNCATE TABLE qfdmo_dagrun CASCADE") # Set auto-increment to 1 - cursor.execute( - "ALTER SEQUENCE data_suggestioncohorte_id_seq RESTART WITH 1" - ) - cursor.execute( - "ALTER SEQUENCE data_suggestionunitaire_id_seq RESTART WITH 1" - ) + cursor.execute("ALTER SEQUENCE qfdmo_dagrun_id_seq RESTART WITH 1") + cursor.execute("ALTER SEQUENCE qfdmo_dagrunchange_id_seq RESTART WITH 1") diff --git a/qfdmo/migrations/0111_delete_bancache.py b/qfdmo/migrations/0111_delete_bancache.py new file mode 100644 index 000000000..4ef826cdf --- /dev/null +++ b/qfdmo/migrations/0111_delete_bancache.py @@ -0,0 +1,16 @@ +# Generated by Django 5.1.4 on 2025-01-14 09:56 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("qfdmo", "0110_alter_source_code"), + ] + + operations = [ + migrations.DeleteModel( + name="BANCache", + ), + ] diff --git a/qfdmo/migrations/0111_delete_bancache_remove_dagrunchange_dag_run_and_more.py b/qfdmo/migrations/0111_delete_bancache_remove_dagrunchange_dag_run_and_more.py deleted file mode 100644 index ce1aa0c50..000000000 --- a/qfdmo/migrations/0111_delete_bancache_remove_dagrunchange_dag_run_and_more.py +++ /dev/null @@ -1,26 +0,0 @@ -# Generated by Django 5.1.4 on 2025-01-13 10:17 - -from django.db import migrations - - -class Migration(migrations.Migration): - - dependencies = [ - ("qfdmo", "0110_alter_source_code"), - ] - - operations = [ - migrations.DeleteModel( - name="BANCache", - ), - migrations.RemoveField( - model_name="dagrunchange", - name="dag_run", - ), - migrations.DeleteModel( - name="DagRun", - ), - migrations.DeleteModel( - name="DagRunChange", - ), - ] diff --git a/qfdmo/models/__init__.py b/qfdmo/models/__init__.py index 80f4d9eba..23b9c02d5 100644 --- a/qfdmo/models/__init__.py +++ b/qfdmo/models/__init__.py @@ -1,4 +1,5 @@ from .acteur import * # noqa from .action import * # noqa from .categorie_objet import * # noqa +from .data import * # noqa from .utils import * # noqa diff --git a/qfdmo/models/data.py b/qfdmo/models/data.py new file mode 100644 index 000000000..58427d5af --- /dev/null +++ b/qfdmo/models/data.py @@ -0,0 +1,148 @@ +""" +DEPRECATED, should use the data django app +""" + +from django.contrib.gis.db import models + +from dags.sources.config.shared_constants import ( + DAGRUN_FINISHED, + DAGRUN_REJECTED, + DAGRUN_TOINSERT, + DAGRUN_TOVALIDATE, +) +from qfdmo.models.acteur import ActeurType, Source + + +class DagRunStatus(models.TextChoices): + TO_VALIDATE = DAGRUN_TOVALIDATE + TO_INSERT = DAGRUN_TOINSERT + REJECTED = DAGRUN_REJECTED + FINISHED = DAGRUN_FINISHED + + +class DagRun(models.Model): + id = models.AutoField(primary_key=True) + dag_id = models.CharField(max_length=250) + run_id = models.CharField(max_length=250) + created_date = models.DateTimeField(auto_now_add=True) + updated_date = models.DateTimeField(auto_now=True) + status = models.CharField( + max_length=50, + choices=DagRunStatus.choices, + default=DagRunStatus.TO_VALIDATE, + ) + # {to_create : 134, to_update : 0, to_delete : 0, to_ignore : 0, errors : 0} + meta_data = models.JSONField(null=True, blank=True) + + def __str__(self) -> str: + return f"{self.dag_id} - {self.run_id}" + + def display_meta_data(self) -> dict: + displayed_metadata = {} + displayed_metadata["Nombre d'acteur à créer"] = self.meta_data.get( + "added_rows", 0 + ) + displayed_metadata["Nombre de duplicats"] = self.meta_data.get( + "number_of_duplicates", 0 + ) + displayed_metadata["Nombre d'acteur MAJ"] = self.meta_data.get( + "updated_rows", 0 + ) + return displayed_metadata + + +class DagRunChangeType(models.Choices): + CREATE = "CREATE" + UPDATE = "UPDATE" + DELETE = "DELETE" + + +class DagRunChange(models.Model): + id = models.AutoField(primary_key=True) + dag_run = models.ForeignKey( + DagRun, on_delete=models.CASCADE, related_name="dagrunchanges" + ) + change_type = models.CharField(max_length=50, choices=DagRunChangeType.choices) + meta_data = models.JSONField(null=True, blank=True) + # metadata : JSON of any error or information about the line to change + row_updates = models.JSONField(null=True, blank=True) + status = models.CharField( + max_length=50, + choices=DagRunStatus.choices, + default=DagRunStatus.TO_VALIDATE, + ) + + def display_acteur_details(self) -> dict: + displayed_details = {} + for field, field_value in { + "nom": "Nom", + "nom_commercial": "Nom commercial", + "siret": "SIRET", + "siren": "SIREN", + "url": "Site web", + "email": "Email", + "telephone": "Téléphone", + "adresse": "Adresse", + "adresse_complement": "Complement d'adresse", + "code_postal": "Code postal", + "ville": "Ville", + "commentaires": "Commentaires", + "horaires_description": "Horaires", + "latitude": "latitude", + "longitude": "longitude", + "identifiant_unique": "identifiant_unique", + "identifiant_externe": "identifiant_externe", + }.items(): + if value := self.row_updates.get(field): + displayed_details[field_value] = value + if value := self.row_updates.get("acteur_type_id"): + displayed_details["Type d'acteur"] = ActeurType.objects.get( + pk=value + ).libelle + if value := self.row_updates.get("source_id"): + displayed_details["Source"] = Source.objects.get(pk=value).libelle + if value := self.row_updates.get("labels"): + displayed_details["Labels"] = ", ".join( + [str(v["labelqualite_id"]) for v in value] + ) + if value := self.row_updates.get("acteur_services"): + displayed_details["Acteur Services"] = ", ".join( + [str(v["acteurservice_id"]) for v in value] + ) + + return displayed_details + + def display_proposition_service(self): + return self.row_updates.get("proposition_services", []) + + def update_row_update_field(self, field_name, value): + if self.row_updates is None: + self.row_updates = {} + + if field_name in self.row_updates and self.row_updates[field_name] == value: + del self.row_updates[field_name] + else: + self.row_updates[field_name] = value + + self.save() + + def update_row_update_candidate(self, status, index): + if self.row_updates is None: + self.row_updates = {} + + if ( + self.status == status + and "best_candidat_index" in self.row_updates + and self.row_updates["best_candidat_index"] == index + ): + self.status = DagRunStatus.TO_VALIDATE.value + del self.row_updates["best_candidat_index"] + + else: + self.status = status + self.row_updates["best_candidat_index"] = index + + self.save() + + def get_candidat(self, index): + return self.row_updates["ae_result"][int(index) - 1] diff --git a/qfdmo/urls.py b/qfdmo/urls.py index 0c99afa07..2fe90c501 100644 --- a/qfdmo/urls.py +++ b/qfdmo/urls.py @@ -16,6 +16,7 @@ ) from qfdmo.views.auth import LVAOLoginView from qfdmo.views.configurator import AdvancedConfiguratorView, ConfiguratorView +from qfdmo.views.dags import DagsValidation urlpatterns = [ path("", direct_access, name="direct_access"), @@ -96,4 +97,10 @@ AdvancedConfiguratorView.as_view(), name="advanced_iframe_configurator", ), + # DEPRECATED, should use the data django app + path( + "dags/validations", + DagsValidation.as_view(), + name="dags_validations", + ), ] diff --git a/qfdmo/views/dags.py b/qfdmo/views/dags.py new file mode 100644 index 000000000..4db1618a5 --- /dev/null +++ b/qfdmo/views/dags.py @@ -0,0 +1,108 @@ +""" +DEPRECATED, should use the data django app +""" + +import logging + +from django.contrib.auth.mixins import LoginRequiredMixin +from django.core.paginator import Paginator +from django.shortcuts import render +from django.views.generic.edit import FormView + +from qfdmo.forms import DagsForm +from qfdmo.models.data import DagRun, DagRunStatus + + +class IsStaffMixin(LoginRequiredMixin): + def dispatch(self, request, *args, **kwargs): + if not request.user.is_staff: + return self.handle_no_permission() + return super().dispatch(request, *args, **kwargs) + + +class DagsValidation(IsStaffMixin, FormView): + form_class = DagsForm + template_name = "qfdmo/dags_validations.html" + success_url = "/dags/validations" + + def get_initial(self): + initial = super().get_initial() + initial["dagrun"] = self.request.GET.get("dagrun") + return initial + + def post(self, request, *args, **kwargs): + dag_valid = request.POST.get("dag_valid") + if dag_valid in ["1", "0"]: + return self.form_valid(self.get_form()) + else: + dagrun_obj = DagRun.objects.get(pk=request.POST.get("dagrun")) + id = request.POST.get("id") + dagrun_line = dagrun_obj.dagrunchanges.filter(id=id).first() + identifiant_unique = request.POST.get("identifiant_unique") + index = request.POST.get("index") + action = request.POST.get("action") + + if action == "validate": + dagrun_line.update_row_update_candidate( + DagRunStatus.TO_INSERT.value, index + ) + elif action == "reject": + dagrun_line.update_row_update_candidate( + DagRunStatus.REJECTED.value, index + ) + + updated_candidat = dagrun_line.get_candidat(index) + + return render( + request, + "qfdmo/partials/candidat_row.html", + { + "identifiant_unique": identifiant_unique, + "candidat": updated_candidat, + "index": index, + "request": request, + "dagrun": request.POST.get("dagrun"), + "dagrun_line": dagrun_line, + }, + ) + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + if self.request.GET.get("dagrun"): + dagrun = DagRun.objects.get(pk=self.request.GET.get("dagrun")) + context["dagrun_instance"] = dagrun + dagrun_lines = dagrun.dagrunchanges.all().order_by("?")[:100] + context["dagrun_lines"] = dagrun_lines + + if dagrun_lines and dagrun_lines[0].change_type == "UPDATE_ACTOR": + # Pagination + dagrun_lines = dagrun.dagrunchanges.all().order_by("id") + paginator = Paginator(dagrun_lines, 100) + page_number = self.request.GET.get("page") + page_obj = paginator.get_page(page_number) + context["dagrun_lines"] = page_obj + + return context + + def form_valid(self, form): + if not form.is_valid(): + raise ValueError("Form is not valid") + dagrun_id = form.cleaned_data["dagrun"].id + dagrun_obj = DagRun.objects.get(pk=dagrun_id) + new_status = ( + DagRunStatus.TO_INSERT.value + if self.request.POST.get("dag_valid") == "1" + else DagRunStatus.REJECTED.value + ) + + # FIXME: I am not sure we need the filter here + dagrun_obj.dagrunchanges.filter(status=DagRunStatus.TO_VALIDATE.value).update( + status=new_status + ) + + logging.info(f"{dagrun_id} - {self.request.user}") + + dagrun_obj.status = new_status + dagrun_obj.save() + + return super().form_valid(form) diff --git a/templates/data/dags_validations.html b/templates/data/dags_validations.html index bad1665a9..7a73d0a86 100644 --- a/templates/data/dags_validations.html +++ b/templates/data/dags_validations.html @@ -3,7 +3,7 @@ {% block content %}
-

Validations des «DAGs»

+

Gestion des suggestions de modification

Cette page permet de valider les données des «DAGs». From 46d344f1fa01a1f3647086b397296abb0535550e Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Tue, 14 Jan 2025 14:30:30 +0100 Subject: [PATCH 11/26] isolation du dags d'ingestion des suggestions --- dags/ingest_validated_dataset_to_db.py | 120 ++++++------ .../db_normalize_suggestion_task.py | 15 ++ .../db_read_suggestiontoprocess_task.py | 13 ++ .../db_write_validsuggestions_task.py | 23 +++ .../launch_compute_carte_acteur_task.py | 10 + .../business_logic/db_normalize_suggestion.py | 113 ++++++++++++ .../db_read_suggestiontoprocess.py | 18 ++ .../db_write_validsuggestions.py | 172 ++++++++++++++++++ dags/suggestions/dags/apply_suggestions.py | 42 +++++ data/views.py | 2 +- 10 files changed, 459 insertions(+), 69 deletions(-) create mode 100644 dags/suggestions/airflow_logic/db_normalize_suggestion_task.py create mode 100644 dags/suggestions/airflow_logic/db_read_suggestiontoprocess_task.py create mode 100644 dags/suggestions/airflow_logic/db_write_validsuggestions_task.py create mode 100644 dags/suggestions/airflow_logic/launch_compute_carte_acteur_task.py create mode 100644 dags/suggestions/business_logic/db_normalize_suggestion.py create mode 100644 dags/suggestions/business_logic/db_read_suggestiontoprocess.py create mode 100644 dags/suggestions/business_logic/db_write_validsuggestions.py create mode 100755 dags/suggestions/dags/apply_suggestions.py diff --git a/dags/ingest_validated_dataset_to_db.py b/dags/ingest_validated_dataset_to_db.py index fb56f20ea..1ac93cca2 100755 --- a/dags/ingest_validated_dataset_to_db.py +++ b/dags/ingest_validated_dataset_to_db.py @@ -1,10 +1,12 @@ -# FIXME: intégrer ce dag dans l'architecture cible +""" +DEPRECATED : utiliser le dag apply_suggestions +""" from datetime import timedelta import pandas as pd from airflow.models import DAG -from airflow.operators.python import PythonOperator, ShortCircuitOperator +from airflow.operators.python import BranchPythonOperator, PythonOperator from airflow.operators.trigger_dagrun import TriggerDagRunOperator from airflow.providers.postgres.hooks.postgres import PostgresHook from airflow.utils.dates import days_ago @@ -21,104 +23,75 @@ } dag = DAG( - dag_id="validate_and_process_suggestions", - dag_display_name="Traitement des cohortes de données validées", + dag_id="validate_and_process_dagruns", + dag_display_name="DEPRECATED : Traitement des cohortes de données validées", default_args=default_args, - description="traiter les suggestions à traiter", + description=""" + DEPRECATED : Check for VALIDATE in qfdmo_dagrun and process qfdmo_dagrunchange + util uniquement pour les cohortes de siretisations + """, schedule="*/5 * * * *", catchup=False, max_active_runs=1, ) -def _get_first_suggetsioncohorte_to_insert(): +def _get_first_dagrun_to_insert(): hook = PostgresHook(postgres_conn_id="qfdmo_django_db") + # get first row from table qfdmo_dagrun with status TO_INSERT row = hook.get_first( - f""" - SELECT * FROM data_suggestioncohorte - WHERE statut = '{constants.SUGGESTION_ATRAITER}' - LIMIT 1 - """ + f"SELECT * FROM qfdmo_dagrun WHERE status = '{constants.DAGRUN_TOINSERT}'" + " LIMIT 1" ) return row -def check_suggestion_to_process(**kwargs): - row = _get_first_suggetsioncohorte_to_insert() - return bool(row) +def check_for_validation(**kwargs): + # get first row from table qfdmo_dagrun with status TO_INSERT + row = _get_first_dagrun_to_insert() + + # Skip if row is None + if row is None: + return "skip_processing" + return "fetch_and_parse_data" def fetch_and_parse_data(**context): - row = _get_first_suggetsioncohorte_to_insert() - suggestion_cohorte_id = row[0] + row = _get_first_dagrun_to_insert() + dag_run_id = row[0] engine = PostgresConnectionManager().engine df_sql = pd.read_sql_query( - f""" - SELECT * FROM data_suggestionunitaire - WHERE suggestion_cohorte_id = '{suggestion_cohorte_id}' - """, + f"SELECT * FROM qfdmo_dagrunchange WHERE dag_run_id = '{dag_run_id}'", engine, ) - df_acteur_to_create = df_sql[ - df_sql["type_action"] == constants.SUGGESTION_SOURCE_AJOUT - ] - df_acteur_to_update = df_sql[ - df_sql["type_action"] == constants.SUGGESTION_SOURCE_AJOUT - ] - df_acteur_to_delete = df_sql[ - df_sql["type_action"] == constants.SUGGESTION_SOURCE_SUPRESSION - ] - df_acteur_to_enrich = df_sql[ - df_sql["type_action"] == constants.SUGGESTION_ENRICHISSEMENT - ] - - df_update_actor = df_sql[df_sql["type_action"] == "UPDATE_ACTOR"] - - if not df_acteur_to_create.empty: - normalized_dfs = df_acteur_to_create["suggestion"].apply(pd.json_normalize) - df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) - return dag_ingest_validated_utils.handle_create_event( - df_acteur, suggestion_cohorte_id, engine - ) - if not df_acteur_to_update.empty: - normalized_dfs = df_acteur_to_update["suggestion"].apply(pd.json_normalize) - df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) - return dag_ingest_validated_utils.handle_create_event( - df_acteur, suggestion_cohorte_id, engine - ) - if not df_acteur_to_delete.empty: - normalized_dfs = df_acteur_to_delete["suggestion"].apply(pd.json_normalize) - df_actors_update_actor = pd.concat(normalized_dfs.tolist(), ignore_index=True) - status_repeated = ( - df_acteur_to_delete["statut"] - .repeat(df_acteur_to_delete["suggestion"].apply(len)) - .reset_index(drop=True) - ) - df_actors_update_actor["status"] = status_repeated + df_create = df_sql[df_sql["change_type"] == "CREATE"] + df_update_actor = df_sql[df_sql["change_type"] == "UPDATE_ACTOR"] - return dag_ingest_validated_utils.handle_update_actor_event( - df_actors_update_actor, suggestion_cohorte_id + if not df_create.empty: + normalized_dfs = df_create["row_updates"].apply(pd.json_normalize) + df_actors_create = pd.concat(normalized_dfs.tolist(), ignore_index=True) + return dag_ingest_validated_utils.handle_create_event( + df_actors_create, dag_run_id, engine ) + if not df_update_actor.empty: - if not df_acteur_to_enrich.empty: - - normalized_dfs = df_update_actor["suggestion"].apply(pd.json_normalize) + normalized_dfs = df_update_actor["row_updates"].apply(pd.json_normalize) df_actors_update_actor = pd.concat(normalized_dfs.tolist(), ignore_index=True) status_repeated = ( df_update_actor["status"] - .repeat(df_update_actor["suggestion"].apply(len)) + .repeat(df_update_actor["row_updates"].apply(len)) .reset_index(drop=True) ) df_actors_update_actor["status"] = status_repeated return dag_ingest_validated_utils.handle_update_actor_event( - df_actors_update_actor, suggestion_cohorte_id + df_actors_update_actor, dag_run_id ) return { - "dag_run_id": suggestion_cohorte_id, + "dag_run_id": dag_run_id, } @@ -159,9 +132,19 @@ def write_data_to_postgres(**kwargs): ) -check_suggestion_to_process_task = ShortCircuitOperator( - task_id="check_suggestion_to_process", - python_callable=check_suggestion_to_process, +def skip_processing(**kwargs): + print("No records to validate. DAG run completes successfully.") + + +skip_processing_task = PythonOperator( + task_id="skip_processing", + python_callable=skip_processing, + dag=dag, +) + +branch_task = BranchPythonOperator( + task_id="branch_processing", + python_callable=check_for_validation, dag=dag, ) @@ -177,8 +160,9 @@ def write_data_to_postgres(**kwargs): dag=dag, ) +branch_task >> skip_processing_task ( - check_suggestion_to_process_task + branch_task >> fetch_parse_task >> write_to_postgres_task >> trigger_create_final_actors_dag diff --git a/dags/suggestions/airflow_logic/db_normalize_suggestion_task.py b/dags/suggestions/airflow_logic/db_normalize_suggestion_task.py new file mode 100644 index 000000000..908776b16 --- /dev/null +++ b/dags/suggestions/airflow_logic/db_normalize_suggestion_task.py @@ -0,0 +1,15 @@ +from airflow.models import DAG +from airflow.operators.python import PythonOperator +from suggestions.business_logic.db_normalize_suggestion import db_normalize_suggestion + + +def db_normalize_suggestion_task(dag: DAG): + return PythonOperator( + task_id="db_normalize_suggestion", + python_callable=db_normalize_suggestion_wrapper, + dag=dag, + ) + + +def db_normalize_suggestion_wrapper(**kwargs): + return db_normalize_suggestion() diff --git a/dags/suggestions/airflow_logic/db_read_suggestiontoprocess_task.py b/dags/suggestions/airflow_logic/db_read_suggestiontoprocess_task.py new file mode 100644 index 000000000..3dcd36249 --- /dev/null +++ b/dags/suggestions/airflow_logic/db_read_suggestiontoprocess_task.py @@ -0,0 +1,13 @@ +from airflow.models import DAG +from airflow.operators.python import ShortCircuitOperator +from suggestions.business_logic.db_read_suggestiontoprocess import ( + db_read_suggestiontoprocess, +) + + +def db_read_suggestiontoprocess_task(dag: DAG): + return ShortCircuitOperator( + task_id="check_suggestion_to_process", + python_callable=db_read_suggestiontoprocess, + dag=dag, + ) diff --git a/dags/suggestions/airflow_logic/db_write_validsuggestions_task.py b/dags/suggestions/airflow_logic/db_write_validsuggestions_task.py new file mode 100644 index 000000000..80f097955 --- /dev/null +++ b/dags/suggestions/airflow_logic/db_write_validsuggestions_task.py @@ -0,0 +1,23 @@ +from airflow.models import DAG +from airflow.operators.python import PythonOperator +from suggestions.business_logic.db_write_validsuggestions import ( + db_write_validsuggestions, +) +from utils import logging_utils as log + + +def db_write_validsuggestions_task(dag: DAG) -> PythonOperator: + return PythonOperator( + task_id="db_write_validsuggestions", + python_callable=db_write_validsuggestions_wrapper, + dag=dag, + ) + + +def db_write_validsuggestions_wrapper(**kwargs): + data_from_db = kwargs["ti"].xcom_pull(task_ids="db_normalize_suggestion") + + log.preview("data_from_db acteur", data_from_db["actors"]) + log.preview("data_from_db change_type", data_from_db["change_type"]) + + return db_write_validsuggestions(data_from_db=data_from_db) diff --git a/dags/suggestions/airflow_logic/launch_compute_carte_acteur_task.py b/dags/suggestions/airflow_logic/launch_compute_carte_acteur_task.py new file mode 100644 index 000000000..963593762 --- /dev/null +++ b/dags/suggestions/airflow_logic/launch_compute_carte_acteur_task.py @@ -0,0 +1,10 @@ +from airflow.models import DAG +from airflow.operators.trigger_dagrun import TriggerDagRunOperator + + +def launch_compute_carte_acteur_task(dag: DAG) -> TriggerDagRunOperator: + return TriggerDagRunOperator( + task_id="launch_compute_carte_acteur", + trigger_dag_id="compute_carte_acteur", + dag=dag, + ) diff --git a/dags/suggestions/business_logic/db_normalize_suggestion.py b/dags/suggestions/business_logic/db_normalize_suggestion.py new file mode 100644 index 000000000..7b33e281c --- /dev/null +++ b/dags/suggestions/business_logic/db_normalize_suggestion.py @@ -0,0 +1,113 @@ +import pandas as pd +from shared.tasks.database_logic.db_manager import PostgresConnectionManager +from sources.config import shared_constants as constants +from suggestions.business_logic.db_read_suggestiontoprocess import ( + get_first_suggetsioncohorte_to_insert, +) +from utils import logging_utils as log + + +def db_normalize_suggestion(): + row = get_first_suggetsioncohorte_to_insert() + suggestion_cohorte_id = row[0] + + engine = PostgresConnectionManager().engine + + df_sql = pd.read_sql_query( + f""" + SELECT * FROM data_suggestionunitaire + WHERE suggestion_cohorte_id = '{suggestion_cohorte_id}' + """, + engine, + ) + + df_acteur_to_create = df_sql[ + df_sql["type_action"] == constants.SUGGESTION_SOURCE_AJOUT + ] + df_acteur_to_update = df_sql[ + df_sql["type_action"] == constants.SUGGESTION_SOURCE_AJOUT + ] + df_acteur_to_delete = df_sql[ + df_sql["type_action"] == constants.SUGGESTION_SOURCE_SUPRESSION + ] + if not df_acteur_to_create.empty: + normalized_dfs = df_acteur_to_create["suggestion"].apply(pd.json_normalize) + df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) + return normalize_acteur_update_for_db(df_acteur, suggestion_cohorte_id, engine) + if not df_acteur_to_update.empty: + normalized_dfs = df_acteur_to_update["suggestion"].apply(pd.json_normalize) + df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) + return normalize_acteur_update_for_db(df_acteur, suggestion_cohorte_id, engine) + if not df_acteur_to_delete.empty: + normalized_dfs = df_acteur_to_delete["suggestion"].apply(pd.json_normalize) + df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) + log.preview("df_acteur_to_delete", df_acteur) + return { + "actors": df_acteur, + "dag_run_id": suggestion_cohorte_id, + "change_type": constants.SUGGESTION_SOURCE_SUPRESSION, + } + + raise ValueError("No suggestion found") + + +def normalize_acteur_update_for_db(df_actors, dag_run_id, engine): + df_labels = process_many2many_df(df_actors, "labels") + df_acteur_services = process_many2many_df( + df_actors, "acteur_services", df_columns=["acteur_id", "acteurservice_id"] + ) + + max_id_pds = pd.read_sql_query( + "SELECT max(id) FROM qfdmo_propositionservice", engine + )["max"][0] + normalized_pds_dfs = df_actors["proposition_services"].apply(pd.json_normalize) + df_pds = pd.concat(normalized_pds_dfs.tolist(), ignore_index=True) + ids_range = range(max_id_pds + 1, max_id_pds + 1 + len(df_pds)) + + df_pds["id"] = ids_range + df_pds["pds_sous_categories"] = df_pds.apply( + lambda row: [ + {**d, "propositionservice_id": row["id"]} + for d in row["pds_sous_categories"] + ], + axis=1, + ) + + normalized_pdssc_dfs = df_pds["pds_sous_categories"].apply(pd.json_normalize) + df_pdssc = pd.concat(normalized_pdssc_dfs.tolist(), ignore_index=True) + + return { + "actors": df_actors, + "pds": df_pds[["id", "action_id", "acteur_id"]], + "pds_sous_categories": df_pdssc[ + ["propositionservice_id", "souscategorieobjet_id"] + ], + "dag_run_id": dag_run_id, + "labels": df_labels[["acteur_id", "labelqualite_id"]], + "acteur_services": df_acteur_services[["acteur_id", "acteurservice_id"]], + "change_type": constants.SUGGESTION_SOURCE, + } + + +def process_many2many_df(df, column_name, df_columns=["acteur_id", "labelqualite_id"]): + try: + # Attempt to process the 'labels' column if it exists and is not empty + normalized_df = df[column_name].dropna().apply(pd.json_normalize) + if normalized_df.empty: + return pd.DataFrame( + columns=df_columns + ) # Return empty DataFrame if no data to process + else: + return pd.concat(normalized_df.tolist(), ignore_index=True) + except KeyError: + # Handle the case where the specified column does not exist + return pd.DataFrame(columns=df_columns) + + +def normalize_acteur_delete_for_db(df_actors, dag_run_id): + + return { + "actors": df_actors, + "dag_run_id": dag_run_id, + "change_type": constants.SUGGESTION_SOURCE_SUPRESSION, + } diff --git a/dags/suggestions/business_logic/db_read_suggestiontoprocess.py b/dags/suggestions/business_logic/db_read_suggestiontoprocess.py new file mode 100644 index 000000000..02975f494 --- /dev/null +++ b/dags/suggestions/business_logic/db_read_suggestiontoprocess.py @@ -0,0 +1,18 @@ +from airflow.providers.postgres.hooks.postgres import PostgresHook +from sources.config import shared_constants as constants + + +def get_first_suggetsioncohorte_to_insert(): + hook = PostgresHook(postgres_conn_id="qfdmo_django_db") + row = hook.get_first( + f""" + SELECT * FROM data_suggestioncohorte + WHERE statut = '{constants.SUGGESTION_ATRAITER}' + LIMIT 1 + """ + ) + return row + + +def db_read_suggestiontoprocess(**kwargs): + return bool(get_first_suggetsioncohorte_to_insert()) diff --git a/dags/suggestions/business_logic/db_write_validsuggestions.py b/dags/suggestions/business_logic/db_write_validsuggestions.py new file mode 100644 index 000000000..52ff9e156 --- /dev/null +++ b/dags/suggestions/business_logic/db_write_validsuggestions.py @@ -0,0 +1,172 @@ +import logging + +from shared.tasks.database_logic.db_manager import PostgresConnectionManager +from sources.config import shared_constants as constants +from utils import logging_utils as log + + +def db_write_validsuggestions(data_from_db: dict): + # If data_set is empty, nothing to do + dag_run_id = data_from_db["dag_run_id"] + engine = PostgresConnectionManager().engine + if "actors" not in data_from_db: + with engine.begin() as connection: + update_suggestion_status( + connection, dag_run_id, constants.SUGGESTION_ENCOURS + ) + return + df_actors = data_from_db["actors"] + df_labels = data_from_db.get("labels") + df_acteur_services = data_from_db.get("acteur_services") + df_pds = data_from_db.get("pds") + df_pdssc = data_from_db.get("pds_sous_categories") + dag_run_id = data_from_db["dag_run_id"] + change_type = data_from_db.get("change_type", "CREATE") + + with engine.begin() as connection: + if change_type in [ + constants.SUGGESTION_SOURCE, + constants.SUGGESTION_SOURCE_AJOUT, + constants.SUGGESTION_SOURCE_MISESAJOUR, + ]: + db_write_acteurupdate( + connection, df_actors, df_labels, df_acteur_services, df_pds, df_pdssc + ) + elif change_type == constants.SUGGESTION_SOURCE_SUPRESSION: + db_write_acteurdelete(connection, df_actors) + else: + raise ValueError("Invalid change_type") + + update_suggestion_status(connection, dag_run_id, constants.SUGGESTION_SUCCES) + + +def db_write_acteurupdate( + connection, df_actors, df_labels, df_acteur_services, df_pds, df_pdssc +): + logging.warning("Création ou mise à jour des acteurs") + + df_actors[["identifiant_unique"]].to_sql( + "temp_actors", connection, if_exists="replace" + ) + + delete_queries = [ + """ + DELETE FROM qfdmo_propositionservice_sous_categories + WHERE propositionservice_id IN ( + SELECT id FROM qfdmo_propositionservice + WHERE acteur_id IN ( SELECT identifiant_unique FROM temp_actors ) + ); + """, + """ + DELETE FROM qfdmo_acteur_labels + WHERE acteur_id IN ( SELECT identifiant_unique FROM temp_actors ); + """, + """ + DELETE FROM qfdmo_acteur_acteur_services + WHERE acteur_id IN ( SELECT identifiant_unique FROM temp_actors ); + """, + """ + DELETE FROM qfdmo_propositionservice + WHERE acteur_id IN ( SELECT identifiant_unique FROM temp_actors ); + """, + """ + DELETE FROM qfdmo_acteur WHERE identifiant_unique + IN ( SELECT identifiant_unique FROM temp_actors); + """, + ] + + for query in delete_queries: + connection.execute(query) + + # Liste des colonnes souhaitées + collection = connection.execute( + "SELECT column_name FROM information_schema.columns WHERE table_name =" + " 'qfdmo_acteur';" + ) + colonnes_souhaitees = [col[0] for col in collection] + + # Filtrer les colonnes qui existent dans le DataFrame + colonnes_existantes = [ + col for col in colonnes_souhaitees if col in df_actors.columns + ] + + df_actors[colonnes_existantes].to_sql( + "qfdmo_acteur", + connection, + if_exists="append", + index=False, + method="multi", + chunksize=1000, + ) + + df_labels = df_labels[["acteur_id", "labelqualite_id"]] + df_labels.drop_duplicates(inplace=True) + df_labels[["acteur_id", "labelqualite_id"]].to_sql( + "qfdmo_acteur_labels", + connection, + if_exists="append", + index=False, + method="multi", + chunksize=1000, + ) + + df_acteur_services = df_acteur_services[["acteur_id", "acteurservice_id"]] + df_acteur_services.drop_duplicates(inplace=True) + df_acteur_services.to_sql( + "qfdmo_acteur_acteur_services", + connection, + if_exists="append", + index=False, + method="multi", + chunksize=1000, + ) + + df_pds[["id", "action_id", "acteur_id"]].to_sql( + "qfdmo_propositionservice", + connection, + if_exists="append", + index=False, + method="multi", + chunksize=1000, + ) + + df_pdssc[["propositionservice_id", "souscategorieobjet_id"]].to_sql( + "qfdmo_propositionservice_sous_categories", + connection, + if_exists="append", + index=False, + method="multi", + chunksize=1000, + ) + + +def db_write_acteurdelete(connection, df_acteur_to_delete): + # mettre le statut des acteur à "SUPPRIMER" pour tous les acteurs à supprimer + logging.warning("Suppression des acteurs") + identifiant_uniques = list( + set(df_acteur_to_delete[["identifiant_unique"]].values.flatten()) + ) + quoted_identifiant_uniques = [ + f"'{identifiant_unique}'" for identifiant_unique in identifiant_uniques + ] + query_acteur_to_delete = f""" + UPDATE qfdmo_acteur + SET statut='{constants.ACTEUR_SUPPRIME}' + WHERE identifiant_unique IN ({",".join(quoted_identifiant_uniques)}); + UPDATE qfdmo_revisionacteur + SET statut='{constants.ACTEUR_SUPPRIME}' + WHERE identifiant_unique IN ({",".join(quoted_identifiant_uniques)}); + """ + log.preview("query_acteur_to_delete", query_acteur_to_delete) + connection.execute(query_acteur_to_delete) + + +def update_suggestion_status( + connection, suggestion_id, statut=constants.SUGGESTION_ENCOURS +): + query = f""" + UPDATE data_suggestioncohorte + SET status = '{statut}' + WHERE id = {suggestion_id}; + """ + connection.execute(query) diff --git a/dags/suggestions/dags/apply_suggestions.py b/dags/suggestions/dags/apply_suggestions.py new file mode 100755 index 000000000..39e14e97a --- /dev/null +++ b/dags/suggestions/dags/apply_suggestions.py @@ -0,0 +1,42 @@ +from datetime import timedelta + +from airflow.models import DAG +from airflow.utils.dates import days_ago +from suggestions.airflow_logic.db_normalize_suggestion_task import ( + db_normalize_suggestion_task, +) +from suggestions.airflow_logic.db_read_suggestiontoprocess_task import ( + db_read_suggestiontoprocess_task, +) +from suggestions.airflow_logic.db_write_validsuggestions_task import ( + db_write_validsuggestions_task, +) +from suggestions.airflow_logic.launch_compute_carte_acteur_task import ( + launch_compute_carte_acteur_task, +) + +default_args = { + "owner": "airflow", + "depends_on_past": False, + "start_date": days_ago(1), + "retries": 1, + "retry_delay": timedelta(minutes=5), +} + +dag = DAG( + dag_id="apply_suggestions", + dag_display_name="Application des suggestions validées", + default_args=default_args, + description="traiter les suggestions à traiter", + schedule="*/5 * * * *", + catchup=False, + max_active_runs=1, +) + + +( + db_read_suggestiontoprocess_task(dag) + >> db_normalize_suggestion_task(dag) + >> db_write_validsuggestions_task(dag) + >> launch_compute_carte_acteur_task(dag) +) diff --git a/data/views.py b/data/views.py index 259a073e7..7e9b0a7fe 100644 --- a/data/views.py +++ b/data/views.py @@ -24,7 +24,7 @@ def dispatch(self, request, *args, **kwargs): class SuggestionManagment(IsStaffMixin, FormView): form_class = SuggestionCohorteForm template_name = "data/dags_validations.html" - success_url = "/dags/validations" + success_url = "/data/suggestions" def form_valid(self, form): # MANAGE search and display suggestion_cohorte details From afb7446a9545389f81a1d3096a9393f1fd24b78b Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Thu, 16 Jan 2025 14:40:38 +0100 Subject: [PATCH 12/26] update statut --- dags/suggestions/business_logic/db_write_validsuggestions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dags/suggestions/business_logic/db_write_validsuggestions.py b/dags/suggestions/business_logic/db_write_validsuggestions.py index 52ff9e156..f1e03c549 100644 --- a/dags/suggestions/business_logic/db_write_validsuggestions.py +++ b/dags/suggestions/business_logic/db_write_validsuggestions.py @@ -166,7 +166,7 @@ def update_suggestion_status( ): query = f""" UPDATE data_suggestioncohorte - SET status = '{statut}' + SET statut = '{statut}' WHERE id = {suggestion_id}; """ connection.execute(query) From 0bff9385dd99ce8af5b1baca22bf3d309d252fb4 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Thu, 16 Jan 2025 15:22:30 +0100 Subject: [PATCH 13/26] Update jinja2/qfdmo/dags_validations.html Co-authored-by: Fabien Le Frapper --- jinja2/qfdmo/dags_validations.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jinja2/qfdmo/dags_validations.html b/jinja2/qfdmo/dags_validations.html index 9a5c7bf6d..5122f2b60 100644 --- a/jinja2/qfdmo/dags_validations.html +++ b/jinja2/qfdmo/dags_validations.html @@ -3,7 +3,7 @@ {% block content %}

-

Interface déprécier

+

Interface dépréciée

La nouvelle interface est disponible à l'adresse suivante : /data/suggestions

Validations des «DAGs»

From cacbf74ae4b96c52d738dc9dbbda9d62c5518d94 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Thu, 16 Jan 2025 16:28:54 +0100 Subject: [PATCH 14/26] =?UTF-8?q?d=C3=A9placement=20de=20fichiers=20et=20s?= =?UTF-8?q?uppression=20de=20champs=20inutilis=C3=A9s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../business_logic/db_write_suggestion.py | 13 +- dags/suggestions/dags/apply_suggestions.py | 8 +- .../tasks/airflow_logic/__init__.py | 4 + .../db_normalize_suggestion_task.py | 4 +- .../db_read_suggestiontoprocess_task.py | 2 +- .../db_write_validsuggestions_task.py | 2 +- .../launch_compute_carte_acteur_task.py | 0 .../business_logic/db_normalize_suggestion.py | 2 +- .../db_read_suggestiontoprocess.py | 0 .../db_write_validsuggestions.py | 0 data/migrations/0001_bancache.py | 55 +++++ ...1_initial.py => 0002_tables_suggestion.py} | 54 +---- data/models.py | 25 -- dev-requirements.txt | 217 +++++++++--------- .../_partials/source_supression_event.html | 4 +- 15 files changed, 190 insertions(+), 200 deletions(-) create mode 100644 dags/suggestions/tasks/airflow_logic/__init__.py rename dags/suggestions/{ => tasks}/airflow_logic/db_normalize_suggestion_task.py (78%) rename dags/suggestions/{ => tasks}/airflow_logic/db_read_suggestiontoprocess_task.py (81%) rename dags/suggestions/{ => tasks}/airflow_logic/db_write_validsuggestions_task.py (90%) rename dags/suggestions/{ => tasks}/airflow_logic/launch_compute_carte_acteur_task.py (100%) rename dags/suggestions/{ => tasks}/business_logic/db_normalize_suggestion.py (98%) rename dags/suggestions/{ => tasks}/business_logic/db_read_suggestiontoprocess.py (100%) rename dags/suggestions/{ => tasks}/business_logic/db_write_validsuggestions.py (100%) create mode 100644 data/migrations/0001_bancache.py rename data/migrations/{0001_initial.py => 0002_tables_suggestion.py} (77%) diff --git a/dags/sources/tasks/business_logic/db_write_suggestion.py b/dags/sources/tasks/business_logic/db_write_suggestion.py index 832869fa5..2e921599d 100644 --- a/dags/sources/tasks/business_logic/db_write_suggestion.py +++ b/dags/sources/tasks/business_logic/db_write_suggestion.py @@ -26,26 +26,26 @@ def db_write_suggestion( metadata=metadata, dag_name=f"{dag_name} - AJOUT", run_name=run_name, - action_type=constants.SUGGESTION_SOURCE_AJOUT, + type_action=constants.SUGGESTION_SOURCE_AJOUT, ) insert_suggestion( df=df_acteur_to_delete, metadata=metadata, dag_name=f"{dag_name} - SUPRESSION", run_name=run_name, - action_type=constants.SUGGESTION_SOURCE_SUPRESSION, + type_action=constants.SUGGESTION_SOURCE_SUPRESSION, ) insert_suggestion( df=df_acteur_to_update, metadata=metadata, dag_name=f"{dag_name} - MISES A JOUR", run_name=run_name, - action_type=constants.SUGGESTION_SOURCE_MISESAJOUR, + type_action=constants.SUGGESTION_SOURCE_MISESAJOUR, ) def insert_suggestion( - df: pd.DataFrame, metadata: dict, dag_name: str, run_name: str, action_type: str + df: pd.DataFrame, metadata: dict, dag_name: str, run_name: str, type_action: str ): if df.empty: return @@ -72,7 +72,7 @@ def insert_suggestion( ( dag_name, run_name, - action_type, + type_action, constants.SUGGESTION_AVALIDER, json.dumps(metadata), current_date, @@ -82,10 +82,9 @@ def insert_suggestion( suggestion_cohorte_id = result.fetchone()[0] # Insert dag_run_change - df["type_action"] = action_type df["suggestion_cohorte_id"] = suggestion_cohorte_id df["statut"] = constants.SUGGESTION_AVALIDER - df[["suggestion", "suggestion_cohorte_id", "type_action", "statut"]].to_sql( + df[["suggestion", "suggestion_cohorte_id", "statut"]].to_sql( "data_suggestionunitaire", engine, if_exists="append", diff --git a/dags/suggestions/dags/apply_suggestions.py b/dags/suggestions/dags/apply_suggestions.py index 39e14e97a..abbca01ae 100755 --- a/dags/suggestions/dags/apply_suggestions.py +++ b/dags/suggestions/dags/apply_suggestions.py @@ -2,16 +2,10 @@ from airflow.models import DAG from airflow.utils.dates import days_ago -from suggestions.airflow_logic.db_normalize_suggestion_task import ( +from suggestions.tasks.airflow_logic import ( db_normalize_suggestion_task, -) -from suggestions.airflow_logic.db_read_suggestiontoprocess_task import ( db_read_suggestiontoprocess_task, -) -from suggestions.airflow_logic.db_write_validsuggestions_task import ( db_write_validsuggestions_task, -) -from suggestions.airflow_logic.launch_compute_carte_acteur_task import ( launch_compute_carte_acteur_task, ) diff --git a/dags/suggestions/tasks/airflow_logic/__init__.py b/dags/suggestions/tasks/airflow_logic/__init__.py new file mode 100644 index 000000000..a9e6fc049 --- /dev/null +++ b/dags/suggestions/tasks/airflow_logic/__init__.py @@ -0,0 +1,4 @@ +from suggestions.tasks.airflow_logic.db_normalize_suggestion_task import * # noqa +from suggestions.tasks.airflow_logic.db_read_suggestiontoprocess_task import * # noqa +from suggestions.tasks.airflow_logic.db_write_validsuggestions_task import * # noqa +from suggestions.tasks.airflow_logic.launch_compute_carte_acteur_task import * # noqa diff --git a/dags/suggestions/airflow_logic/db_normalize_suggestion_task.py b/dags/suggestions/tasks/airflow_logic/db_normalize_suggestion_task.py similarity index 78% rename from dags/suggestions/airflow_logic/db_normalize_suggestion_task.py rename to dags/suggestions/tasks/airflow_logic/db_normalize_suggestion_task.py index 908776b16..decfee015 100644 --- a/dags/suggestions/airflow_logic/db_normalize_suggestion_task.py +++ b/dags/suggestions/tasks/airflow_logic/db_normalize_suggestion_task.py @@ -1,6 +1,8 @@ from airflow.models import DAG from airflow.operators.python import PythonOperator -from suggestions.business_logic.db_normalize_suggestion import db_normalize_suggestion +from suggestions.tasks.business_logic.db_normalize_suggestion import ( + db_normalize_suggestion, +) def db_normalize_suggestion_task(dag: DAG): diff --git a/dags/suggestions/airflow_logic/db_read_suggestiontoprocess_task.py b/dags/suggestions/tasks/airflow_logic/db_read_suggestiontoprocess_task.py similarity index 81% rename from dags/suggestions/airflow_logic/db_read_suggestiontoprocess_task.py rename to dags/suggestions/tasks/airflow_logic/db_read_suggestiontoprocess_task.py index 3dcd36249..d86347ff9 100644 --- a/dags/suggestions/airflow_logic/db_read_suggestiontoprocess_task.py +++ b/dags/suggestions/tasks/airflow_logic/db_read_suggestiontoprocess_task.py @@ -1,6 +1,6 @@ from airflow.models import DAG from airflow.operators.python import ShortCircuitOperator -from suggestions.business_logic.db_read_suggestiontoprocess import ( +from suggestions.tasks.business_logic.db_read_suggestiontoprocess import ( db_read_suggestiontoprocess, ) diff --git a/dags/suggestions/airflow_logic/db_write_validsuggestions_task.py b/dags/suggestions/tasks/airflow_logic/db_write_validsuggestions_task.py similarity index 90% rename from dags/suggestions/airflow_logic/db_write_validsuggestions_task.py rename to dags/suggestions/tasks/airflow_logic/db_write_validsuggestions_task.py index 80f097955..071b1db3f 100644 --- a/dags/suggestions/airflow_logic/db_write_validsuggestions_task.py +++ b/dags/suggestions/tasks/airflow_logic/db_write_validsuggestions_task.py @@ -1,6 +1,6 @@ from airflow.models import DAG from airflow.operators.python import PythonOperator -from suggestions.business_logic.db_write_validsuggestions import ( +from suggestions.tasks.business_logic.db_write_validsuggestions import ( db_write_validsuggestions, ) from utils import logging_utils as log diff --git a/dags/suggestions/airflow_logic/launch_compute_carte_acteur_task.py b/dags/suggestions/tasks/airflow_logic/launch_compute_carte_acteur_task.py similarity index 100% rename from dags/suggestions/airflow_logic/launch_compute_carte_acteur_task.py rename to dags/suggestions/tasks/airflow_logic/launch_compute_carte_acteur_task.py diff --git a/dags/suggestions/business_logic/db_normalize_suggestion.py b/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py similarity index 98% rename from dags/suggestions/business_logic/db_normalize_suggestion.py rename to dags/suggestions/tasks/business_logic/db_normalize_suggestion.py index 7b33e281c..01249eda8 100644 --- a/dags/suggestions/business_logic/db_normalize_suggestion.py +++ b/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py @@ -1,7 +1,7 @@ import pandas as pd from shared.tasks.database_logic.db_manager import PostgresConnectionManager from sources.config import shared_constants as constants -from suggestions.business_logic.db_read_suggestiontoprocess import ( +from suggestions.tasks.business_logic.db_read_suggestiontoprocess import ( get_first_suggetsioncohorte_to_insert, ) from utils import logging_utils as log diff --git a/dags/suggestions/business_logic/db_read_suggestiontoprocess.py b/dags/suggestions/tasks/business_logic/db_read_suggestiontoprocess.py similarity index 100% rename from dags/suggestions/business_logic/db_read_suggestiontoprocess.py rename to dags/suggestions/tasks/business_logic/db_read_suggestiontoprocess.py diff --git a/dags/suggestions/business_logic/db_write_validsuggestions.py b/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py similarity index 100% rename from dags/suggestions/business_logic/db_write_validsuggestions.py rename to dags/suggestions/tasks/business_logic/db_write_validsuggestions.py diff --git a/data/migrations/0001_bancache.py b/data/migrations/0001_bancache.py new file mode 100644 index 000000000..887041a9a --- /dev/null +++ b/data/migrations/0001_bancache.py @@ -0,0 +1,55 @@ +# Generated by Django 5.1.4 on 2025-01-09 14:04 + +import django.contrib.gis.db.models.fields +import django.core.validators +import django.db.models.deletion +import django.db.models.functions.datetime +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name="BANCache", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("adresse", models.CharField(blank=True, max_length=255, null=True)), + ( + "code_postal", + models.CharField(blank=True, max_length=255, null=True), + ), + ("ville", models.CharField(blank=True, max_length=255, null=True)), + ( + "location", + django.contrib.gis.db.models.fields.PointField( + blank=True, null=True, srid=4326 + ), + ), + ("ban_returned", models.JSONField(blank=True, null=True)), + ( + "modifie_le", + models.DateTimeField( + auto_now=True, + db_default=django.db.models.functions.datetime.Now(), + ), + ), + ], + options={ + "verbose_name": "Cache BAN", + "verbose_name_plural": "Cache BAN", + }, + ), + ] diff --git a/data/migrations/0001_initial.py b/data/migrations/0002_tables_suggestion.py similarity index 77% rename from data/migrations/0001_initial.py rename to data/migrations/0002_tables_suggestion.py index e12402e22..0cdbcef99 100644 --- a/data/migrations/0001_initial.py +++ b/data/migrations/0002_tables_suggestion.py @@ -11,47 +11,11 @@ class Migration(migrations.Migration): initial = True - dependencies = [] + dependencies = [ + ("data", "0001_bancache"), + ] operations = [ - migrations.CreateModel( - name="BANCache", - fields=[ - ( - "id", - models.BigAutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ("adresse", models.CharField(blank=True, max_length=255, null=True)), - ( - "code_postal", - models.CharField(blank=True, max_length=255, null=True), - ), - ("ville", models.CharField(blank=True, max_length=255, null=True)), - ( - "location", - django.contrib.gis.db.models.fields.PointField( - blank=True, null=True, srid=4326 - ), - ), - ("ban_returned", models.JSONField(blank=True, null=True)), - ( - "modifie_le", - models.DateTimeField( - auto_now=True, - db_default=django.db.models.functions.datetime.Now(), - ), - ), - ], - options={ - "verbose_name": "Cache BAN", - "verbose_name_plural": "Cache BAN", - }, - ), migrations.CreateModel( name="SuggestionCohorte", fields=[ @@ -115,18 +79,6 @@ class Migration(migrations.Migration): null=True, ), ), - ( - "pourcentage_erreurs_tolerees", - models.IntegerField( - db_default=0, - default=0, - help_text="Nombre d'erreurs tolérées en pourcentage", - validators=[ - django.core.validators.MinValueValidator(0), - django.core.validators.MaxValueValidator(100), - ], - ), - ), ( "cree_le", models.DateTimeField( diff --git a/data/models.py b/data/models.py index 61c6e4abb..329fc4872 100644 --- a/data/models.py +++ b/data/models.py @@ -1,5 +1,4 @@ from django.contrib.gis.db import models -from django.core.validators import MaxValueValidator, MinValueValidator from django.db.models.functions import Now from dags.sources.config.shared_constants import ( @@ -72,12 +71,6 @@ class SuggestionCohorte(models.Model): metadata = models.JSONField( null=True, blank=True, help_text="Metadata de la cohorte, données statistiques" ) - pourcentage_erreurs_tolerees = models.IntegerField( - default=0, - db_default=0, - help_text="Nombre d'erreurs tolérées en pourcentage", - validators=[MinValueValidator(0), MaxValueValidator(100)], - ) cree_le = models.DateTimeField(auto_now_add=True, db_default=Now()) modifie_le = models.DateTimeField(auto_now=True, db_default=Now()) @@ -99,30 +92,12 @@ def is_clustering_type(self) -> bool: def __str__(self) -> str: return f"{self.identifiant_action} - {self.identifiant_execution}" - def display_meta_data(self) -> dict: - displayed_metadata = {} - displayed_metadata["Nombre d'acteur à créer ou mettre à jour"] = ( - self.metadata.get("acteurs_to_add_or_update", 0) - ) - displayed_metadata["Nombre de duplicats"] = self.metadata.get( - "number_of_duplicates", 0 - ) - displayed_metadata["Nombre d'acteur à supprimer"] = self.metadata.get( - "number_of_removed_actors", 0 - ) - return displayed_metadata - class SuggestionUnitaire(models.Model): id = models.AutoField(primary_key=True) suggestion_cohorte = models.ForeignKey( SuggestionCohorte, on_delete=models.CASCADE, related_name="suggestion_unitaires" ) - type_action = models.CharField( - choices=SuggestionAction.choices, - max_length=250, - blank=True, - ) statut = models.CharField( max_length=50, choices=SuggestionStatut.choices, diff --git a/dev-requirements.txt b/dev-requirements.txt index 854156923..c6ed309ca 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1024,6 +1024,10 @@ jmespath==1.0.1 \ # via # -c requirements.txt # apache-airflow-providers-fab +joblib==1.4.2 \ + --hash=sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6 \ + --hash=sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e + # via scikit-learn jsonschema==4.23.0 \ --hash=sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4 \ --hash=sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566 @@ -1439,6 +1443,9 @@ numpy==1.26.4 \ --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f # via # -c requirements.txt + # pandas + # scikit-learn + # scipy # shapely opentelemetry-api==1.29.0 \ --hash=sha256:5fcd94c4141cc49c736271f3e1efb777bebe9cc535759c54c936cca4f1b312b8 \ @@ -1503,6 +1510,35 @@ packaging==24.2 \ # marshmallow-sqlalchemy # pytest # sphinx +pandas==2.1.4 \ + --hash=sha256:00028e6737c594feac3c2df15636d73ace46b8314d236100b57ed7e4b9ebe8d9 \ + --hash=sha256:0aa6e92e639da0d6e2017d9ccff563222f4eb31e4b2c3cf32a2a392fc3103c0d \ + --hash=sha256:1ebfd771110b50055712b3b711b51bee5d50135429364d0498e1213a7adc2be8 \ + --hash=sha256:294d96cfaf28d688f30c918a765ea2ae2e0e71d3536754f4b6de0ea4a496d034 \ + --hash=sha256:3f06bda01a143020bad20f7a85dd5f4a1600112145f126bc9e3e42077c24ef34 \ + --hash=sha256:426dc0f1b187523c4db06f96fb5c8d1a845e259c99bda74f7de97bd8a3bb3139 \ + --hash=sha256:45d63d2a9b1b37fa6c84a68ba2422dc9ed018bdaa668c7f47566a01188ceeec1 \ + --hash=sha256:482d5076e1791777e1571f2e2d789e940dedd927325cc3cb6d0800c6304082f6 \ + --hash=sha256:6b728fb8deba8905b319f96447a27033969f3ea1fea09d07d296c9030ab2ed1d \ + --hash=sha256:8a706cfe7955c4ca59af8c7a0517370eafbd98593155b48f10f9811da440248b \ + --hash=sha256:8ea107e0be2aba1da619cc6ba3f999b2bfc9669a83554b1904ce3dd9507f0860 \ + --hash=sha256:ab5796839eb1fd62a39eec2916d3e979ec3130509930fea17fe6f81e18108f6a \ + --hash=sha256:b0513a132a15977b4a5b89aabd304647919bc2169eac4c8536afb29c07c23540 \ + --hash=sha256:b7d852d16c270e4331f6f59b3e9aa23f935f5c4b0ed2d0bc77637a8890a5d092 \ + --hash=sha256:bd7d5f2f54f78164b3d7a40f33bf79a74cdee72c31affec86bfcabe7e0789821 \ + --hash=sha256:bdec823dc6ec53f7a6339a0e34c68b144a7a1fd28d80c260534c39c62c5bf8c9 \ + --hash=sha256:d2d3e7b00f703aea3945995ee63375c61b2e6aa5aa7871c5d622870e5e137623 \ + --hash=sha256:d65148b14788b3758daf57bf42725caa536575da2b64df9964c563b015230984 \ + --hash=sha256:d797591b6846b9db79e65dc2d0d48e61f7db8d10b2a9480b4e3faaddc421a171 \ + --hash=sha256:dc9bf7ade01143cddc0074aa6995edd05323974e6e40d9dbde081021ded8510e \ + --hash=sha256:e9f17f2b6fc076b2a0078862547595d66244db0f41bf79fc5f64a5c4d635bead \ + --hash=sha256:edbaf9e8d3a63a9276d707b4d25930a262341bca9874fcb22eff5e3da5394732 \ + --hash=sha256:f237e6ca6421265643608813ce9793610ad09b40154a3344a088159590469e46 \ + --hash=sha256:f69b0c9bb174a2342818d3e2778584e18c740d56857fc5cdb944ec8bbe4082cf \ + --hash=sha256:fcb68203c833cc735321512e13861358079a96c174a61f5116a1de89c58c0ef7 + # via + # -c requirements.txt + # -r dev-requirements.in parso==0.8.4 \ --hash=sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18 \ --hash=sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d @@ -1870,6 +1906,7 @@ python-dateutil==2.9.0.post0 \ # croniter # faker # flask-appbuilder + # pandas # pendulum # time-machine python-dotenv==1.0.1 \ @@ -1896,6 +1933,7 @@ pytz==2024.2 \ # -c requirements.txt # croniter # flask-babel + # pandas pyyaml==6.0.2 \ --hash=sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff \ --hash=sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48 \ @@ -2216,6 +2254,76 @@ ruff==0.9.1 \ --hash=sha256:f0c8b149e9c7353cace7d698e1656ffcf1e36e50f8ea3b5d5f7f87ff9986a7ca \ --hash=sha256:fd2b25ecaf907d6458fa842675382c8597b3c746a2dde6717fe3415425df0c17 # via -r dev-requirements.in +scikit-learn==1.3.2 \ + --hash=sha256:0402638c9a7c219ee52c94cbebc8fcb5eb9fe9c773717965c1f4185588ad3107 \ + --hash=sha256:0ee107923a623b9f517754ea2f69ea3b62fc898a3641766cb7deb2f2ce450161 \ + --hash=sha256:1215e5e58e9880b554b01187b8c9390bf4dc4692eedeaf542d3273f4785e342c \ + --hash=sha256:15e1e94cc23d04d39da797ee34236ce2375ddea158b10bee3c343647d615581d \ + --hash=sha256:18424efee518a1cde7b0b53a422cde2f6625197de6af36da0b57ec502f126157 \ + --hash=sha256:1d08ada33e955c54355d909b9c06a4789a729977f165b8bae6f225ff0a60ec4a \ + --hash=sha256:3271552a5eb16f208a6f7f617b8cc6d1f137b52c8a1ef8edf547db0259b2c9fb \ + --hash=sha256:35a22e8015048c628ad099da9df5ab3004cdbf81edc75b396fd0cff8699ac58c \ + --hash=sha256:535805c2a01ccb40ca4ab7d081d771aea67e535153e35a1fd99418fcedd1648a \ + --hash=sha256:5b2de18d86f630d68fe1f87af690d451388bb186480afc719e5f770590c2ef6c \ + --hash=sha256:61a6efd384258789aa89415a410dcdb39a50e19d3d8410bd29be365bcdd512d5 \ + --hash=sha256:64381066f8aa63c2710e6b56edc9f0894cc7bf59bd71b8ce5613a4559b6145e0 \ + --hash=sha256:67f37d708f042a9b8d59551cf94d30431e01374e00dc2645fa186059c6c5d78b \ + --hash=sha256:6c43290337f7a4b969d207e620658372ba3c1ffb611f8bc2b6f031dc5c6d1d03 \ + --hash=sha256:6fb6bc98f234fda43163ddbe36df8bcde1d13ee176c6dc9b92bb7d3fc842eb66 \ + --hash=sha256:763f0ae4b79b0ff9cca0bf3716bcc9915bdacff3cebea15ec79652d1cc4fa5c9 \ + --hash=sha256:785a2213086b7b1abf037aeadbbd6d67159feb3e30263434139c98425e3dcfcf \ + --hash=sha256:8db94cd8a2e038b37a80a04df8783e09caac77cbe052146432e67800e430c028 \ + --hash=sha256:a19f90f95ba93c1a7f7924906d0576a84da7f3b2282ac3bfb7a08a32801add93 \ + --hash=sha256:a2f54c76accc15a34bfb9066e6c7a56c1e7235dda5762b990792330b52ccfb05 \ + --hash=sha256:b8692e395a03a60cd927125eef3a8e3424d86dde9b2370d544f0ea35f78a8073 \ + --hash=sha256:cb06f8dce3f5ddc5dee1715a9b9f19f20d295bed8e3cd4fa51e1d050347de525 \ + --hash=sha256:dc9002fc200bed597d5d34e90c752b74df516d592db162f756cc52836b38fe0e \ + --hash=sha256:e326c0eb5cf4d6ba40f93776a20e9a7a69524c4db0757e7ce24ba222471ee8a1 \ + --hash=sha256:ed932ea780517b00dae7431e031faae6b49b20eb6950918eb83bd043237950e0 \ + --hash=sha256:fc4144a5004a676d5022b798d9e573b05139e77f271253a4703eed295bde0433 + # via -r dev-requirements.in +scipy==1.15.1 \ + --hash=sha256:033a75ddad1463970c96a88063a1df87ccfddd526437136b6ee81ff0312ebdf6 \ + --hash=sha256:0458839c9f873062db69a03de9a9765ae2e694352c76a16be44f93ea45c28d2b \ + --hash=sha256:070d10654f0cb6abd295bc96c12656f948e623ec5f9a4eab0ddb1466c000716e \ + --hash=sha256:09c52320c42d7f5c7748b69e9f0389266fd4f82cf34c38485c14ee976cb8cb04 \ + --hash=sha256:0ac102ce99934b162914b1e4a6b94ca7da0f4058b6d6fd65b0cef330c0f3346f \ + --hash=sha256:0fb57b30f0017d4afa5fe5f5b150b8f807618819287c21cbe51130de7ccdaed2 \ + --hash=sha256:100193bb72fbff37dbd0bf14322314fc7cbe08b7ff3137f11a34d06dc0ee6b85 \ + --hash=sha256:14eaa373c89eaf553be73c3affb11ec6c37493b7eaaf31cf9ac5dffae700c2e0 \ + --hash=sha256:2114a08daec64980e4b4cbdf5bee90935af66d750146b1d2feb0d3ac30613692 \ + --hash=sha256:21e10b1dd56ce92fba3e786007322542361984f8463c6d37f6f25935a5a6ef52 \ + --hash=sha256:2722a021a7929d21168830790202a75dbb20b468a8133c74a2c0230c72626b6c \ + --hash=sha256:395be70220d1189756068b3173853029a013d8c8dd5fd3d1361d505b2aa58fa7 \ + --hash=sha256:3fe1d95944f9cf6ba77aa28b82dd6bb2a5b52f2026beb39ecf05304b8392864b \ + --hash=sha256:491d57fe89927fa1aafbe260f4cfa5ffa20ab9f1435025045a5315006a91b8f5 \ + --hash=sha256:4b17d4220df99bacb63065c76b0d1126d82bbf00167d1730019d2a30d6ae01ea \ + --hash=sha256:4c9d8fc81d6a3b6844235e6fd175ee1d4c060163905a2becce8e74cb0d7554ce \ + --hash=sha256:55cc79ce4085c702ac31e49b1e69b27ef41111f22beafb9b49fea67142b696c4 \ + --hash=sha256:5b190b935e7db569960b48840e5bef71dc513314cc4e79a1b7d14664f57fd4ff \ + --hash=sha256:5bd8d27d44e2c13d0c1124e6a556454f52cd3f704742985f6b09e75e163d20d2 \ + --hash=sha256:5dff14e75cdbcf07cdaa1c7707db6017d130f0af9ac41f6ce443a93318d6c6e0 \ + --hash=sha256:5eb0ca35d4b08e95da99a9f9c400dc9f6c21c424298a0ba876fdc69c7afacedf \ + --hash=sha256:63b9b6cd0333d0eb1a49de6f834e8aeaefe438df8f6372352084535ad095219e \ + --hash=sha256:667f950bf8b7c3a23b4199db24cb9bf7512e27e86d0e3813f015b74ec2c6e3df \ + --hash=sha256:6b3e71893c6687fc5e29208d518900c24ea372a862854c9888368c0b267387ab \ + --hash=sha256:71ba9a76c2390eca6e359be81a3e879614af3a71dfdabb96d1d7ab33da6f2364 \ + --hash=sha256:74bb864ff7640dea310a1377d8567dc2cb7599c26a79ca852fc184cc851954ac \ + --hash=sha256:82add84e8a9fb12af5c2c1a3a3f1cb51849d27a580cb9e6bd66226195142be6e \ + --hash=sha256:837299eec3d19b7e042923448d17d95a86e43941104d33f00da7e31a0f715d3c \ + --hash=sha256:900f3fa3db87257510f011c292a5779eb627043dd89731b9c461cd16ef76ab3d \ + --hash=sha256:9f151e9fb60fbf8e52426132f473221a49362091ce7a5e72f8aa41f8e0da4f25 \ + --hash=sha256:af0b61c1de46d0565b4b39c6417373304c1d4f5220004058bdad3061c9fa8a95 \ + --hash=sha256:bc7136626261ac1ed988dca56cfc4ab5180f75e0ee52e58f1e6aa74b5f3eacd5 \ + --hash=sha256:be3deeb32844c27599347faa077b359584ba96664c5c79d71a354b80a0ad0ce0 \ + --hash=sha256:c09aa9d90f3500ea4c9b393ee96f96b0ccb27f2f350d09a47f533293c78ea776 \ + --hash=sha256:c352c1b6d7cac452534517e022f8f7b8d139cd9f27e6fbd9f3cbd0bfd39f5bef \ + --hash=sha256:c64ded12dcab08afff9e805a67ff4480f5e69993310e093434b10e85dc9d43e1 \ + --hash=sha256:cdde8414154054763b42b74fe8ce89d7f3d17a7ac5dd77204f0e142cdc9239e9 \ + --hash=sha256:ce3a000cd28b4430426db2ca44d96636f701ed12e2b3ca1f2b1dd7abdd84b39a \ + --hash=sha256:f735bc41bd1c792c96bc426dece66c8723283695f02df61dcc4d0a707a42fc54 \ + --hash=sha256:f82fcf4e5b377f819542fbc8541f7b5fbcf1c0017d0df0bc22c781bf60abc4d8 + # via scikit-learn setproctitle==1.3.4 \ --hash=sha256:020ea47a79b2bbd7bd7b94b85ca956ba7cb026e82f41b20d2e1dac4008cead25 \ --hash=sha256:02ca3802902d91a89957f79da3ec44b25b5804c88026362cb85eea7c1fbdefd1 \ @@ -2489,6 +2597,10 @@ text-unidecode==1.3 \ --hash=sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8 \ --hash=sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93 # via python-slugify +threadpoolctl==3.5.0 \ + --hash=sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107 \ + --hash=sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467 + # via scikit-learn time-machine==2.16.0 \ --hash=sha256:01bc257e9418980a4922de94775be42a966e1a082fb01a1635917f9afc7b84ca \ --hash=sha256:09531af59fdfb39bfd24d28bd1e837eff5a5d98318509a31b6cfd57d27801e52 \ @@ -2569,6 +2681,7 @@ tzdata==2024.2 \ --hash=sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd # via # -c requirements.txt + # pandas # pendulum uc-micro-py==1.0.3 \ --hash=sha256:d321b92cff673ec58027c04015fcaa8bb1e005478643ff4a500882eaab88c48a \ @@ -2767,107 +2880,3 @@ zipp==3.21.0 \ --hash=sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4 \ --hash=sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931 # via importlib-metadata -joblib==1.4.2 \ - --hash=sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6 \ - --hash=sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e - # via scikit-learn -scikit-learn==1.3.2 \ - --hash=sha256:0402638c9a7c219ee52c94cbebc8fcb5eb9fe9c773717965c1f4185588ad3107 \ - --hash=sha256:0ee107923a623b9f517754ea2f69ea3b62fc898a3641766cb7deb2f2ce450161 \ - --hash=sha256:1215e5e58e9880b554b01187b8c9390bf4dc4692eedeaf542d3273f4785e342c \ - --hash=sha256:15e1e94cc23d04d39da797ee34236ce2375ddea158b10bee3c343647d615581d \ - --hash=sha256:18424efee518a1cde7b0b53a422cde2f6625197de6af36da0b57ec502f126157 \ - --hash=sha256:1d08ada33e955c54355d909b9c06a4789a729977f165b8bae6f225ff0a60ec4a \ - --hash=sha256:3271552a5eb16f208a6f7f617b8cc6d1f137b52c8a1ef8edf547db0259b2c9fb \ - --hash=sha256:35a22e8015048c628ad099da9df5ab3004cdbf81edc75b396fd0cff8699ac58c \ - --hash=sha256:535805c2a01ccb40ca4ab7d081d771aea67e535153e35a1fd99418fcedd1648a \ - --hash=sha256:5b2de18d86f630d68fe1f87af690d451388bb186480afc719e5f770590c2ef6c \ - --hash=sha256:61a6efd384258789aa89415a410dcdb39a50e19d3d8410bd29be365bcdd512d5 \ - --hash=sha256:64381066f8aa63c2710e6b56edc9f0894cc7bf59bd71b8ce5613a4559b6145e0 \ - --hash=sha256:67f37d708f042a9b8d59551cf94d30431e01374e00dc2645fa186059c6c5d78b \ - --hash=sha256:6c43290337f7a4b969d207e620658372ba3c1ffb611f8bc2b6f031dc5c6d1d03 \ - --hash=sha256:6fb6bc98f234fda43163ddbe36df8bcde1d13ee176c6dc9b92bb7d3fc842eb66 \ - --hash=sha256:763f0ae4b79b0ff9cca0bf3716bcc9915bdacff3cebea15ec79652d1cc4fa5c9 \ - --hash=sha256:785a2213086b7b1abf037aeadbbd6d67159feb3e30263434139c98425e3dcfcf \ - --hash=sha256:8db94cd8a2e038b37a80a04df8783e09caac77cbe052146432e67800e430c028 \ - --hash=sha256:a19f90f95ba93c1a7f7924906d0576a84da7f3b2282ac3bfb7a08a32801add93 \ - --hash=sha256:a2f54c76accc15a34bfb9066e6c7a56c1e7235dda5762b990792330b52ccfb05 \ - --hash=sha256:b8692e395a03a60cd927125eef3a8e3424d86dde9b2370d544f0ea35f78a8073 \ - --hash=sha256:cb06f8dce3f5ddc5dee1715a9b9f19f20d295bed8e3cd4fa51e1d050347de525 \ - --hash=sha256:dc9002fc200bed597d5d34e90c752b74df516d592db162f756cc52836b38fe0e \ - --hash=sha256:e326c0eb5cf4d6ba40f93776a20e9a7a69524c4db0757e7ce24ba222471ee8a1 \ - --hash=sha256:ed932ea780517b00dae7431e031faae6b49b20eb6950918eb83bd043237950e0 \ - --hash=sha256:fc4144a5004a676d5022b798d9e573b05139e77f271253a4703eed295bde0433 - # via -r req_sickit.in -scipy==1.15.1 \ - --hash=sha256:033a75ddad1463970c96a88063a1df87ccfddd526437136b6ee81ff0312ebdf6 \ - --hash=sha256:0458839c9f873062db69a03de9a9765ae2e694352c76a16be44f93ea45c28d2b \ - --hash=sha256:070d10654f0cb6abd295bc96c12656f948e623ec5f9a4eab0ddb1466c000716e \ - --hash=sha256:09c52320c42d7f5c7748b69e9f0389266fd4f82cf34c38485c14ee976cb8cb04 \ - --hash=sha256:0ac102ce99934b162914b1e4a6b94ca7da0f4058b6d6fd65b0cef330c0f3346f \ - --hash=sha256:0fb57b30f0017d4afa5fe5f5b150b8f807618819287c21cbe51130de7ccdaed2 \ - --hash=sha256:100193bb72fbff37dbd0bf14322314fc7cbe08b7ff3137f11a34d06dc0ee6b85 \ - --hash=sha256:14eaa373c89eaf553be73c3affb11ec6c37493b7eaaf31cf9ac5dffae700c2e0 \ - --hash=sha256:2114a08daec64980e4b4cbdf5bee90935af66d750146b1d2feb0d3ac30613692 \ - --hash=sha256:21e10b1dd56ce92fba3e786007322542361984f8463c6d37f6f25935a5a6ef52 \ - --hash=sha256:2722a021a7929d21168830790202a75dbb20b468a8133c74a2c0230c72626b6c \ - --hash=sha256:395be70220d1189756068b3173853029a013d8c8dd5fd3d1361d505b2aa58fa7 \ - --hash=sha256:3fe1d95944f9cf6ba77aa28b82dd6bb2a5b52f2026beb39ecf05304b8392864b \ - --hash=sha256:491d57fe89927fa1aafbe260f4cfa5ffa20ab9f1435025045a5315006a91b8f5 \ - --hash=sha256:4b17d4220df99bacb63065c76b0d1126d82bbf00167d1730019d2a30d6ae01ea \ - --hash=sha256:4c9d8fc81d6a3b6844235e6fd175ee1d4c060163905a2becce8e74cb0d7554ce \ - --hash=sha256:55cc79ce4085c702ac31e49b1e69b27ef41111f22beafb9b49fea67142b696c4 \ - --hash=sha256:5b190b935e7db569960b48840e5bef71dc513314cc4e79a1b7d14664f57fd4ff \ - --hash=sha256:5bd8d27d44e2c13d0c1124e6a556454f52cd3f704742985f6b09e75e163d20d2 \ - --hash=sha256:5dff14e75cdbcf07cdaa1c7707db6017d130f0af9ac41f6ce443a93318d6c6e0 \ - --hash=sha256:5eb0ca35d4b08e95da99a9f9c400dc9f6c21c424298a0ba876fdc69c7afacedf \ - --hash=sha256:63b9b6cd0333d0eb1a49de6f834e8aeaefe438df8f6372352084535ad095219e \ - --hash=sha256:667f950bf8b7c3a23b4199db24cb9bf7512e27e86d0e3813f015b74ec2c6e3df \ - --hash=sha256:6b3e71893c6687fc5e29208d518900c24ea372a862854c9888368c0b267387ab \ - --hash=sha256:71ba9a76c2390eca6e359be81a3e879614af3a71dfdabb96d1d7ab33da6f2364 \ - --hash=sha256:74bb864ff7640dea310a1377d8567dc2cb7599c26a79ca852fc184cc851954ac \ - --hash=sha256:82add84e8a9fb12af5c2c1a3a3f1cb51849d27a580cb9e6bd66226195142be6e \ - --hash=sha256:837299eec3d19b7e042923448d17d95a86e43941104d33f00da7e31a0f715d3c \ - --hash=sha256:900f3fa3db87257510f011c292a5779eb627043dd89731b9c461cd16ef76ab3d \ - --hash=sha256:9f151e9fb60fbf8e52426132f473221a49362091ce7a5e72f8aa41f8e0da4f25 \ - --hash=sha256:af0b61c1de46d0565b4b39c6417373304c1d4f5220004058bdad3061c9fa8a95 \ - --hash=sha256:bc7136626261ac1ed988dca56cfc4ab5180f75e0ee52e58f1e6aa74b5f3eacd5 \ - --hash=sha256:be3deeb32844c27599347faa077b359584ba96664c5c79d71a354b80a0ad0ce0 \ - --hash=sha256:c09aa9d90f3500ea4c9b393ee96f96b0ccb27f2f350d09a47f533293c78ea776 \ - --hash=sha256:c352c1b6d7cac452534517e022f8f7b8d139cd9f27e6fbd9f3cbd0bfd39f5bef \ - --hash=sha256:c64ded12dcab08afff9e805a67ff4480f5e69993310e093434b10e85dc9d43e1 \ - --hash=sha256:cdde8414154054763b42b74fe8ce89d7f3d17a7ac5dd77204f0e142cdc9239e9 \ - --hash=sha256:ce3a000cd28b4430426db2ca44d96636f701ed12e2b3ca1f2b1dd7abdd84b39a \ - --hash=sha256:f735bc41bd1c792c96bc426dece66c8723283695f02df61dcc4d0a707a42fc54 \ - --hash=sha256:f82fcf4e5b377f819542fbc8541f7b5fbcf1c0017d0df0bc22c781bf60abc4d8 - # via scikit-learn -threadpoolctl==3.5.0 \ - --hash=sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107 \ - --hash=sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467 - # via scikit-learn -pandas==2.1.4 \ - --hash=sha256:00028e6737c594feac3c2df15636d73ace46b8314d236100b57ed7e4b9ebe8d9 \ - --hash=sha256:0aa6e92e639da0d6e2017d9ccff563222f4eb31e4b2c3cf32a2a392fc3103c0d \ - --hash=sha256:1ebfd771110b50055712b3b711b51bee5d50135429364d0498e1213a7adc2be8 \ - --hash=sha256:294d96cfaf28d688f30c918a765ea2ae2e0e71d3536754f4b6de0ea4a496d034 \ - --hash=sha256:3f06bda01a143020bad20f7a85dd5f4a1600112145f126bc9e3e42077c24ef34 \ - --hash=sha256:426dc0f1b187523c4db06f96fb5c8d1a845e259c99bda74f7de97bd8a3bb3139 \ - --hash=sha256:45d63d2a9b1b37fa6c84a68ba2422dc9ed018bdaa668c7f47566a01188ceeec1 \ - --hash=sha256:482d5076e1791777e1571f2e2d789e940dedd927325cc3cb6d0800c6304082f6 \ - --hash=sha256:6b728fb8deba8905b319f96447a27033969f3ea1fea09d07d296c9030ab2ed1d \ - --hash=sha256:8a706cfe7955c4ca59af8c7a0517370eafbd98593155b48f10f9811da440248b \ - --hash=sha256:8ea107e0be2aba1da619cc6ba3f999b2bfc9669a83554b1904ce3dd9507f0860 \ - --hash=sha256:ab5796839eb1fd62a39eec2916d3e979ec3130509930fea17fe6f81e18108f6a \ - --hash=sha256:b0513a132a15977b4a5b89aabd304647919bc2169eac4c8536afb29c07c23540 \ - --hash=sha256:b7d852d16c270e4331f6f59b3e9aa23f935f5c4b0ed2d0bc77637a8890a5d092 \ - --hash=sha256:bd7d5f2f54f78164b3d7a40f33bf79a74cdee72c31affec86bfcabe7e0789821 \ - --hash=sha256:bdec823dc6ec53f7a6339a0e34c68b144a7a1fd28d80c260534c39c62c5bf8c9 \ - --hash=sha256:d2d3e7b00f703aea3945995ee63375c61b2e6aa5aa7871c5d622870e5e137623 \ - --hash=sha256:d65148b14788b3758daf57bf42725caa536575da2b64df9964c563b015230984 \ - --hash=sha256:d797591b6846b9db79e65dc2d0d48e61f7db8d10b2a9480b4e3faaddc421a171 \ - --hash=sha256:dc9bf7ade01143cddc0074aa6995edd05323974e6e40d9dbde081021ded8510e \ - --hash=sha256:e9f17f2b6fc076b2a0078862547595d66244db0f41bf79fc5f64a5c4d635bead \ - --hash=sha256:edbaf9e8d3a63a9276d707b4d25930a262341bca9874fcb22eff5e3da5394732 \ - --hash=sha256:f237e6ca6421265643608813ce9793610ad09b40154a3344a088159590469e46 \ - --hash=sha256:f69b0c9bb174a2342818d3e2778584e18c740d56857fc5cdb944ec8bbe4082cf \ - --hash=sha256:fcb68203c833cc735321512e13861358079a96c174a61f5116a1de89c58c0ef7 \ No newline at end of file diff --git a/templates/data/_partials/source_supression_event.html b/templates/data/_partials/source_supression_event.html index b348c79b4..3b6257b24 100644 --- a/templates/data/_partials/source_supression_event.html +++ b/templates/data/_partials/source_supression_event.html @@ -1,7 +1,7 @@ - action_type + type_action Acteur suggestion @@ -10,7 +10,7 @@ {% for suggestion_unitaire in suggestion_unitaires %} - {{ suggestion_unitaire.get_action_type_display }} + {{ suggestion_unitaire.get_type_action_display }} {% for key, value in suggestion_unitaire.display_acteur_details.items %}

{{ key }} : {{ value }}

From ec116f92c63092f698e28d963be0a8aefceb5b2d Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Thu, 16 Jan 2025 17:33:38 +0100 Subject: [PATCH 15/26] suppression type_action inutiles --- dags/sources/config/shared_constants.py | 2 -- .../business_logic/db_normalize_suggestion.py | 26 +++++++++---------- .../db_write_validsuggestions.py | 1 - data/migrations/0002_tables_suggestion.py | 23 ---------------- data/models.py | 7 ----- 5 files changed, 12 insertions(+), 47 deletions(-) diff --git a/dags/sources/config/shared_constants.py b/dags/sources/config/shared_constants.py index 18330f7a4..bf6c9961e 100755 --- a/dags/sources/config/shared_constants.py +++ b/dags/sources/config/shared_constants.py @@ -15,11 +15,9 @@ # SuggestionCohorte actions SUGGESTION_CLUSTERING = "CLUSTERING" -SUGGESTION_SOURCE = "SOURCE" SUGGESTION_SOURCE_AJOUT = "SOURCE_AJOUT" SUGGESTION_SOURCE_MISESAJOUR = "SOURCE_MISESAJOUR" SUGGESTION_SOURCE_SUPRESSION = "SOURCE_SUPRESSION" -SUGGESTION_ENRICHISSEMENT = "ENRICHISSEMENT" # Public accueilli PUBLIC_PAR = "Particuliers" diff --git a/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py b/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py index 01249eda8..57ddd2c9c 100644 --- a/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py +++ b/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py @@ -25,7 +25,7 @@ def db_normalize_suggestion(): df_sql["type_action"] == constants.SUGGESTION_SOURCE_AJOUT ] df_acteur_to_update = df_sql[ - df_sql["type_action"] == constants.SUGGESTION_SOURCE_AJOUT + df_sql["type_action"] == constants.SUGGESTION_SOURCE_MISESAJOUR ] df_acteur_to_delete = df_sql[ df_sql["type_action"] == constants.SUGGESTION_SOURCE_SUPRESSION @@ -33,11 +33,18 @@ def db_normalize_suggestion(): if not df_acteur_to_create.empty: normalized_dfs = df_acteur_to_create["suggestion"].apply(pd.json_normalize) df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) - return normalize_acteur_update_for_db(df_acteur, suggestion_cohorte_id, engine) + return normalize_acteur_update_for_db( + df_acteur, suggestion_cohorte_id, engine, constants.SUGGESTION_SOURCE_AJOUT + ) if not df_acteur_to_update.empty: normalized_dfs = df_acteur_to_update["suggestion"].apply(pd.json_normalize) df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) - return normalize_acteur_update_for_db(df_acteur, suggestion_cohorte_id, engine) + return normalize_acteur_update_for_db( + df_acteur, + suggestion_cohorte_id, + engine, + constants.SUGGESTION_SOURCE_MISESAJOUR, + ) if not df_acteur_to_delete.empty: normalized_dfs = df_acteur_to_delete["suggestion"].apply(pd.json_normalize) df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) @@ -51,7 +58,7 @@ def db_normalize_suggestion(): raise ValueError("No suggestion found") -def normalize_acteur_update_for_db(df_actors, dag_run_id, engine): +def normalize_acteur_update_for_db(df_actors, dag_run_id, engine, type_action): df_labels = process_many2many_df(df_actors, "labels") df_acteur_services = process_many2many_df( df_actors, "acteur_services", df_columns=["acteur_id", "acteurservice_id"] @@ -85,7 +92,7 @@ def normalize_acteur_update_for_db(df_actors, dag_run_id, engine): "dag_run_id": dag_run_id, "labels": df_labels[["acteur_id", "labelqualite_id"]], "acteur_services": df_acteur_services[["acteur_id", "acteurservice_id"]], - "change_type": constants.SUGGESTION_SOURCE, + "change_type": type_action, } @@ -102,12 +109,3 @@ def process_many2many_df(df, column_name, df_columns=["acteur_id", "labelqualite except KeyError: # Handle the case where the specified column does not exist return pd.DataFrame(columns=df_columns) - - -def normalize_acteur_delete_for_db(df_actors, dag_run_id): - - return { - "actors": df_actors, - "dag_run_id": dag_run_id, - "change_type": constants.SUGGESTION_SOURCE_SUPRESSION, - } diff --git a/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py b/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py index f1e03c549..eb84d02d9 100644 --- a/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py +++ b/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py @@ -25,7 +25,6 @@ def db_write_validsuggestions(data_from_db: dict): with engine.begin() as connection: if change_type in [ - constants.SUGGESTION_SOURCE, constants.SUGGESTION_SOURCE_AJOUT, constants.SUGGESTION_SOURCE_MISESAJOUR, ]: diff --git a/data/migrations/0002_tables_suggestion.py b/data/migrations/0002_tables_suggestion.py index 0cdbcef99..781e61179 100644 --- a/data/migrations/0002_tables_suggestion.py +++ b/data/migrations/0002_tables_suggestion.py @@ -40,7 +40,6 @@ class Migration(migrations.Migration): blank=True, choices=[ ("CLUSTERING", "regroupement/déduplication des acteurs"), - ("SOURCE", "ingestion de source de données"), ( "SOURCE_AJOUT", "ingestion de source de données - nouveau acteur", @@ -50,7 +49,6 @@ class Migration(migrations.Migration): "ingestion de source de données - modification d'acteur existant", ), ("SOURCE_SUPRESSION", "ingestion de source de données"), - ("ENRICHISSEMENT", "suggestion d'enrichissement"), ], max_length=250, ), @@ -99,27 +97,6 @@ class Migration(migrations.Migration): name="SuggestionUnitaire", fields=[ ("id", models.AutoField(primary_key=True, serialize=False)), - ( - "type_action", - models.CharField( - blank=True, - choices=[ - ("CLUSTERING", "regroupement/déduplication des acteurs"), - ("SOURCE", "ingestion de source de données"), - ( - "SOURCE_AJOUT", - "ingestion de source de données - nouveau acteur", - ), - ( - "SOURCE_MISESAJOUR", - "ingestion de source de données - modification d'acteur existant", - ), - ("SOURCE_SUPRESSION", "ingestion de source de données"), - ("ENRICHISSEMENT", "suggestion d'enrichissement"), - ], - max_length=250, - ), - ), ( "statut", models.CharField( diff --git a/data/models.py b/data/models.py index 329fc4872..730ec4247 100644 --- a/data/models.py +++ b/data/models.py @@ -6,11 +6,9 @@ SUGGESTION_AVALIDER, SUGGESTION_CLUSTERING, SUGGESTION_ENCOURS, - SUGGESTION_ENRICHISSEMENT, SUGGESTION_ERREUR, SUGGESTION_PARTIEL, SUGGESTION_REJETER, - SUGGESTION_SOURCE, SUGGESTION_SOURCE_AJOUT, SUGGESTION_SOURCE_MISESAJOUR, SUGGESTION_SOURCE_SUPRESSION, @@ -31,10 +29,6 @@ class SuggestionStatut(models.TextChoices): class SuggestionAction(models.TextChoices): CLUSTERING = SUGGESTION_CLUSTERING, "regroupement/déduplication des acteurs" - SOURCE = ( - SUGGESTION_SOURCE, - "ingestion de source de données", - ) SOURCE_AJOUT = ( SUGGESTION_SOURCE_AJOUT, "ingestion de source de données - nouveau acteur", @@ -44,7 +38,6 @@ class SuggestionAction(models.TextChoices): "ingestion de source de données - modification d'acteur existant", ) SOURCE_SUPPRESSION = SUGGESTION_SOURCE_SUPRESSION, "ingestion de source de données" - SOURCE_ENRICHISSEMENT = SUGGESTION_ENRICHISSEMENT, "suggestion d'enrichissement" class SuggestionCohorte(models.Model): From 5505b48ac2ec774f150a825ee8f9945916312097 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Thu, 16 Jan 2025 17:50:09 +0100 Subject: [PATCH 16/26] clean source dags files --- dags/sources/dags/source_aliapur.py | 15 ------------- dags/sources/dags/source_citeo.py | 19 ----------------- dags/sources/dags/source_cma.py | 1 - dags/sources/dags/source_corepile.py | 29 -------------------------- dags/sources/dags/source_ecodds.py | 10 --------- dags/sources/dags/source_ecologic.py | 25 ---------------------- dags/sources/dags/source_ecomaison.py | 15 ------------- dags/sources/dags/source_ecosystem.py | 27 ------------------------ dags/sources/dags/source_ocab.py | 23 -------------------- dags/sources/dags/source_ocad3e.py | 20 ------------------ dags/sources/dags/source_pharmacies.py | 4 ---- dags/sources/dags/source_refashion.py | 4 ---- dags/sources/dags/source_screlec.py | 14 ------------- dags/sources/dags/source_soren.py | 18 ---------------- dags/sources/dags/source_valdelia.py | 28 ------------------------- 15 files changed, 252 deletions(-) diff --git a/dags/sources/dags/source_aliapur.py b/dags/sources/dags/source_aliapur.py index 28fb8989d..89b4d216c 100755 --- a/dags/sources/dags/source_aliapur.py +++ b/dags/sources/dags/source_aliapur.py @@ -46,21 +46,6 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, - # { - # "origin": "exclusivite_de_reprisereparation", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "exclusivite_de_reprisereparation", - # }, - # { - # "origin": "reprise", - # "transformation": "clean_reprise", - # "destination": "reprise", - # }, { "origin": "produitsdechets_acceptes", "transformation": "clean_souscategorie_codes", diff --git a/dags/sources/dags/source_citeo.py b/dags/sources/dags/source_citeo.py index c41972843..17608fa84 100755 --- a/dags/sources/dags/source_citeo.py +++ b/dags/sources/dags/source_citeo.py @@ -18,10 +18,6 @@ "origin": "nom_de_lorganisme", "destination": "nom", }, - # { - # "origin": "enseigne_commerciale", - # "destination": "nom_commercial", - # }, { "origin": "longitudewgs84", "destination": "longitude", @@ -46,21 +42,6 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, - # { - # "origin": "exclusivite_de_reprisereparation", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "exclusivite_de_reprisereparation", - # }, - # { - # "origin": "reprise", - # "transformation": "clean_reprise", - # "destination": "reprise", - # }, { "origin": "produitsdechets_acceptes", "transformation": "clean_souscategorie_codes", diff --git a/dags/sources/dags/source_cma.py b/dags/sources/dags/source_cma.py index 4b2e132ae..78246df48 100755 --- a/dags/sources/dags/source_cma.py +++ b/dags/sources/dags/source_cma.py @@ -165,7 +165,6 @@ "ignore_duplicates": False, "validate_address_with_ban": False, "product_mapping": get_mapping_config(mapping_key="sous_categories_cma"), - "source_code": "cma_reparacteur", }, schedule=None, ) as dag: diff --git a/dags/sources/dags/source_corepile.py b/dags/sources/dags/source_corepile.py index 515b01fbb..21e5feef3 100755 --- a/dags/sources/dags/source_corepile.py +++ b/dags/sources/dags/source_corepile.py @@ -46,21 +46,6 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, - # { - # "origin": "exclusivite_de_reprisereparation", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "exclusivite_de_reprisereparation", - # }, - # { - # "origin": "reprise", - # "transformation": "clean_reprise", - # "destination": "reprise", - # }, { "origin": "produitsdechets_acceptes", "transformation": "clean_souscategorie_codes", @@ -81,11 +66,6 @@ "transformation": "compute_location", "destination": ["location"], }, - # { - # "origin": ["labels_etou_bonus", "acteur_type_code"], - # "transformation": "clean_label_codes", - # "destination": ["label_codes"], - # }, { "origin": ["id_point_apport_ou_reparation", "nom"], "transformation": "clean_identifiant_externe", @@ -145,15 +125,6 @@ {"remove": "point_de_reparation"}, # 6. Colonnes à garder (rien à faire, utilisé pour le controle) ], - "column_mapping": { - "id_point_apport_ou_reparation": "identifiant_externe", - "type_de_point_de_collecte": "acteur_type_id", - "ecoorganisme": "source_id", - "nom_de_lorganisme": "nom", - "enseigne_commerciale": "nom_commercial", - "longitudewgs84": "longitude", - "latitudewgs84": "latitude", - }, "endpoint": ( "https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/" "donnees-eo-corepile/lines?size=10000" diff --git a/dags/sources/dags/source_ecodds.py b/dags/sources/dags/source_ecodds.py index 271712aed..d02c0ec80 100755 --- a/dags/sources/dags/source_ecodds.py +++ b/dags/sources/dags/source_ecodds.py @@ -46,11 +46,6 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, { "origin": "exclusivite_de_reprisereparation", "transformation": "cast_eo_boolean_or_string_to_boolean", @@ -95,11 +90,6 @@ "transformation": "clean_identifiant_unique", "destination": ["identifiant_unique"], }, - # { - # "origin": ["siret"], - # "transformation": "clean_siret_and_siren", - # "destination": ["siret", "siren"], - # }, { "origin": ["adresse_format_ban"], "transformation": "clean_adresse", diff --git a/dags/sources/dags/source_ecologic.py b/dags/sources/dags/source_ecologic.py index cb4d4f6dc..b4a0d9642 100755 --- a/dags/sources/dags/source_ecologic.py +++ b/dags/sources/dags/source_ecologic.py @@ -18,10 +18,6 @@ "origin": "nom_de_lorganisme", "destination": "nom", }, - # { - # "origin": "enseigne_commerciale", - # "destination": "nom_commercial", - # }, { "origin": "longitudewgs84", "destination": "longitude", @@ -46,16 +42,6 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, - # { - # "origin": "exclusivite_de_reprisereparation", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "exclusivite_de_reprisereparation", - # }, { "origin": "reprise", "transformation": "clean_reprise", @@ -81,11 +67,6 @@ "transformation": "compute_location", "destination": ["location"], }, - # { - # "origin": ["labels_etou_bonus", "acteur_type_code"], - # "transformation": "clean_label_codes", - # "destination": ["label_codes"], - # }, { "origin": ["id_point_apport_ou_reparation", "nom"], "transformation": "clean_identifiant_externe", @@ -99,11 +80,6 @@ "transformation": "clean_identifiant_unique", "destination": ["identifiant_unique"], }, - # { - # "origin": ["siret"], - # "transformation": "clean_siret_and_siren", - # "destination": ["siret", "siren"], - # }, { "origin": ["adresse_format_ban"], "transformation": "clean_adresse", @@ -140,7 +116,6 @@ {"remove": "adresse_format_ban"}, {"remove": "id_point_apport_ou_reparation"}, {"remove": "point_de_collecte_ou_de_reprise_des_dechets"}, - # {"remove": "labels_etou_bonus"}, {"remove": "point_dapport_de_service_reparation"}, {"remove": "point_dapport_pour_reemploi"}, {"remove": "point_de_reparation"}, diff --git a/dags/sources/dags/source_ecomaison.py b/dags/sources/dags/source_ecomaison.py index a15e5be39..be1afecb3 100755 --- a/dags/sources/dags/source_ecomaison.py +++ b/dags/sources/dags/source_ecomaison.py @@ -56,16 +56,6 @@ "transformation": "cast_eo_boolean_or_string_to_boolean", "destination": "uniquement_sur_rdv", }, - # { - # "origin": "exclusivite_de_reprisereparation", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "exclusivite_de_reprisereparation", - # }, - # { - # "origin": "reprise", - # "transformation": "clean_reprise", - # "destination": "reprise", - # }, { "origin": "produitsdechets_acceptes", "transformation": "clean_souscategorie_codes", @@ -86,11 +76,6 @@ "transformation": "compute_location", "destination": ["location"], }, - # { - # "origin": ["labels_etou_bonus", "acteur_type_code"], - # "transformation": "clean_label_codes", - # "destination": ["label_codes"], - # }, { "origin": ["id_point_apport_ou_reparation", "nom"], "transformation": "clean_identifiant_externe", diff --git a/dags/sources/dags/source_ecosystem.py b/dags/sources/dags/source_ecosystem.py index 5152bc695..fa438259b 100755 --- a/dags/sources/dags/source_ecosystem.py +++ b/dags/sources/dags/source_ecosystem.py @@ -46,21 +46,6 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, - # { - # "origin": "exclusivite_de_reprisereparation", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "exclusivite_de_reprisereparation", - # }, - # { - # "origin": "reprise", - # "transformation": "clean_reprise", - # "destination": "reprise", - # }, { "origin": "produitsdechets_acceptes", "transformation": "clean_souscategorie_codes", @@ -81,11 +66,6 @@ "transformation": "compute_location", "destination": ["location"], }, - # { - # "origin": ["labels_etou_bonus", "acteur_type_code"], - # "transformation": "clean_label_codes", - # "destination": ["label_codes"], - # }, { "origin": ["id_point_apport_ou_reparation", "nom"], "transformation": "clean_identifiant_externe", @@ -111,8 +91,6 @@ }, { "origin": [ - # "point_dapport_de_service_reparation", - # "point_de_reparation", "point_dapport_pour_reemploi", "point_de_collecte_ou_de_reprise_des_dechets", ], @@ -121,8 +99,6 @@ }, { "origin": [ - # "point_dapport_de_service_reparation", - # "point_de_reparation", "point_dapport_pour_reemploi", "point_de_collecte_ou_de_reprise_des_dechets", ], @@ -140,10 +116,7 @@ {"remove": "adresse_format_ban"}, {"remove": "id_point_apport_ou_reparation"}, {"remove": "point_de_collecte_ou_de_reprise_des_dechets"}, - # {"remove": "labels_etou_bonus"}, - # {"remove": "point_dapport_de_service_reparation"}, {"remove": "point_dapport_pour_reemploi"}, - # {"remove": "point_de_reparation"}, {"remove": "siret"}, # 6. Colonnes à garder (rien à faire, utilisé pour le controle) {"keep": "adresse_complement"}, diff --git a/dags/sources/dags/source_ocab.py b/dags/sources/dags/source_ocab.py index ad45453e6..e23f970fd 100755 --- a/dags/sources/dags/source_ocab.py +++ b/dags/sources/dags/source_ocab.py @@ -46,21 +46,6 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, - # { - # "origin": "exclusivite_de_reprisereparation", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "exclusivite_de_reprisereparation", - # }, - # { - # "origin": "reprise", - # "transformation": "clean_reprise", - # "destination": "reprise", - # }, { "origin": "produitsdechets_acceptes", "transformation": "clean_souscategorie_codes", @@ -81,11 +66,6 @@ "transformation": "compute_location", "destination": ["location"], }, - # { - # "origin": ["labels_etou_bonus", "acteur_type_code"], - # "transformation": "clean_label_codes", - # "destination": ["label_codes"], - # }, { "origin": ["id_point_apport_ou_reparation", "nom"], "transformation": "clean_identifiant_externe", @@ -140,10 +120,7 @@ {"remove": "adresse_format_ban"}, {"remove": "id_point_apport_ou_reparation"}, {"remove": "point_de_collecte_ou_de_reprise_des_dechets"}, - # {"remove": "labels_etou_bonus"}, - # {"remove": "point_dapport_de_service_reparation"}, {"remove": "point_dapport_pour_reemploi"}, - # {"remove": "point_de_reparation"}, # 6. Colonnes à garder (rien à faire, utilisé pour le controle) ], "endpoint": ( diff --git a/dags/sources/dags/source_ocad3e.py b/dags/sources/dags/source_ocad3e.py index 945553359..367891773 100755 --- a/dags/sources/dags/source_ocad3e.py +++ b/dags/sources/dags/source_ocad3e.py @@ -46,21 +46,11 @@ "transformation": "clean_public_accueilli", "destination": "public_accueilli", }, - # { - # "origin": "uniquement_sur_rdv", - # "transformation": "cast_eo_boolean_or_string_to_boolean", - # "destination": "uniquement_sur_rdv", - # }, { "origin": "exclusivite_de_reprisereparation", "transformation": "cast_eo_boolean_or_string_to_boolean", "destination": "exclusivite_de_reprisereparation", }, - # { - # "origin": "reprise", - # "transformation": "clean_reprise", - # "destination": "reprise", - # }, { "origin": "produitsdechets_acceptes", "transformation": "clean_souscategorie_codes", @@ -75,10 +65,6 @@ "column": "statut", "value": constants.ACTEUR_ACTIF, }, - # { - # "column": "label_codes", - # "value": [], - # }, # 4. Transformation du dataframe { "origin": ["latitude", "longitude"], @@ -122,8 +108,6 @@ "origin": [ "point_dapport_de_service_reparation", "point_de_reparation", - # "point_dapport_pour_reemploi", - # "point_de_collecte_ou_de_reprise_des_dechets", ], "transformation": "clean_acteurservice_codes", "destination": ["acteurservice_codes"], @@ -132,8 +116,6 @@ "origin": [ "point_dapport_de_service_reparation", "point_de_reparation", - # "point_dapport_pour_reemploi", - # "point_de_collecte_ou_de_reprise_des_dechets", ], "transformation": "clean_action_codes", "destination": ["action_codes"], @@ -148,10 +130,8 @@ {"remove": "_score"}, {"remove": "adresse_format_ban"}, {"remove": "id_point_apport_ou_reparation"}, - # {"remove": "point_de_collecte_ou_de_reprise_des_dechets"}, {"remove": "labels_etou_bonus"}, {"remove": "point_dapport_de_service_reparation"}, - # {"remove": "point_dapport_pour_reemploi"}, {"remove": "point_de_reparation"}, {"remove": "perimetre_dintervention"}, {"remove": "ecoorganisme"}, diff --git a/dags/sources/dags/source_pharmacies.py b/dags/sources/dags/source_pharmacies.py index 24e899c3b..6ca5ced72 100755 --- a/dags/sources/dags/source_pharmacies.py +++ b/dags/sources/dags/source_pharmacies.py @@ -11,10 +11,6 @@ params={ "normalization_rules": [ # 1. Renommage des colonnes - # { # fait en dur dans la code car l'apostrophe est mal géré par airflow - # "origin": "Numéro d\\'établissement", - # "destination": "identifiant_externe", - # }, # 2. Transformation des colonnes { "origin": "Raison sociale", diff --git a/dags/sources/dags/source_refashion.py b/dags/sources/dags/source_refashion.py index 9960f06a3..ce5ddc54b 100755 --- a/dags/sources/dags/source_refashion.py +++ b/dags/sources/dags/source_refashion.py @@ -80,10 +80,6 @@ "column": "statut", "value": constants.ACTEUR_ACTIF, }, - # { - # "column": "label_codes", - # "value": [], - # }, # 4. Transformation du dataframe { "origin": ["latitude", "longitude"], diff --git a/dags/sources/dags/source_screlec.py b/dags/sources/dags/source_screlec.py index db259fbc1..4b7ddf7d0 100644 --- a/dags/sources/dags/source_screlec.py +++ b/dags/sources/dags/source_screlec.py @@ -41,11 +41,6 @@ "transformation": "strip_lower_string", "destination": "source_code", }, - # { - # "origin": "site_web", - # "transformation": "clean_url", - # "destination": "url", - # }, { "origin": "type_de_point_de_collecte", "transformation": "clean_acteur_type_code", @@ -81,10 +76,6 @@ "column": "statut", "value": constants.ACTEUR_ACTIF, }, - # { - # "column": "label_codes", - # "value": [], - # }, # 4. Transformation du dataframe { "origin": ["latitude", "longitude"], @@ -119,11 +110,6 @@ "transformation": "clean_adresse", "destination": ["adresse", "code_postal", "ville"], }, - # { - # "origin": ["telephone", "code_postal"], - # "transformation": "clean_telephone", - # "destination": ["telephone"], - # }, { "origin": [ "point_dapport_de_service_reparation", diff --git a/dags/sources/dags/source_soren.py b/dags/sources/dags/source_soren.py index 04d8b0f47..9c4998e87 100755 --- a/dags/sources/dags/source_soren.py +++ b/dags/sources/dags/source_soren.py @@ -18,10 +18,6 @@ "origin": "nom_de_lorganisme", "destination": "nom", }, - # { - # "origin": "enseigne_commerciale", - # "destination": "nom_commercial", - # }, { "origin": "longitudewgs84", "destination": "longitude", @@ -36,11 +32,6 @@ "transformation": "strip_lower_string", "destination": "source_code", }, - # { - # "origin": "site_web", - # "transformation": "clean_url", - # "destination": "url", - # }, { "origin": "horaires_douverture", "transformation": "convert_opening_hours", @@ -81,10 +72,6 @@ "column": "statut", "value": constants.ACTEUR_ACTIF, }, - # { - # "column": "label_codes", - # "value": [], - # }, # 4. Transformation du dataframe { "origin": ["latitude", "longitude"], @@ -114,11 +101,6 @@ "transformation": "clean_adresse", "destination": ["adresse", "code_postal", "ville"], }, - # { - # "origin": ["telephone", "code_postal"], - # "transformation": "clean_telephone", - # "destination": ["telephone"], - # }, { "origin": [ "point_dapport_de_service_reparation", diff --git a/dags/sources/dags/source_valdelia.py b/dags/sources/dags/source_valdelia.py index cbd161091..afd37f36e 100755 --- a/dags/sources/dags/source_valdelia.py +++ b/dags/sources/dags/source_valdelia.py @@ -36,11 +36,6 @@ "transformation": "strip_lower_string", "destination": "source_code", }, - # { - # "origin": "site_web", - # "transformation": "clean_url", - # "destination": "url", - # }, { "origin": "type_de_point_de_collecte", "transformation": "clean_acteur_type_code", @@ -76,10 +71,6 @@ "column": "statut", "value": constants.ACTEUR_ACTIF, }, - # { - # "column": "label_codes", - # "value": [], - # }, # 4. Transformation du dataframe { "origin": ["latitude", "longitude"], @@ -156,25 +147,6 @@ {"remove": "point_de_reparation"}, # 6. Colonnes à garder (rien à faire, utilisé pour le controle) ], - "column_mapping": { - "id_point_apport_ou_reparation": "identifiant_externe", - "type_de_point_de_collecte": "acteur_type_id", - "exclusivite_de_reprisereparation": "exclusivite_de_reprisereparation", - "uniquement_sur_rdv": "uniquement_sur_rdv", - "public_accueilli": "public_accueilli", - "reprise": "reprise", - "siret": "siret", - "telephone": "telephone", - "produitsdechets_acceptes": "produitsdechets_acceptes", - "labels_etou_bonus": "labels_etou_bonus", - "point_de_reparation": "point_de_reparation", - "ecoorganisme": "source_id", - "adresse_format_ban": "adresse_format_ban", - "nom_de_lorganisme": "nom", - "perimetre_dintervention": "perimetre_dintervention", - "longitudewgs84": "longitude", - "latitudewgs84": "latitude", - }, "endpoint": ( "https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/" "donnees-eo-valdelia/lines?size=10000" From 7c8bbeb4a9e9da9d9509f645a53dd1cd5f4d66bb Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Sun, 19 Jan 2025 21:24:19 +0100 Subject: [PATCH 17/26] renomme fonction db_write_type_action_suggestions --- ...k.py => db_write_type_action_suggestions_task.py} | 12 +++++++----- dags/sources/tasks/airflow_logic/operators.py | 4 ++-- dags/sources/tasks/business_logic/db_data_prepare.py | 6 ------ ...estion.py => db_write_type_action_suggestions.py} | 2 +- 4 files changed, 10 insertions(+), 14 deletions(-) rename dags/sources/tasks/airflow_logic/{db_write_suggestion_task.py => db_write_type_action_suggestions_task.py} (79%) rename dags/sources/tasks/business_logic/{db_write_suggestion.py => db_write_type_action_suggestions.py} (98%) diff --git a/dags/sources/tasks/airflow_logic/db_write_suggestion_task.py b/dags/sources/tasks/airflow_logic/db_write_type_action_suggestions_task.py similarity index 79% rename from dags/sources/tasks/airflow_logic/db_write_suggestion_task.py rename to dags/sources/tasks/airflow_logic/db_write_type_action_suggestions_task.py index f372d0e0b..cdc1e5e25 100644 --- a/dags/sources/tasks/airflow_logic/db_write_suggestion_task.py +++ b/dags/sources/tasks/airflow_logic/db_write_type_action_suggestions_task.py @@ -2,21 +2,23 @@ from airflow import DAG from airflow.operators.python import PythonOperator -from sources.tasks.business_logic.db_write_suggestion import db_write_suggestion +from sources.tasks.business_logic.db_write_type_action_suggestions import ( + db_write_type_action_suggestions, +) from utils import logging_utils as log logger = logging.getLogger(__name__) -def db_write_suggestion_task(dag: DAG) -> PythonOperator: +def db_write_type_action_suggestions_task(dag: DAG) -> PythonOperator: return PythonOperator( task_id="db_write_suggestion", - python_callable=db_write_suggestion_wrapper, + python_callable=db_write_type_action_suggestions_wrapper, dag=dag, ) -def db_write_suggestion_wrapper(**kwargs) -> None: +def db_write_type_action_suggestions_wrapper(**kwargs) -> None: dag_name = kwargs["dag"].dag_display_name or kwargs["dag"].dag_id run_id = kwargs["run_id"] dfs_acteur = kwargs["ti"].xcom_pull(task_ids="db_data_prepare") @@ -40,7 +42,7 @@ def db_write_suggestion_wrapper(**kwargs) -> None: kwargs["ti"].xcom_push(key="skip", value=True) return - return db_write_suggestion( + return db_write_type_action_suggestions( dag_name=dag_name, run_id=run_id, df_acteur_to_create=df_acteur_to_create, diff --git a/dags/sources/tasks/airflow_logic/operators.py b/dags/sources/tasks/airflow_logic/operators.py index a17e0052e..ad4cddfb6 100755 --- a/dags/sources/tasks/airflow_logic/operators.py +++ b/dags/sources/tasks/airflow_logic/operators.py @@ -8,7 +8,7 @@ db_read_propositions_max_id_task, ) from sources.tasks.airflow_logic.db_write_suggestion_task import ( - db_write_suggestion_task, + db_write_type_action_suggestions_task, ) from sources.tasks.airflow_logic.propose_acteur_changes_task import ( propose_acteur_changes_task, @@ -93,5 +93,5 @@ def eo_task_chain(dag: DAG) -> None: create_tasks, propose_services_sous_categories_task(dag), db_data_prepare_task(dag), - db_write_suggestion_task(dag), + db_write_type_action_suggestions_task(dag), ) diff --git a/dags/sources/tasks/business_logic/db_data_prepare.py b/dags/sources/tasks/business_logic/db_data_prepare.py index 98e804a49..c2e5e216e 100644 --- a/dags/sources/tasks/business_logic/db_data_prepare.py +++ b/dags/sources/tasks/business_logic/db_data_prepare.py @@ -23,12 +23,6 @@ def db_data_prepare( df_acteur_to_delete["suggestion"] = df_acteur_to_delete[ update_actors_columns ].apply(lambda row: json.dumps(row.to_dict(), default=str), axis=1) - # Created or updated Acteurs - # df_acteur_services = ( - # df_acteur_services - # if df_acteur_services is not None - # else pd.DataFrame(columns=["acteur_id", "acteurservice_id"]) - # ) if df_acteur.empty: raise ValueError("df_acteur est vide") diff --git a/dags/sources/tasks/business_logic/db_write_suggestion.py b/dags/sources/tasks/business_logic/db_write_type_action_suggestions.py similarity index 98% rename from dags/sources/tasks/business_logic/db_write_suggestion.py rename to dags/sources/tasks/business_logic/db_write_type_action_suggestions.py index 2e921599d..24d48931b 100644 --- a/dags/sources/tasks/business_logic/db_write_suggestion.py +++ b/dags/sources/tasks/business_logic/db_write_type_action_suggestions.py @@ -9,7 +9,7 @@ logger = logging.getLogger(__name__) -def db_write_suggestion( +def db_write_type_action_suggestions( dag_name: str, run_id: str, df_acteur_to_create: pd.DataFrame, From a64edb43c833b757411fb95683d626f2cd260251 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Mon, 20 Jan 2025 09:41:21 +0100 Subject: [PATCH 18/26] =?UTF-8?q?corrections=20propos=C3=A9=20lors=20de=20?= =?UTF-8?q?la=20relecture?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/views.py | 8 ++ dags/sources/tasks/airflow_logic/operators.py | 2 +- .../business_logic/source_data_normalize.py | 35 ++--- dags/sources/tasks/transform/transform_df.py | 1 - dags/utils/base_utils.py | 1 - data/admin.py | 6 +- data/apps.py | 2 + data/migrations/0001_bancache.py | 6 +- data/migrations/0002_tables_suggestion.py | 16 +-- data/models.py | 28 ++-- data/urls.py | 6 +- data/views.py | 126 ++---------------- docs/reference/303-systeme-de-suggestions.md | 52 +------- qfdmo/views/dags.py | 9 +- 14 files changed, 80 insertions(+), 218 deletions(-) create mode 100644 core/views.py diff --git a/core/views.py b/core/views.py new file mode 100644 index 000000000..4439d12e9 --- /dev/null +++ b/core/views.py @@ -0,0 +1,8 @@ +from django.contrib.auth.mixins import LoginRequiredMixin + + +class IsStaffMixin(LoginRequiredMixin): + def dispatch(self, request, *args, **kwargs): + if not request.user.is_staff: + return self.handle_no_permission() + return super().dispatch(request, *args, **kwargs) diff --git a/dags/sources/tasks/airflow_logic/operators.py b/dags/sources/tasks/airflow_logic/operators.py index ad4cddfb6..efa5fba9f 100755 --- a/dags/sources/tasks/airflow_logic/operators.py +++ b/dags/sources/tasks/airflow_logic/operators.py @@ -7,7 +7,7 @@ from sources.tasks.airflow_logic.db_read_propositions_max_id_task import ( db_read_propositions_max_id_task, ) -from sources.tasks.airflow_logic.db_write_suggestion_task import ( +from sources.tasks.airflow_logic.db_write_type_action_suggestions_task import ( db_write_type_action_suggestions_task, ) from sources.tasks.airflow_logic.propose_acteur_changes_task import ( diff --git a/dags/sources/tasks/business_logic/source_data_normalize.py b/dags/sources/tasks/business_logic/source_data_normalize.py index 8830fc8cb..40026ec3d 100755 --- a/dags/sources/tasks/business_logic/source_data_normalize.py +++ b/dags/sources/tasks/business_logic/source_data_normalize.py @@ -52,7 +52,7 @@ def _transform_columns(df: pd.DataFrame, dag_config: DAGConfig) -> pd.DataFrame: for column_to_transform in columns_to_transform: function_name = column_to_transform.transformation normalisation_function = get_transformation_function(function_name, dag_config) - # logger.warning(f"Transformation {function_name}") + logger.warning(f"Transformation {function_name}") df[column_to_transform.destination] = df[column_to_transform.origin].apply( normalisation_function ) @@ -70,7 +70,7 @@ def _transform_df(df: pd.DataFrame, dag_config: DAGConfig) -> pd.DataFrame: for column_to_transform_df in columns_to_transform_df: function_name = column_to_transform_df.transformation normalisation_function = get_transformation_function(function_name, dag_config) - # logger.warning(f"Transformation {function_name}") + logger.warning(f"Transformation {function_name}") df[column_to_transform_df.destination] = df[ column_to_transform_df.origin ].apply(normalisation_function, axis=1) @@ -141,6 +141,22 @@ def _remove_undesired_lines(df: pd.DataFrame, dag_config: DAGConfig) -> pd.DataF return df +def _display_warning_about_missing_location(df: pd.DataFrame) -> None: + # TODO: A voir ce qu'on doit faire de ces acteurs non digitaux mais sans + # localisation (proposition : les afficher en erreur directement ?) + if "location" in df.columns and "acteur_type_code" in df.columns: + df_acteur_sans_loc = df[ + (df["location"].isnull()) & (df["acteur_type_code"] != "acteur_digital") + ] + if not df_acteur_sans_loc.empty: + nb_acteurs = len(df) + logger.warning( + f"Nombre d'acteur sans localisation: {len(df_acteur_sans_loc)} / " + f"{nb_acteurs}" + ) + log.preview("Acteurs sans localisation", df_acteur_sans_loc) + + def source_data_normalize( df_acteur_from_source: pd.DataFrame, dag_config: DAGConfig, @@ -191,19 +207,8 @@ def source_data_normalize( # Merge et suppression des lignes indésirables df = _remove_undesired_lines(df, dag_config) - # TODO: A voir ce qu'on doit faire de ces acteurs non digitaux mais sans - # localisation (proposition : les afficher en erreur directement ?) - if "location" in df.columns and "acteur_type_code" in df.columns: - df_acteur_sans_loc = df[ - (df["location"].isnull()) & (df["acteur_type_code"] != "acteur_digital") - ] - if not df_acteur_sans_loc.empty: - nb_acteurs = len(df) - logger.warning( - f"Nombre d'acteur sans localisation: {len(df_acteur_sans_loc)} / " - f"{nb_acteurs}" - ) - log.preview("Acteurs sans localisation", df_acteur_sans_loc) + # Log si des localisations sont manquantes parmis les acteurs non digitaux + _display_warning_about_missing_location(df) log.preview("df après normalisation", df) if df.empty: diff --git a/dags/sources/tasks/transform/transform_df.py b/dags/sources/tasks/transform/transform_df.py index df1516781..507d144d5 100644 --- a/dags/sources/tasks/transform/transform_df.py +++ b/dags/sources/tasks/transform/transform_df.py @@ -227,7 +227,6 @@ def compute_location(row: pd.Series, _): lng_column = row.keys()[1] row[lat_column] = parse_float(row[lat_column]) row[lng_column] = parse_float(row[lng_column]) - print(row[lat_column], row[lng_column]) row["location"] = transform_location(row[lng_column], row[lat_column]) return row[["location"]] diff --git a/dags/utils/base_utils.py b/dags/utils/base_utils.py index 98548e25a..85d01a098 100755 --- a/dags/utils/base_utils.py +++ b/dags/utils/base_utils.py @@ -115,7 +115,6 @@ def extract_details(row, col="adresse_format_ban"): def transform_location(longitude, latitude): if not longitude or not latitude or math.isnan(longitude) or math.isnan(latitude): - print("Longitude or latitude is missing.") return None return wkb.dumps(Point(longitude, latitude)).hex() diff --git a/data/admin.py b/data/admin.py index 11b18b4dd..b15dcb50f 100644 --- a/data/admin.py +++ b/data/admin.py @@ -1,15 +1,15 @@ from django.contrib.gis import admin -from data.models import SuggestionCohorte, SuggestionUnitaire +from data.models import Suggestion, SuggestionCohorte class SuggestionCohorteAdmin(admin.ModelAdmin): pass -class SuggestionUnitaireAdmin(admin.ModelAdmin): +class SuggestionAdmin(admin.ModelAdmin): pass admin.site.register(SuggestionCohorte, SuggestionCohorteAdmin) -admin.site.register(SuggestionUnitaire, SuggestionUnitaireAdmin) +admin.site.register(Suggestion, SuggestionAdmin) diff --git a/data/apps.py b/data/apps.py index b882be950..29617343f 100644 --- a/data/apps.py +++ b/data/apps.py @@ -4,3 +4,5 @@ class DataConfig(AppConfig): default_auto_field = "django.db.models.BigAutoField" name = "data" + label = "data" + verbose_name = "Gestion des interactions avec la plateforme de données" diff --git a/data/migrations/0001_bancache.py b/data/migrations/0001_bancache.py index 887041a9a..01ef4502d 100644 --- a/data/migrations/0001_bancache.py +++ b/data/migrations/0001_bancache.py @@ -26,12 +26,12 @@ class Migration(migrations.Migration): verbose_name="ID", ), ), - ("adresse", models.CharField(blank=True, max_length=255, null=True)), + ("adresse", models.CharField(blank=True, null=True)), ( "code_postal", - models.CharField(blank=True, max_length=255, null=True), + models.CharField(blank=True, null=True), ), - ("ville", models.CharField(blank=True, max_length=255, null=True)), + ("ville", models.CharField(blank=True, null=True)), ( "location", django.contrib.gis.db.models.fields.PointField( diff --git a/data/migrations/0002_tables_suggestion.py b/data/migrations/0002_tables_suggestion.py index 781e61179..9008c58aa 100644 --- a/data/migrations/0002_tables_suggestion.py +++ b/data/migrations/0002_tables_suggestion.py @@ -23,15 +23,15 @@ class Migration(migrations.Migration): ( "identifiant_action", models.CharField( - help_text="Identifiant de l'action (ex : dag_id pour Airflow)", - max_length=250, + verbose_name="Identifiant de l'action", + help_text="(ex : dag_id pour Airflow)", ), ), ( "identifiant_execution", models.CharField( - help_text="Identifiant de l'execution (ex : run_id pour Airflow)", - max_length=250, + verbose_name="Identifiant de l'execution", + help_text="(ex : run_id pour Airflow)", ), ), ( @@ -50,7 +50,7 @@ class Migration(migrations.Migration): ), ("SOURCE_SUPRESSION", "ingestion de source de données"), ], - max_length=250, + max_length=50, ), ), ( @@ -94,7 +94,7 @@ class Migration(migrations.Migration): ], ), migrations.CreateModel( - name="SuggestionUnitaire", + name="Suggestion", fields=[ ("id", models.AutoField(primary_key=True, serialize=False)), ( @@ -117,14 +117,14 @@ class Migration(migrations.Migration): "context", models.JSONField( blank=True, - help_text="Contexte de la suggestion : données initiales", + verbose_name="Contexte de la suggestion : données initiales", null=True, ), ), ( "suggestion", models.JSONField( - blank=True, help_text="Suggestion de modification" + blank=True, verbose_name="Suggestion de modification" ), ), ( diff --git a/data/models.py b/data/models.py index 730ec4247..eb0fc140c 100644 --- a/data/models.py +++ b/data/models.py @@ -45,15 +45,15 @@ class SuggestionCohorte(models.Model): # On utilise identifiant car le champ n'est pas utilisé pour résoudre une relation # en base de données identifiant_action = models.CharField( - max_length=250, help_text="Identifiant de l'action (ex : dag_id pour Airflow)" + verbose_name="Identifiant de l'action", help_text="(ex : dag_id pour Airflow)" ) identifiant_execution = models.CharField( - max_length=250, - help_text="Identifiant de l'execution (ex : run_id pour Airflow)", + verbose_name="Identifiant de l'execution", + help_text="(ex : run_id pour Airflow)", ) type_action = models.CharField( choices=SuggestionAction.choices, - max_length=250, + max_length=50, blank=True, ) statut = models.CharField( @@ -62,7 +62,9 @@ class SuggestionCohorte(models.Model): default=SuggestionStatut.AVALIDER, ) metadata = models.JSONField( - null=True, blank=True, help_text="Metadata de la cohorte, données statistiques" + null=True, + blank=True, + verbose_name="Metadata de la cohorte, données statistiques", ) cree_le = models.DateTimeField(auto_now_add=True, db_default=Now()) modifie_le = models.DateTimeField(auto_now=True, db_default=Now()) @@ -86,7 +88,7 @@ def __str__(self) -> str: return f"{self.identifiant_action} - {self.identifiant_execution}" -class SuggestionUnitaire(models.Model): +class Suggestion(models.Model): id = models.AutoField(primary_key=True) suggestion_cohorte = models.ForeignKey( SuggestionCohorte, on_delete=models.CASCADE, related_name="suggestion_unitaires" @@ -97,9 +99,11 @@ class SuggestionUnitaire(models.Model): default=SuggestionStatut.AVALIDER, ) context = models.JSONField( - null=True, blank=True, help_text="Contexte de la suggestion : données initiales" + null=True, + blank=True, + verbose_name="Contexte de la suggestion : données initiales", ) - suggestion = models.JSONField(blank=True, help_text="Suggestion de modification") + suggestion = models.JSONField(blank=True, verbose_name="Suggestion de modification") cree_le = models.DateTimeField(auto_now_add=True, db_default=Now()) modifie_le = models.DateTimeField(auto_now=True, db_default=Now()) @@ -152,11 +156,11 @@ def display_proposition_service(self): class BANCache(models.Model): class Meta: verbose_name = "Cache BAN" - verbose_name_plural = "Cache BAN" + verbose_name_plural = "Caches BAN" - adresse = models.CharField(max_length=255, blank=True, null=True) - code_postal = models.CharField(max_length=255, blank=True, null=True) - ville = models.CharField(max_length=255, blank=True, null=True) + adresse = models.CharField(blank=True, null=True) + code_postal = models.CharField(blank=True, null=True) + ville = models.CharField(blank=True, null=True) location = models.PointField(blank=True, null=True) ban_returned = models.JSONField(blank=True, null=True) modifie_le = models.DateTimeField(auto_now=True, db_default=Now()) diff --git a/data/urls.py b/data/urls.py index 3419263eb..b4026f355 100644 --- a/data/urls.py +++ b/data/urls.py @@ -1,11 +1,11 @@ from django.urls import path -from data.views import SuggestionManagment +from data.views import SuggestionManagement urlpatterns = [ path( - "suggestions", - SuggestionManagment.as_view(), + "suggestions/", + SuggestionManagement.as_view(), name="suggestions", ), ] diff --git a/data/views.py b/data/views.py index 7e9b0a7fe..b6ab0e7fb 100644 --- a/data/views.py +++ b/data/views.py @@ -1,19 +1,16 @@ +""" +DEPRECATED: cette vue sera bentôt caduque, on utilisera l'administration django +""" + from django.contrib import messages -from django.contrib.auth.mixins import LoginRequiredMixin from django.shortcuts import render +from django.urls import reverse from django.views.generic.edit import FormView +from core.views import IsStaffMixin from data.forms import SuggestionCohorteForm from data.models import SuggestionAction, SuggestionStatut - -class IsStaffMixin(LoginRequiredMixin): - def dispatch(self, request, *args, **kwargs): - if not request.user.is_staff: - return self.handle_no_permission() - return super().dispatch(request, *args, **kwargs) - - ACTION_TO_VERB = { SuggestionAction.SOURCE_AJOUT: "ajoutera", SuggestionAction.SOURCE_SUPPRESSION: "supprimera", @@ -21,10 +18,13 @@ def dispatch(self, request, *args, **kwargs): } -class SuggestionManagment(IsStaffMixin, FormView): +class SuggestionManagement(IsStaffMixin, FormView): form_class = SuggestionCohorteForm template_name = "data/dags_validations.html" - success_url = "/data/suggestions" + # success_url = "/data/suggestions" + + def get_success_url(self) -> str: + return reverse("data:suggestions") def form_valid(self, form): # MANAGE search and display suggestion_cohorte details @@ -69,107 +69,3 @@ def form_valid(self, form): def form_invalid(self, form): messages.error(self.request, "Il y a des erreurs dans le formulaire.") return super().form_invalid(form) - - -# class DagsValidationDeprecated(IsStaffMixin, FormView): -# form_class = SuggestionCohorteForm -# template_name = "qfdmo/dags_validations.html" -# success_url = "/dags/validations" - -# def get_initial(self): -# initial = super().get_initial() -# initial["suggestion_cohorte"] = self.request.GET.get("suggestion_cohorte") -# return initial - -# def post(self, request, *args, **kwargs): - -# dag_valid = request.POST.get("dag_valid") -# if dag_valid in ["1", "0"]: -# return self.form_valid(self.get_form()) -# else: -# suggestion_cohorte_obj = SuggestionCohorte.objects.get( -# pk=request.POST.get("suggestion_cohorte") -# ) -# id = request.POST.get("id") -# suggestion_unitaire = suggestion_cohorte_obj.suggestion_unitaires.filter( -# id=id -# ).first() -# identifiant_unique = request.POST.get("identifiant_unique") -# index = request.POST.get("index") -# action = request.POST.get("action") - -# if action == "validate": -# suggestion_unitaire.update_row_update_candidate( -# SuggestionStatut.ATRAITER.value, index -# ) -# elif action == "reject": -# suggestion_unitaire.update_row_update_candidate( -# SuggestionStatut.REJETER.value, index -# ) - -# updated_candidat = suggestion_unitaire.get_candidat(index) - -# return render( -# request, -# "qfdmo/partials/candidat_row.html", -# { -# "identifiant_unique": identifiant_unique, -# "candidat": updated_candidat, -# "index": index, -# "request": request, -# "suggestion_cohorte": request.POST.get("suggestion_cohorte"), -# "suggestion_unitaire": suggestion_unitaire, -# }, -# ) - -# def get_context_data(self, **kwargs): -# context = super().get_context_data(**kwargs) -# if self.request.GET.get("suggestion_cohorte"): -# suggestion_cohorte = SuggestionCohorte.objects.get( -# pk=self.request.GET.get("suggestion_cohorte") -# ) -# context["suggestion_cohorte_instance"] = suggestion_cohorte -# suggestion_unitaires = ( -# suggestion_cohorte.suggestion_unitaires.all().order_by("?")[:100] -# ) -# context["suggestion_unitaires"] = suggestion_unitaires - -# if ( -# suggestion_unitaires -# and suggestion_unitaires[0].change_type == "UPDATE_ACTOR" -# ): -# # Pagination -# suggestion_unitaires = ( -# suggestion_cohorte.suggestion_unitaires.all().order_by("id") -# ) -# paginator = Paginator(suggestion_unitaires, 100) -# page_number = self.request.GET.get("page") -# page_obj = paginator.get_page(page_number) -# context["suggestion_unitaires"] = page_obj - -# return context - -# def form_valid(self, form): -# if not form.is_valid(): -# raise ValueError("Form is not valid") -# suggestion_cohorte_id = form.cleaned_data["suggestion_cohorte"].id -# suggestion_cohorte_obj = ( -# SuggestionCohorte.objects.get(pk=suggestion_cohorte_id) -# ) -# new_status = ( -# SuggestionStatut.ATRAITER.value -# if self.request.POST.get("dag_valid") == "1" -# else SuggestionStatut.REJETER.value -# ) - -# # FIXME: I am not sure we need the filter here -# suggestion_cohorte_obj.suggestion_unitaires.filter( -# status=SuggestionStatut.AVALIDER.value -# ).update(status=new_status) - -# logging.info(f"{suggestion_cohorte_id} - {self.request.user}") - -# suggestion_cohorte_obj.statut = new_status -# suggestion_cohorte_obj.save() - -# return super().form_valid(form) diff --git a/docs/reference/303-systeme-de-suggestions.md b/docs/reference/303-systeme-de-suggestions.md index f42cee410..dc2f7e062 100644 --- a/docs/reference/303-systeme-de-suggestions.md +++ b/docs/reference/303-systeme-de-suggestions.md @@ -4,62 +4,18 @@ Cette proposition de modification de l'architecture pour faire évoluer le système de suggestion est un travail itératif. Il est donc nessaire de garder en tête la cibe et le moyen d'y aller. -## Existant et problématique - -il existe les tables `dagrun` et `dagrunchange`: - -- `dagrun` représente un ensemble de suggestions produit par l'execution d'un DAG airflow -- `dagrinchange` représente la suggestion de modification pour une ligne donnée - -On a quelques problème de lisibilité des ces tables: - -- les types des évenements sont imprécis et utilisé pour plusieurs propos, par exemple, `UPDATE_ACTOR` est utilisé pour des propositions de siretisation et de suppression de acteurs lors de l'ingestion de la source -- les types des évenements sont définis au niveau de chaque ligne, pour connaitre le type de -- si une ligne est problématique, aucune ligne n'est mise à jour -- on n'à pas de vu sur les DAG qui on réussi ou se sont terminés en erreur - -## Proposition d'amélioration - -### Base de données - -- Renommage des tables : `dagrun` -> `suggestion_cohorte` , `dagrunchange` -> `suggestion_unitaire` -- Écrire les champs en français comme le reste des tables de l'application -- Revu des statuts de `suggestion_cohorte` : à traiter, en cours de traitement, fini avec succès, fini avec succès partiel, fini en erreur -- Ajout d'un type d'évenement à `suggestion_cohorte` : source, enrichissement -- Ajout d'un sous-type d'évenement à `suggestion_cohorte` : source - ajout acteur, source - suppression acteur, source - modification acteur, enrichissement - déménagement… -- Ajout de champ pour stocker le message de sortie (au moins en cas d'erreur) -- Paramettre de tolérance d'erreur -- 2 champs JSON, 1 context initial, 1 suggestion - -### Interface - -Si possible, utiliser l'interface d'administration de Django pour gérer les suggestions (cela devrait bien fonctionner au mons pour la partie `ingestion des sources`). - -- Division des interfaces de validation : - - `ingestion des sources` : nouvelles sources ou nouvelle version d'une source existante - - `enrichissements` : fermetures, démenagements, enrichissement avec annuaire-entrprise, l'API BAN ou d'autres API -- Ajout de filtre sur le statut (à traiter est sélectionné par défaut) -- Ajout de la pagination -- permettre de cocher les suggestions et d'executer une action our l'ensemble - -### Pipeline - -- Le DAG de validation des cohortes doit intégrer la même architecture que les autres DAGs - -# Cible - ## Systeme de Suggestion -Les suggestions sont crées par l'exécution d'un pipeline ou d'un script. Les suggestions sont faites par paquet qu'on appelle **Cohorte**, les Cohortes comprennent un ensemble de suggestion de mofification +Les suggestions sont créées par l'exécution d'un pipeline ou d'un script. Les suggestions sont faites par paquet qu'on appelle **Cohorte**, les Cohortes comprennent un ensemble de suggestions de modification Les cohortes ont un type d'événement : `clustering`, `enrichissement`, `source` selon le type de l'action lancée à l'origine de la suggestion de modification Les cohortes et les suggestions ont un statut de traitement qui représente leur cycle de vie : `à valider`, `rejeter`, `à traiter`, `en cours de traitement`, `fini avec succès`, `fini avec succès partiel` (uniquement pour les cohortes), `fini en erreur` -### Representation dans Django +### Représentation dans Django -- SuggestionCohorte représente les cohortes -- SuggestionUnitaire représente les propositions de modification +- SuggestionCohorte représente les cohortes, c'est à dire un ensemble de suggestions de la même nature +- Suggestion représente les propositions de modification ### Cycle de vie d'une suggestion diff --git a/qfdmo/views/dags.py b/qfdmo/views/dags.py index 4db1618a5..582b2a5e6 100644 --- a/qfdmo/views/dags.py +++ b/qfdmo/views/dags.py @@ -4,22 +4,15 @@ import logging -from django.contrib.auth.mixins import LoginRequiredMixin from django.core.paginator import Paginator from django.shortcuts import render from django.views.generic.edit import FormView +from core.views import IsStaffMixin from qfdmo.forms import DagsForm from qfdmo.models.data import DagRun, DagRunStatus -class IsStaffMixin(LoginRequiredMixin): - def dispatch(self, request, *args, **kwargs): - if not request.user.is_staff: - return self.handle_no_permission() - return super().dispatch(request, *args, **kwargs) - - class DagsValidation(IsStaffMixin, FormView): form_class = DagsForm template_name = "qfdmo/dags_validations.html" From 9f62facb48a93a2ee2124d5f6195fba4a5bad632 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Mon, 20 Jan 2025 11:25:57 +0100 Subject: [PATCH 19/26] fix suggestion process --- .../db_write_type_action_suggestions.py | 2 +- .../business_logic/db_normalize_suggestion.py | 54 +++++++++---------- .../db_read_suggestiontoprocess.py | 16 ++++-- data/models.py | 1 - unit_tests/data/test_models.py | 36 +++++++++++++ 5 files changed, 75 insertions(+), 34 deletions(-) create mode 100644 unit_tests/data/test_models.py diff --git a/dags/sources/tasks/business_logic/db_write_type_action_suggestions.py b/dags/sources/tasks/business_logic/db_write_type_action_suggestions.py index 24d48931b..d8f9fe7be 100644 --- a/dags/sources/tasks/business_logic/db_write_type_action_suggestions.py +++ b/dags/sources/tasks/business_logic/db_write_type_action_suggestions.py @@ -85,7 +85,7 @@ def insert_suggestion( df["suggestion_cohorte_id"] = suggestion_cohorte_id df["statut"] = constants.SUGGESTION_AVALIDER df[["suggestion", "suggestion_cohorte_id", "statut"]].to_sql( - "data_suggestionunitaire", + "data_suggestion", engine, if_exists="append", index=False, diff --git a/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py b/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py index 57ddd2c9c..27e094844 100644 --- a/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py +++ b/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py @@ -1,3 +1,5 @@ +import logging + import pandas as pd from shared.tasks.database_logic.db_manager import PostgresConnectionManager from sources.config import shared_constants as constants @@ -8,51 +10,49 @@ def db_normalize_suggestion(): - row = get_first_suggetsioncohorte_to_insert() - suggestion_cohorte_id = row[0] + suggestion_cohorte = get_first_suggetsioncohorte_to_insert() + if suggestion_cohorte is None: + raise ValueError("No suggestion found") + suggestion_cohorte_id = suggestion_cohorte["id"] + type_action = suggestion_cohorte["type_action"] + logging.warning(f"Processing suggestion_cohorte_id: {suggestion_cohorte_id}") + logging.warning(f"Processing suggestion_cohorte: {suggestion_cohorte}") + logging.warning( + f"Processing suggestion_cohorte: {suggestion_cohorte['type_action']}" + ) engine = PostgresConnectionManager().engine df_sql = pd.read_sql_query( f""" - SELECT * FROM data_suggestionunitaire + SELECT * FROM data_suggestion WHERE suggestion_cohorte_id = '{suggestion_cohorte_id}' """, engine, ) + log.preview("df_acteur_to_delete", df_sql) - df_acteur_to_create = df_sql[ - df_sql["type_action"] == constants.SUGGESTION_SOURCE_AJOUT - ] - df_acteur_to_update = df_sql[ - df_sql["type_action"] == constants.SUGGESTION_SOURCE_MISESAJOUR - ] - df_acteur_to_delete = df_sql[ - df_sql["type_action"] == constants.SUGGESTION_SOURCE_SUPRESSION - ] - if not df_acteur_to_create.empty: - normalized_dfs = df_acteur_to_create["suggestion"].apply(pd.json_normalize) - df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) - return normalize_acteur_update_for_db( - df_acteur, suggestion_cohorte_id, engine, constants.SUGGESTION_SOURCE_AJOUT - ) - if not df_acteur_to_update.empty: - normalized_dfs = df_acteur_to_update["suggestion"].apply(pd.json_normalize) + if ( + type_action + in [ + constants.SUGGESTION_SOURCE_AJOUT, + constants.SUGGESTION_SOURCE_MISESAJOUR, + ] + and not df_sql.empty + ): + normalized_dfs = df_sql["suggestion"].apply(pd.json_normalize) df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) return normalize_acteur_update_for_db( - df_acteur, - suggestion_cohorte_id, - engine, - constants.SUGGESTION_SOURCE_MISESAJOUR, + df_acteur, suggestion_cohorte_id, engine, type_action ) - if not df_acteur_to_delete.empty: - normalized_dfs = df_acteur_to_delete["suggestion"].apply(pd.json_normalize) + if type_action == constants.SUGGESTION_SOURCE_SUPRESSION and not df_sql.empty: + normalized_dfs = df_sql["suggestion"].apply(pd.json_normalize) df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) log.preview("df_acteur_to_delete", df_acteur) return { "actors": df_acteur, "dag_run_id": suggestion_cohorte_id, - "change_type": constants.SUGGESTION_SOURCE_SUPRESSION, + "change_type": type_action, } raise ValueError("No suggestion found") diff --git a/dags/suggestions/tasks/business_logic/db_read_suggestiontoprocess.py b/dags/suggestions/tasks/business_logic/db_read_suggestiontoprocess.py index 02975f494..12e8154f1 100644 --- a/dags/suggestions/tasks/business_logic/db_read_suggestiontoprocess.py +++ b/dags/suggestions/tasks/business_logic/db_read_suggestiontoprocess.py @@ -1,17 +1,23 @@ -from airflow.providers.postgres.hooks.postgres import PostgresHook +import pandas as pd +from shared.tasks.database_logic.db_manager import PostgresConnectionManager from sources.config import shared_constants as constants def get_first_suggetsioncohorte_to_insert(): - hook = PostgresHook(postgres_conn_id="qfdmo_django_db") - row = hook.get_first( + engine = PostgresConnectionManager().engine + + # get first cohorte suggestion to process as a dict + suggestion_cohorte = pd.read_sql_query( f""" SELECT * FROM data_suggestioncohorte WHERE statut = '{constants.SUGGESTION_ATRAITER}' LIMIT 1 - """ + """, + engine, ) - return row + if suggestion_cohorte.empty: + return None + return suggestion_cohorte.to_dict(orient="records")[0] def db_read_suggestiontoprocess(**kwargs): diff --git a/data/models.py b/data/models.py index eb0fc140c..7bb9afe6e 100644 --- a/data/models.py +++ b/data/models.py @@ -73,7 +73,6 @@ class SuggestionCohorte(models.Model): def is_source_type(self) -> bool: # FIXME: ajout de tests return self.type_action in [ - SuggestionAction.SOURCE, SuggestionAction.SOURCE_AJOUT, SuggestionAction.SOURCE_MISESAJOUR, SuggestionAction.SOURCE_SUPPRESSION, diff --git a/unit_tests/data/test_models.py b/unit_tests/data/test_models.py new file mode 100644 index 000000000..8fee546c5 --- /dev/null +++ b/unit_tests/data/test_models.py @@ -0,0 +1,36 @@ +import pytest + +from data.models import ( # Remplacez YourModel par le nom de votre modèle + SuggestionAction, + SuggestionCohorte, +) + + +@pytest.mark.parametrize( + "type_action, expected_result", + [ + (SuggestionAction.SOURCE_AJOUT, True), + (SuggestionAction.SOURCE_MISESAJOUR, True), + (SuggestionAction.SOURCE_SUPPRESSION, True), + (SuggestionAction.CLUSTERING, False), + ("other_action", False), + ], +) +def test_is_source_type(type_action, expected_result): + instance = SuggestionCohorte(type_action=type_action) + assert instance.is_source_type == expected_result + + +@pytest.mark.parametrize( + "type_action, expected_result", + [ + (SuggestionAction.CLUSTERING, True), + (SuggestionAction.SOURCE_AJOUT, False), + (SuggestionAction.SOURCE_MISESAJOUR, False), + (SuggestionAction.SOURCE_SUPPRESSION, False), + ("other_action", False), + ], +) +def test_is_clustering_type(type_action, expected_result): + instance = SuggestionCohorte(type_action=type_action) + assert instance.is_clustering_type == expected_result From 9d2a0cefed6a834e1904b6a6c75508dc6d52771e Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Mon, 20 Jan 2025 11:31:20 +0100 Subject: [PATCH 20/26] logger stuff --- .../tasks/business_logic/db_normalize_suggestion.py | 7 ------- .../tasks/business_logic/db_write_validsuggestions.py | 6 ++++-- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py b/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py index 27e094844..f45b50e95 100644 --- a/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py +++ b/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py @@ -1,5 +1,3 @@ -import logging - import pandas as pd from shared.tasks.database_logic.db_manager import PostgresConnectionManager from sources.config import shared_constants as constants @@ -15,11 +13,6 @@ def db_normalize_suggestion(): raise ValueError("No suggestion found") suggestion_cohorte_id = suggestion_cohorte["id"] type_action = suggestion_cohorte["type_action"] - logging.warning(f"Processing suggestion_cohorte_id: {suggestion_cohorte_id}") - logging.warning(f"Processing suggestion_cohorte: {suggestion_cohorte}") - logging.warning( - f"Processing suggestion_cohorte: {suggestion_cohorte['type_action']}" - ) engine = PostgresConnectionManager().engine diff --git a/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py b/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py index eb84d02d9..71b00fe73 100644 --- a/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py +++ b/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py @@ -4,6 +4,8 @@ from sources.config import shared_constants as constants from utils import logging_utils as log +logger = logging.getLogger(__name__) + def db_write_validsuggestions(data_from_db: dict): # If data_set is empty, nothing to do @@ -42,7 +44,7 @@ def db_write_validsuggestions(data_from_db: dict): def db_write_acteurupdate( connection, df_actors, df_labels, df_acteur_services, df_pds, df_pdssc ): - logging.warning("Création ou mise à jour des acteurs") + logger.warning("Création ou mise à jour des acteurs") df_actors[["identifiant_unique"]].to_sql( "temp_actors", connection, if_exists="replace" @@ -141,7 +143,7 @@ def db_write_acteurupdate( def db_write_acteurdelete(connection, df_acteur_to_delete): # mettre le statut des acteur à "SUPPRIMER" pour tous les acteurs à supprimer - logging.warning("Suppression des acteurs") + logger.warning("Suppression des acteurs") identifiant_uniques = list( set(df_acteur_to_delete[["identifiant_unique"]].values.flatten()) ) From a37aba867fe8be7cea820b99286f68610a08d65c Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Mon, 20 Jan 2025 11:34:12 +0100 Subject: [PATCH 21/26] rename --- .../db_write_validsuggestions_task.py | 10 ++++++---- .../db_write_validsuggestions.py | 20 +++++++++---------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/dags/suggestions/tasks/airflow_logic/db_write_validsuggestions_task.py b/dags/suggestions/tasks/airflow_logic/db_write_validsuggestions_task.py index 071b1db3f..9a010d83c 100644 --- a/dags/suggestions/tasks/airflow_logic/db_write_validsuggestions_task.py +++ b/dags/suggestions/tasks/airflow_logic/db_write_validsuggestions_task.py @@ -15,9 +15,11 @@ def db_write_validsuggestions_task(dag: DAG) -> PythonOperator: def db_write_validsuggestions_wrapper(**kwargs): - data_from_db = kwargs["ti"].xcom_pull(task_ids="db_normalize_suggestion") + data_acteurs_normalized = kwargs["ti"].xcom_pull(task_ids="db_normalize_suggestion") - log.preview("data_from_db acteur", data_from_db["actors"]) - log.preview("data_from_db change_type", data_from_db["change_type"]) + log.preview("data_acteurs_normalized acteur", data_acteurs_normalized["actors"]) + log.preview( + "data_acteurs_normalized change_type", data_acteurs_normalized["change_type"] + ) - return db_write_validsuggestions(data_from_db=data_from_db) + return db_write_validsuggestions(data_acteurs_normalized=data_acteurs_normalized) diff --git a/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py b/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py index 71b00fe73..c4f3a456b 100644 --- a/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py +++ b/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py @@ -7,23 +7,23 @@ logger = logging.getLogger(__name__) -def db_write_validsuggestions(data_from_db: dict): +def db_write_validsuggestions(data_acteurs_normalized: dict): # If data_set is empty, nothing to do - dag_run_id = data_from_db["dag_run_id"] + dag_run_id = data_acteurs_normalized["dag_run_id"] engine = PostgresConnectionManager().engine - if "actors" not in data_from_db: + if "actors" not in data_acteurs_normalized: with engine.begin() as connection: update_suggestion_status( connection, dag_run_id, constants.SUGGESTION_ENCOURS ) return - df_actors = data_from_db["actors"] - df_labels = data_from_db.get("labels") - df_acteur_services = data_from_db.get("acteur_services") - df_pds = data_from_db.get("pds") - df_pdssc = data_from_db.get("pds_sous_categories") - dag_run_id = data_from_db["dag_run_id"] - change_type = data_from_db.get("change_type", "CREATE") + df_actors = data_acteurs_normalized["actors"] + df_labels = data_acteurs_normalized.get("labels") + df_acteur_services = data_acteurs_normalized.get("acteur_services") + df_pds = data_acteurs_normalized.get("pds") + df_pdssc = data_acteurs_normalized.get("pds_sous_categories") + dag_run_id = data_acteurs_normalized["dag_run_id"] + change_type = data_acteurs_normalized.get("change_type", "CREATE") with engine.begin() as connection: if change_type in [ From 23ff59709b73e1980824f3c96b4112e335f940f7 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Mon, 20 Jan 2025 14:24:36 +0100 Subject: [PATCH 22/26] update migrations --- data/migrations/0001_bancache.py | 2 +- data/migrations/0002_tables_suggestion.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/data/migrations/0001_bancache.py b/data/migrations/0001_bancache.py index 01ef4502d..286ee979f 100644 --- a/data/migrations/0001_bancache.py +++ b/data/migrations/0001_bancache.py @@ -49,7 +49,7 @@ class Migration(migrations.Migration): ], options={ "verbose_name": "Cache BAN", - "verbose_name_plural": "Cache BAN", + "verbose_name_plural": "Caches BAN", }, ), ] diff --git a/data/migrations/0002_tables_suggestion.py b/data/migrations/0002_tables_suggestion.py index 9008c58aa..670188d61 100644 --- a/data/migrations/0002_tables_suggestion.py +++ b/data/migrations/0002_tables_suggestion.py @@ -73,7 +73,7 @@ class Migration(migrations.Migration): "metadata", models.JSONField( blank=True, - help_text="Metadata de la cohorte, données statistiques", + verbose_name="Metadata de la cohorte, données statistiques", null=True, ), ), From f68434d40278cb8dafb441dc0ea030e15cc518fc Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Mon, 20 Jan 2025 15:09:19 +0100 Subject: [PATCH 23/26] REJETER -> REJETEE --- dags/sources/config/shared_constants.py | 2 +- data/migrations/0002_tables_suggestion.py | 4 ++-- data/models.py | 4 ++-- data/views.py | 2 +- docs/reference/303-systeme-de-suggestions.md | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dags/sources/config/shared_constants.py b/dags/sources/config/shared_constants.py index bf6c9961e..1a15dbe9e 100755 --- a/dags/sources/config/shared_constants.py +++ b/dags/sources/config/shared_constants.py @@ -6,7 +6,7 @@ # Suggestion statuts (pour cohorte et unitaire) SUGGESTION_AVALIDER = "AVALIDER" -SUGGESTION_REJETER = "REJETER" +SUGGESTION_REJETEE = "REJETEE" SUGGESTION_PARTIEL = "PARTIEL" SUGGESTION_ATRAITER = "ATRAITER" SUGGESTION_ENCOURS = "ENCOURS" diff --git a/data/migrations/0002_tables_suggestion.py b/data/migrations/0002_tables_suggestion.py index 670188d61..ee6b3047c 100644 --- a/data/migrations/0002_tables_suggestion.py +++ b/data/migrations/0002_tables_suggestion.py @@ -58,7 +58,7 @@ class Migration(migrations.Migration): models.CharField( choices=[ ("AVALIDER", "À valider"), - ("REJETER", "Rejeter"), + ("REJETEE", "Rejetée"), ("ATRAITER", "À traiter"), ("ENCOURS", "En cours de traitement"), ("ERREUR", "Fini en erreur"), @@ -102,7 +102,7 @@ class Migration(migrations.Migration): models.CharField( choices=[ ("AVALIDER", "À valider"), - ("REJETER", "Rejeter"), + ("REJETEE", "Rejetée"), ("ATRAITER", "À traiter"), ("ENCOURS", "En cours de traitement"), ("ERREUR", "Fini en erreur"), diff --git a/data/models.py b/data/models.py index 7bb9afe6e..521b87af6 100644 --- a/data/models.py +++ b/data/models.py @@ -8,7 +8,7 @@ SUGGESTION_ENCOURS, SUGGESTION_ERREUR, SUGGESTION_PARTIEL, - SUGGESTION_REJETER, + SUGGESTION_REJETEE, SUGGESTION_SOURCE_AJOUT, SUGGESTION_SOURCE_MISESAJOUR, SUGGESTION_SOURCE_SUPRESSION, @@ -19,7 +19,7 @@ class SuggestionStatut(models.TextChoices): AVALIDER = SUGGESTION_AVALIDER, "À valider" - REJETER = SUGGESTION_REJETER, "Rejeter" + REJETEE = SUGGESTION_REJETEE, "Rejetée" ATRAITER = SUGGESTION_ATRAITER, "À traiter" ENCOURS = SUGGESTION_ENCOURS, "En cours de traitement" ERREUR = SUGGESTION_ERREUR, "Fini en erreur" diff --git a/data/views.py b/data/views.py index b6ab0e7fb..4eedabffa 100644 --- a/data/views.py +++ b/data/views.py @@ -51,7 +51,7 @@ def form_valid(self, form): new_status = ( SuggestionStatut.ATRAITER.value if self.request.POST.get("dag_valid") == "1" - else SuggestionStatut.REJETER.value + else SuggestionStatut.REJETEE.value ) suggestion_cohorte.suggestion_unitaires.all().update(statut=new_status) diff --git a/docs/reference/303-systeme-de-suggestions.md b/docs/reference/303-systeme-de-suggestions.md index dc2f7e062..4eea24829 100644 --- a/docs/reference/303-systeme-de-suggestions.md +++ b/docs/reference/303-systeme-de-suggestions.md @@ -27,7 +27,7 @@ title: Cycle de vie d'une suggestion (cohorte et unitaire) flowchart TB AVALIDER[À valider] --> ATRAITER[À traiter] --> ENCOURS[En cours de traitement] --> SUCCES[Fini avec succès] - AVALIDER[À valider] --> REJETER[Rejeter] + AVALIDER[À valider] --> REJETEE[Rejetée] ENCOURS --> PARTIEL[Fini avec succès partiel] ENCOURS --> ERREUR[Fini en erreur] ``` From c0190cf2317d0f13ac301c4f7161ad95956bdd11 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Mon, 20 Jan 2025 15:24:50 +0100 Subject: [PATCH 24/26] remove useless FIXME --- core/settings.py | 1 - dags/sources/tasks/transform/transform_df.py | 1 - data/models.py | 2 -- qfdmo/forms.py | 1 - 4 files changed, 5 deletions(-) diff --git a/core/settings.py b/core/settings.py index efc2d116a..de5a7a448 100644 --- a/core/settings.py +++ b/core/settings.py @@ -65,7 +65,6 @@ "corsheaders", ] -# FIXME : check if we can manage django forms templating with jinja2 FORM_RENDERER = "django.forms.renderers.TemplatesSetting" diff --git a/dags/sources/tasks/transform/transform_df.py b/dags/sources/tasks/transform/transform_df.py index 507d144d5..4c8a8c42b 100644 --- a/dags/sources/tasks/transform/transform_df.py +++ b/dags/sources/tasks/transform/transform_df.py @@ -221,7 +221,6 @@ def get_latlng_from_geopoint(row: pd.Series, _) -> pd.Series: def compute_location(row: pd.Series, _): - # FIXME : tests à déplacer # first column is latitude, second is longitude lat_column = row.keys()[0] lng_column = row.keys()[1] diff --git a/data/models.py b/data/models.py index 521b87af6..ca445298e 100644 --- a/data/models.py +++ b/data/models.py @@ -71,7 +71,6 @@ class SuggestionCohorte(models.Model): @property def is_source_type(self) -> bool: - # FIXME: ajout de tests return self.type_action in [ SuggestionAction.SOURCE_AJOUT, SuggestionAction.SOURCE_MISESAJOUR, @@ -80,7 +79,6 @@ def is_source_type(self) -> bool: @property def is_clustering_type(self) -> bool: - # FIXME: ajout de tests return self.type_action == SuggestionAction.CLUSTERING def __str__(self) -> str: diff --git a/qfdmo/forms.py b/qfdmo/forms.py index b637c8a22..b8ec9a27f 100644 --- a/qfdmo/forms.py +++ b/qfdmo/forms.py @@ -479,7 +479,6 @@ def load_choices(self): "data-search-solution-form-target": "direction", }, ), - # FIXME: I guess async error comes from here choices=[ ("carte", "Carte"), ("form", "Formulaire"), From 963820b00d1ffe096cc83b94a42f4ee44aad8894 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Mon, 20 Jan 2025 16:51:17 +0100 Subject: [PATCH 25/26] =?UTF-8?q?extract=20cree=5Fle=20et=20modifie=5Fle?= =?UTF-8?q?=20dans=20un=20mod=C3=A8le=20abstrait=20et=20partag=C3=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/models.py | 10 ++++++++++ data/models.py | 5 ++--- qfdmo/models/acteur.py | 6 ++---- 3 files changed, 14 insertions(+), 7 deletions(-) create mode 100644 core/models.py diff --git a/core/models.py b/core/models.py new file mode 100644 index 000000000..1d4b399a3 --- /dev/null +++ b/core/models.py @@ -0,0 +1,10 @@ +from django.db import models +from django.db.models.functions import Now + + +class TimestampedModel(models.Model): + cree_le = models.DateTimeField(auto_now_add=True, db_default=Now()) + modifie_le = models.DateTimeField(auto_now=True, db_default=Now()) + + class Meta: + abstract = True diff --git a/data/models.py b/data/models.py index ca445298e..80112310c 100644 --- a/data/models.py +++ b/data/models.py @@ -1,6 +1,7 @@ from django.contrib.gis.db import models from django.db.models.functions import Now +from core.models import TimestampedModel from dags.sources.config.shared_constants import ( SUGGESTION_ATRAITER, SUGGESTION_AVALIDER, @@ -40,7 +41,7 @@ class SuggestionAction(models.TextChoices): SOURCE_SUPPRESSION = SUGGESTION_SOURCE_SUPRESSION, "ingestion de source de données" -class SuggestionCohorte(models.Model): +class SuggestionCohorte(TimestampedModel): id = models.AutoField(primary_key=True) # On utilise identifiant car le champ n'est pas utilisé pour résoudre une relation # en base de données @@ -66,8 +67,6 @@ class SuggestionCohorte(models.Model): blank=True, verbose_name="Metadata de la cohorte, données statistiques", ) - cree_le = models.DateTimeField(auto_now_add=True, db_default=Now()) - modifie_le = models.DateTimeField(auto_now=True, db_default=Now()) @property def is_source_type(self) -> bool: diff --git a/qfdmo/models/acteur.py b/qfdmo/models/acteur.py index f3bd8a3b7..4721dbc30 100644 --- a/qfdmo/models/acteur.py +++ b/qfdmo/models/acteur.py @@ -17,7 +17,6 @@ from django.core.cache import cache from django.core.files.images import get_image_dimensions from django.db.models import Case, Exists, Min, OuterRef, Q, Value, When -from django.db.models.functions import Now from django.forms import ValidationError, model_to_dict from django.http import HttpRequest from django.urls import reverse @@ -25,6 +24,7 @@ from unidecode import unidecode from core.constants import DIGITAL_ACTEUR_CODE +from core.models import TimestampedModel from dags.sources.config.shared_constants import REPRISE_1POUR0, REPRISE_1POUR1 from qfdmo.models.action import Action, get_action_instances from qfdmo.models.categorie_objet import SousCategorieObjet @@ -268,7 +268,7 @@ def get_queryset(self): return DisplayedActeurQuerySet(self.model, using=self._db) -class BaseActeur(NomAsNaturalKeyModel): +class BaseActeur(TimestampedModel, NomAsNaturalKeyModel): class Meta: abstract = True @@ -303,8 +303,6 @@ class Meta: ) naf_principal = models.CharField(max_length=255, blank=True, null=True) commentaires = models.TextField(blank=True, null=True) - cree_le = models.DateTimeField(auto_now_add=True, db_default=Now()) - modifie_le = models.DateTimeField(auto_now=True, db_default=Now()) horaires_osm = models.CharField( blank=True, null=True, validators=[validate_opening_hours] ) From e009a1d658c65df5ee29e98c5bb228bed0f49183 Mon Sep 17 00:00:00 2001 From: Nicolas Oudard Date: Tue, 21 Jan 2025 08:31:10 +0100 Subject: [PATCH 26/26] =?UTF-8?q?utiliser=20modification=20plut=C3=B4t=20q?= =?UTF-8?q?ue=20mises=20=C3=A0=20jour?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dags/sources/config/shared_constants.py | 2 +- .../business_logic/db_write_type_action_suggestions.py | 2 +- .../tasks/business_logic/db_normalize_suggestion.py | 2 +- .../tasks/business_logic/db_write_validsuggestions.py | 2 +- data/migrations/0002_tables_suggestion.py | 2 +- data/models.py | 8 ++++---- data/views.py | 2 +- templates/data/_partials/source_event.html | 2 +- unit_tests/data/test_models.py | 4 ++-- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/dags/sources/config/shared_constants.py b/dags/sources/config/shared_constants.py index 1a15dbe9e..e25e7dc09 100755 --- a/dags/sources/config/shared_constants.py +++ b/dags/sources/config/shared_constants.py @@ -16,7 +16,7 @@ # SuggestionCohorte actions SUGGESTION_CLUSTERING = "CLUSTERING" SUGGESTION_SOURCE_AJOUT = "SOURCE_AJOUT" -SUGGESTION_SOURCE_MISESAJOUR = "SOURCE_MISESAJOUR" +SUGGESTION_SOURCE_MODIFICATION = "SOURCE_MODIFICATION" SUGGESTION_SOURCE_SUPRESSION = "SOURCE_SUPRESSION" # Public accueilli diff --git a/dags/sources/tasks/business_logic/db_write_type_action_suggestions.py b/dags/sources/tasks/business_logic/db_write_type_action_suggestions.py index d8f9fe7be..eaed2cead 100644 --- a/dags/sources/tasks/business_logic/db_write_type_action_suggestions.py +++ b/dags/sources/tasks/business_logic/db_write_type_action_suggestions.py @@ -40,7 +40,7 @@ def db_write_type_action_suggestions( metadata=metadata, dag_name=f"{dag_name} - MISES A JOUR", run_name=run_name, - type_action=constants.SUGGESTION_SOURCE_MISESAJOUR, + type_action=constants.SUGGESTION_SOURCE_MODIFICATION, ) diff --git a/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py b/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py index f45b50e95..5551da313 100644 --- a/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py +++ b/dags/suggestions/tasks/business_logic/db_normalize_suggestion.py @@ -29,7 +29,7 @@ def db_normalize_suggestion(): type_action in [ constants.SUGGESTION_SOURCE_AJOUT, - constants.SUGGESTION_SOURCE_MISESAJOUR, + constants.SUGGESTION_SOURCE_MODIFICATION, ] and not df_sql.empty ): diff --git a/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py b/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py index c4f3a456b..05c3543f6 100644 --- a/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py +++ b/dags/suggestions/tasks/business_logic/db_write_validsuggestions.py @@ -28,7 +28,7 @@ def db_write_validsuggestions(data_acteurs_normalized: dict): with engine.begin() as connection: if change_type in [ constants.SUGGESTION_SOURCE_AJOUT, - constants.SUGGESTION_SOURCE_MISESAJOUR, + constants.SUGGESTION_SOURCE_MODIFICATION, ]: db_write_acteurupdate( connection, df_actors, df_labels, df_acteur_services, df_pds, df_pdssc diff --git a/data/migrations/0002_tables_suggestion.py b/data/migrations/0002_tables_suggestion.py index ee6b3047c..2b51a67c4 100644 --- a/data/migrations/0002_tables_suggestion.py +++ b/data/migrations/0002_tables_suggestion.py @@ -45,7 +45,7 @@ class Migration(migrations.Migration): "ingestion de source de données - nouveau acteur", ), ( - "SOURCE_MISESAJOUR", + "SOURCE_MODIFICATION", "ingestion de source de données - modification d'acteur existant", ), ("SOURCE_SUPRESSION", "ingestion de source de données"), diff --git a/data/models.py b/data/models.py index 80112310c..c8f0730ce 100644 --- a/data/models.py +++ b/data/models.py @@ -11,7 +11,7 @@ SUGGESTION_PARTIEL, SUGGESTION_REJETEE, SUGGESTION_SOURCE_AJOUT, - SUGGESTION_SOURCE_MISESAJOUR, + SUGGESTION_SOURCE_MODIFICATION, SUGGESTION_SOURCE_SUPRESSION, SUGGESTION_SUCCES, ) @@ -34,8 +34,8 @@ class SuggestionAction(models.TextChoices): SUGGESTION_SOURCE_AJOUT, "ingestion de source de données - nouveau acteur", ) - SOURCE_MISESAJOUR = ( - SUGGESTION_SOURCE_MISESAJOUR, + SOURCE_MODIFICATION = ( + SUGGESTION_SOURCE_MODIFICATION, "ingestion de source de données - modification d'acteur existant", ) SOURCE_SUPPRESSION = SUGGESTION_SOURCE_SUPRESSION, "ingestion de source de données" @@ -72,7 +72,7 @@ class SuggestionCohorte(TimestampedModel): def is_source_type(self) -> bool: return self.type_action in [ SuggestionAction.SOURCE_AJOUT, - SuggestionAction.SOURCE_MISESAJOUR, + SuggestionAction.SOURCE_MODIFICATION, SuggestionAction.SOURCE_SUPPRESSION, ] diff --git a/data/views.py b/data/views.py index 4eedabffa..e1ebbfb12 100644 --- a/data/views.py +++ b/data/views.py @@ -14,7 +14,7 @@ ACTION_TO_VERB = { SuggestionAction.SOURCE_AJOUT: "ajoutera", SuggestionAction.SOURCE_SUPPRESSION: "supprimera", - SuggestionAction.SOURCE_MISESAJOUR: "mettra à jour", + SuggestionAction.SOURCE_MODIFICATION: "modifiera", } diff --git a/templates/data/_partials/source_event.html b/templates/data/_partials/source_event.html index acf653d6b..e830a6acf 100644 --- a/templates/data/_partials/source_event.html +++ b/templates/data/_partials/source_event.html @@ -10,7 +10,7 @@

Exemples

- {% if suggestion_cohorte_instance.type_action == "SOURCE_AJOUT" or suggestion_cohorte_instance.type_action == "SOURCE_MISESAJOUR" %} + {% if suggestion_cohorte_instance.type_action == "SOURCE_AJOUT" or suggestion_cohorte_instance.type_action == "SOURCE_MODIFICATION" %} {% include 'data/_partials/source_ajout_event.html' %} {% elif suggestion_cohorte_instance.type_action == "SOURCE_SUPRESSION" %} {% include 'data/_partials/source_supression_event.html' %} diff --git a/unit_tests/data/test_models.py b/unit_tests/data/test_models.py index 8fee546c5..84703d66e 100644 --- a/unit_tests/data/test_models.py +++ b/unit_tests/data/test_models.py @@ -10,7 +10,7 @@ "type_action, expected_result", [ (SuggestionAction.SOURCE_AJOUT, True), - (SuggestionAction.SOURCE_MISESAJOUR, True), + (SuggestionAction.SOURCE_MODIFICATION, True), (SuggestionAction.SOURCE_SUPPRESSION, True), (SuggestionAction.CLUSTERING, False), ("other_action", False), @@ -26,7 +26,7 @@ def test_is_source_type(type_action, expected_result): [ (SuggestionAction.CLUSTERING, True), (SuggestionAction.SOURCE_AJOUT, False), - (SuggestionAction.SOURCE_MISESAJOUR, False), + (SuggestionAction.SOURCE_MODIFICATION, False), (SuggestionAction.SOURCE_SUPPRESSION, False), ("other_action", False), ],
Suggestion de source à valider