-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
isolation du dags d'ingestion des suggestions
- Loading branch information
Showing
10 changed files
with
459 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
15 changes: 15 additions & 0 deletions
15
dags/suggestions/airflow_logic/db_normalize_suggestion_task.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from airflow.models import DAG | ||
from airflow.operators.python import PythonOperator | ||
from suggestions.business_logic.db_normalize_suggestion import db_normalize_suggestion | ||
|
||
|
||
def db_normalize_suggestion_task(dag: DAG): | ||
return PythonOperator( | ||
task_id="db_normalize_suggestion", | ||
python_callable=db_normalize_suggestion_wrapper, | ||
dag=dag, | ||
) | ||
|
||
|
||
def db_normalize_suggestion_wrapper(**kwargs): | ||
return db_normalize_suggestion() |
13 changes: 13 additions & 0 deletions
13
dags/suggestions/airflow_logic/db_read_suggestiontoprocess_task.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from airflow.models import DAG | ||
from airflow.operators.python import ShortCircuitOperator | ||
from suggestions.business_logic.db_read_suggestiontoprocess import ( | ||
db_read_suggestiontoprocess, | ||
) | ||
|
||
|
||
def db_read_suggestiontoprocess_task(dag: DAG): | ||
return ShortCircuitOperator( | ||
task_id="check_suggestion_to_process", | ||
python_callable=db_read_suggestiontoprocess, | ||
dag=dag, | ||
) |
23 changes: 23 additions & 0 deletions
23
dags/suggestions/airflow_logic/db_write_validsuggestions_task.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from airflow.models import DAG | ||
from airflow.operators.python import PythonOperator | ||
from suggestions.business_logic.db_write_validsuggestions import ( | ||
db_write_validsuggestions, | ||
) | ||
from utils import logging_utils as log | ||
|
||
|
||
def db_write_validsuggestions_task(dag: DAG) -> PythonOperator: | ||
return PythonOperator( | ||
task_id="db_write_validsuggestions", | ||
python_callable=db_write_validsuggestions_wrapper, | ||
dag=dag, | ||
) | ||
|
||
|
||
def db_write_validsuggestions_wrapper(**kwargs): | ||
data_from_db = kwargs["ti"].xcom_pull(task_ids="db_normalize_suggestion") | ||
|
||
log.preview("data_from_db acteur", data_from_db["actors"]) | ||
log.preview("data_from_db change_type", data_from_db["change_type"]) | ||
|
||
return db_write_validsuggestions(data_from_db=data_from_db) |
10 changes: 10 additions & 0 deletions
10
dags/suggestions/airflow_logic/launch_compute_carte_acteur_task.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from airflow.models import DAG | ||
from airflow.operators.trigger_dagrun import TriggerDagRunOperator | ||
|
||
|
||
def launch_compute_carte_acteur_task(dag: DAG) -> TriggerDagRunOperator: | ||
return TriggerDagRunOperator( | ||
task_id="launch_compute_carte_acteur", | ||
trigger_dag_id="compute_carte_acteur", | ||
dag=dag, | ||
) |
113 changes: 113 additions & 0 deletions
113
dags/suggestions/business_logic/db_normalize_suggestion.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
import pandas as pd | ||
from shared.tasks.database_logic.db_manager import PostgresConnectionManager | ||
from sources.config import shared_constants as constants | ||
from suggestions.business_logic.db_read_suggestiontoprocess import ( | ||
get_first_suggetsioncohorte_to_insert, | ||
) | ||
from utils import logging_utils as log | ||
|
||
|
||
def db_normalize_suggestion(): | ||
row = get_first_suggetsioncohorte_to_insert() | ||
suggestion_cohorte_id = row[0] | ||
|
||
engine = PostgresConnectionManager().engine | ||
|
||
df_sql = pd.read_sql_query( | ||
f""" | ||
SELECT * FROM data_suggestionunitaire | ||
WHERE suggestion_cohorte_id = '{suggestion_cohorte_id}' | ||
""", | ||
engine, | ||
) | ||
|
||
df_acteur_to_create = df_sql[ | ||
df_sql["type_action"] == constants.SUGGESTION_SOURCE_AJOUT | ||
] | ||
df_acteur_to_update = df_sql[ | ||
df_sql["type_action"] == constants.SUGGESTION_SOURCE_AJOUT | ||
] | ||
df_acteur_to_delete = df_sql[ | ||
df_sql["type_action"] == constants.SUGGESTION_SOURCE_SUPRESSION | ||
] | ||
if not df_acteur_to_create.empty: | ||
normalized_dfs = df_acteur_to_create["suggestion"].apply(pd.json_normalize) | ||
df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) | ||
return normalize_acteur_update_for_db(df_acteur, suggestion_cohorte_id, engine) | ||
if not df_acteur_to_update.empty: | ||
normalized_dfs = df_acteur_to_update["suggestion"].apply(pd.json_normalize) | ||
df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) | ||
return normalize_acteur_update_for_db(df_acteur, suggestion_cohorte_id, engine) | ||
if not df_acteur_to_delete.empty: | ||
normalized_dfs = df_acteur_to_delete["suggestion"].apply(pd.json_normalize) | ||
df_acteur = pd.concat(normalized_dfs.tolist(), ignore_index=True) | ||
log.preview("df_acteur_to_delete", df_acteur) | ||
return { | ||
"actors": df_acteur, | ||
"dag_run_id": suggestion_cohorte_id, | ||
"change_type": constants.SUGGESTION_SOURCE_SUPRESSION, | ||
} | ||
|
||
raise ValueError("No suggestion found") | ||
|
||
|
||
def normalize_acteur_update_for_db(df_actors, dag_run_id, engine): | ||
df_labels = process_many2many_df(df_actors, "labels") | ||
df_acteur_services = process_many2many_df( | ||
df_actors, "acteur_services", df_columns=["acteur_id", "acteurservice_id"] | ||
) | ||
|
||
max_id_pds = pd.read_sql_query( | ||
"SELECT max(id) FROM qfdmo_propositionservice", engine | ||
)["max"][0] | ||
normalized_pds_dfs = df_actors["proposition_services"].apply(pd.json_normalize) | ||
df_pds = pd.concat(normalized_pds_dfs.tolist(), ignore_index=True) | ||
ids_range = range(max_id_pds + 1, max_id_pds + 1 + len(df_pds)) | ||
|
||
df_pds["id"] = ids_range | ||
df_pds["pds_sous_categories"] = df_pds.apply( | ||
lambda row: [ | ||
{**d, "propositionservice_id": row["id"]} | ||
for d in row["pds_sous_categories"] | ||
], | ||
axis=1, | ||
) | ||
|
||
normalized_pdssc_dfs = df_pds["pds_sous_categories"].apply(pd.json_normalize) | ||
df_pdssc = pd.concat(normalized_pdssc_dfs.tolist(), ignore_index=True) | ||
|
||
return { | ||
"actors": df_actors, | ||
"pds": df_pds[["id", "action_id", "acteur_id"]], | ||
"pds_sous_categories": df_pdssc[ | ||
["propositionservice_id", "souscategorieobjet_id"] | ||
], | ||
"dag_run_id": dag_run_id, | ||
"labels": df_labels[["acteur_id", "labelqualite_id"]], | ||
"acteur_services": df_acteur_services[["acteur_id", "acteurservice_id"]], | ||
"change_type": constants.SUGGESTION_SOURCE, | ||
} | ||
|
||
|
||
def process_many2many_df(df, column_name, df_columns=["acteur_id", "labelqualite_id"]): | ||
try: | ||
# Attempt to process the 'labels' column if it exists and is not empty | ||
normalized_df = df[column_name].dropna().apply(pd.json_normalize) | ||
if normalized_df.empty: | ||
return pd.DataFrame( | ||
columns=df_columns | ||
) # Return empty DataFrame if no data to process | ||
else: | ||
return pd.concat(normalized_df.tolist(), ignore_index=True) | ||
except KeyError: | ||
# Handle the case where the specified column does not exist | ||
return pd.DataFrame(columns=df_columns) | ||
|
||
|
||
def normalize_acteur_delete_for_db(df_actors, dag_run_id): | ||
|
||
return { | ||
"actors": df_actors, | ||
"dag_run_id": dag_run_id, | ||
"change_type": constants.SUGGESTION_SOURCE_SUPRESSION, | ||
} |
18 changes: 18 additions & 0 deletions
18
dags/suggestions/business_logic/db_read_suggestiontoprocess.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from airflow.providers.postgres.hooks.postgres import PostgresHook | ||
from sources.config import shared_constants as constants | ||
|
||
|
||
def get_first_suggetsioncohorte_to_insert(): | ||
hook = PostgresHook(postgres_conn_id="qfdmo_django_db") | ||
row = hook.get_first( | ||
f""" | ||
SELECT * FROM data_suggestioncohorte | ||
WHERE statut = '{constants.SUGGESTION_ATRAITER}' | ||
LIMIT 1 | ||
""" | ||
) | ||
return row | ||
|
||
|
||
def db_read_suggestiontoprocess(**kwargs): | ||
return bool(get_first_suggetsioncohorte_to_insert()) |
Oops, something went wrong.