From 3718088ca856142fdc46f655f9ec51301e15bf9f Mon Sep 17 00:00:00 2001
From: Michael Bunsen <notbot@gmail.com>
Date: Mon, 20 Jan 2025 22:18:51 -0800
Subject: [PATCH] fix: selection of existing pipelines & algorithms

---
 ami/ml/models/pipeline.py           | 12 ++++++++++--
 ami/ml/models/processing_service.py | 21 +++++++++++++--------
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/ami/ml/models/pipeline.py b/ami/ml/models/pipeline.py
index 209f048c..6c9cc0c6 100644
--- a/ami/ml/models/pipeline.py
+++ b/ami/ml/models/pipeline.py
@@ -514,7 +514,14 @@ def create_classification(
     ), f"No classification algorithm was specified for classification {classification_resp}"
     logger.debug(f"Processing classification {classification_resp}")
 
-    classification_algo = algorithms_used[classification_resp.algorithm.key]
+    try:
+        classification_algo = algorithms_used[classification_resp.algorithm.key]
+    except KeyError:
+        raise ValueError(
+            f"Classification algorithm {classification_resp.algorithm.key} is not a known algorithm. "
+            "The processing service must declare it in the /info endpoint. "
+            f"Known algorithms: {list(algorithms_used.keys())}"
+        )
 
     if not classification_algo.category_map:
         logger.warning(
@@ -766,7 +773,8 @@ def save_results(
     # however they are also currently available in each pipeline results response as well.
     # @TODO review if we should only use the algorithms from the pre-registered pipeline config instead of the results
     algorithms_used = {
-        algorithm.key: get_or_create_algorithm_and_category_map(algorithm) for algorithm in pipeline.algorithms.all()
+        algo_key: get_or_create_algorithm_and_category_map(algo_config, logger=job_logger)
+        for algo_key, algo_config in results.algorithms.items()
     }
 
     detections = create_detections(
diff --git a/ami/ml/models/processing_service.py b/ami/ml/models/processing_service.py
index 4c8031b4..3d66143d 100644
--- a/ami/ml/models/processing_service.py
+++ b/ami/ml/models/processing_service.py
@@ -44,14 +44,19 @@ def create_pipelines(self):
         algorithms_created = []
 
         for pipeline_data in pipelines_to_add:
-            pipeline, created = Pipeline.objects.get_or_create(
-                slug=pipeline_data.slug,
-                version=pipeline_data.version,
-                defaults={
-                    "name": pipeline_data.name,
-                    "description": pipeline_data.description or "",
-                },
-            )
+            pipeline = Pipeline.objects.filter(
+                models.Q(slug=pipeline_data.slug) | models.Q(name=pipeline_data.name, version=pipeline_data.version)
+            ).first()
+            created = False
+            if not pipeline:
+                pipeline = Pipeline.objects.create(
+                    slug=pipeline_data.slug,
+                    name=pipeline_data.name,
+                    version=pipeline_data.version,
+                    description=pipeline_data.description or "",
+                )
+                created = True
+
             pipeline.projects.add(*self.projects.all())
             self.pipelines.add(pipeline)