Merge branch 'main' into demo/jan29

RolnickLab · Feb 3, 2024 · 62c9464 · 62c9464
2 parents 6651741 + db6fcc4
commit 62c9464
Show file tree

Hide file tree

Showing 12 changed files with 271 additions and 96 deletions.
diff --git a/.github/workflows/test.backend.yml b/.github/workflows/test.backend.yml
@@ -34,20 +34,20 @@ jobs:
         uses: pre-commit/[email protected]
 
   # With no caching at all the entire ci process takes 4m 30s to complete!
-  test:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout Code Repository
-        uses: actions/checkout@v4
+  # test:
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #     - name: Checkout Code Repository
+  #       uses: actions/checkout@v4
 
-      - name: Build the Stack
-        run: docker-compose -f local.yml build
+  #     - name: Build the Stack
+  #       run: docker-compose -f local.yml build
 
-      - name: Run DB Migrations
-        run: docker-compose -f local.yml run --rm django python manage.py migrate
+  #     - name: Run DB Migrations
+  #       run: docker-compose -f local.yml run --rm django python manage.py migrate
 
-      - name: Run Django Tests
-        run: docker-compose -f local.yml run django pytest
+  #     - name: Run Django Tests
+  #       run: docker-compose -f local.yml run --rm django python manage.py test
 
-      - name: Tear down the Stack
-        run: docker-compose -f local.yml down
+  #     - name: Tear down the Stack
+  #       run: docker-compose -f local.yml down
diff --git a/ami/main/api/serializers.py b/ami/main/api/serializers.py
@@ -83,10 +83,42 @@ class Meta:
         ]
 
 
+class DeviceNestedSerializer(DefaultSerializer):
+    class Meta:
+        model = Device
+        fields = [
+            "id",
+            "name",
+            "details",
+        ]
+
+
+class SiteNestedSerializer(DefaultSerializer):
+    class Meta:
+        model = Site
+        fields = [
+            "id",
+            "name",
+            "details",
+        ]
+
+
+class StorageSourceNestedSerializer(DefaultSerializer):
+    class Meta:
+        model = S3StorageSource
+        fields = [
+            "id",
+            "name",
+            "details",
+        ]
+
+
 class DeploymentListSerializer(DefaultSerializer):
     events = serializers.SerializerMethodField()
     occurrences = serializers.SerializerMethodField()
     project = ProjectNestedSerializer(read_only=True)
+    device = DeviceNestedSerializer(read_only=True)
+    research_site = SiteNestedSerializer(read_only=True)
 
     class Meta:
         model = Deployment
@@ -108,6 +140,8 @@ class Meta:
             "longitude",
             "first_date",
             "last_date",
+            "device",
+            "research_site",
         ]
 
     def get_events(self, obj):
@@ -285,24 +319,51 @@ class DeploymentSerializer(DeploymentListSerializer):
     events = DeploymentEventNestedSerializer(many=True, read_only=True)
     occurrences = serializers.SerializerMethodField()
     example_captures = DeploymentCaptureNestedSerializer(many=True, read_only=True)
-    data_source = serializers.SerializerMethodField(read_only=True)
     project_id = serializers.PrimaryKeyRelatedField(
         write_only=True,
         queryset=Project.objects.all(),
         source="project",
     )
+    device_id = serializers.PrimaryKeyRelatedField(
+        write_only=True,
+        queryset=Device.objects.all(),
+        source="device",
+    )
+    research_site_id = serializers.PrimaryKeyRelatedField(
+        write_only=True,
+        queryset=Site.objects.all(),
+        source="research_site",
+    )
+    data_source = serializers.SerializerMethodField()
+    data_source_id = serializers.PrimaryKeyRelatedField(
+        write_only=True,
+        queryset=S3StorageSource.objects.all(),
+        source="data_source",
+    )
 
     class Meta(DeploymentListSerializer.Meta):
         fields = DeploymentListSerializer.Meta.fields + [
             "project_id",
-            "description",
+            "device_id",
+            "research_site_id",
             "data_source",
+            "data_source_id",
+            "description",
             "example_captures",
             # "capture_images",
         ]
 
     def get_data_source(self, obj):
-        return obj.data_source_uri()
+        """
+        Add uri to nested serializer of the data source
+
+        The data source is defined by both the StorageSource model
+        and the extra configuration in the Deployment model.
+        """
+
+        data = StorageSourceNestedSerializer(obj.data_source, context=self.context).data
+        data["uri"] = obj.data_source_uri()
+        return data
 
     def get_occurrences(self, obj):
         """
@@ -507,7 +568,10 @@ def get_page_offset(self, obj) -> int:
         # @TODO this may not be correct. Test or remove if unnecessary.
         # the Occurrence to Session navigation in the UI will be using
         # another method.
-        return obj.event.captures.filter(timestamp__lt=obj.timestamp).count()
+        if not obj or not obj.event:
+            return 0
+        else:
+            return obj.event.captures.filter(timestamp__lt=obj.timestamp).count()
 
 
 class TaxonOccurrenceNestedSerializer(DefaultSerializer):

diff --git a/ami/main/api/views.py b/ami/main/api/views.py
@@ -121,7 +121,7 @@ class DeploymentViewSet(DefaultViewSet):
     for the list and detail views.
     """
 
-    queryset = Deployment.objects.select_related("project")
+    queryset = Deployment.objects.select_related("project", "device", "research_site")
     filterset_fields = ["project"]
     ordering_fields = [
         "created_at",
@@ -435,6 +435,7 @@ class OccurrenceViewSet(DefaultViewSet):
 
     queryset = (
         Occurrence.objects.exclude(detections=None)
+        .exclude(event=None)  # These must be independent exclude calls
         .annotate(
             detections_count=models.Count("detections", distinct=True),
             duration=models.Max("detections__timestamp") - models.Min("detections__timestamp"),
@@ -447,7 +448,7 @@ class OccurrenceViewSet(DefaultViewSet):
         )
         .prefetch_related("detections")
         .order_by("-determination_score")
-        .all()
+        .exclude(first_appearance_time=None)  # This must come after annotations
     )
     serializer_class = OccurrenceSerializer
     filterset_fields = ["event", "deployment", "determination", "project"]
@@ -573,17 +574,33 @@ def filter_by_occurrence(self, queryset: QuerySet) -> tuple[QuerySet, bool]:
             event = Event.objects.get(id=event_id)
             queryset = super().get_queryset().filter(occurrences__event=event)
 
+        return queryset, filter_active
+
+    def filter_by_classification_threshold(self, queryset: QuerySet) -> QuerySet:
+        """
+        Filter taxa by their best determination score in occurrences.
+
+        This is only applicable to list queries that are not filtered by occurrence, project, deployment, or event.
+        """
+        # Look for a query param to filter by score
+        classification_threshold = self.request.query_params.get("classification_threshold")
+
+        if classification_threshold is not None:
+            classification_threshold = FloatField(required=False).clean(classification_threshold)
+        else:
+            classification_threshold = DEFAULT_CONFIDENCE_THRESHOLD
+
         queryset = (
             queryset.annotate(best_determination_score=models.Max("occurrences__determination_score"))
-            .filter(best_determination_score__gte=DEFAULT_CONFIDENCE_THRESHOLD)
+            .filter(best_determination_score__gte=classification_threshold)
             .distinct()
         )
 
         # If ordering is not specified, order by best determination score
         if not self.request.query_params.get("ordering"):
             queryset = queryset.order_by("-best_determination_score")
 
-        return queryset, filter_active
+        return queryset
 
     def get_queryset(self) -> QuerySet:
         qs = super().get_queryset()
@@ -593,9 +610,12 @@ def get_queryset(self) -> QuerySet:
             from rest_framework.exceptions import NotFound
 
             raise NotFound(detail=str(e))
+
         qs = qs.select_related("parent", "parent__parent")
 
         if filter_active:
+            qs = self.filter_by_classification_threshold(qs)
+
             qs = qs.prefetch_related("occurrences")
             qs = qs.annotate(
                 occurrences_count=models.Count("occurrences", distinct=True),

diff --git a/ami/main/charts.py b/ami/main/charts.py
@@ -175,6 +175,7 @@ def detections_per_hour(project_pk: int):
         .values("source_image__timestamp__hour")
         .annotate(num_detections=models.Count("id"))
         .order_by("source_image__timestamp__hour")
+        .exclude(source_image__timestamp=None)
     )
 
     # hours, counts = list(zip(*detections_per_hour))
@@ -204,6 +205,9 @@ def occurrences_accumulated(project_pk: int):
     occurrences_per_day = (
         Occurrence.objects.filter(project=project_pk)
         .values_list("event__start")
+        .exclude(event=None)
+        .exclude(event__start=None)
+        .exclude(detections=None)
         .annotate(num_occurrences=models.Count("id"))
         .order_by("event__start")
     )
@@ -213,8 +217,8 @@ def occurrences_accumulated(project_pk: int):
         # Accumulate the counts
         counts = list(itertools.accumulate(counts))
         # tickvals = [f"{d:%b %d}" for d in days]
-        tickvals = [f"{days[0]:%b %d}", f"{days[-1]:%b %d}"]
-        days = [f"{d:%b %d}" for d in days]
+        tickvals = [f"{days[0]:%b %d, %Y}", f"{days[-1]:%b %d, %Y}"]
+        days = [f"{d:%b %d, %Y}" for d in days]
     else:
         days, counts = [], []
         tickvals = []
@@ -234,6 +238,7 @@ def event_detections_per_hour(event_pk: int):
         .values("source_image__timestamp__hour")
         .annotate(num_detections=models.Count("id"))
         .order_by("source_image__timestamp__hour")
+        .exclude(source_image__timestamp=None)
     )
 
     # hours, counts = list(zip(*detections_per_hour))

diff --git a/ami/main/migrations/0025_update_deployment_aggregates.py b/ami/main/migrations/0025_update_deployment_aggregates.py
@@ -8,12 +8,13 @@
 
 # Save all Deployment objects to update their calculated fields.
 def update_deployment_aggregates(apps, schema_editor):
-    # Deployment = apps.get_model("main", "Deployment")
-    from ami.main.models import Deployment
+    Deployment = apps.get_model("main", "Deployment")
+    # from ami.main.models import Deployment
 
     for deployment in Deployment.objects.all():
         logger.info(f"Updating deployment {deployment}")
-        deployment.save(update_calculated_fields=True)
+        # deployment.save(update_calculated_fields=True)
+        deployment.save()
 
 
 class Migration(migrations.Migration):

diff --git a/ami/main/migrations/0027_update_occurrence_scores.py b/ami/main/migrations/0027_update_occurrence_scores.py
@@ -5,8 +5,8 @@
 
 # Call save on all occurrences to update their scores
 def update_occurrence_scores(apps, schema_editor):
-    # Occurrence = apps.get_model("main", "Occurrence")
-    from ami.main.models import Occurrence
+    Occurrence = apps.get_model("main", "Occurrence")
+    # from ami.main.models import Occurrence
 
     for occurrence in Occurrence.objects.all():
         occurrence.save()

diff --git a/ami/main/models.py b/ami/main/models.py
@@ -217,6 +217,7 @@ def _insert_or_update_batch_for_sync(
     total_files: int,
     total_size: int,
     sql_batch_size=500,
+    regroup_events_per_batch=False,
 ):
     logger.info(f"Bulk inserting or updating batch of {len(source_images)} SourceImages")
     try:
@@ -236,9 +237,10 @@ def _insert_or_update_batch_for_sync(
         deployment.data_source_total_size = total_size
     deployment.data_source_last_checked = datetime.datetime.now()
 
-    events = group_images_into_events(deployment)
-    for event in events:
-        set_dimensions_for_collection(event)
+    if regroup_events_per_batch:
+        events = group_images_into_events(deployment)
+        for event in events:
+            set_dimensions_for_collection(event)
 
     deployment.save(update_calculated_fields=False)
 
@@ -354,7 +356,7 @@ def data_source_uri(self) -> str | None:
             uri = None
         return uri
 
-    def sync_captures(self, batch_size=1000) -> int:
+    def sync_captures(self, batch_size=1000, regroup_events_per_batch=False) -> int:
         """Import images from the deployment's data source"""
 
         deployment = self
@@ -379,17 +381,22 @@ def sync_captures(self, batch_size=1000) -> int:
                 source_images.append(source_image)
 
             if len(source_images) >= django_batch_size:
-                _insert_or_update_batch_for_sync(deployment, source_images, total_files, total_size, sql_batch_size)
+                _insert_or_update_batch_for_sync(
+                    deployment, source_images, total_files, total_size, sql_batch_size, regroup_events_per_batch
+                )
                 source_images = []
 
         if source_images:
             # Insert/update the last batch
-            _insert_or_update_batch_for_sync(deployment, source_images, total_files, total_size, sql_batch_size)
+            _insert_or_update_batch_for_sync(
+                deployment, source_images, total_files, total_size, sql_batch_size, regroup_events_per_batch
+            )
 
         _compare_totals_for_sync(deployment, total_files)
 
         # @TODO decide if we should delete SourceImages that are no longer in the data source
         self.save()
+
         return total_files
 
     def update_children(self):
@@ -1323,6 +1330,7 @@ class Detection(BaseModel):
     # @TODO use structured data for bbox
     bbox = models.JSONField(null=True, blank=True)
 
+    # @TODO shouldn't this be automatically set by the source image?
     timestamp = models.DateTimeField(null=True, blank=True)
 
     # file = (
@@ -1577,7 +1585,10 @@ def save(self, update_determination=True, *args, **kwargs):
             # This may happen for legacy occurrences that were created
             # before the determination_score field was added
             self.determination_score = self.get_determination_score()
-            self.save(update_determination=False)
+            if not self.determination_score:
+                logger.warning(f"Could not determine score for {self}")
+            else:
+                self.save(update_determination=False)
 
     class Meta:
         ordering = ["-determination_score"]
@@ -1601,6 +1612,12 @@ def update_occurrence_determination(
     """
     needs_update = False
 
+    # Invalidate the cached properties so they will be re-calculated
+    if hasattr(occurrence, "best_identification"):
+        del occurrence.best_identification
+    if hasattr(occurrence, "best_prediction"):
+        del occurrence.best_prediction
+
     current_determination = (
         current_determination
         or Occurrence.objects.select_related("determination")