Skip to content

Commit

Permalink
Merge branch 'main' into demo/jan29
Browse files Browse the repository at this point in the history
  • Loading branch information
mihow authored Feb 3, 2024
2 parents 6651741 + db6fcc4 commit 62c9464
Show file tree
Hide file tree
Showing 12 changed files with 271 additions and 96 deletions.
26 changes: 13 additions & 13 deletions .github/workflows/test.backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,20 @@ jobs:
uses: pre-commit/[email protected]

# With no caching at all the entire ci process takes 4m 30s to complete!
test:
runs-on: ubuntu-latest
steps:
- name: Checkout Code Repository
uses: actions/checkout@v4
# test:
# runs-on: ubuntu-latest
# steps:
# - name: Checkout Code Repository
# uses: actions/checkout@v4

- name: Build the Stack
run: docker-compose -f local.yml build
# - name: Build the Stack
# run: docker-compose -f local.yml build

- name: Run DB Migrations
run: docker-compose -f local.yml run --rm django python manage.py migrate
# - name: Run DB Migrations
# run: docker-compose -f local.yml run --rm django python manage.py migrate

- name: Run Django Tests
run: docker-compose -f local.yml run django pytest
# - name: Run Django Tests
# run: docker-compose -f local.yml run --rm django python manage.py test

- name: Tear down the Stack
run: docker-compose -f local.yml down
# - name: Tear down the Stack
# run: docker-compose -f local.yml down
72 changes: 68 additions & 4 deletions ami/main/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,42 @@ class Meta:
]


class DeviceNestedSerializer(DefaultSerializer):
class Meta:
model = Device
fields = [
"id",
"name",
"details",
]


class SiteNestedSerializer(DefaultSerializer):
class Meta:
model = Site
fields = [
"id",
"name",
"details",
]


class StorageSourceNestedSerializer(DefaultSerializer):
class Meta:
model = S3StorageSource
fields = [
"id",
"name",
"details",
]


class DeploymentListSerializer(DefaultSerializer):
events = serializers.SerializerMethodField()
occurrences = serializers.SerializerMethodField()
project = ProjectNestedSerializer(read_only=True)
device = DeviceNestedSerializer(read_only=True)
research_site = SiteNestedSerializer(read_only=True)

class Meta:
model = Deployment
Expand All @@ -108,6 +140,8 @@ class Meta:
"longitude",
"first_date",
"last_date",
"device",
"research_site",
]

def get_events(self, obj):
Expand Down Expand Up @@ -285,24 +319,51 @@ class DeploymentSerializer(DeploymentListSerializer):
events = DeploymentEventNestedSerializer(many=True, read_only=True)
occurrences = serializers.SerializerMethodField()
example_captures = DeploymentCaptureNestedSerializer(many=True, read_only=True)
data_source = serializers.SerializerMethodField(read_only=True)
project_id = serializers.PrimaryKeyRelatedField(
write_only=True,
queryset=Project.objects.all(),
source="project",
)
device_id = serializers.PrimaryKeyRelatedField(
write_only=True,
queryset=Device.objects.all(),
source="device",
)
research_site_id = serializers.PrimaryKeyRelatedField(
write_only=True,
queryset=Site.objects.all(),
source="research_site",
)
data_source = serializers.SerializerMethodField()
data_source_id = serializers.PrimaryKeyRelatedField(
write_only=True,
queryset=S3StorageSource.objects.all(),
source="data_source",
)

class Meta(DeploymentListSerializer.Meta):
fields = DeploymentListSerializer.Meta.fields + [
"project_id",
"description",
"device_id",
"research_site_id",
"data_source",
"data_source_id",
"description",
"example_captures",
# "capture_images",
]

def get_data_source(self, obj):
return obj.data_source_uri()
"""
Add uri to nested serializer of the data source
The data source is defined by both the StorageSource model
and the extra configuration in the Deployment model.
"""

data = StorageSourceNestedSerializer(obj.data_source, context=self.context).data
data["uri"] = obj.data_source_uri()
return data

def get_occurrences(self, obj):
"""
Expand Down Expand Up @@ -507,7 +568,10 @@ def get_page_offset(self, obj) -> int:
# @TODO this may not be correct. Test or remove if unnecessary.
# the Occurrence to Session navigation in the UI will be using
# another method.
return obj.event.captures.filter(timestamp__lt=obj.timestamp).count()
if not obj or not obj.event:
return 0
else:
return obj.event.captures.filter(timestamp__lt=obj.timestamp).count()


class TaxonOccurrenceNestedSerializer(DefaultSerializer):
Expand Down
28 changes: 24 additions & 4 deletions ami/main/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class DeploymentViewSet(DefaultViewSet):
for the list and detail views.
"""

queryset = Deployment.objects.select_related("project")
queryset = Deployment.objects.select_related("project", "device", "research_site")
filterset_fields = ["project"]
ordering_fields = [
"created_at",
Expand Down Expand Up @@ -435,6 +435,7 @@ class OccurrenceViewSet(DefaultViewSet):

queryset = (
Occurrence.objects.exclude(detections=None)
.exclude(event=None) # These must be independent exclude calls
.annotate(
detections_count=models.Count("detections", distinct=True),
duration=models.Max("detections__timestamp") - models.Min("detections__timestamp"),
Expand All @@ -447,7 +448,7 @@ class OccurrenceViewSet(DefaultViewSet):
)
.prefetch_related("detections")
.order_by("-determination_score")
.all()
.exclude(first_appearance_time=None) # This must come after annotations
)
serializer_class = OccurrenceSerializer
filterset_fields = ["event", "deployment", "determination", "project"]
Expand Down Expand Up @@ -573,17 +574,33 @@ def filter_by_occurrence(self, queryset: QuerySet) -> tuple[QuerySet, bool]:
event = Event.objects.get(id=event_id)
queryset = super().get_queryset().filter(occurrences__event=event)

return queryset, filter_active

def filter_by_classification_threshold(self, queryset: QuerySet) -> QuerySet:
"""
Filter taxa by their best determination score in occurrences.
This is only applicable to list queries that are not filtered by occurrence, project, deployment, or event.
"""
# Look for a query param to filter by score
classification_threshold = self.request.query_params.get("classification_threshold")

if classification_threshold is not None:
classification_threshold = FloatField(required=False).clean(classification_threshold)
else:
classification_threshold = DEFAULT_CONFIDENCE_THRESHOLD

queryset = (
queryset.annotate(best_determination_score=models.Max("occurrences__determination_score"))
.filter(best_determination_score__gte=DEFAULT_CONFIDENCE_THRESHOLD)
.filter(best_determination_score__gte=classification_threshold)
.distinct()
)

# If ordering is not specified, order by best determination score
if not self.request.query_params.get("ordering"):
queryset = queryset.order_by("-best_determination_score")

return queryset, filter_active
return queryset

def get_queryset(self) -> QuerySet:
qs = super().get_queryset()
Expand All @@ -593,9 +610,12 @@ def get_queryset(self) -> QuerySet:
from rest_framework.exceptions import NotFound

raise NotFound(detail=str(e))

qs = qs.select_related("parent", "parent__parent")

if filter_active:
qs = self.filter_by_classification_threshold(qs)

qs = qs.prefetch_related("occurrences")
qs = qs.annotate(
occurrences_count=models.Count("occurrences", distinct=True),
Expand Down
9 changes: 7 additions & 2 deletions ami/main/charts.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ def detections_per_hour(project_pk: int):
.values("source_image__timestamp__hour")
.annotate(num_detections=models.Count("id"))
.order_by("source_image__timestamp__hour")
.exclude(source_image__timestamp=None)
)

# hours, counts = list(zip(*detections_per_hour))
Expand Down Expand Up @@ -204,6 +205,9 @@ def occurrences_accumulated(project_pk: int):
occurrences_per_day = (
Occurrence.objects.filter(project=project_pk)
.values_list("event__start")
.exclude(event=None)
.exclude(event__start=None)
.exclude(detections=None)
.annotate(num_occurrences=models.Count("id"))
.order_by("event__start")
)
Expand All @@ -213,8 +217,8 @@ def occurrences_accumulated(project_pk: int):
# Accumulate the counts
counts = list(itertools.accumulate(counts))
# tickvals = [f"{d:%b %d}" for d in days]
tickvals = [f"{days[0]:%b %d}", f"{days[-1]:%b %d}"]
days = [f"{d:%b %d}" for d in days]
tickvals = [f"{days[0]:%b %d, %Y}", f"{days[-1]:%b %d, %Y}"]
days = [f"{d:%b %d, %Y}" for d in days]
else:
days, counts = [], []
tickvals = []
Expand All @@ -234,6 +238,7 @@ def event_detections_per_hour(event_pk: int):
.values("source_image__timestamp__hour")
.annotate(num_detections=models.Count("id"))
.order_by("source_image__timestamp__hour")
.exclude(source_image__timestamp=None)
)

# hours, counts = list(zip(*detections_per_hour))
Expand Down
7 changes: 4 additions & 3 deletions ami/main/migrations/0025_update_deployment_aggregates.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@

# Save all Deployment objects to update their calculated fields.
def update_deployment_aggregates(apps, schema_editor):
# Deployment = apps.get_model("main", "Deployment")
from ami.main.models import Deployment
Deployment = apps.get_model("main", "Deployment")
# from ami.main.models import Deployment

for deployment in Deployment.objects.all():
logger.info(f"Updating deployment {deployment}")
deployment.save(update_calculated_fields=True)
# deployment.save(update_calculated_fields=True)
deployment.save()


class Migration(migrations.Migration):
Expand Down
4 changes: 2 additions & 2 deletions ami/main/migrations/0027_update_occurrence_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

# Call save on all occurrences to update their scores
def update_occurrence_scores(apps, schema_editor):
# Occurrence = apps.get_model("main", "Occurrence")
from ami.main.models import Occurrence
Occurrence = apps.get_model("main", "Occurrence")
# from ami.main.models import Occurrence

for occurrence in Occurrence.objects.all():
occurrence.save()
Expand Down
31 changes: 24 additions & 7 deletions ami/main/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ def _insert_or_update_batch_for_sync(
total_files: int,
total_size: int,
sql_batch_size=500,
regroup_events_per_batch=False,
):
logger.info(f"Bulk inserting or updating batch of {len(source_images)} SourceImages")
try:
Expand All @@ -236,9 +237,10 @@ def _insert_or_update_batch_for_sync(
deployment.data_source_total_size = total_size
deployment.data_source_last_checked = datetime.datetime.now()

events = group_images_into_events(deployment)
for event in events:
set_dimensions_for_collection(event)
if regroup_events_per_batch:
events = group_images_into_events(deployment)
for event in events:
set_dimensions_for_collection(event)

deployment.save(update_calculated_fields=False)

Expand Down Expand Up @@ -354,7 +356,7 @@ def data_source_uri(self) -> str | None:
uri = None
return uri

def sync_captures(self, batch_size=1000) -> int:
def sync_captures(self, batch_size=1000, regroup_events_per_batch=False) -> int:
"""Import images from the deployment's data source"""

deployment = self
Expand All @@ -379,17 +381,22 @@ def sync_captures(self, batch_size=1000) -> int:
source_images.append(source_image)

if len(source_images) >= django_batch_size:
_insert_or_update_batch_for_sync(deployment, source_images, total_files, total_size, sql_batch_size)
_insert_or_update_batch_for_sync(
deployment, source_images, total_files, total_size, sql_batch_size, regroup_events_per_batch
)
source_images = []

if source_images:
# Insert/update the last batch
_insert_or_update_batch_for_sync(deployment, source_images, total_files, total_size, sql_batch_size)
_insert_or_update_batch_for_sync(
deployment, source_images, total_files, total_size, sql_batch_size, regroup_events_per_batch
)

_compare_totals_for_sync(deployment, total_files)

# @TODO decide if we should delete SourceImages that are no longer in the data source
self.save()

return total_files

def update_children(self):
Expand Down Expand Up @@ -1323,6 +1330,7 @@ class Detection(BaseModel):
# @TODO use structured data for bbox
bbox = models.JSONField(null=True, blank=True)

# @TODO shouldn't this be automatically set by the source image?
timestamp = models.DateTimeField(null=True, blank=True)

# file = (
Expand Down Expand Up @@ -1577,7 +1585,10 @@ def save(self, update_determination=True, *args, **kwargs):
# This may happen for legacy occurrences that were created
# before the determination_score field was added
self.determination_score = self.get_determination_score()
self.save(update_determination=False)
if not self.determination_score:
logger.warning(f"Could not determine score for {self}")
else:
self.save(update_determination=False)

class Meta:
ordering = ["-determination_score"]
Expand All @@ -1601,6 +1612,12 @@ def update_occurrence_determination(
"""
needs_update = False

# Invalidate the cached properties so they will be re-calculated
if hasattr(occurrence, "best_identification"):
del occurrence.best_identification
if hasattr(occurrence, "best_prediction"):
del occurrence.best_prediction

current_determination = (
current_determination
or Occurrence.objects.select_related("determination")
Expand Down
Loading

0 comments on commit 62c9464

Please sign in to comment.