Skip to content

Commit

Permalink
Don't regroup events every batch during image sync
Browse files Browse the repository at this point in the history
  • Loading branch information
mihow authored and Debian committed Feb 3, 2024
1 parent 67b7bd0 commit b330e74
Showing 1 changed file with 13 additions and 6 deletions.
19 changes: 13 additions & 6 deletions ami/main/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ def _insert_or_update_batch_for_sync(
total_files: int,
total_size: int,
sql_batch_size=500,
regroup_events_per_batch=False,
):
logger.info(f"Bulk inserting or updating batch of {len(source_images)} SourceImages")
try:
Expand All @@ -236,9 +237,10 @@ def _insert_or_update_batch_for_sync(
deployment.data_source_total_size = total_size
deployment.data_source_last_checked = datetime.datetime.now()

events = group_images_into_events(deployment)
for event in events:
set_dimensions_for_collection(event)
if regroup_events_per_batch:
events = group_images_into_events(deployment)
for event in events:
set_dimensions_for_collection(event)

deployment.save(update_calculated_fields=False)

Expand Down Expand Up @@ -354,7 +356,7 @@ def data_source_uri(self) -> str | None:
uri = None
return uri

def sync_captures(self, batch_size=1000) -> int:
def sync_captures(self, batch_size=1000, regroup_events_per_batch=False) -> int:
"""Import images from the deployment's data source"""

deployment = self
Expand All @@ -379,17 +381,22 @@ def sync_captures(self, batch_size=1000) -> int:
source_images.append(source_image)

if len(source_images) >= django_batch_size:
_insert_or_update_batch_for_sync(deployment, source_images, total_files, total_size, sql_batch_size)
_insert_or_update_batch_for_sync(
deployment, source_images, total_files, total_size, sql_batch_size, regroup_events_per_batch
)
source_images = []

if source_images:
# Insert/update the last batch
_insert_or_update_batch_for_sync(deployment, source_images, total_files, total_size, sql_batch_size)
_insert_or_update_batch_for_sync(
deployment, source_images, total_files, total_size, sql_batch_size, regroup_events_per_batch
)

_compare_totals_for_sync(deployment, total_files)

# @TODO decide if we should delete SourceImages that are no longer in the data source
self.save()

return total_files

def update_children(self):
Expand Down

0 comments on commit b330e74

Please sign in to comment.