Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement endpoint for deleting attachments by entity ID #105

Open
wants to merge 8 commits into
base: develop
Choose a base branch
from
19 changes: 16 additions & 3 deletions object_storage_api/repositories/attachment.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ def list(self, entity_id: Optional[str], session: Optional[ClientSession] = None
"""
Retrieve attachments from a MongoDB database.

:param session: PyMongo ClientSession to use for database operations.
:param entity_id: The ID of the entity to filter attachments by.
:param session: PyMongo ClientSession to use for database operations.
:return: List of attachments or an empty list if no attachments are retrieved.
"""

Expand Down Expand Up @@ -101,7 +101,7 @@ def delete(self, attachment_id: str, session: Optional[ClientSession] = None) ->
Delete an attachment by its ID from a MongoDB database.

:param attachment_id: The ID of the attachment to delete.
:param session: PyMongo ClientSession to use for database operations
:param session: PyMongo ClientSession to use for database operations.
:raises MissingRecordError: If the supplied `attachment_id` is non-existent.
:raises InvalidObjectIdError: If the supplied `attachment_id` is invalid.
"""
Expand All @@ -114,4 +114,17 @@ def delete(self, attachment_id: str, session: Optional[ClientSession] = None) ->
raise exc
response = self._attachments_collection.delete_one(filter={"_id": attachment_id}, session=session)
if response.deleted_count == 0:
raise MissingRecordError(f"No attachment found with ID: {attachment_id}", "attachment")
raise MissingRecordError(f"No attachment found with ID: {attachment_id}", entity_name="attachment")

def delete_by_entity_id(self, entity_id: str, session: Optional[ClientSession] = None) -> None:
"""
Delete attachments by entity ID.

:param entity_id: The entity ID of the attachments to delete.
:param session: PyMongo ClientSession to use for database operations.
:return:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
:return:

"""
logger.info("Deleting attachments with entity ID: %s from the database", entity_id)
entity_id = CustomObjectId(entity_id)
# Given it is deleting multiple, we are not raising an exception if no attachments were found to be deleted
self._attachments_collection.delete_many(filter={"entity_id": entity_id}, session=session)
2 changes: 1 addition & 1 deletion object_storage_api/repositories/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,4 @@ def delete(self, image_id: str, session: Optional[ClientSession] = None) -> None
raise exc
response = self._images_collection.delete_one(filter={"_id": image_id}, session=session)
if response.deleted_count == 0:
raise MissingRecordError(f"No image found with ID: {image_id}", "image")
raise MissingRecordError(f"No image found with ID: {image_id}", entity_name="image")
21 changes: 21 additions & 0 deletions object_storage_api/routers/attachment.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from fastapi import APIRouter, Depends, Path, Query, status

from object_storage_api.core.exceptions import InvalidObjectIdError
from object_storage_api.schemas.attachment import (
AttachmentMetadataSchema,
AttachmentPostResponseSchema,
Expand Down Expand Up @@ -82,3 +83,23 @@ def delete_attachment(
# pylint: disable=missing-function-docstring
logger.info("Deleting attachment with ID: %s", attachment_id)
attachment_service.delete(attachment_id)


@router.delete(
path="",
summary="Delete attachments by entity ID",
response_description="Attachments deleted successfully",
status_code=status.HTTP_204_NO_CONTENT,
)
def delete_attachments_by_entity_id(
entity_id: Annotated[str, Query(description="The entity ID of the attachments to delete")],
attachment_service: AttachmentServiceDep,
) -> None:
# pylint: disable=missing-function-docstring
logger.info("Deleting attachments with entity ID: %s", entity_id)
try:
attachment_service.delete_by_entity_id(entity_id)
except InvalidObjectIdError:
# As this endpoint takes in a query parameter to delete multiple attachments, and to hide the database
# behaviour, we treat any invalid entity_id the same as a valid one that has no attachments associated to it.
pass
Comment on lines +100 to +105
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whilst it is fine to catch the exception in the router layer, I think it would be better if it was in the repo layer to match what we are doing in the rest of the repo.

14 changes: 14 additions & 0 deletions object_storage_api/services/attachment.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,17 @@ def delete(self, attachment_id: str) -> None:
# Deletes attachment from object store first to prevent unreferenced objects in storage
self._attachment_store.delete(stored_attachment.object_key)
self._attachment_repository.delete(attachment_id)

def delete_by_entity_id(self, entity_id: str) -> None:
"""
Delete attachments by entity ID.

:param entity_id: The entity ID of the attachments to delete.
"""
stored_attachments = self._attachment_repository.list(entity_id)
if stored_attachments:
# Deletes attachments from object store first to prevent unreferenced objects in storage
self._attachment_store.delete_many(
[stored_attachment.object_key for stored_attachment in stored_attachments]
)
self._attachment_repository.delete_by_entity_id(entity_id)
19 changes: 19 additions & 0 deletions object_storage_api/stores/attachment.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,22 @@ def delete(self, object_key: str) -> None:
Bucket=object_storage_config.bucket_name.get_secret_value(),
Key=object_key,
)

def delete_many(self, object_keys: list[str]) -> None:
"""
Deletes given attachments from object storage by object keys.

It does this in batches due to the `delete_objects` request only allowing a list of up to 1000 keys.

:param object_keys: Keys of the attachments to delete.
"""
logger.info("Deleting attachment files with object keys: %s from the object store", object_keys)

batch_size = 1000
# Loop through the list of object keys in steps of `batch_size`
for i in range(0, len(object_keys), batch_size):
batch = object_keys[i : i + batch_size]
s3_client.delete_objects(
Bucket=object_storage_config.bucket_name.get_secret_value(),
Delete={"Objects": [{"Key": key for key in batch}]},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Delete={"Objects": [{"Key": key for key in batch}]},
Delete={"Objects": [{"Key": key } for key in batch]},

The current implementation works, but I'm not sure why it does?

The docs for the function requires the parameter to be structured as so:
image

If I modify the code to compare the 2 outputs of both options this is the result.
image
image

Again both implementation work, but I don't understand why yours works?

)
45 changes: 45 additions & 0 deletions test/e2e/test_attachment.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,3 +360,48 @@ def test_delete_with_invalid_id(self):
"""Test deleting an attachment with an invalid ID."""
self.delete_attachment("invalid_id")
self.check_delete_attachment_failed_with_detail(404, "Attachment not found")


class DeleteByEntityIdDSL(ListDSL):
"""Base class for delete tests."""
Copy link
Member

@rowan04 rowan04 Jan 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"""Base class for delete tests."""
"""Base class for delete_by_entity_id tests."""


_delete_response_attachments: Response

def delete_attachments_by_entity_id(self, entity_id: str) -> None:
"""
Deletes attachments with the given entity ID.

:param entity_id: Entity ID of the attachments to be deleted.
"""
self._delete_response_attachments = self.test_client.delete("/attachments", params={"entity_id": entity_id})

def check_delete_attachments_by_entity_id_success(self) -> None:
"""
Checks that a prior call to `delete_attachments_by_entity_id` gave a successful response with the expected code.
"""
assert self._delete_response_attachments.status_code == 204


class TestDeleteByEntityId(DeleteByEntityIdDSL):
"""Tests for deleting attachments by entity ID."""

def test_delete_attachments_by_entity_id(self):
"""Test deleting attachments."""
attachments = self.post_test_attachments()
entity_id = attachments[0]["entity_id"]

self.delete_attachments_by_entity_id(entity_id)
self.check_delete_attachments_by_entity_id_success()

self.get_attachments(filters={"entity_id": entity_id})
self.check_get_attachments_success([])

def test_delete_attachments_by_entity_id_with_non_existent_id(self):
"""Test deleting attachments with a non-existent entity ID."""
self.delete_attachments_by_entity_id(str(ObjectId()))
self.check_delete_attachments_by_entity_id_success()

def test_delete_attachments_by_entity_id_with_invalid_id(self):
Comment on lines +388 to +404
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is attachments needed in each function name? could they just be e.g. test_delete_by_entity_id

"""Test deleting attachments with an invalid entity ID."""
self.delete_attachments_by_entity_id("invalid_id")
self.check_delete_attachments_by_entity_id_success()
14 changes: 14 additions & 0 deletions test/unit/repositories/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,17 @@ def mock_delete_one(collection_mock: Mock, deleted_count: int) -> None:
delete_result_mock = Mock(DeleteResult)
delete_result_mock.deleted_count = deleted_count
collection_mock.delete_one.return_value = delete_result_mock

@staticmethod
def mock_delete_many(collection_mock: Mock, deleted_count: int) -> None:
"""
Mock the `delete_many` method of the MongoDB database collection mock to return a `DeleteResult` object. The
passed `deleted_count` value is returned as the `deleted_count` attribute of the `DeleteResult` object, enabling
for the code that relies on the `deleted_count` value to work.

:param collection_mock: Mocked MongoDB database collection instance.
:param deleted_count: The value to be assigned to the `deleted_count` attribute of the `DeleteResult` object
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
:param deleted_count: The value to be assigned to the `deleted_count` attribute of the `DeleteResult` object
:param deleted_count: The value to be assigned to the `deleted_count` attribute of the `DeleteResult` object.

"""
delete_result_mock = Mock(DeleteResult)
delete_result_mock.deleted_count = deleted_count
collection_mock.delete_many.return_value = delete_result_mock
77 changes: 73 additions & 4 deletions test/unit/repositories/test_attachment.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class GetDSL(AttachmentRepoDSL):
_obtained_attachment_out: AttachmentOut
_get_exception: pytest.ExceptionInfo

def mock_get(self, attachment_id: str, attachment_in_data: dict) -> None:
def mock_get(self, attachment_id: str, attachment_in_data: Optional[dict] = None) -> None:
"""
Mocks database methods appropriately to test the `get` repo method.

Expand Down Expand Up @@ -155,7 +155,7 @@ def check_get_failed_with_exception(self, message: str, assert_find: bool = Fals
Checks that a prior call to `call_get_expecting_error` worked as expected, raising an exception
with the correct message.

:param attachment_id: ID of the expected attachment to appear in the exception detail.
:param message: Expected message of the raised exception.
:param assert_find: If `True` it asserts whether a `find_one` call was made,
else it asserts that no call was made.
"""
Expand Down Expand Up @@ -186,15 +186,15 @@ def test_get_with_non_existent_id(self):

attachment_id = str(ObjectId())

self.mock_get(attachment_id, None)
self.mock_get(attachment_id)
self.call_get_expecting_error(attachment_id, MissingRecordError)
self.check_get_failed_with_exception(f"No attachment found with ID: {attachment_id}", True)

def test_get_with_invalid_id(self):
"""Test getting an attachment with an invalid attachment ID."""
attachment_id = "invalid-id"

self.mock_get(attachment_id, None)
self.mock_get(attachment_id)
self.call_get_expecting_error(attachment_id, InvalidObjectIdError)
self.check_get_failed_with_exception(f"Invalid ObjectId value '{attachment_id}'")

Expand Down Expand Up @@ -350,3 +350,72 @@ def test_delete_invalid_id(self):


# pylint: enable=duplicate-code


class DeleteByEntityIdDSL(AttachmentRepoDSL):
"""Base class for `delete_by_entity_id` tests."""

_delete_entity_id: str
_delete_by_entity_id_exception: pytest.ExceptionInfo

def mock_delete_by_entity_id(self, deleted_count: int) -> None:
"""
Mocks database methods appropriately to test the `delete_by_entity_id` repo method.

:param deleted_count: Number of documents deleted successfully.
"""
RepositoryTestHelpers.mock_delete_many(self.attachments_collection, deleted_count)

def call_delete_by_entity_id(self, entity_id: str) -> None:
"""
Calls the `AttachmentRepo` `delete_by_entity_id` method.

:param entity_id: The entity ID of the attachments to be deleted.
"""
self._delete_entity_id = entity_id
self.attachment_repository.delete_by_entity_id(entity_id, session=self.mock_session)

def call_delete_by_entity_id_expecting_error(self, entity_id: str, error_type: type[BaseException]) -> None:
"""
Calls the `AttachmentRepo` `delete_by_entity_id` method while expecting an error to be raised.

:param entity_id: The entity ID of the attachments to be deleted.
:param error_type: Expected exception to be raised.
"""
self._delete_entity_id = entity_id
with pytest.raises(error_type) as exc:
self.attachment_repository.delete_by_entity_id(entity_id, session=self.mock_session)
self._delete_by_entity_id_exception = exc

def check_delete_by_entity_id_success(self) -> None:
"""Checks that a prior call to `call_delete_by_entity_id` worked as expected."""
self.attachments_collection.delete_many.assert_called_once_with(
filter={"entity_id": ObjectId(self._delete_entity_id)}, session=self.mock_session
)

def check_delete_by_entity_id_failed_with_exception(self, message: str) -> None:
"""
Checks that a prior call to `call_delete_by_entity_id_expecting_error` worked as expected, raising an exception
with the correct message.

:param message: Expected message of the raised exception.
"""
self.attachments_collection.delete_many.assert_not_called()
assert str(self._delete_by_entity_id_exception.value) == message


class TestDeleteByEntityId(DeleteByEntityIdDSL):
"""Tests for deleting attachments by entity ID."""

def test_delete_by_entity_id(self):
"""Test deleting attachments."""
self.mock_delete_by_entity_id(3)
self.call_delete_by_entity_id(str(ObjectId()))
self.check_delete_by_entity_id_success()

def test_delete_by_entity_id_invalid_id(self):
"""Test deleting attachments with an invalid entity ID."""
entity_id = "invalid-id"

self.call_delete_by_entity_id_expecting_error(entity_id, InvalidObjectIdError)
self.check_delete_by_entity_id_failed_with_exception(f"Invalid ObjectId value '{entity_id}'")
61 changes: 61 additions & 0 deletions test/unit/services/test_attachment.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,64 @@ def test_delete(self):
self.mock_delete(ATTACHMENT_IN_DATA_ALL_VALUES)
self.call_delete()
self.check_delete_success()


class DeleteByEntityIdDSL(AttachmentServiceDSL):
"""Base class for `delete_by_entity_id` tests."""

_expected_attachments_out: list[AttachmentOut]
_delete_entity_id: str
_delete_attachment_object_keys: list[str]

def mock_delete_by_entity_id(self, attachments_in_data: list[dict]) -> None:
"""
Mocks repo methods appropriately to test the `delete_by_entity_id` service method.

:param attachments_in_data: List of dictionaries containing the attachment data as would be required for an
`AttachmentIn` database model (i.e. no created and modified times required).
"""
self._expected_attachments_out = [
AttachmentOut(**AttachmentIn(**attachment_in_data).model_dump())
for attachment_in_data in attachments_in_data
]
self.mock_attachment_repository.list.return_value = self._expected_attachments_out
self._delete_entity_id = (
self._expected_attachments_out[0].id if self._expected_attachments_out else str(ObjectId())
)
self._delete_attachment_object_keys = [
expected_attachment_out.object_key for expected_attachment_out in self._expected_attachments_out
]

def call_delete_by_entity_id(self) -> None:
"""Calls the `AttachmentService` `delete_by_entity_id` method."""
self.attachment_service.delete_by_entity_id(self._delete_entity_id)

def check_delete_by_entity_id_success(self, assert_delete: bool = True) -> None:
"""
Checks that a prior call to `call_delete_by_entity_id` worked as expected.

:param assert_delete: Whether the `delete_many` store method and `delete_by_entity_id` repo method are expected
to be called or not.
"""
if assert_delete:
self.mock_attachment_store.delete_many.assert_called_once_with(self._delete_attachment_object_keys)
self.mock_attachment_repository.delete_by_entity_id.assert_called_once_with(self._delete_entity_id)
else:
self.mock_attachment_store.delete_many.assert_not_called()
self.mock_attachment_repository.delete_by_entity_id.assert_not_called()


class TestDeleteByEntityId(DeleteByEntityIdDSL):
"""Tests for deleting attachments by entity ID."""

def test_delete_by_entity_id(self):
"""Test deleting attachments."""
self.mock_delete_by_entity_id([ATTACHMENT_IN_DATA_ALL_VALUES])
self.call_delete_by_entity_id()
self.check_delete_by_entity_id_success()

def test_delete_by_entity_id_non_existent_id(self):
"""Test deleting attachments with a non-existent entity ID."""
self.mock_delete_by_entity_id([])
self.call_delete_by_entity_id()
self.check_delete_by_entity_id_success(False)
31 changes: 31 additions & 0 deletions test/unit/stores/test_attachment.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,37 @@ def test_delete(self):
# pylint: enable=duplicate-code


class DeleteManyDSL(AttachmentStoreDSL):
"""Base class for `delete_many` tests."""

_delete_many_object_keys: list[str]

def call_delete_many(self, object_keys: list[str]) -> None:
"""
Calls the `AttachmentStore` `delete_many` method.

:param object_keys: Keys of the attachment to delete.
"""
self._delete_many_object_keys = object_keys
self.attachment_store.delete_many(object_keys)

def check_delete_many_success(self) -> None:
"""Checks that a prior call to `call_delete_many` worked as expected."""
self.mock_s3_client.delete_objects.assert_called_once_with(
Bucket=object_storage_config.bucket_name.get_secret_value(),
Delete={"Objects": [{"Key": key for key in self._delete_many_object_keys}]},
)


class TestDeleteMany(DeleteManyDSL):
"""Tests for deleting attachments from object storage by object keys."""

def test_delete_many(self):
"""Test deleting attachments from object storage."""
self.call_delete_many(["object-key"])
self.check_delete_many_success()


class CreatePresignedPostDSL(AttachmentStoreDSL):
"""Base class for `create_presigned_post` tests."""

Expand Down