Skip to content

Commit

Permalink
Add telemetry for the file uploader
Browse files Browse the repository at this point in the history
The old_api file upload method now raises a FileUploadError for
any errors it encounters. We let the do_upload_task raise exceptions,
and catch them in the main command so that we can log them as well
as send any errors up to honeycomb.
  • Loading branch information
rebkwok committed Feb 10, 2025
1 parent 2969c2a commit 5af1fb5
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 43 deletions.
72 changes: 39 additions & 33 deletions airlock/management/commands/run_file_uploader.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import logging
import os
import time

from django.conf import settings
from django.core.management.base import BaseCommand
from opentelemetry import trace

import old_api
from airlock.business_logic import bll
from airlock.enums import RequestStatus
from airlock.users import User
from services.tracing import instrument


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -69,56 +72,59 @@ def handle(self, *args, **options):
file_for_upload = bll.register_file_upload_attempt(
approved_request, file_for_upload.relpath
)
try:
do_upload_task(file_for_upload, approved_request, workspace)
except Exception as error:
# Catch any unexpected exception here so we don't stop the task runner
# from running
logger.error(
"Upload for %s - %s failed (attempt %d of %d): %s",
approved_request.id,
file_for_upload.relpath,
file_for_upload.upload_attempts,
settings.UPLOAD_MAX_ATTEMPTS,
error,
)

tracer = trace.get_tracer(
os.environ.get("OTEL_SERVICE_NAME", "airlock")
)
with tracer.start_as_current_span(
"file_uploader",
attributes={
"release_request": approved_request.id,
"workspace": approved_request.workspace,
"file": str(file_for_upload.relpath),
},
) as span:
try:
do_upload_task(file_for_upload, approved_request, workspace)
except Exception as error:
# The most likely error here is old_api.FileUploadError, however
# we catch any unexpected exception here so we don't stop the task runner
# from running
span.record_exception(error)
logger.error(
"Upload for %s - %s failed (attempt %d of %d): %s",
approved_request.id,
file_for_upload.relpath,
file_for_upload.upload_attempts,
settings.UPLOAD_MAX_ATTEMPTS,
str(error),
)

# After we've tried to upload all files for this request, check if
# there are any still pending and set the request status now, so it's
# done as soon as possible and doesn't have to wait on the next loop
get_upload_files_and_update_request_status(approved_request)


@instrument
def do_upload_task(file_for_upload, release_request, workspace):
"""
Perform an upload task.
"""
uploaded, error = old_api.upload_file(
old_api.upload_file(
release_request.id,
release_request.workspace,
file_for_upload.relpath,
workspace.abspath(file_for_upload.relpath),
file_for_upload.released_by,
)
if uploaded:
# mark the request file as uploaded and set the task completed time
# we use the released_by user for this, for consistency with the
# user who initiated the release
bll.register_file_upload(
release_request, file_for_upload.relpath, get_user_for_file(file_for_upload)
)
logger.info(
"File uploaded: %s - %s", release_request.id, file_for_upload.relpath
)
else:
logger.error(
"Upload for %s - %s failed (attempt %d of %d): %s",
release_request.id,
file_for_upload.relpath,
file_for_upload.upload_attempts,
settings.UPLOAD_MAX_ATTEMPTS,
error,
)
# mark the request file as uploaded and set the task completed time
# we use the released_by user for this, for consistency with the
# user who initiated the release
bll.register_file_upload(
release_request, file_for_upload.relpath, get_user_for_file(file_for_upload)
)
logger.info("File uploaded: %s - %s", release_request.id, file_for_upload.relpath)


def get_upload_files_and_update_request_status(release_request):
Expand Down
7 changes: 4 additions & 3 deletions old_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
logger = logging.getLogger(__name__)


class FileUploadError(Exception): ...


def create_filelist(paths, release_request):
files = []

Expand Down Expand Up @@ -86,7 +89,6 @@ def upload_file(release_id, workspace, relpath, abspath, username):
logger.info(
"File already uploaded - %s - %s - %s", workspace, relpath, release_id
)
return True, error
else:
logger.error(
"%s Error uploading file - %s - %s - %s",
Expand All @@ -95,8 +97,7 @@ def upload_file(release_id, workspace, relpath, abspath, username):
release_id,
response_content,
)
return False, error
return True, ""
raise FileUploadError(error)


def modified_time(path: Path) -> str:
Expand Down
29 changes: 27 additions & 2 deletions tests/integration/management/commands/test_run_file_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
from airlock.enums import AuditEventType, RequestStatus
from airlock.management.commands.run_file_uploader import do_upload_task
from airlock.types import UrlPath
from old_api import FileUploadError
from tests import factories
from tests.conftest import get_trace


pytestmark = pytest.mark.django_db
Expand Down Expand Up @@ -81,7 +83,9 @@ def test_do_upload_task_api_error(upload_files_stubber, bll, freezer):
)
relpath = UrlPath("test/file.txt")
request_file = release_request.get_request_file_from_output_path(relpath)
do_upload_task(request_file, release_request, workspace)

with pytest.raises(FileUploadError):
do_upload_task(request_file, release_request, workspace)

request_file = refresh_request_file(release_request, relpath)
assert not request_file.uploaded
Expand Down Expand Up @@ -118,6 +122,14 @@ def test_run_file_uploader_command(upload_files_stubber, bll):
assert audit_log[0].type == AuditEventType.REQUEST_RELEASE
assert {log.type for log in audit_log[1:3]} == {AuditEventType.REQUEST_FILE_UPLOAD}

traces = get_trace()
last_trace = traces[-1]
assert last_trace.attributes == {
"release_request": release_request.id,
"workspace": "workspace",
"file": "test/file2.txt",
}


@patch("airlock.management.commands.run_file_uploader.time.sleep")
def test_run_file_uploader_command_no_tasks(mock_sleep):
Expand Down Expand Up @@ -212,7 +224,7 @@ def test_run_file_uploader_command_unexpected_error(upload_files_stubber, bll):

with patch(
"airlock.management.commands.run_file_uploader.do_upload_task",
side_effect=Exception(),
side_effect=Exception("an unknown exception"),
):
call_command("run_file_uploader", run_fn=run_fn)

Expand All @@ -224,6 +236,19 @@ def test_run_file_uploader_command_unexpected_error(upload_files_stubber, bll):
assert not request_file.uploaded
request_file.upload_attempts == 2

traces = get_trace()
last_trace = traces[-1]

assert last_trace.attributes == {
"release_request": release_request.id,
"workspace": "workspace",
"file": "test/file2.txt",
}
last_trace_event = last_trace.events[0]
assert last_trace_event.name == "exception"
assert last_trace_event.attributes["exception.type"] == "Exception"
assert last_trace_event.attributes["exception.message"] == "an unknown exception"


def test_run_file_uploader_command_exceeds_attempts(upload_files_stubber, bll):
# Mock status responses for file uploads; there are 3 files in total -
Expand Down
9 changes: 4 additions & 5 deletions tests/integration/test_old_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,14 @@ def test_old_api_upload_file_error(responses, caplog):
status=400,
json={"detail": "job-server error"},
)
uploaded, error = old_api.upload_file(
"release-id", "workspace", relpath, abspath, "testuser"
)
assert not uploaded
with pytest.raises(old_api.FileUploadError, match="job-server error"):
old_api.upload_file(
"release-id", "workspace", relpath, abspath, "testuser"
)
assert len(caplog.messages) == 1
log = caplog.messages[0]
assert "Error uploading file" in log
assert "job-server error" in log
assert error == "job-server error"


def test_old_api_upload_file_already_uploaded_error(responses, caplog):
Expand Down

0 comments on commit 5af1fb5

Please sign in to comment.