From ecd0ab1813c218691d55c709730970686058469c Mon Sep 17 00:00:00 2001 From: Aleksandr Pasechnik Date: Fri, 15 Nov 2024 13:24:11 -0500 Subject: [PATCH] fix(botocore): [SVLS-5973] less noisy span pointers (#11353) Span pointer issues should be debug messages rather than warnings since they are not really actionable from the perspective of our users. We'd also like to have some instrumentation telemetry to see how the logic is doing. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- ddtrace/_trace/_span_pointer.py | 27 +- ddtrace/_trace/telemetry.py | 52 ++ .../utils_botocore/span_pointers/__init__.py | 30 +- .../utils_botocore/span_pointers/dynamodb.py | 465 +++++++++++++----- .../_trace/utils_botocore/span_pointers/s3.py | 86 ++-- .../utils_botocore/span_pointers/telemetry.py | 18 + ...e-span-pointer-noise-e1cf01ec581a409c.yaml | 4 + tests/tracer/test_telemetry.py | 67 +++ .../utils_botocore/test_span_pointers.py | 183 ++++--- 9 files changed, 706 insertions(+), 226 deletions(-) create mode 100644 ddtrace/_trace/telemetry.py create mode 100644 ddtrace/_trace/utils_botocore/span_pointers/telemetry.py create mode 100644 releasenotes/notes/reduce-span-pointer-noise-e1cf01ec581a409c.yaml create mode 100644 tests/tracer/test_telemetry.py diff --git a/ddtrace/_trace/_span_pointer.py b/ddtrace/_trace/_span_pointer.py index 9b2c661047e..015aac7fc12 100644 --- a/ddtrace/_trace/_span_pointer.py +++ b/ddtrace/_trace/_span_pointer.py @@ -8,6 +8,7 @@ from ddtrace._trace._span_link import SpanLink from ddtrace._trace._span_link import SpanLinkKind +from ddtrace._trace.telemetry import record_span_pointer_calculation_issue from ddtrace.internal.logger import get_logger @@ -67,20 +68,28 @@ def __post_init__(self): def _standard_hashing_function(*elements: bytes) -> str: try: if not elements: - raise ValueError("elements must not be empty") + return _standard_hashing_function_failure("elements must not be empty") # Please see the tests for more details about this logic. return sha256(b"|".join(elements)).hexdigest()[:32] except Exception as e: - log.warning( - "failed to generate standard hash for span pointer: %s", - str(e), - ) - return _add_random_suffix( - prefix=_STANDARD_HASHING_FUNCTION_FAILURE_PREFIX, - minimum_length=32, - ) + return _standard_hashing_function_failure(str(e)) + + +def _standard_hashing_function_failure(reason: str) -> str: + log.debug( + "failed to generate standard hash for span pointer: %s", + reason, + ) + record_span_pointer_calculation_issue( + context="standard_hashing_function", + ) + + return _add_random_suffix( + prefix=_STANDARD_HASHING_FUNCTION_FAILURE_PREFIX, + minimum_length=32, + ) def _add_random_suffix(*, prefix: str, minimum_length: int) -> str: diff --git a/ddtrace/_trace/telemetry.py b/ddtrace/_trace/telemetry.py new file mode 100644 index 00000000000..4611cedba51 --- /dev/null +++ b/ddtrace/_trace/telemetry.py @@ -0,0 +1,52 @@ +from typing import Optional +from typing import Tuple + +from ddtrace.internal.telemetry import telemetry_writer + + +def record_span_pointer_calculation(context: str, span_pointer_count: int) -> None: + telemetry_writer.add_count_metric( + namespace="tracer", + name="span_pointer_calculation", + value=1, + tags=(("context", context), ("count", _span_pointer_count_to_tag(span_pointer_count))), + ) + + +def _span_pointer_count_to_tag(span_pointer_count: int) -> str: + if span_pointer_count < 0: + # this shouldn't be possible, but let's make sure + return "negative" + + elif span_pointer_count <= 5: + return str(span_pointer_count) + + elif span_pointer_count <= 10: + return "6-10" + + elif span_pointer_count <= 20: + return "11-20" + + elif span_pointer_count <= 50: + return "21-50" + + elif span_pointer_count <= 100: + return "51-100" + + else: + return "101+" + + +def record_span_pointer_calculation_issue( + context: str, additional_tags: Optional[Tuple[Tuple[str, str], ...]] = None +) -> None: + tags: Tuple[Tuple[str, str], ...] = (("context", context),) + if additional_tags: + tags += additional_tags + + telemetry_writer.add_count_metric( + namespace="tracer", + name="span_pointer_calculation.issue", + value=1, + tags=tags, + ) diff --git a/ddtrace/_trace/utils_botocore/span_pointers/__init__.py b/ddtrace/_trace/utils_botocore/span_pointers/__init__.py index c9765b30cfb..d4d724c38cb 100644 --- a/ddtrace/_trace/utils_botocore/span_pointers/__init__.py +++ b/ddtrace/_trace/utils_botocore/span_pointers/__init__.py @@ -14,6 +14,12 @@ # import from here as well. from ddtrace._trace.utils_botocore.span_pointers.s3 import _aws_s3_object_span_pointer_description # noqa: F401 from ddtrace._trace.utils_botocore.span_pointers.s3 import _extract_span_pointers_for_s3_response +from ddtrace._trace.utils_botocore.span_pointers.telemetry import record_span_pointer_calculation +from ddtrace._trace.utils_botocore.span_pointers.telemetry import record_span_pointer_calculation_issue +from ddtrace.internal.logger import get_logger + + +log = get_logger(__name__) def extract_span_pointers_from_successful_botocore_response( @@ -23,12 +29,22 @@ def extract_span_pointers_from_successful_botocore_response( request_parameters: Dict[str, Any], response: Dict[str, Any], ) -> List[_SpanPointerDescription]: - if endpoint_name == "s3": - return _extract_span_pointers_for_s3_response(operation_name, request_parameters, response) + result = [] + + try: + if endpoint_name == "s3": + result = _extract_span_pointers_for_s3_response(operation_name, request_parameters, response) + + elif endpoint_name == "dynamodb": + result = _extract_span_pointers_for_dynamodb_response( + dynamodb_primary_key_names_for_tables, operation_name, request_parameters, response + ) + + except Exception as e: + # Catch-all in case we miss something in the helpers + log.debug("Error extracting span pointers from botocore response: %s", e) + record_span_pointer_calculation_issue("extractor_root", "unexpected_error") - if endpoint_name == "dynamodb": - return _extract_span_pointers_for_dynamodb_response( - dynamodb_primary_key_names_for_tables, operation_name, request_parameters, response - ) + record_span_pointer_calculation(span_pointer_count=len(result)) - return [] + return result diff --git a/ddtrace/_trace/utils_botocore/span_pointers/dynamodb.py b/ddtrace/_trace/utils_botocore/span_pointers/dynamodb.py index 3b7d99cb0e4..89cc8c6f6e8 100644 --- a/ddtrace/_trace/utils_botocore/span_pointers/dynamodb.py +++ b/ddtrace/_trace/utils_botocore/span_pointers/dynamodb.py @@ -1,9 +1,11 @@ from copy import deepcopy +from enum import Enum import itertools import sys from typing import Any from typing import Dict from typing import List +from typing import Optional from typing import Set from typing import Union from typing import cast @@ -11,6 +13,7 @@ from ddtrace._trace._span_pointer import _SpanPointerDescription from ddtrace._trace._span_pointer import _SpanPointerDirection from ddtrace._trace._span_pointer import _standard_hashing_function +from ddtrace._trace.utils_botocore.span_pointers.telemetry import record_span_pointer_calculation_issue from ddtrace.internal.logger import get_logger @@ -22,6 +25,14 @@ log = get_logger(__name__) +class _TelemetryIssueTags(Enum): + REQUEST_PARAMETERS = "request_parameters" + HASHING_FAILURE = "hashing_failure" + MISSING_TABLE_INFO = "missing_table_info" + PROCESSED_ITEMS_CALCULATION = "processed_items_calculation" + PRIMARY_KEY_ISSUE = "primary_key_issue" + + _DynamoDBTableName = str _DynamoDBItemFieldName = str _DynamoDBItemTypeTag = str @@ -101,6 +112,9 @@ class _DynamoDBTransactUpdateItem(TypedDict): ] +_OPERATION_BASE = "DynamoDB." + + def _extract_span_pointers_for_dynamodb_response( dynamodb_primary_key_names_for_tables: Dict[_DynamoDBTableName, Set[_DynamoDBItemFieldName]], operation_name: str, @@ -139,50 +153,118 @@ def _extract_span_pointers_for_dynamodb_putitem_response( dynamodb_primary_key_names_for_tables: Dict[_DynamoDBTableName, Set[_DynamoDBItemFieldName]], request_parameters: Dict[str, Any], ) -> List[_SpanPointerDescription]: + operation = _OPERATION_BASE + "PutItem" + try: table_name = request_parameters["TableName"] item = request_parameters["Item"] + except KeyError as e: + log.debug( + "failed to extract %s span pointer: missing key %s", + operation, + e, + ) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.REQUEST_PARAMETERS.value + ) + return [] - return [ - _aws_dynamodb_item_span_pointer_description( - pointer_direction=_SpanPointerDirection.DOWNSTREAM, - table_name=table_name, - primary_key=_aws_dynamodb_item_primary_key_from_item( - dynamodb_primary_key_names_for_tables[table_name], item - ), - ) - ] + primary_key_names = _extract_primary_key_names_from_configuration( + operation=operation, + dynamodb_primary_key_names_for_tables=dynamodb_primary_key_names_for_tables, + table_name=table_name, + ) + if primary_key_names is None: + return [] + + primary_key = _aws_dynamodb_item_primary_key_from_item( + operation=operation, + primary_key_field_names=primary_key_names, + item=item, + ) + if primary_key is None: + return [] + + try: + span_pointer_description = _aws_dynamodb_item_span_pointer_description( + operation=operation, + pointer_direction=_SpanPointerDirection.DOWNSTREAM, + table_name=table_name, + primary_key=primary_key, + ) + if span_pointer_description is None: + return [] + + return [span_pointer_description] except Exception as e: - log.warning( - "failed to generate DynamoDB.PutItem span pointer: %s", - str(e), + log.debug( + "failed to generate %s span pointer: %s", + operation, + e, ) + record_span_pointer_calculation_issue(operation=operation, issue_tag=_TelemetryIssueTags.HASHING_FAILURE.value) return [] +def _extract_primary_key_names_from_configuration( + operation: str, + dynamodb_primary_key_names_for_tables: Dict[_DynamoDBTableName, Set[_DynamoDBItemFieldName]], + table_name: _DynamoDBTableName, +) -> Optional[Set[_DynamoDBItemFieldName]]: + try: + return dynamodb_primary_key_names_for_tables[table_name] + except KeyError as e: + log.warning( + "failed to extract %s span pointer: table %s not found in primary key names", + operation, + e, + ) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.MISSING_TABLE_INFO.value + ) + return None + + def _extract_span_pointers_for_dynamodb_keyed_operation_response( operation_name: str, request_parmeters: Dict[str, Any], ) -> List[_SpanPointerDescription]: + operation = _OPERATION_BASE + operation_name + try: table_name = request_parmeters["TableName"] key = request_parmeters["Key"] + except KeyError as e: + log.debug( + "failed to extract %s span pointer: missing key %s", + operation, + e, + ) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.REQUEST_PARAMETERS.value + ) + return [] - return [ - _aws_dynamodb_item_span_pointer_description( - pointer_direction=_SpanPointerDirection.DOWNSTREAM, - table_name=table_name, - primary_key=key, - ) - ] + try: + span_pointer_description = _aws_dynamodb_item_span_pointer_description( + operation=operation, + pointer_direction=_SpanPointerDirection.DOWNSTREAM, + table_name=table_name, + primary_key=key, + ) + if span_pointer_description is None: + return [] + + return [span_pointer_description] except Exception as e: - log.warning( - "failed to generate DynamoDB.%s span pointer: %s", - operation_name, - str(e), + log.debug( + "failed to generate %s span pointer: %s", + operation, + e, ) + record_span_pointer_calculation_issue(operation=operation, issue_tag=_TelemetryIssueTags.HASHING_FAILURE.value) return [] @@ -191,33 +273,59 @@ def _extract_span_pointers_for_dynamodb_batchwriteitem_response( request_parameters: Dict[str, Any], response: Dict[str, Any], ) -> List[_SpanPointerDescription]: + operation = _OPERATION_BASE + "BatchWriteItem" + try: requested_items = request_parameters["RequestItems"] unprocessed_items = response.get("UnprocessedItems", {}) processed_items = _identify_dynamodb_batch_write_item_processed_items(requested_items, unprocessed_items) + if processed_items is None: + return [] - return list( - itertools.chain.from_iterable( - [ - _aws_dynamodb_item_span_pointer_description( - pointer_direction=_SpanPointerDirection.DOWNSTREAM, - table_name=table_name, - primary_key=_aws_dynamodb_item_primary_key_from_write_request( - dynamodb_primary_key_names_for_tables, table_name, write_request - ), - ) - for write_request in processed_items_for_table - ] - for table_name, processed_items_for_table in processed_items.items() - ) + except Exception as e: + log.debug( + "failed to extract %s span pointers: %s", + operation, + e, + ) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.REQUEST_PARAMETERS.value ) + return [] + + try: + result = [] + for table_name, processed_items_for_table in processed_items.items(): + for write_request in processed_items_for_table: + primary_key = _aws_dynamodb_item_primary_key_from_write_request( + dynamodb_primary_key_names_for_tables=dynamodb_primary_key_names_for_tables, + table_name=table_name, + write_request=write_request, + ) + if primary_key is None: + return [] + + span_pointer_description = _aws_dynamodb_item_span_pointer_description( + operation=operation, + pointer_direction=_SpanPointerDirection.DOWNSTREAM, + table_name=table_name, + primary_key=primary_key, + ) + if span_pointer_description is None: + return [] + + result.append(span_pointer_description) + + return result except Exception as e: - log.warning( - "failed to generate DynamoDB.BatchWriteItem span pointer: %s", - str(e), + log.debug( + "failed to generate %s span pointer: %s", + operation, + e, ) + record_span_pointer_calculation_issue(operation=operation, issue_tag=_TelemetryIssueTags.HASHING_FAILURE.value) return [] @@ -225,6 +333,7 @@ def _extract_span_pointers_for_dynamodb_transactwriteitems_response( dynamodb_primary_key_names_for_tables: Dict[_DynamoDBTableName, Set[_DynamoDBItemFieldName]], request_parameters: Dict[str, Any], ) -> List[_SpanPointerDescription]: + operation = _OPERATION_BASE + "TransactWriteItems" try: return list( itertools.chain.from_iterable( @@ -237,21 +346,29 @@ def _extract_span_pointers_for_dynamodb_transactwriteitems_response( ) except Exception as e: - log.warning( - "failed to generate DynamoDB.TransactWriteItems span pointer: %s", - str(e), + log.debug( + "failed to generate %s span pointer: %s", + operation, + e, ) + record_span_pointer_calculation_issue(operation=operation, issue_tag=_TelemetryIssueTags.HASHING_FAILURE.value) return [] def _identify_dynamodb_batch_write_item_processed_items( requested_items: Dict[_DynamoDBTableName, List[_DynamoDBWriteRequest]], unprocessed_items: Dict[_DynamoDBTableName, List[_DynamoDBWriteRequest]], -) -> Dict[_DynamoDBTableName, List[_DynamoDBWriteRequest]]: +) -> Optional[Dict[_DynamoDBTableName, List[_DynamoDBWriteRequest]]]: + operation = _OPERATION_BASE + "BatchWriteItem" + processed_items = {} if not all(table_name in requested_items for table_name in unprocessed_items): - raise ValueError("unprocessed items include tables not in the requested items") + log.debug("%s unprocessed items include tables not in the requested items", operation) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.PROCESSED_ITEMS_CALCULATION.value + ) + return None for table_name, requested_write_requests in requested_items.items(): if table_name not in unprocessed_items: @@ -262,7 +379,14 @@ def _identify_dynamodb_batch_write_item_processed_items( unprocessed_write_request in requested_write_requests for unprocessed_write_request in unprocessed_items[table_name] ): - raise ValueError("unprocessed write requests include items not in the requested write requests") + log.debug( + "%s unprocessed write requests include items not in the requested write requests", + operation, + ) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.PROCESSED_ITEMS_CALCULATION.value + ) + return None these_processed_items = [ deepcopy(processed_write_request) @@ -277,29 +401,45 @@ def _identify_dynamodb_batch_write_item_processed_items( def _aws_dynamodb_item_primary_key_from_item( + operation: str, primary_key_field_names: Set[_DynamoDBItemFieldName], item: _DynamoDBItem, -) -> _DynamoDBItemPrimaryKey: +) -> Optional[_DynamoDBItemPrimaryKey]: if len(primary_key_field_names) not in (1, 2): - raise ValueError(f"unexpected number of primary key fields: {len(primary_key_field_names)}") + log.debug("unexpected number of primary key fields: %d", len(primary_key_field_names)) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.PRIMARY_KEY_ISSUE.value + ) + return None - return { - primary_key_field_name: _aws_dynamodb_extract_and_verify_primary_key_field_value_item( - item, primary_key_field_name + result = {} + for primary_key_field_name in primary_key_field_names: + primary_key_field_value = _aws_dynamodb_extract_and_verify_primary_key_field_value_item( + operation, item, primary_key_field_name ) - for primary_key_field_name in primary_key_field_names - } + if primary_key_field_value is None: + return None + + result[primary_key_field_name] = primary_key_field_value + + return result def _aws_dynamodb_item_primary_key_from_write_request( dynamodb_primary_key_names_for_tables: Dict[_DynamoDBTableName, Set[_DynamoDBItemFieldName]], table_name: _DynamoDBTableName, write_request: _DynamoDBWriteRequest, -) -> _DynamoDBItemPrimaryKey: +) -> Optional[_DynamoDBItemPrimaryKey]: # https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_WriteRequest.html + operation = _OPERATION_BASE + "BatchWriteItem" + if len(write_request) != 1: - raise ValueError(f"unexpected number of write request fields: {len(write_request)}") + log.debug("unexpected number of write request fields: %d", len(write_request)) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.REQUEST_PARAMETERS.value + ) + return None if "PutRequest" in write_request: # Unfortunately mypy doesn't properly see the if statement above as a @@ -307,9 +447,18 @@ def _aws_dynamodb_item_primary_key_from_write_request( # _DynamoDBPutRequestWriteRequest, so we help it out ourselves. write_request = cast(_DynamoDBPutRequestWriteRequest, write_request) + primary_key_field_names = _extract_primary_key_names_from_configuration( + operation=operation, + dynamodb_primary_key_names_for_tables=dynamodb_primary_key_names_for_tables, + table_name=table_name, + ) + if primary_key_field_names is None: + return None + return _aws_dynamodb_item_primary_key_from_item( - dynamodb_primary_key_names_for_tables[table_name], - write_request["PutRequest"]["Item"], + operation=operation, + primary_key_field_names=primary_key_field_names, + item=write_request["PutRequest"]["Item"], ) elif "DeleteRequest" in write_request: @@ -321,15 +470,25 @@ def _aws_dynamodb_item_primary_key_from_write_request( return write_request["DeleteRequest"]["Key"] else: - raise ValueError(f"unexpected write request structure: {''.join(sorted(write_request.keys()))}") + log.debug("unexpected write request structure: %s", "".join(sorted(write_request.keys()))) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.REQUEST_PARAMETERS.value + ) + return None def _aws_dynamodb_item_span_pointer_description_for_transactwrite_request( dynamodb_primary_key_names_for_tables: Dict[_DynamoDBTableName, Set[_DynamoDBItemFieldName]], transact_write_request: _DynamoDBTransactWriteItem, ) -> List[_SpanPointerDescription]: + operation = _OPERATION_BASE + "TransactWriteItems" + if len(transact_write_request) != 1: - raise ValueError(f"unexpected number of transact write request fields: {len(transact_write_request)}") + log.debug("unexpected number of transact write request fields: %d", len(transact_write_request)) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.REQUEST_PARAMETERS.value + ) + return [] if "ConditionCheck" in transact_write_request: # ConditionCheck requests don't actually modify anything, so we don't @@ -354,10 +513,23 @@ def _aws_dynamodb_item_span_pointer_description_for_transactwrite_request( transact_write_request = cast(_DynamoDBTransactPutItem, transact_write_request) table_name = transact_write_request["Put"]["TableName"] - key = _aws_dynamodb_item_primary_key_from_item( - dynamodb_primary_key_names_for_tables[table_name], - transact_write_request["Put"]["Item"], + + primary_key_field_names = _extract_primary_key_names_from_configuration( + operation=operation, + dynamodb_primary_key_names_for_tables=dynamodb_primary_key_names_for_tables, + table_name=table_name, + ) + if primary_key_field_names is None: + return [] + + primary_key = _aws_dynamodb_item_primary_key_from_item( + operation=operation, + primary_key_field_names=primary_key_field_names, + item=transact_write_request["Put"]["Item"], ) + if primary_key is None: + return [] + key = primary_key elif "Update" in transact_write_request: # Unfortunately mypy does not properly see the if statement above as a @@ -370,94 +542,163 @@ def _aws_dynamodb_item_span_pointer_description_for_transactwrite_request( key = transact_write_request["Update"]["Key"] else: - raise ValueError( - f"unexpected transact write request structure: {''.join(sorted(transact_write_request.keys()))}" + log.debug("unexpected transact write request structure: %s", "".join(sorted(transact_write_request.keys()))) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.REQUEST_PARAMETERS.value ) + return [] - return [ - _aws_dynamodb_item_span_pointer_description( - pointer_direction=_SpanPointerDirection.DOWNSTREAM, - table_name=table_name, - primary_key=key, - ) - ] + span_pointer_description = _aws_dynamodb_item_span_pointer_description( + operation=operation, + pointer_direction=_SpanPointerDirection.DOWNSTREAM, + table_name=table_name, + primary_key=key, + ) + if span_pointer_description is None: + return [] + + return [span_pointer_description] def _aws_dynamodb_item_span_pointer_description( + operation: str, pointer_direction: _SpanPointerDirection, table_name: _DynamoDBTableName, primary_key: _DynamoDBItemPrimaryKey, -) -> _SpanPointerDescription: +) -> Optional[_SpanPointerDescription]: + pointer_hash = _aws_dynamodb_item_span_pointer_hash(operation, table_name, primary_key) + if pointer_hash is None: + return None + return _SpanPointerDescription( pointer_kind="aws.dynamodb.item", pointer_direction=pointer_direction, - pointer_hash=_aws_dynamodb_item_span_pointer_hash(table_name, primary_key), + pointer_hash=pointer_hash, extra_attributes={}, ) def _aws_dynamodb_extract_and_verify_primary_key_field_value_item( + operation: str, item: _DynamoDBItem, primary_key_field_name: _DynamoDBItemFieldName, -) -> _DynamoDBItemPrimaryKeyValue: +) -> Optional[_DynamoDBItemPrimaryKeyValue]: if primary_key_field_name not in item: - raise ValueError(f"missing primary key field: {primary_key_field_name}") + log.debug("missing primary key field: %s", primary_key_field_name) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.PRIMARY_KEY_ISSUE.value + ) + return None value_object = item[primary_key_field_name] if len(value_object) != 1: - raise ValueError(f"primary key field {primary_key_field_name} must have exactly one value: {len(value_object)}") + log.debug("primary key field %s must have exactly one value: %d", primary_key_field_name, len(value_object)) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.PRIMARY_KEY_ISSUE.value + ) + return None value_type, value_data = next(iter(value_object.items())) if value_type not in ("S", "N", "B"): - raise ValueError(f"unexpected primary key field {primary_key_field_name} value type: {value_type}") + log.debug("unexpected primary key field %s value type: %s", primary_key_field_name, value_type) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.PRIMARY_KEY_ISSUE.value + ) + return None if not isinstance(value_data, str): - raise ValueError(f"unexpected primary key field {primary_key_field_name} value data type: {type(value_data)}") + log.debug("unexpected primary key field %s value data type: %s", primary_key_field_name, type(value_data)) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.PRIMARY_KEY_ISSUE.value + ) + return None return {value_type: value_data} -def _aws_dynamodb_item_span_pointer_hash(table_name: _DynamoDBTableName, primary_key: _DynamoDBItemPrimaryKey) -> str: - if len(primary_key) == 1: - key, value_object = next(iter(primary_key.items())) - encoded_key_1 = key.encode("utf-8") - encoded_value_1 = _aws_dynamodb_item_encode_primary_key_value(value_object) - encoded_key_2 = b"" - encoded_value_2 = b"" +def _aws_dynamodb_item_span_pointer_hash( + operation: str, table_name: _DynamoDBTableName, primary_key: _DynamoDBItemPrimaryKey +) -> Optional[str]: + try: + if len(primary_key) == 1: + key, value_object = next(iter(primary_key.items())) + encoded_key_1 = key.encode("utf-8") + + encoded_value_1 = _aws_dynamodb_item_encode_primary_key_value(operation, value_object) + if encoded_value_1 is None: + return None + + encoded_key_2 = b"" + encoded_value_2 = b"" + + elif len(primary_key) == 2: + (key_1, value_object_1), (key_2, value_object_2) = sorted( + primary_key.items(), key=lambda x: x[0].encode("utf-8") + ) + encoded_key_1 = key_1.encode("utf-8") + + encoded_value_1 = _aws_dynamodb_item_encode_primary_key_value(operation, value_object_1) + if encoded_value_1 is None: + return None + + encoded_key_2 = key_2.encode("utf-8") - elif len(primary_key) == 2: - (key_1, value_object_1), (key_2, value_object_2) = sorted( - primary_key.items(), key=lambda x: x[0].encode("utf-8") + maybe_encoded_value_2 = _aws_dynamodb_item_encode_primary_key_value(operation, value_object_2) + if maybe_encoded_value_2 is None: + return None + encoded_value_2 = maybe_encoded_value_2 + + else: + log.debug("unexpected number of primary key fields: %d", len(primary_key)) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.PRIMARY_KEY_ISSUE.value + ) + return None + + return _standard_hashing_function( + table_name.encode("utf-8"), + encoded_key_1, + encoded_value_1, + encoded_key_2, + encoded_value_2, ) - encoded_key_1 = key_1.encode("utf-8") - encoded_value_1 = _aws_dynamodb_item_encode_primary_key_value(value_object_1) - encoded_key_2 = key_2.encode("utf-8") - encoded_value_2 = _aws_dynamodb_item_encode_primary_key_value(value_object_2) - else: - raise ValueError(f"unexpected number of primary key fields: {len(primary_key)}") - - return _standard_hashing_function( - table_name.encode("utf-8"), - encoded_key_1, - encoded_value_1, - encoded_key_2, - encoded_value_2, - ) + except Exception as e: + log.debug("failed to generate %s span pointer hash: %s", operation, e) + record_span_pointer_calculation_issue(operation=operation, issue_tag=_TelemetryIssueTags.HASHING_FAILURE.value) + return None -def _aws_dynamodb_item_encode_primary_key_value(value_object: _DynamoDBItemPrimaryKeyValue) -> bytes: - if len(value_object) != 1: - raise ValueError(f"primary key value object must have exactly one field: {len(value_object)}") +def _aws_dynamodb_item_encode_primary_key_value( + operation: str, value_object: _DynamoDBItemPrimaryKeyValue +) -> Optional[bytes]: + try: + if len(value_object) != 1: + log.debug("primary key value object must have exactly one field: %d", len(value_object)) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.PRIMARY_KEY_ISSUE.value + ) + return None + + value_type, value = next(iter(value_object.items())) - value_type, value = next(iter(value_object.items())) + if value_type == "S": + return value.encode("utf-8") - if value_type == "S": - return value.encode("utf-8") + if value_type in ("N", "B"): + # these should already be here as ASCII strings + return value.encode("ascii") - if value_type in ("N", "B"): - # these should already be here as ASCII strings - return value.encode("ascii") + log.debug("unexpected primary key value type: %s", value_type) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.PRIMARY_KEY_ISSUE.value + ) + return None - raise ValueError(f"unknown primary key value type: {value_type}") + except Exception as e: + log.debug("failed to encode primary key value for %s: %s", operation, e) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.PRIMARY_KEY_ISSUE.value + ) + return None diff --git a/ddtrace/_trace/utils_botocore/span_pointers/s3.py b/ddtrace/_trace/utils_botocore/span_pointers/s3.py index dbd56279a7b..46625292da1 100644 --- a/ddtrace/_trace/utils_botocore/span_pointers/s3.py +++ b/ddtrace/_trace/utils_botocore/span_pointers/s3.py @@ -1,18 +1,27 @@ +from enum import Enum from typing import Any from typing import Callable from typing import Dict from typing import List from typing import NamedTuple +from typing import Optional from ddtrace._trace._span_pointer import _SpanPointerDescription from ddtrace._trace._span_pointer import _SpanPointerDirection from ddtrace._trace._span_pointer import _standard_hashing_function +from ddtrace._trace.utils_botocore.span_pointers.telemetry import record_span_pointer_calculation_issue from ddtrace.internal.logger import get_logger log = get_logger(__name__) +class _TelemetryIssueTags(Enum): + REQUEST_PARAMETERS = "request_parameters" + ETAG_QUOTES = "etag_quotes" + HASHING_FAILURE = "hashing_failure" + + def _extract_span_pointers_for_s3_response( operation_name: str, request_parameters: Dict[str, Any], @@ -74,6 +83,8 @@ def _extract_span_pointers_for_s3_response_with_helper( request_parameters: Dict[str, Any], response: Dict[str, Any], ) -> List[_SpanPointerDescription]: + operation = f"S3.{operation_name}" + try: hashing_properties = extractor(request_parameters, response) bucket = hashing_properties.bucket @@ -84,54 +95,71 @@ def _extract_span_pointers_for_s3_response_with_helper( if etag.startswith('"') and etag.endswith('"'): etag = etag[1:-1] - except KeyError as e: - log.warning( - "missing a parameter or response field required to make span pointer for S3.%s: %s", - operation_name, - str(e), + except Exception as e: + log.debug( + "problem with parameters for %s span pointer: %s", + operation, + e, + ) + record_span_pointer_calculation_issue( + operation=operation, issue_tag=_TelemetryIssueTags.REQUEST_PARAMETERS.value ) return [] - try: - return [ - _aws_s3_object_span_pointer_description( - pointer_direction=_SpanPointerDirection.DOWNSTREAM, - bucket=bucket, - key=key, - etag=etag, - ) - ] - except Exception as e: - log.warning( - "failed to generate S3.%s span pointer: %s", - operation_name, - str(e), - ) + span_pointer_description = _aws_s3_object_span_pointer_description( + operation=operation, + pointer_direction=_SpanPointerDirection.DOWNSTREAM, + bucket=bucket, + key=key, + etag=etag, + ) + if span_pointer_description is None: return [] + return [span_pointer_description] + def _aws_s3_object_span_pointer_description( + operation: str, pointer_direction: _SpanPointerDirection, bucket: str, key: str, etag: str, -) -> _SpanPointerDescription: +) -> Optional[_SpanPointerDescription]: + pointer_hash = _aws_s3_object_span_pointer_hash(operation, bucket, key, etag) + if pointer_hash is None: + return None + return _SpanPointerDescription( pointer_kind="aws.s3.object", pointer_direction=pointer_direction, - pointer_hash=_aws_s3_object_span_pointer_hash(bucket, key, etag), + pointer_hash=pointer_hash, extra_attributes={}, ) -def _aws_s3_object_span_pointer_hash(bucket: str, key: str, etag: str) -> str: +def _aws_s3_object_span_pointer_hash(operation: str, bucket: str, key: str, etag: str) -> Optional[str]: if '"' in etag: # Some AWS API endpoints put the ETag in double quotes. We expect the # calling code to have correctly fixed this already. - raise ValueError(f"ETag should not have double quotes: {etag}") + log.debug( + "ETag should not have double quotes: %s", + etag, + ) + record_span_pointer_calculation_issue(operation=operation, issue_tag=_TelemetryIssueTags.ETAG_QUOTES.value) + return None - return _standard_hashing_function( - bucket.encode("ascii"), - key.encode("utf-8"), - etag.encode("ascii"), - ) + try: + return _standard_hashing_function( + bucket.encode("ascii"), + key.encode("utf-8"), + etag.encode("ascii"), + ) + + except Exception as e: + log.debug( + "failed to hash S3 object span pointer: %s", + e, + ) + record_span_pointer_calculation_issue(operation=operation, issue_tag=_TelemetryIssueTags.HASHING_FAILURE.value) + return None diff --git a/ddtrace/_trace/utils_botocore/span_pointers/telemetry.py b/ddtrace/_trace/utils_botocore/span_pointers/telemetry.py new file mode 100644 index 00000000000..ccb0dce9a25 --- /dev/null +++ b/ddtrace/_trace/utils_botocore/span_pointers/telemetry.py @@ -0,0 +1,18 @@ +from ddtrace._trace.telemetry import record_span_pointer_calculation as base_record_span_pointer_calculation +from ddtrace._trace.telemetry import record_span_pointer_calculation_issue as base_record_span_pointer_calculation_issue + + +_CONTEXT = "botocore" + + +def record_span_pointer_calculation(span_pointer_count: int) -> None: + base_record_span_pointer_calculation( + context=_CONTEXT, + span_pointer_count=span_pointer_count, + ) + + +def record_span_pointer_calculation_issue(operation: str, issue_tag: str) -> None: + base_record_span_pointer_calculation_issue( + context=_CONTEXT, additional_tags=(("operation", operation), ("issue", issue_tag)) + ) diff --git a/releasenotes/notes/reduce-span-pointer-noise-e1cf01ec581a409c.yaml b/releasenotes/notes/reduce-span-pointer-noise-e1cf01ec581a409c.yaml new file mode 100644 index 00000000000..180b3e13e75 --- /dev/null +++ b/releasenotes/notes/reduce-span-pointer-noise-e1cf01ec581a409c.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + botocore: This fix resolves an issue where our span pointer calculation code added recently logged unactionable messages. diff --git a/tests/tracer/test_telemetry.py b/tests/tracer/test_telemetry.py new file mode 100644 index 00000000000..d400966108a --- /dev/null +++ b/tests/tracer/test_telemetry.py @@ -0,0 +1,67 @@ +from typing import NamedTuple + +import pytest + +from ddtrace._trace.telemetry import _span_pointer_count_to_tag + + +def test_span_pointer_count_to_tag_returns_strings() -> None: + unique_tags = set() + + for count in range(-10, 500): + tag = _span_pointer_count_to_tag(count) + + assert isinstance(tag, str) + assert tag != "" + + unique_tags.add(tag) + + reasonable_cadinality_limit = 15 + assert len(unique_tags) <= reasonable_cadinality_limit + + +class SpanPointerTagCase(NamedTuple): + count: int + expected: str + + +@pytest.mark.parametrize( + "test_case", + [ + SpanPointerTagCase( + count=-1, + expected="negative", + ), + SpanPointerTagCase( + count=0, + expected="0", + ), + SpanPointerTagCase( + count=1, + expected="1", + ), + SpanPointerTagCase( + count=5, + expected="5", + ), + SpanPointerTagCase( + count=15, + expected="11-20", + ), + SpanPointerTagCase( + count=25, + expected="21-50", + ), + SpanPointerTagCase( + count=95, + expected="51-100", + ), + SpanPointerTagCase( + count=1000, + expected="101+", + ), + ], + ids=lambda test_case: f"count={test_case.count}", +) +def test_span_pointer_count_to_tag(test_case: SpanPointerTagCase) -> None: + assert _span_pointer_count_to_tag(test_case.count) == test_case.expected diff --git a/tests/tracer/utils_botocore/test_span_pointers.py b/tests/tracer/utils_botocore/test_span_pointers.py index 49209c4fc47..775ad23d6bd 100644 --- a/tests/tracer/utils_botocore/test_span_pointers.py +++ b/tests/tracer/utils_botocore/test_span_pointers.py @@ -58,6 +58,7 @@ class HashingCase(NamedTuple): def test_hashing(self, hashing_case: HashingCase) -> None: assert ( _aws_s3_object_span_pointer_hash( + operation="SomeOperation", bucket=hashing_case.bucket, key=hashing_case.key, etag=hashing_case.etag, @@ -118,6 +119,7 @@ class HashingCase(NamedTuple): def test_hashing(self, hashing_case: HashingCase) -> None: assert ( _aws_dynamodb_item_span_pointer_hash( + operation="SomeOperation", table_name=hashing_case.table_name, primary_key=hashing_case.primary_key, ) @@ -133,7 +135,8 @@ class PointersCase(NamedTuple): request_parameters: dict response: dict expected_pointers: List[_SpanPointerDescription] - expected_warning_regex: Optional[str] + expected_logger_regex: Optional[str] + logger_level: str @pytest.mark.parametrize( "pointers_case", @@ -145,7 +148,8 @@ class PointersCase(NamedTuple): request_parameters={}, response={}, expected_pointers=[], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="unknown s3 operation", @@ -154,7 +158,8 @@ class PointersCase(NamedTuple): request_parameters={}, response={}, expected_pointers=[], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="malformed s3.PutObject, missing bucket", @@ -167,7 +172,8 @@ class PointersCase(NamedTuple): "ETag": "ab12ef34", }, expected_pointers=[], - expected_warning_regex=r"missing a parameter or response field .*: 'Bucket'", + expected_logger_regex=r"problem with parameters for S3.PutObject .*: 'Bucket'", + logger_level="debug", ), PointersCase( name="malformed s3.PutObject, missing key", @@ -180,7 +186,8 @@ class PointersCase(NamedTuple): "ETag": "ab12ef34", }, expected_pointers=[], - expected_warning_regex=r"missing a parameter or response field .*: 'Key'", + expected_logger_regex=r"problem with parameters for S3.PutObject .*: 'Key'", + logger_level="debug", ), PointersCase( name="malformed s3.PutObject, missing etag", @@ -192,7 +199,8 @@ class PointersCase(NamedTuple): }, response={}, expected_pointers=[], - expected_warning_regex=r"missing a parameter or response field .*: 'ETag'", + expected_logger_regex=r"problem with parameters for S3.PutObject .*: 'ETag'", + logger_level="debug", ), PointersCase( name="malformed s3.PutObject, impossible non-ascii bucket", @@ -206,7 +214,8 @@ class PointersCase(NamedTuple): "ETag": "ab12ef34", }, expected_pointers=[], - expected_warning_regex=r".*'ascii' codec can't encode characters.*", + expected_logger_regex=r".*'ascii' codec can't encode characters.*", + logger_level="debug", ), PointersCase( name="s3.PutObject", @@ -227,7 +236,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="s3.PutObject with double quoted ETag", @@ -249,7 +259,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="s3.CopyObject", @@ -272,7 +283,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="s3.CopyObject with double quoted ETag", @@ -295,7 +307,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="s3.CompleteMultipartUpload", @@ -316,7 +329,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="s3.CompleteMultipartUpload with double quoted ETag", @@ -338,7 +352,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="dynamodb.PutItem", @@ -361,7 +376,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="dynamodb.PutItem with extra data", @@ -385,7 +401,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="dynamodb.PutItem unknown table", @@ -401,7 +418,8 @@ class PointersCase(NamedTuple): # things we do not care about }, expected_pointers=[], - expected_warning_regex=".*unknown-table.*", + expected_logger_regex=".*unknown-table.*", + logger_level="warning", ), PointersCase( name="dynamodb.PutItem missing primary key", @@ -417,7 +435,8 @@ class PointersCase(NamedTuple): # things we do not care about }, expected_pointers=[], - expected_warning_regex=".*missing primary key field: some-key", + expected_logger_regex=".*missing primary key field: some-key", + logger_level="debug", ), PointersCase( name="dynamodb.UpdateItem", @@ -440,7 +459,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="dynamodb.UpdateItem table does not need to be known", @@ -463,7 +483,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="dynamodb.UpdateItem with two key attributes", @@ -487,7 +508,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="dynamodb.UpdateItem with three keys, impossibly", @@ -505,7 +527,8 @@ class PointersCase(NamedTuple): # things we do not care about }, expected_pointers=[], - expected_warning_regex=".*unexpected number of primary key fields: 3", + expected_logger_regex=".*unexpected number of primary key fields: 3", + logger_level="debug", ), PointersCase( name="dynamodb.UpdateItem missing the key", @@ -518,7 +541,8 @@ class PointersCase(NamedTuple): # things we do not care about }, expected_pointers=[], - expected_warning_regex=".*'Key'.*", + expected_logger_regex=".*'Key'.*", + logger_level="debug", ), PointersCase( name="dynamodb.DeleteItem", @@ -541,7 +565,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="dynamodb.DeleteItem table does not need to be known", @@ -564,7 +589,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="dynamodb.DeleteItem with two key attributes", @@ -588,7 +614,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="dynamodb.DeleteItem with three keys, impossibly", @@ -606,7 +633,8 @@ class PointersCase(NamedTuple): # things we do not care about }, expected_pointers=[], - expected_warning_regex=".*unexpected number of primary key fields: 3", + expected_logger_regex=".*unexpected number of primary key fields: 3", + logger_level="debug", ), PointersCase( name="dynamodb.DeleteItem missing the key", @@ -619,7 +647,8 @@ class PointersCase(NamedTuple): # things we do not care about }, expected_pointers=[], - expected_warning_regex=".*'Key'.*", + expected_logger_regex=".*'Key'.*", + logger_level="debug", ), PointersCase( name="dynamodb.BatchWriteItem works with multiple items and tables", @@ -697,7 +726,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="dynamodb.BatchWriteItem still needs the mapping sometimes", @@ -718,7 +748,8 @@ class PointersCase(NamedTuple): }, response={}, expected_pointers=[], - expected_warning_regex=".*unknown-table.*", + expected_logger_regex=".*unknown-table.*", + logger_level="warning", ), PointersCase( name="dynamodb.TransactWriteItems basic case", @@ -787,7 +818,8 @@ class PointersCase(NamedTuple): extra_attributes={}, ), ], - expected_warning_regex=None, + expected_logger_regex=None, + logger_level="debug", ), PointersCase( name="dynamodb.TransactWriteItems still needs the mapping sometimes", @@ -807,7 +839,8 @@ class PointersCase(NamedTuple): }, response={}, expected_pointers=[], - expected_warning_regex=".*unknown-table.*", + expected_logger_regex=".*unknown-table.*", + logger_level="warning", ), ], ids=lambda case: case.name, @@ -816,7 +849,7 @@ def test_pointers(self, pointers_case: PointersCase) -> None: # We might like to use caplog here but it resulted in inconsistent test # behavior, so we have to go a bit deeper. - with mock.patch.object(logging.Logger, "warning") as mock_logger: + with mock.patch.object(logging.Logger, pointers_case.logger_level) as mock_logger: assert sorted( extract_span_pointers_from_successful_botocore_response( dynamodb_primary_key_names_for_tables={ @@ -830,7 +863,7 @@ def test_pointers(self, pointers_case: PointersCase) -> None: key=lambda pointer: pointer.pointer_hash, ) == sorted(pointers_case.expected_pointers, key=lambda pointer: pointer.pointer_hash) - if pointers_case.expected_warning_regex is None: + if pointers_case.expected_logger_regex is None: mock_logger.assert_not_called() else: @@ -840,7 +873,7 @@ def test_pointers(self, pointers_case: PointersCase) -> None: assert not kwargs fmt, *other_args = args assert re.match( - pointers_case.expected_warning_regex, + pointers_case.expected_logger_regex, fmt % tuple(other_args), ) @@ -851,7 +884,7 @@ class WriteRequestPrimaryKeyCase(NamedTuple): table_name: str write_request: _DynamoDBWriteRequest primary_key: Optional[Dict[str, Dict[str, str]]] - expected_exception_regex: Optional[str] + expected_logger_regex: Optional[str] @pytest.mark.parametrize( "test_case", @@ -868,7 +901,7 @@ class WriteRequestPrimaryKeyCase(NamedTuple): }, }, primary_key={"some-key": {"S": "some-value"}}, - expected_exception_regex=None, + expected_logger_regex=None, ), WriteRequestPrimaryKeyCase( name="delete request", @@ -881,7 +914,7 @@ class WriteRequestPrimaryKeyCase(NamedTuple): }, }, primary_key={"some-key": {"S": "some-value"}}, - expected_exception_regex=None, + expected_logger_regex=None, ), WriteRequestPrimaryKeyCase( name="impossible combined request", @@ -900,7 +933,7 @@ class WriteRequestPrimaryKeyCase(NamedTuple): }, }, primary_key=None, - expected_exception_regex="unexpected number of write request fields", + expected_logger_regex="unexpected number of write request fields", ), WriteRequestPrimaryKeyCase( name="unknown request kind", @@ -914,23 +947,13 @@ class WriteRequestPrimaryKeyCase(NamedTuple): }, }, primary_key=None, - expected_exception_regex="unexpected write request structure: SomeRequest", + expected_logger_regex="unexpected write request structure: SomeRequest", ), ], ids=lambda test_case: test_case.name, ) def test_aws_dynamodb_item_primary_key_from_write_request(self, test_case: WriteRequestPrimaryKeyCase) -> None: - if test_case.expected_exception_regex is not None: - with pytest.raises(ValueError, match=test_case.expected_exception_regex): - _aws_dynamodb_item_primary_key_from_write_request( - dynamodb_primary_key_names_for_tables={ - "some-table": {"some-key"}, - }, - table_name=test_case.table_name, - write_request=test_case.write_request, - ) - - else: + with mock.patch.object(logging.Logger, "debug") as mock_logger: assert ( _aws_dynamodb_item_primary_key_from_write_request( dynamodb_primary_key_names_for_tables={ @@ -942,12 +965,26 @@ def test_aws_dynamodb_item_primary_key_from_write_request(self, test_case: Write == test_case.primary_key ) + if test_case.expected_logger_regex is None: + mock_logger.assert_not_called() + + else: + mock_logger.assert_called_once() + + (args, kwargs) = mock_logger.call_args + assert not kwargs + fmt, *other_args = args + assert re.match( + test_case.expected_logger_regex, + fmt % tuple(other_args), + ) + class ProcessedWriteRequestCase(NamedTuple): name: str requested_items: Dict[_DynamoDBTableName, List[_DynamoDBWriteRequest]] unprocessed_items: Dict[_DynamoDBTableName, List[_DynamoDBWriteRequest]] expected_processed_items: Optional[Dict[_DynamoDBTableName, List[_DynamoDBWriteRequest]]] - expected_exception_regex: Optional[str] + expected_logger_regex: Optional[str] @pytest.mark.parametrize( "test_case", @@ -977,7 +1014,7 @@ class ProcessedWriteRequestCase(NamedTuple): }, ], }, - expected_exception_regex=None, + expected_logger_regex=None, ), ProcessedWriteRequestCase( name="all unprocessed", @@ -1004,7 +1041,7 @@ class ProcessedWriteRequestCase(NamedTuple): ], }, expected_processed_items={}, - expected_exception_regex=None, + expected_logger_regex=None, ), ProcessedWriteRequestCase( name="some unprocessed", @@ -1050,7 +1087,7 @@ class ProcessedWriteRequestCase(NamedTuple): }, ], }, - expected_exception_regex=None, + expected_logger_regex=None, ), ProcessedWriteRequestCase( name="nothing unprocessed", @@ -1077,7 +1114,7 @@ class ProcessedWriteRequestCase(NamedTuple): }, ], }, - expected_exception_regex=None, + expected_logger_regex=None, ), ProcessedWriteRequestCase( name="extra unprocessed tables", @@ -1094,7 +1131,7 @@ class ProcessedWriteRequestCase(NamedTuple): ], }, expected_processed_items=None, - expected_exception_regex="unprocessed items include tables not in the requested items", + expected_logger_regex=".*unprocessed items include tables not in the requested items", ), ProcessedWriteRequestCase( name="extra unprocessed items", @@ -1128,26 +1165,34 @@ class ProcessedWriteRequestCase(NamedTuple): ], }, expected_processed_items=None, - expected_exception_regex="unprocessed write requests include items not in the requested write requests", + expected_logger_regex=".*unprocessed write requests include items not in the requested write requests", ), ], ids=lambda test_case: test_case.name, ) def test_identify_dynamodb_batch_write_item_processed_items(self, test_case: ProcessedWriteRequestCase) -> None: - if test_case.expected_exception_regex is not None: - with pytest.raises(Exception, match=test_case.expected_exception_regex): - _identify_dynamodb_batch_write_item_processed_items( - requested_items=test_case.requested_items, - unprocessed_items=test_case.unprocessed_items, - ) + with mock.patch.object(logging.Logger, "debug") as mock_logger: + processed_items = _identify_dynamodb_batch_write_item_processed_items( + requested_items=test_case.requested_items, + unprocessed_items=test_case.unprocessed_items, + ) + assert processed_items == test_case.expected_processed_items - return + if test_case.expected_logger_regex is None: + mock_logger.assert_not_called() - processed_items = _identify_dynamodb_batch_write_item_processed_items( - requested_items=test_case.requested_items, - unprocessed_items=test_case.unprocessed_items, - ) - assert processed_items == test_case.expected_processed_items + else: + mock_logger.assert_called_once() + + (args, kwargs) = mock_logger.call_args + assert not kwargs + fmt, *other_args = args + assert re.match( + test_case.expected_logger_regex, + fmt % tuple(other_args), + ) + + return def collect_all_ids(thing: object, accumulator: Set[int]) -> None: if isinstance(thing, dict):