diff --git a/python/semantic_kernel/utils/telemetry/model_diagnostics/decorators.py b/python/semantic_kernel/utils/telemetry/model_diagnostics/decorators.py index 236a94e5d435..11ff9279faa7 100644 --- a/python/semantic_kernel/utils/telemetry/model_diagnostics/decorators.py +++ b/python/semantic_kernel/utils/telemetry/model_diagnostics/decorators.py @@ -2,9 +2,10 @@ import functools import json +import logging from collections.abc import AsyncGenerator, Callable from functools import reduce -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, ClassVar from opentelemetry.trace import Span, StatusCode, get_tracer, use_span @@ -37,9 +38,36 @@ TEXT_COMPLETION_OPERATION = "text.completions" TEXT_STREAMING_COMPLETION_OPERATION = "text.streaming_completions" + +# We're recording multiple events for the chat history, some of them are emitted within (hundreds of) +# nanoseconds of each other. The default timestamp resolution is not high enough to guarantee unique +# timestamps for each message. Also Azure Monitor truncates resolution to microseconds and some other +# backends truncate to milliseconds. +# +# But we need to give users a way to restore chat message order, so we're incrementing the timestamp +# by 1 microsecond for each message. +# +# This is a workaround, we'll find a generic and better solution - see +# https://github.com/open-telemetry/semantic-conventions/issues/1701 +class ChatHistoryMessageTimestampFilter(logging.Filter): + """A filter to increment the timestamp of INFO logs by 1 microsecond.""" + + INDEX_KEY: ClassVar[str] = "CHAT_MESSAGE_INDEX" + + def filter(self, record: logging.LogRecord) -> bool: + """Increment the timestamp of INFO logs by 1 microsecond.""" + if hasattr(record, self.INDEX_KEY): + idx = getattr(record, self.INDEX_KEY) + record.created += idx * 1e-6 + return True + + # Creates a tracer from the global tracer provider tracer = get_tracer(__name__) +logger = logging.getLogger(__name__) +logger.addFilter(ChatHistoryMessageTimestampFilter()) + @experimental_function def are_model_diagnostics_enabled() -> bool: @@ -87,19 +115,19 @@ async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[ChatMessageConten settings: "PromptExecutionSettings" = kwargs.get("settings") or args[2] # type: ignore with use_span( - _start_completion_activity( + _get_completion_span( CHAT_COMPLETION_OPERATION, completion_service.ai_model_id, model_provider, completion_service.service_url(), - chat_history, settings, ), end_on_exit=True, ) as current_span: + _set_completion_input(model_provider, chat_history) try: completions: list[ChatMessageContent] = await completion_func(*args, **kwargs) - _set_completion_response(current_span, completions) + _set_completion_response(current_span, completions, model_provider) return completions except Exception as exception: _set_completion_error(current_span, exception) @@ -144,16 +172,16 @@ async def wrapper_decorator( all_messages: dict[int, list[StreamingChatMessageContent]] = {} with use_span( - _start_completion_activity( + _get_completion_span( CHAT_STREAMING_COMPLETION_OPERATION, completion_service.ai_model_id, model_provider, completion_service.service_url(), - chat_history, settings, ), end_on_exit=True, ) as current_span: + _set_completion_input(model_provider, chat_history) try: async for streaming_chat_message_contents in completion_func(*args, **kwargs): for streaming_chat_message_content in streaming_chat_message_contents: @@ -166,7 +194,7 @@ async def wrapper_decorator( all_messages_flattened = [ reduce(lambda x, y: x + y, messages) for messages in all_messages.values() ] - _set_completion_response(current_span, all_messages_flattened) + _set_completion_response(current_span, all_messages_flattened, model_provider) except Exception as exception: _set_completion_error(current_span, exception) raise @@ -203,19 +231,19 @@ async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[TextContent]: settings: "PromptExecutionSettings" = kwargs["settings"] if kwargs.get("settings") is not None else args[2] with use_span( - _start_completion_activity( + _get_completion_span( TEXT_COMPLETION_OPERATION, completion_service.ai_model_id, model_provider, completion_service.service_url(), - prompt, settings, ), end_on_exit=True, ) as current_span: + _set_completion_input(model_provider, prompt) try: completions: list[TextContent] = await completion_func(*args, **kwargs) - _set_completion_response(current_span, completions) + _set_completion_response(current_span, completions, model_provider) return completions except Exception as exception: _set_completion_error(current_span, exception) @@ -258,16 +286,16 @@ async def wrapper_decorator(*args: Any, **kwargs: Any) -> AsyncGenerator[list["S all_text_contents: dict[int, list["StreamingTextContent"]] = {} with use_span( - _start_completion_activity( + _get_completion_span( TEXT_STREAMING_COMPLETION_OPERATION, completion_service.ai_model_id, model_provider, completion_service.service_url(), - prompt, settings, ), end_on_exit=True, ) as current_span: + _set_completion_input(model_provider, prompt) try: async for streaming_text_contents in completion_func(*args, **kwargs): for streaming_text_content in streaming_text_contents: @@ -280,7 +308,7 @@ async def wrapper_decorator(*args: Any, **kwargs: Any) -> AsyncGenerator[list["S all_text_contents_flattened = [ reduce(lambda x, y: x + y, messages) for messages in all_text_contents.values() ] - _set_completion_response(current_span, all_text_contents_flattened) + _set_completion_response(current_span, all_text_contents_flattened, model_provider) except Exception as exception: _set_completion_error(current_span, exception) raise @@ -292,15 +320,18 @@ async def wrapper_decorator(*args: Any, **kwargs: Any) -> AsyncGenerator[list["S return inner_trace_streaming_text_completion -def _start_completion_activity( +def _get_completion_span( operation_name: str, model_name: str, model_provider: str, service_url: str | None, - prompt: str | ChatHistory, execution_settings: "PromptExecutionSettings | None", ) -> Span: - """Start a text or chat completion activity for a given model.""" + """Start a text or chat completion span for a given model. + + Note that `start_span` doesn't make the span the current span. + Use `use_span` to make it the current span as a context manager. + """ span = tracer.start_span(f"{operation_name} {model_name}") # Set attributes on the span @@ -316,24 +347,53 @@ def _start_completion_activity( # TODO(@glahaye): we'll need to have a way to get these attributes from model # providers other than OpenAI (for example if the attributes are named differently) if execution_settings: - attribute = execution_settings.extension_data.get("max_tokens") - if attribute: - span.set_attribute(gen_ai_attributes.MAX_TOKENS, attribute) + attribute_name_map = { + "seed": gen_ai_attributes.SEED, + "encoding_formats": gen_ai_attributes.ENCODING_FORMATS, + "frequency_penalty": gen_ai_attributes.FREQUENCY_PENALTY, + "max_tokens": gen_ai_attributes.MAX_TOKENS, + "stop_sequences": gen_ai_attributes.STOP_SEQUENCES, + "temperature": gen_ai_attributes.TEMPERATURE, + "top_k": gen_ai_attributes.TOP_K, + "top_p": gen_ai_attributes.TOP_P, + } + for attribute_name, attribute_key in attribute_name_map.items(): + attribute = execution_settings.extension_data.get(attribute_name) + if attribute: + span.set_attribute(attribute_key, attribute) + + return span - attribute = execution_settings.extension_data.get("temperature") - if attribute: - span.set_attribute(gen_ai_attributes.TEMPERATURE, attribute) - attribute = execution_settings.extension_data.get("top_p") - if attribute: - span.set_attribute(gen_ai_attributes.TOP_P, attribute) +def _set_completion_input( + model_provider: str, + prompt: str | ChatHistory, +) -> None: + """Set the input for a text or chat completion. + The logs will be associated to the current span. + """ if are_sensitive_events_enabled(): if isinstance(prompt, ChatHistory): - prompt = _messages_to_openai_format(prompt.messages) - span.add_event(gen_ai_attributes.PROMPT_EVENT, {gen_ai_attributes.PROMPT_EVENT_PROMPT: prompt}) - - return span + for idx, message in enumerate(prompt.messages): + event_name = gen_ai_attributes.ROLE_EVENT_MAP.get(message.role) + if event_name: + logger.info( + json.dumps(message.to_dict()), + extra={ + gen_ai_attributes.EVENT_NAME: event_name, + gen_ai_attributes.SYSTEM: model_provider, + ChatHistoryMessageTimestampFilter.INDEX_KEY: idx, + }, + ) + else: + logger.info( + prompt, + extra={ + gen_ai_attributes.EVENT_NAME: gen_ai_attributes.PROMPT, + gen_ai_attributes.SYSTEM: model_provider, + }, + ) def _set_completion_response( @@ -342,8 +402,9 @@ def _set_completion_response( | list[TextContent] | list[StreamingChatMessageContent] | list[StreamingTextContent], + model_provider: str, ) -> None: - """Set the a text or chat completion response for a given activity.""" + """Set the a text or chat completion response for a given span.""" first_completion = completions[0] # Set the response ID @@ -362,33 +423,32 @@ def _set_completion_response( usage = first_completion.metadata.get("usage", None) if isinstance(usage, CompletionUsage): if usage.prompt_tokens: - current_span.set_attribute(gen_ai_attributes.PROMPT_TOKENS, usage.prompt_tokens) + current_span.set_attribute(gen_ai_attributes.INPUT_TOKENS, usage.prompt_tokens) if usage.completion_tokens: - current_span.set_attribute(gen_ai_attributes.COMPLETION_TOKENS, usage.completion_tokens) + current_span.set_attribute(gen_ai_attributes.OUTPUT_TOKENS, usage.completion_tokens) # Set the completion event if are_sensitive_events_enabled(): - completion_text: str = _messages_to_openai_format(completions) - current_span.add_event( - gen_ai_attributes.COMPLETION_EVENT, {gen_ai_attributes.COMPLETION_EVENT_COMPLETION: completion_text} - ) + for completion in completions: + full_response: dict[str, Any] = { + "message": completion.to_dict(), + } + + if hasattr(completion, "finish_reason"): + full_response["finish_reason"] = completion.finish_reason + if hasattr(completion, "choice_index"): + full_response["index"] = completion.choice_index + + logger.info( + json.dumps(full_response), + extra={ + gen_ai_attributes.EVENT_NAME: gen_ai_attributes.CHOICE, + gen_ai_attributes.SYSTEM: model_provider, + }, + ) def _set_completion_error(span: Span, error: Exception) -> None: """Set an error for a text or chat completion .""" span.set_attribute(gen_ai_attributes.ERROR_TYPE, str(type(error))) span.set_status(StatusCode.ERROR, repr(error)) - - -def _messages_to_openai_format( - messages: list[ChatMessageContent] - | list[StreamingChatMessageContent] - | list[TextContent] - | list[StreamingTextContent], -) -> str: - """Convert a list of ChatMessageContent to a string in the OpenAI format. - - OpenTelemetry recommends formatting the messages in the OpenAI format - regardless of the actual model being used. - """ - return json.dumps([message.to_dict() for message in messages]) diff --git a/python/semantic_kernel/utils/telemetry/model_diagnostics/gen_ai_attributes.py b/python/semantic_kernel/utils/telemetry/model_diagnostics/gen_ai_attributes.py index 461500a998cb..76d388378f23 100644 --- a/python/semantic_kernel/utils/telemetry/model_diagnostics/gen_ai_attributes.py +++ b/python/semantic_kernel/utils/telemetry/model_diagnostics/gen_ai_attributes.py @@ -1,32 +1,48 @@ # Copyright (c) Microsoft. All rights reserved. +from semantic_kernel.contents.utils.author_role import AuthorRole + # Constants for tracing activities with semantic conventions. # Ideally, we should use the attributes from the semcov package. # However, many of the attributes are not yet available in the package, # so we define them here for now. # Activity tags -SYSTEM = "gen_ai.system" OPERATION = "gen_ai.operation.name" +SYSTEM = "gen_ai.system" +ERROR_TYPE = "error.type" MODEL = "gen_ai.request.model" -MAX_TOKENS = "gen_ai.request.max_tokens" # nosec +SEED = "gen_ai.request.seed" +PORT = "server.port" +ENCODING_FORMATS = "gen_ai.request.encoding_formats" +FREQUENCY_PENALTY = "gen_ai.request.frequency_penalty" +MAX_TOKENS = "gen_ai.request.max_tokens" +STOP_SEQUENCES = "gen_ai.request.stop_sequences" TEMPERATURE = "gen_ai.request.temperature" +TOP_K = "gen_ai.request.top_k" TOP_P = "gen_ai.request.top_p" -RESPONSE_ID = "gen_ai.response.id" FINISH_REASON = "gen_ai.response.finish_reason" -PROMPT_TOKENS = "gen_ai.response.prompt_tokens" # nosec -COMPLETION_TOKENS = "gen_ai.response.completion_tokens" # nosec +RESPONSE_ID = "gen_ai.response.id" +INPUT_TOKENS = "gen_ai.usage.input_tokens" +OUTPUT_TOKENS = "gen_ai.usage.output_tokens" ADDRESS = "server.address" -PORT = "server.port" -ERROR_TYPE = "error.type" # Activity events -PROMPT_EVENT = "gen_ai.content.prompt" -COMPLETION_EVENT = "gen_ai.content.completion" - -# Activity event attributes -PROMPT_EVENT_PROMPT = "gen_ai.prompt" -COMPLETION_EVENT_COMPLETION = "gen_ai.completion" +EVENT_NAME = "event.name" +SYSTEM_MESSAGE = "gen_ai.system.message" +USER_MESSAGE = "gen_ai.user.message" +ASSISTANT_MESSAGE = "gen_ai.assistant.message" +TOOL_MESSAGE = "gen_ai.tool.message" +CHOICE = "gen_ai.choice" +PROMPT = "gen_ai.prompt" # Kernel specific attributes AVAILABLE_FUNCTIONS = "sk.available_functions" + + +ROLE_EVENT_MAP = { + AuthorRole.SYSTEM: SYSTEM_MESSAGE, + AuthorRole.USER: USER_MESSAGE, + AuthorRole.ASSISTANT: ASSISTANT_MESSAGE, + AuthorRole.TOOL: TOOL_MESSAGE, +} diff --git a/python/tests/unit/utils/model_diagnostics/test_trace_chat_completion.py b/python/tests/unit/utils/model_diagnostics/test_trace_chat_completion.py index 4d03fd3543c3..302abb01235b 100644 --- a/python/tests/unit/utils/model_diagnostics/test_trace_chat_completion.py +++ b/python/tests/unit/utils/model_diagnostics/test_trace_chat_completion.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft. All rights reserved. -from unittest.mock import patch +import json +from unittest.mock import call, patch import pytest from opentelemetry.trace import StatusCode @@ -14,7 +15,7 @@ from semantic_kernel.utils.telemetry.model_diagnostics import gen_ai_attributes from semantic_kernel.utils.telemetry.model_diagnostics.decorators import ( CHAT_COMPLETION_OPERATION, - _messages_to_openai_format, + ChatHistoryMessageTimestampFilter, trace_chat_completion, ) from tests.unit.utils.model_diagnostics.conftest import MockChatCompletion @@ -80,9 +81,11 @@ ) +@patch("semantic_kernel.utils.telemetry.model_diagnostics.decorators.logger") @patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN async def test_trace_chat_completion( mock_span, + mock_logger, execution_settings, mock_response, chat_history, @@ -124,9 +127,18 @@ async def test_trace_chat_completion( if execution_settings.extension_data.get("top_p") is not None: mock_span.set_attribute.assert_any_call(gen_ai_attributes.TOP_P, execution_settings.extension_data["top_p"]) - mock_span.add_event.assert_any_call( - gen_ai_attributes.PROMPT_EVENT, - {gen_ai_attributes.PROMPT_EVENT_PROMPT: _messages_to_openai_format(chat_history)}, + mock_logger.info.assert_has_calls( + [ + call( + json.dumps(message.to_dict()), + extra={ + gen_ai_attributes.EVENT_NAME: gen_ai_attributes.ROLE_EVENT_MAP.get(message.role), + gen_ai_attributes.SYSTEM: MockChatCompletion.MODEL_PROVIDER_NAME, + ChatHistoryMessageTimestampFilter.INDEX_KEY: idx, + }, + ) + ] + for idx, message in enumerate(chat_history) ) # After the call to the model @@ -139,15 +151,20 @@ async def test_trace_chat_completion( ",".join([str(completion.finish_reason) for completion in mock_response]), ) - mock_span.add_event.assert_any_call( - gen_ai_attributes.COMPLETION_EVENT, - {gen_ai_attributes.COMPLETION_EVENT_COMPLETION: _messages_to_openai_format(mock_response)}, + mock_logger.info.assert_any_call( + json.dumps({"message": results[0].to_dict(), "finish_reason": results[0].finish_reason}), + extra={ + gen_ai_attributes.EVENT_NAME: gen_ai_attributes.CHOICE, + gen_ai_attributes.SYSTEM: MockChatCompletion.MODEL_PROVIDER_NAME, + }, ) +@patch("semantic_kernel.utils.telemetry.model_diagnostics.decorators.logger") @patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN async def test_trace_chat_completion_exception( mock_span, + mock_logger, execution_settings, mock_response, chat_history, @@ -170,3 +187,17 @@ async def test_trace_chat_completion_exception( mock_span.set_status.assert_any_call(StatusCode.ERROR, repr(exception)) mock_span.end.assert_any_call() + + mock_logger.info.assert_has_calls( + [ + call( + json.dumps(message.to_dict()), + extra={ + gen_ai_attributes.EVENT_NAME: gen_ai_attributes.ROLE_EVENT_MAP.get(message.role), + gen_ai_attributes.SYSTEM: MockChatCompletion.MODEL_PROVIDER_NAME, + ChatHistoryMessageTimestampFilter.INDEX_KEY: idx, + }, + ) + ] + for idx, message in enumerate(chat_history) + ) diff --git a/python/tests/unit/utils/model_diagnostics/test_trace_streaming_chat_completion.py b/python/tests/unit/utils/model_diagnostics/test_trace_streaming_chat_completion.py index 486a7df4b115..2b5632ac5d6d 100644 --- a/python/tests/unit/utils/model_diagnostics/test_trace_streaming_chat_completion.py +++ b/python/tests/unit/utils/model_diagnostics/test_trace_streaming_chat_completion.py @@ -1,8 +1,9 @@ # Copyright (c) Microsoft. All rights reserved. +import json from collections.abc import AsyncGenerator from functools import reduce -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, call, patch import pytest from opentelemetry.trace import StatusCode @@ -16,7 +17,7 @@ from semantic_kernel.utils.telemetry.model_diagnostics import gen_ai_attributes from semantic_kernel.utils.telemetry.model_diagnostics.decorators import ( CHAT_STREAMING_COMPLETION_OPERATION, - _messages_to_openai_format, + ChatHistoryMessageTimestampFilter, trace_streaming_chat_completion, ) from tests.unit.utils.model_diagnostics.conftest import MockChatCompletion @@ -86,9 +87,11 @@ ) +@patch("semantic_kernel.utils.telemetry.model_diagnostics.decorators.logger") @patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN async def test_trace_streaming_chat_completion( mock_span, + mock_logger, execution_settings, mock_response, chat_history, @@ -135,9 +138,18 @@ async def test_trace_streaming_chat_completion( if execution_settings.extension_data.get("top_p") is not None: mock_span.set_attribute.assert_any_call(gen_ai_attributes.TOP_P, execution_settings.extension_data["top_p"]) - mock_span.add_event.assert_any_call( - gen_ai_attributes.PROMPT_EVENT, - {gen_ai_attributes.PROMPT_EVENT_PROMPT: _messages_to_openai_format(chat_history)}, + mock_logger.info.assert_has_calls( + [ + call( + json.dumps(message.to_dict()), + extra={ + gen_ai_attributes.EVENT_NAME: gen_ai_attributes.ROLE_EVENT_MAP.get(message.role), + gen_ai_attributes.SYSTEM: MockChatCompletion.MODEL_PROVIDER_NAME, + ChatHistoryMessageTimestampFilter.INDEX_KEY: idx, + }, + ) + ] + for idx, message in enumerate(chat_history) ) # After the call to the model @@ -150,15 +162,24 @@ async def test_trace_streaming_chat_completion( ",".join([str(completion.finish_reason) for completion in mock_response]), ) - mock_span.add_event.assert_any_call( - gen_ai_attributes.COMPLETION_EVENT, - {gen_ai_attributes.COMPLETION_EVENT_COMPLETION: _messages_to_openai_format(mock_response)}, + mock_logger.info.assert_any_call( + json.dumps({ + "message": updates_flatten[0].to_dict(), + "finish_reason": updates_flatten[0].finish_reason, + "index": 0, + }), + extra={ + gen_ai_attributes.EVENT_NAME: gen_ai_attributes.CHOICE, + gen_ai_attributes.SYSTEM: MockChatCompletion.MODEL_PROVIDER_NAME, + }, ) +@patch("semantic_kernel.utils.telemetry.model_diagnostics.decorators.logger") @patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN async def test_trace_streaming_chat_completion_exception( mock_span, + mock_logger, execution_settings, mock_response, chat_history, @@ -186,3 +207,17 @@ async def test_trace_streaming_chat_completion_exception( mock_span.set_status.assert_any_call(StatusCode.ERROR, repr(exception)) mock_span.end.assert_any_call() + + mock_logger.info.assert_has_calls( + [ + call( + json.dumps(message.to_dict()), + extra={ + gen_ai_attributes.EVENT_NAME: gen_ai_attributes.ROLE_EVENT_MAP.get(message.role), + gen_ai_attributes.SYSTEM: MockChatCompletion.MODEL_PROVIDER_NAME, + ChatHistoryMessageTimestampFilter.INDEX_KEY: idx, + }, + ) + ] + for idx, message in enumerate(chat_history) + ) diff --git a/python/tests/unit/utils/model_diagnostics/test_trace_streaming_text_completion.py b/python/tests/unit/utils/model_diagnostics/test_trace_streaming_text_completion.py index 54321cc16d2d..27c31376aa42 100644 --- a/python/tests/unit/utils/model_diagnostics/test_trace_streaming_text_completion.py +++ b/python/tests/unit/utils/model_diagnostics/test_trace_streaming_text_completion.py @@ -1,5 +1,6 @@ # Copyright (c) Microsoft. All rights reserved. +import json from collections.abc import AsyncGenerator from functools import reduce from unittest.mock import ANY, MagicMock, patch @@ -14,7 +15,6 @@ from semantic_kernel.utils.telemetry.model_diagnostics import gen_ai_attributes from semantic_kernel.utils.telemetry.model_diagnostics.decorators import ( TEXT_STREAMING_COMPLETION_OPERATION, - _messages_to_openai_format, trace_streaming_text_completion, ) from tests.unit.utils.model_diagnostics.conftest import MockTextCompletion @@ -68,9 +68,11 @@ ) +@patch("semantic_kernel.utils.telemetry.model_diagnostics.decorators.logger") @patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN async def test_trace_streaming_text_completion( mock_span, + mock_logger, execution_settings, mock_response, prompt, @@ -117,8 +119,12 @@ async def test_trace_streaming_text_completion( if execution_settings.extension_data.get("top_p") is not None: mock_span.set_attribute.assert_any_call(gen_ai_attributes.TOP_P, execution_settings.extension_data["top_p"]) - mock_span.add_event.assert_any_call( - gen_ai_attributes.PROMPT_EVENT, {gen_ai_attributes.PROMPT_EVENT_PROMPT: prompt} + mock_logger.info.assert_any_call( + prompt, + extra={ + gen_ai_attributes.EVENT_NAME: gen_ai_attributes.PROMPT, + gen_ai_attributes.SYSTEM: MockTextCompletion.MODEL_PROVIDER_NAME, + }, ) # After the call to the model @@ -126,15 +132,20 @@ async def test_trace_streaming_text_completion( if mock_response[0].metadata.get("id") is not None: mock_span.set_attribute.assert_any_call(gen_ai_attributes.RESPONSE_ID, mock_response[0].metadata["id"]) - mock_span.add_event.assert_any_call( - gen_ai_attributes.COMPLETION_EVENT, - {gen_ai_attributes.COMPLETION_EVENT_COMPLETION: _messages_to_openai_format(mock_response)}, + mock_logger.info.assert_any_call( + json.dumps({"message": updates_flatten[0].to_dict(), "index": 0}), + extra={ + gen_ai_attributes.EVENT_NAME: gen_ai_attributes.CHOICE, + gen_ai_attributes.SYSTEM: MockTextCompletion.MODEL_PROVIDER_NAME, + }, ) +@patch("semantic_kernel.utils.telemetry.model_diagnostics.decorators.logger") @patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN async def test_trace_streaming_text_completion_exception( mock_span, + mock_logger, execution_settings, mock_response, prompt, @@ -158,3 +169,11 @@ async def test_trace_streaming_text_completion_exception( mock_span.set_status.assert_any_call(StatusCode.ERROR, repr(exception)) mock_span.end.assert_any_call() + + mock_logger.info.assert_any_call( + prompt, + extra={ + gen_ai_attributes.EVENT_NAME: gen_ai_attributes.PROMPT, + gen_ai_attributes.SYSTEM: MockTextCompletion.MODEL_PROVIDER_NAME, + }, + ) diff --git a/python/tests/unit/utils/model_diagnostics/test_trace_text_completion.py b/python/tests/unit/utils/model_diagnostics/test_trace_text_completion.py index cf7160d5c31c..a5215d122a50 100644 --- a/python/tests/unit/utils/model_diagnostics/test_trace_text_completion.py +++ b/python/tests/unit/utils/model_diagnostics/test_trace_text_completion.py @@ -1,5 +1,6 @@ # Copyright (c) Microsoft. All rights reserved. +import json from unittest.mock import ANY, patch import pytest @@ -13,7 +14,6 @@ from semantic_kernel.utils.telemetry.model_diagnostics import gen_ai_attributes from semantic_kernel.utils.telemetry.model_diagnostics.decorators import ( TEXT_COMPLETION_OPERATION, - _messages_to_openai_format, trace_text_completion, ) from tests.unit.utils.model_diagnostics.conftest import MockTextCompletion @@ -64,9 +64,11 @@ ) +@patch("semantic_kernel.utils.telemetry.model_diagnostics.decorators.logger") @patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN async def test_trace_text_completion( mock_span, + mock_logger, execution_settings, mock_response, prompt, @@ -110,8 +112,12 @@ async def test_trace_text_completion( if execution_settings.extension_data.get("top_p") is not None: mock_span.set_attribute.assert_any_call(gen_ai_attributes.TOP_P, execution_settings.extension_data["top_p"]) - mock_span.add_event.assert_any_call( - gen_ai_attributes.PROMPT_EVENT, {gen_ai_attributes.PROMPT_EVENT_PROMPT: prompt} + mock_logger.info.assert_any_call( + prompt, + extra={ + gen_ai_attributes.EVENT_NAME: gen_ai_attributes.PROMPT, + gen_ai_attributes.SYSTEM: MockTextCompletion.MODEL_PROVIDER_NAME, + }, ) # After the call to the model @@ -119,15 +125,20 @@ async def test_trace_text_completion( if mock_response[0].metadata.get("id") is not None: mock_span.set_attribute.assert_any_call(gen_ai_attributes.RESPONSE_ID, mock_response[0].metadata["id"]) - mock_span.add_event.assert_any_call( - gen_ai_attributes.COMPLETION_EVENT, - {gen_ai_attributes.COMPLETION_EVENT_COMPLETION: _messages_to_openai_format(mock_response)}, + mock_logger.info.assert_any_call( + json.dumps({"message": results[0].to_dict()}), + extra={ + gen_ai_attributes.EVENT_NAME: gen_ai_attributes.CHOICE, + gen_ai_attributes.SYSTEM: MockTextCompletion.MODEL_PROVIDER_NAME, + }, ) +@patch("semantic_kernel.utils.telemetry.model_diagnostics.decorators.logger") @patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN async def test_trace_text_completion_exception( mock_span, + mock_logger, execution_settings, mock_response, prompt, @@ -150,3 +161,11 @@ async def test_trace_text_completion_exception( mock_span.set_status.assert_any_call(StatusCode.ERROR, repr(exception)) mock_span.end.assert_any_call() + + mock_logger.info.assert_any_call( + prompt, + extra={ + gen_ai_attributes.EVENT_NAME: gen_ai_attributes.PROMPT, + gen_ai_attributes.SYSTEM: MockTextCompletion.MODEL_PROVIDER_NAME, + }, + )