Skip to content

Commit

Permalink
[promptflow][internal] Fix traces missing attributes collection bug (#…
Browse files Browse the repository at this point in the history
…2055)

# Description

We have observed scenario that some 3p traces do not have field
`attributes`, which will lead to a `KeyError`.

This PR targets to fix that with using `dict.get`, plus adding a unit
test to guard this.

# All Promptflow Contribution checklist:
- [x] **The pull request does not introduce [breaking changes].**
- [ ] **CHANGELOG is updated for new features, bug fixes or other
significant changes.**
- [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).**
- [ ] **Create an issue and link to the pull request to get dedicated
review from promptflow team. Learn more: [suggested
workflow](../CONTRIBUTING.md#suggested-workflow).**

## General Guidelines and Best Practices
- [x] Title of the pull request is clear and informative.
- [x] There are a small number of commits, each of which have an
informative message. This means that previously merged commits do not
appear in the history of the PR. For more information on cleaning up the
commits in your PR, [see this
page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md).

### Testing Guidelines
- [x] Pull request includes test coverage for the included changes.
  • Loading branch information
zhengfeiwang authored Feb 20, 2024
1 parent ea9753f commit e398924
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 5 deletions.
7 changes: 4 additions & 3 deletions src/promptflow/promptflow/_sdk/entities/_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def _to_orm_object(self) -> ORMSpan:

@staticmethod
def _from_protobuf_object(obj: PBSpan, resource: typing.Dict) -> "Span":
span_dict = json.loads(MessageToJson(obj))
span_dict: dict = json.loads(MessageToJson(obj))
span_id = obj.span_id.hex()
trace_id = obj.trace_id.hex()
context = {
Expand All @@ -132,14 +132,15 @@ def _from_protobuf_object(obj: PBSpan, resource: typing.Dict) -> "Span":
status = {
SpanStatusFieldName.STATUS_CODE: parse_otel_span_status_code(obj.status.code),
}
attributes = flatten_pb_attributes(span_dict[SpanFieldName.ATTRIBUTES])
# we have observed in some scenarios, there is not `attributes` field
attributes = flatten_pb_attributes(span_dict.get(SpanFieldName.ATTRIBUTES, dict()))
# `span_type` are not standard fields in OpenTelemetry attributes
# for example, LangChain instrumentation, as we do not inject this;
# so we need to get it with default value to avoid KeyError
span_type = attributes.get(SpanAttributeFieldName.SPAN_TYPE, DEFAULT_SPAN_TYPE)

# parse from resource.attributes: session id, experiment
resource_attributes = resource[SpanResourceFieldName.ATTRIBUTES]
resource_attributes: dict = resource[SpanResourceFieldName.ATTRIBUTES]
session_id = resource_attributes[SpanResourceAttributesFieldName.SESSION_ID]
experiment = resource_attributes.get(SpanResourceAttributesFieldName.EXPERIMENT_NAME, None)

Expand Down
37 changes: 35 additions & 2 deletions src/promptflow/tests/sdk_cli_test/unittests/test_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,20 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------

import base64
import os
import uuid
from typing import Dict
from unittest.mock import patch

import pytest
from opentelemetry import trace
from opentelemetry.proto.trace.v1.trace_pb2 import Span as PBSpan
from opentelemetry.sdk.environment_variables import OTEL_EXPORTER_OTLP_ENDPOINT
from opentelemetry.sdk.trace import TracerProvider

from promptflow._constants import SpanResourceAttributesFieldName, TraceEnvironmentVariableName
from promptflow._constants import SpanResourceAttributesFieldName, SpanResourceFieldName, TraceEnvironmentVariableName
from promptflow._sdk.entities._trace import Span
from promptflow._trace._start_trace import (
_create_resource,
_is_tracer_provider_configured,
Expand All @@ -21,7 +25,7 @@


@pytest.fixture
def reset_tracer_provider() -> None:
def reset_tracer_provider():
from opentelemetry.util._once import Once

with patch("opentelemetry.trace._TRACER_PROVIDER_SET_ONCE", Once()), patch(
Expand All @@ -30,6 +34,17 @@ def reset_tracer_provider() -> None:
yield


@pytest.fixture
def mock_resource() -> Dict:
return {
SpanResourceFieldName.ATTRIBUTES: {
SpanResourceAttributesFieldName.SERVICE_NAME: "promptflow",
SpanResourceAttributesFieldName.SESSION_ID: str(uuid.uuid4()),
},
SpanResourceFieldName.SCHEMA_URL: "",
}


@pytest.mark.sdk_test
@pytest.mark.unittest
class TestStartTrace:
Expand Down Expand Up @@ -100,3 +115,21 @@ def test_provision_session_id(self) -> None:
# specified, but still honor the configured one
session_id = _provision_session_id(specified_session_id=str(uuid.uuid4()))
assert configured_session_id == session_id

def test_trace_without_attributes_collection(self, mock_resource: Dict) -> None:
# generate a span without attributes
# below magic numbers come from a real case from `azure-search-documents`
pb_span = PBSpan()
pb_span.trace_id = base64.b64decode("4WIgbhNyYmYKOWeAxbRm4g==")
pb_span.span_id = base64.b64decode("lvxVSnvNhWo=")
pb_span.name = "DocumentsOperations.search_post"
pb_span.start_time_unix_nano = 1708420657948895100
pb_span.end_time_unix_nano = 1708420659479925700
pb_span.parent_span_id = base64.b64decode("C+++WS+OuxI=")
pb_span.kind = PBSpan.SpanKind.SPAN_KIND_INTERNAL
# below line should execute successfully
span = Span._from_protobuf_object(pb_span, resource=mock_resource)
# as the above span do not have any attributes, so the parsed span should not have any attributes
attributes = span._content["attributes"]
assert isinstance(attributes, dict)
assert len(attributes) == 0

0 comments on commit e398924

Please sign in to comment.