Add util method to summarize trace telemetry. (#3074)

# Description Add util method to summarize trace count telemetry. For long term telemetry, we will need span count. The simplest solution is create custom event for each trace id and include span count in custom dimension. But that may generate too many customer events to affect all telemetry. So, we only record trace count first, and decide how to add span count later according to trace count's telemetry. Maybe just decide a reasonable sampling rate. # All Promptflow Contribution checklist: - [ ] **The pull request does not introduce [breaking changes].** - [ ] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [ ] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [ ] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [ ] Title of the pull request is clear and informative. - [ ] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [ ] Pull request includes test coverage for the included changes. --------- Co-authored-by: robbenwang <[email protected]>
microsoft · May 6, 2024 · e53bf7f · e53bf7f
1 parent 57c632a
commit e53bf7f
Show file tree

Hide file tree

Showing 5 changed files with 149 additions and 1 deletion.
diff --git a/src/promptflow-core/promptflow/_constants.py b/src/promptflow-core/promptflow/_constants.py
@@ -171,6 +171,9 @@ class SpanAttributeFieldName:
     COMPLETION_TOKEN_COUNT = "__computed__.cumulative_token_count.completion"
     PROMPT_TOKEN_COUNT = "__computed__.cumulative_token_count.prompt"
     TOTAL_TOKEN_COUNT = "__computed__.cumulative_token_count.total"
+    # Execution target, e.g. prompty, flex, dag, code.
+    # We may need another field to indicate the language, e.g. python, csharp.
+    EXECUTION_TARGET = "execution_target"
 
     SESSION_ID = "session_id"
 

diff --git a/src/promptflow-devkit/promptflow/_internal/__init__.py b/src/promptflow-devkit/promptflow/_internal/__init__.py
@@ -52,6 +52,7 @@
 from promptflow._sdk._service.apis.collector import trace_collector
 from promptflow._sdk._tracing import process_otlp_trace_request
 from promptflow._sdk._utilities.general_utils import resolve_flow_language
+from promptflow._sdk._utilities.tracing_utils import aggregate_trace_count
 from promptflow._sdk._version import VERSION
 from promptflow._utils.context_utils import _change_working_dir, inject_sys_path
 from promptflow._utils.credential_scrubber import CredentialScrubber

diff --git a/src/promptflow-devkit/promptflow/_sdk/_tracing.py b/src/promptflow-devkit/promptflow/_sdk/_tracing.py
@@ -615,7 +615,7 @@ def process_otlp_trace_request(
         args=(all_spans, get_created_by_info_with_cache, logger, get_credential, cloud_trace_only),
     ).start()
 
-    return
+    return all_spans
 
 
 def _try_write_trace_to_cosmosdb(

diff --git a/src/promptflow-devkit/promptflow/_sdk/_utilities/tracing_utils.py b/src/promptflow-devkit/promptflow/_sdk/_utilities/tracing_utils.py
@@ -6,6 +6,7 @@
 import json
 import logging
 import typing
+from collections import namedtuple
 from dataclasses import dataclass
 from pathlib import Path
 
@@ -15,10 +16,13 @@
 from opentelemetry.trace.span import format_trace_id as otel_format_trace_id
 
 from promptflow._constants import (
+    SpanAttributeFieldName,
     SpanContextFieldName,
     SpanEventFieldName,
     SpanFieldName,
     SpanLinkFieldName,
+    SpanResourceAttributesFieldName,
+    SpanResourceFieldName,
     SpanStatusFieldName,
 )
 from promptflow._sdk._constants import HOME_PROMPT_FLOW_DIR, AzureMLWorkspaceTriad
@@ -284,3 +288,42 @@ def append_conditions(
         expression += f" and session_id == '{session_id}'"
     logger.debug("final search expression: %s", expression)
     return expression
+
+
+# SCENARIO: trace count telemetry
+TraceCountKey = namedtuple(
+    "TraceKey", ["subscription_id", "resource_group", "workspace_name", "scenario", "execution_target"]
+)
+
+
+def aggregate_trace_count(all_spans: typing.List[Span]) -> typing.Dict[TraceCountKey, int]:
+    """
+    Aggregate the trace count based on workspace info, scenario, and execution target.
+    """
+    trace_count_summary = {}
+
+    if not all_spans:
+        return trace_count_summary
+
+    # Iterate over all spans
+    for span in all_spans:
+        # Only count for root span, ignore span count telemetry for now.
+        if span.parent_id is None:
+            resource_attributes = span.resource.get(SpanResourceFieldName.ATTRIBUTES, {})
+            subscription_id = resource_attributes.get(SpanResourceAttributesFieldName.SUBSCRIPTION_ID, None)
+            resource_group = resource_attributes.get(SpanResourceAttributesFieldName.RESOURCE_GROUP_NAME, None)
+            workspace_name = resource_attributes.get(SpanResourceAttributesFieldName.WORKSPACE_NAME, None)
+            # We may need another field to indicate the language in the future, e.g. python, csharp.
+            execution_target = span.attributes.get(SpanAttributeFieldName.EXECUTION_TARGET, "code")
+
+            scenario = "script"
+            if SpanAttributeFieldName.BATCH_RUN_ID in span.attributes:
+                scenario = "batch"
+            elif SpanAttributeFieldName.LINE_RUN_ID in span.attributes:
+                scenario = "test"
+
+            key = TraceCountKey(subscription_id, resource_group, workspace_name, scenario, execution_target)
+
+            trace_count_summary[key] = trace_count_summary.get(key, 0) + 1
+
+    return trace_count_summary
diff --git a/src/promptflow-devkit/tests/unittests/_sdk/_utilities/test_tracing_utils.py b/src/promptflow-devkit/tests/unittests/_sdk/_utilities/test_tracing_utils.py
@@ -0,0 +1,101 @@
+import pytest
+from pydash import partial
+
+from promptflow._constants import SpanAttributeFieldName, SpanResourceAttributesFieldName, SpanResourceFieldName
+from promptflow._sdk._utilities.tracing_utils import aggregate_trace_count
+from promptflow._sdk.entities._trace import Span
+
+# Mock definitions for Span, SpanResourceFieldName, SpanResourceAttributesFieldName, and SpanAttributeFieldName
+# These should match the actual implementations you're using in your application.
+
+
+@pytest.mark.unittest
+class TestTraceTelemetry:
+    def test_empty_span_list(self):
+        """Test with an empty list of spans."""
+        result = aggregate_trace_count([])
+        assert result == {}
+
+    def test_single_root_span(self):
+
+        resource = {
+            SpanResourceFieldName.ATTRIBUTES: {
+                SpanResourceAttributesFieldName.SUBSCRIPTION_ID: "sub",
+                SpanResourceAttributesFieldName.RESOURCE_GROUP_NAME: "rg",
+                SpanResourceAttributesFieldName.WORKSPACE_NAME: "ws",
+            }
+        }
+        create_span = partial(
+            Span,
+            trace_id=None,
+            span_id=None,
+            name=None,
+            context=None,
+            kind=None,
+            start_time=None,
+            end_time=None,
+            status=None,
+            parent_id=None,
+            resource=resource,
+        )
+
+        batch_root_span = create_span(
+            attributes={
+                SpanAttributeFieldName.EXECUTION_TARGET: "code",
+                SpanAttributeFieldName.BATCH_RUN_ID: "batch_run_id",
+            },
+        )
+        line_root_span = create_span(
+            attributes={
+                SpanAttributeFieldName.EXECUTION_TARGET: "code",
+                SpanAttributeFieldName.LINE_RUN_ID: "line_run_id",
+            },
+        )
+
+        flex_root_span = create_span(
+            attributes={
+                SpanAttributeFieldName.EXECUTION_TARGET: "flex",
+            },
+        )
+        prompty_root_span = create_span(
+            attributes={
+                SpanAttributeFieldName.EXECUTION_TARGET: "prompty",
+            },
+        )
+        script_root_span = create_span(
+            attributes={
+                SpanAttributeFieldName.EXECUTION_TARGET: "code",
+            },
+        )
+        none_ws_root_span = create_span(
+            resource={},
+            attributes={
+                SpanAttributeFieldName.EXECUTION_TARGET: "prompty",
+            },
+        )
+        non_root_span = create_span(
+            parent_id=1,
+            attributes={
+                SpanAttributeFieldName.EXECUTION_TARGET: "code",
+            },
+        )
+        result = aggregate_trace_count(
+            [
+                batch_root_span,
+                line_root_span,
+                script_root_span,
+                flex_root_span,
+                prompty_root_span,
+                non_root_span,
+                none_ws_root_span,
+            ]
+        )
+        expected_result = {
+            ("sub", "rg", "ws", "batch", "code"): 1,
+            ("sub", "rg", "ws", "script", "code"): 1,
+            ("sub", "rg", "ws", "script", "flex"): 1,
+            ("sub", "rg", "ws", "script", "prompty"): 1,
+            ("sub", "rg", "ws", "test", "code"): 1,
+            (None, None, None, "script", "prompty"): 1,
+        }
+        assert result == expected_result