diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index ae599563dd..d6e3d8fe36 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -765,6 +765,79 @@
]
}
},
+ "/v1/telemetry/metrics/{metric_name}": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/GetMetricsResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Telemetry"
+ ],
+ "description": "",
+ "parameters": [
+ {
+ "name": "metric_name",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "start_time",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "integer"
+ }
+ },
+ {
+ "name": "end_time",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer"
+ }
+ },
+ {
+ "name": "step",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "query_type",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "$ref": "#/components/schemas/MetricQueryType"
+ }
+ },
+ {
+ "name": "label_matchers",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/MetricLabelMatcher"
+ }
+ }
+ }
+ ]
+ }
+ },
"/v1/models/{model_id}": {
"get": {
"responses": {
@@ -3106,6 +3179,12 @@
"ChatCompletionResponse": {
"type": "object",
"properties": {
+ "metrics": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/Metric"
+ }
+ },
"completion_message": {
"$ref": "#/components/schemas/CompletionMessage",
"description": "The complete response message"
@@ -3124,6 +3203,32 @@
],
"description": "Response from a chat completion request."
},
+ "Metric": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "token_usage",
+ "default": "token_usage"
+ },
+ "prompt_tokens": {
+ "type": "integer"
+ },
+ "completion_tokens": {
+ "type": "integer"
+ },
+ "total_tokens": {
+ "type": "integer"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "prompt_tokens",
+ "completion_tokens",
+ "total_tokens"
+ ]
+ },
"TokenLogProbs": {
"type": "object",
"properties": {
@@ -3195,6 +3300,12 @@
"CompletionResponse": {
"type": "object",
"properties": {
+ "metrics": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/Metric"
+ }
+ },
"content": {
"type": "string",
"description": "The generated completion text"
@@ -3388,6 +3499,12 @@
"ChatCompletionResponseStreamChunk": {
"type": "object",
"properties": {
+ "metrics": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/Metric"
+ }
+ },
"event": {
"$ref": "#/components/schemas/ChatCompletionResponseEvent",
"description": "The event containing the new content"
@@ -3537,6 +3654,12 @@
"CompletionResponseStreamChunk": {
"type": "object",
"properties": {
+ "metrics": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/Metric"
+ }
+ },
"delta": {
"type": "string",
"description": "New content generated since last chunk. This can be one or more tokens."
@@ -4480,6 +4603,12 @@
"EmbeddingsResponse": {
"type": "object",
"properties": {
+ "metrics": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/Metric"
+ }
+ },
"embeddings": {
"type": "array",
"items": {
@@ -5265,6 +5394,101 @@
"metadata"
]
},
+ "MetricQueryType": {
+ "type": "string",
+ "enum": [
+ "range",
+ "instant"
+ ]
+ },
+ "MetricLabelMatcher": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "value": {
+ "type": "string"
+ },
+ "operator": {
+ "$ref": "#/components/schemas/MetricLabelOperator",
+ "default": "="
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "name",
+ "value",
+ "operator"
+ ]
+ },
+ "MetricLabelOperator": {
+ "type": "string",
+ "enum": [
+ "=",
+ "!=",
+ "=~",
+ "!~"
+ ]
+ },
+ "GetMetricsResponse": {
+ "type": "object",
+ "properties": {
+ "data": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/MetricSeries"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "data"
+ ]
+ },
+ "MetricDataPoint": {
+ "type": "object",
+ "properties": {
+ "timestamp": {
+ "type": "string",
+ "format": "date-time"
+ },
+ "value": {
+ "type": "number"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "timestamp",
+ "value"
+ ]
+ },
+ "MetricSeries": {
+ "type": "object",
+ "properties": {
+ "metric": {
+ "type": "string"
+ },
+ "labels": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ },
+ "values": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/MetricDataPoint"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "metric",
+ "labels",
+ "values"
+ ]
+ },
"Model": {
"type": "object",
"properties": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 2953f1b69a..8b56fb1bcd 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -460,6 +460,51 @@ paths:
required: true
schema:
type: string
+ /v1/telemetry/metrics/{metric_name}:
+ get:
+ responses:
+ '200':
+ description: OK
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/GetMetricsResponse'
+ tags:
+ - Telemetry
+ description: ''
+ parameters:
+ - name: metric_name
+ in: path
+ required: true
+ schema:
+ type: string
+ - name: start_time
+ in: query
+ required: true
+ schema:
+ type: integer
+ - name: end_time
+ in: query
+ required: false
+ schema:
+ type: integer
+ - name: step
+ in: query
+ required: false
+ schema:
+ type: string
+ - name: query_type
+ in: query
+ required: true
+ schema:
+ $ref: '#/components/schemas/MetricQueryType'
+ - name: label_matchers
+ in: query
+ required: false
+ schema:
+ type: array
+ items:
+ $ref: '#/components/schemas/MetricLabelMatcher'
/v1/models/{model_id}:
get:
responses:
@@ -1925,6 +1970,10 @@ components:
ChatCompletionResponse:
type: object
properties:
+ metrics:
+ type: array
+ items:
+ $ref: '#/components/schemas/Metric'
completion_message:
$ref: '#/components/schemas/CompletionMessage'
description: The complete response message
@@ -1938,6 +1987,25 @@ components:
required:
- completion_message
description: Response from a chat completion request.
+ Metric:
+ type: object
+ properties:
+ type:
+ type: string
+ const: token_usage
+ default: token_usage
+ prompt_tokens:
+ type: integer
+ completion_tokens:
+ type: integer
+ total_tokens:
+ type: integer
+ additionalProperties: false
+ required:
+ - type
+ - prompt_tokens
+ - completion_tokens
+ - total_tokens
TokenLogProbs:
type: object
properties:
@@ -1990,6 +2058,10 @@ components:
CompletionResponse:
type: object
properties:
+ metrics:
+ type: array
+ items:
+ $ref: '#/components/schemas/Metric'
content:
type: string
description: The generated completion text
@@ -2173,6 +2245,10 @@ components:
ChatCompletionResponseStreamChunk:
type: object
properties:
+ metrics:
+ type: array
+ items:
+ $ref: '#/components/schemas/Metric'
event:
$ref: '#/components/schemas/ChatCompletionResponseEvent'
description: The event containing the new content
@@ -2285,6 +2361,10 @@ components:
CompletionResponseStreamChunk:
type: object
properties:
+ metrics:
+ type: array
+ items:
+ $ref: '#/components/schemas/Metric'
delta:
type: string
description: >-
@@ -2896,6 +2976,10 @@ components:
EmbeddingsResponse:
type: object
properties:
+ metrics:
+ type: array
+ items:
+ $ref: '#/components/schemas/Metric'
embeddings:
type: array
items:
@@ -3388,6 +3472,73 @@ components:
- dataset_id
- scoring_functions
- metadata
+ MetricQueryType:
+ type: string
+ enum:
+ - range
+ - instant
+ MetricLabelMatcher:
+ type: object
+ properties:
+ name:
+ type: string
+ value:
+ type: string
+ operator:
+ $ref: '#/components/schemas/MetricLabelOperator'
+ default: '='
+ additionalProperties: false
+ required:
+ - name
+ - value
+ - operator
+ MetricLabelOperator:
+ type: string
+ enum:
+ - '='
+ - '!='
+ - =~
+ - '!~'
+ GetMetricsResponse:
+ type: object
+ properties:
+ data:
+ type: array
+ items:
+ $ref: '#/components/schemas/MetricSeries'
+ additionalProperties: false
+ required:
+ - data
+ MetricDataPoint:
+ type: object
+ properties:
+ timestamp:
+ type: string
+ format: date-time
+ value:
+ type: number
+ additionalProperties: false
+ required:
+ - timestamp
+ - value
+ MetricSeries:
+ type: object
+ properties:
+ metric:
+ type: string
+ labels:
+ type: object
+ additionalProperties:
+ type: string
+ values:
+ type: array
+ items:
+ $ref: '#/components/schemas/MetricDataPoint'
+ additionalProperties: false
+ required:
+ - metric
+ - labels
+ - values
Model:
type: object
properties:
diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py
index e37afd1bb4..7e11b40843 100644
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@@ -209,15 +209,23 @@ class MetricsMixin(BaseModel):
@json_schema_type
class MetricQueryType(Enum):
- RANGE = "range" # Returns data points over time range
- INSTANT = "instant" # Returns single data point
+ RANGE = "range"
+ INSTANT = "instant"
+
+
+@json_schema_type
+class MetricLabelOperator(Enum):
+ EQUALS = "="
+ NOT_EQUALS = "!="
+ REGEX_MATCH = "=~"
+ REGEX_NOT_MATCH = "!~"
@json_schema_type
class MetricLabelMatcher(BaseModel):
name: str
value: str
- operator: Literal["=", "!=", "=~", "!~"] = "=" # Prometheus-style operators
+ operator: MetricLabelOperator = MetricLabelOperator.EQUALS
@json_schema_type
@@ -287,9 +295,9 @@ async def save_spans_to_dataset(
async def get_metrics(
self,
metric_name: str,
- start_time: int, # Unix timestamp in seconds
- end_time: Optional[int] = None, # Unix timestamp in seconds
- step: Optional[str] = "1m", # Prometheus-style duration: 1m, 5m, 1h, etc.
+ start_time: int,
+ end_time: Optional[int] = None,
+ step: Optional[str] = "1m",
query_type: MetricQueryType = MetricQueryType.RANGE,
label_matchers: Optional[List[MetricLabelMatcher]] = None,
) -> GetMetricsResponse: ...