diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index ae599563dd..d6e3d8fe36 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -765,6 +765,79 @@ ] } }, + "/v1/telemetry/metrics/{metric_name}": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetMetricsResponse" + } + } + } + } + }, + "tags": [ + "Telemetry" + ], + "description": "", + "parameters": [ + { + "name": "metric_name", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "start_time", + "in": "query", + "required": true, + "schema": { + "type": "integer" + } + }, + { + "name": "end_time", + "in": "query", + "required": false, + "schema": { + "type": "integer" + } + }, + { + "name": "step", + "in": "query", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "query_type", + "in": "query", + "required": true, + "schema": { + "$ref": "#/components/schemas/MetricQueryType" + } + }, + { + "name": "label_matchers", + "in": "query", + "required": false, + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MetricLabelMatcher" + } + } + } + ] + } + }, "/v1/models/{model_id}": { "get": { "responses": { @@ -3106,6 +3179,12 @@ "ChatCompletionResponse": { "type": "object", "properties": { + "metrics": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Metric" + } + }, "completion_message": { "$ref": "#/components/schemas/CompletionMessage", "description": "The complete response message" @@ -3124,6 +3203,32 @@ ], "description": "Response from a chat completion request." }, + "Metric": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "token_usage", + "default": "token_usage" + }, + "prompt_tokens": { + "type": "integer" + }, + "completion_tokens": { + "type": "integer" + }, + "total_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "type", + "prompt_tokens", + "completion_tokens", + "total_tokens" + ] + }, "TokenLogProbs": { "type": "object", "properties": { @@ -3195,6 +3300,12 @@ "CompletionResponse": { "type": "object", "properties": { + "metrics": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Metric" + } + }, "content": { "type": "string", "description": "The generated completion text" @@ -3388,6 +3499,12 @@ "ChatCompletionResponseStreamChunk": { "type": "object", "properties": { + "metrics": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Metric" + } + }, "event": { "$ref": "#/components/schemas/ChatCompletionResponseEvent", "description": "The event containing the new content" @@ -3537,6 +3654,12 @@ "CompletionResponseStreamChunk": { "type": "object", "properties": { + "metrics": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Metric" + } + }, "delta": { "type": "string", "description": "New content generated since last chunk. This can be one or more tokens." @@ -4480,6 +4603,12 @@ "EmbeddingsResponse": { "type": "object", "properties": { + "metrics": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Metric" + } + }, "embeddings": { "type": "array", "items": { @@ -5265,6 +5394,101 @@ "metadata" ] }, + "MetricQueryType": { + "type": "string", + "enum": [ + "range", + "instant" + ] + }, + "MetricLabelMatcher": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "value": { + "type": "string" + }, + "operator": { + "$ref": "#/components/schemas/MetricLabelOperator", + "default": "=" + } + }, + "additionalProperties": false, + "required": [ + "name", + "value", + "operator" + ] + }, + "MetricLabelOperator": { + "type": "string", + "enum": [ + "=", + "!=", + "=~", + "!~" + ] + }, + "GetMetricsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MetricSeries" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ] + }, + "MetricDataPoint": { + "type": "object", + "properties": { + "timestamp": { + "type": "string", + "format": "date-time" + }, + "value": { + "type": "number" + } + }, + "additionalProperties": false, + "required": [ + "timestamp", + "value" + ] + }, + "MetricSeries": { + "type": "object", + "properties": { + "metric": { + "type": "string" + }, + "labels": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "values": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MetricDataPoint" + } + } + }, + "additionalProperties": false, + "required": [ + "metric", + "labels", + "values" + ] + }, "Model": { "type": "object", "properties": { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 2953f1b69a..8b56fb1bcd 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -460,6 +460,51 @@ paths: required: true schema: type: string + /v1/telemetry/metrics/{metric_name}: + get: + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/GetMetricsResponse' + tags: + - Telemetry + description: '' + parameters: + - name: metric_name + in: path + required: true + schema: + type: string + - name: start_time + in: query + required: true + schema: + type: integer + - name: end_time + in: query + required: false + schema: + type: integer + - name: step + in: query + required: false + schema: + type: string + - name: query_type + in: query + required: true + schema: + $ref: '#/components/schemas/MetricQueryType' + - name: label_matchers + in: query + required: false + schema: + type: array + items: + $ref: '#/components/schemas/MetricLabelMatcher' /v1/models/{model_id}: get: responses: @@ -1925,6 +1970,10 @@ components: ChatCompletionResponse: type: object properties: + metrics: + type: array + items: + $ref: '#/components/schemas/Metric' completion_message: $ref: '#/components/schemas/CompletionMessage' description: The complete response message @@ -1938,6 +1987,25 @@ components: required: - completion_message description: Response from a chat completion request. + Metric: + type: object + properties: + type: + type: string + const: token_usage + default: token_usage + prompt_tokens: + type: integer + completion_tokens: + type: integer + total_tokens: + type: integer + additionalProperties: false + required: + - type + - prompt_tokens + - completion_tokens + - total_tokens TokenLogProbs: type: object properties: @@ -1990,6 +2058,10 @@ components: CompletionResponse: type: object properties: + metrics: + type: array + items: + $ref: '#/components/schemas/Metric' content: type: string description: The generated completion text @@ -2173,6 +2245,10 @@ components: ChatCompletionResponseStreamChunk: type: object properties: + metrics: + type: array + items: + $ref: '#/components/schemas/Metric' event: $ref: '#/components/schemas/ChatCompletionResponseEvent' description: The event containing the new content @@ -2285,6 +2361,10 @@ components: CompletionResponseStreamChunk: type: object properties: + metrics: + type: array + items: + $ref: '#/components/schemas/Metric' delta: type: string description: >- @@ -2896,6 +2976,10 @@ components: EmbeddingsResponse: type: object properties: + metrics: + type: array + items: + $ref: '#/components/schemas/Metric' embeddings: type: array items: @@ -3388,6 +3472,73 @@ components: - dataset_id - scoring_functions - metadata + MetricQueryType: + type: string + enum: + - range + - instant + MetricLabelMatcher: + type: object + properties: + name: + type: string + value: + type: string + operator: + $ref: '#/components/schemas/MetricLabelOperator' + default: '=' + additionalProperties: false + required: + - name + - value + - operator + MetricLabelOperator: + type: string + enum: + - '=' + - '!=' + - =~ + - '!~' + GetMetricsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/MetricSeries' + additionalProperties: false + required: + - data + MetricDataPoint: + type: object + properties: + timestamp: + type: string + format: date-time + value: + type: number + additionalProperties: false + required: + - timestamp + - value + MetricSeries: + type: object + properties: + metric: + type: string + labels: + type: object + additionalProperties: + type: string + values: + type: array + items: + $ref: '#/components/schemas/MetricDataPoint' + additionalProperties: false + required: + - metric + - labels + - values Model: type: object properties: diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py index e37afd1bb4..7e11b40843 100644 --- a/llama_stack/apis/telemetry/telemetry.py +++ b/llama_stack/apis/telemetry/telemetry.py @@ -209,15 +209,23 @@ class MetricsMixin(BaseModel): @json_schema_type class MetricQueryType(Enum): - RANGE = "range" # Returns data points over time range - INSTANT = "instant" # Returns single data point + RANGE = "range" + INSTANT = "instant" + + +@json_schema_type +class MetricLabelOperator(Enum): + EQUALS = "=" + NOT_EQUALS = "!=" + REGEX_MATCH = "=~" + REGEX_NOT_MATCH = "!~" @json_schema_type class MetricLabelMatcher(BaseModel): name: str value: str - operator: Literal["=", "!=", "=~", "!~"] = "=" # Prometheus-style operators + operator: MetricLabelOperator = MetricLabelOperator.EQUALS @json_schema_type @@ -287,9 +295,9 @@ async def save_spans_to_dataset( async def get_metrics( self, metric_name: str, - start_time: int, # Unix timestamp in seconds - end_time: Optional[int] = None, # Unix timestamp in seconds - step: Optional[str] = "1m", # Prometheus-style duration: 1m, 5m, 1h, etc. + start_time: int, + end_time: Optional[int] = None, + step: Optional[str] = "1m", query_type: MetricQueryType = MetricQueryType.RANGE, label_matchers: Optional[List[MetricLabelMatcher]] = None, ) -> GetMetricsResponse: ...