Skip to content

Commit

Permalink
[Metrics] Add --show-hidden-metrics-for-version CLI arg
Browse files Browse the repository at this point in the history
Add some infrastructure to help us deprecate and remove metrics
in a less user-hostile way.

Our deprecation process will now be:

1) Deprecate the metric in 0.N.0 - document the deprecation in
   release notes, user-facing docs, and the help text in `/metrics`
2) Hide the metric in 0.N+1.0 - users can still re-enable the
   metrics using `--show-hidden-metrics-for-version=0.N.0` as an
   escape hatch
3) Remove the metric completely in 0.N+2.0

`--show-hidden-metrics` takes a version string argument so that
users cannot fall into the habit of always enabling all deprecated
metrics, which would defeat the purpose.

This approach is copied directly from kubernetes/kubernetes#85270

Signed-off-by: Mark McLoughlin <[email protected]>
  • Loading branch information
markmc committed Feb 14, 2025
1 parent c9e2d64 commit 52c20bf
Show file tree
Hide file tree
Showing 8 changed files with 98 additions and 6 deletions.
8 changes: 8 additions & 0 deletions docs/source/serving/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,11 @@ The following metrics are exposed:
:language: python
:start-after: begin-metrics-definitions
:::

The following metrics are deprecated and due to be removed in a future version:

- *(No metrics are currently deprecated)*

Note: when metrics are deprecated in version `X.Y`, they are hidden in version `X.Y+1`
but can be re-enabled using the `--show-hidden-metrics-for-version=X.Y` escape hatch,
and are then removed in version `X.Y+2`.
36 changes: 36 additions & 0 deletions tests/test_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# SPDX-License-Identifier: Apache-2.0

from unittest.mock import patch

import pytest

from vllm import version


def test_version_is_defined():
assert version.__version__ is not None


def test_version_tuple():
assert len(version.__version_tuple__) in (3, 4, 5)


@pytest.mark.parametrize(
"version_tuple, version_str, expected",
[
((0, 0, "dev"), "0.0", True),
((0, 0, "dev"), "foobar", True),
((0, 7, 4), "0.6", True),
((0, 7, 4), "0.5", False),
((0, 7, 4), "0.7", False),
((1, 2, 3), "1.1", True),
((1, 2, 3), "1.0", False),
((1, 2, 3), "1.2", False),
# This won't work as expected
((1, 0, 0), "1.-1", True),
((1, 0, 0), "0.9", False),
((1, 0, 0), "0.17", False),
])
def test_prev_minor_version_was(version_tuple, version_str, expected):
with patch("vllm.version.__version_tuple__", version_tuple):
assert version._prev_minor_version_was(version_str) == expected
4 changes: 3 additions & 1 deletion vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2556,7 +2556,9 @@ def __post_init__(self):

@dataclass
class ObservabilityConfig:
"""Configuration for observability."""
"""Configuration for observability - metrics and tracing."""
show_hidden_metrics: bool = False

otlp_traces_endpoint: Optional[str] = None

# Collecting detailed timing information for each request can be expensive.
Expand Down
20 changes: 20 additions & 0 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import torch

import vllm.envs as envs
from vllm import version
from vllm.config import (CacheConfig, CompilationConfig, ConfigFormat,
DecodingConfig, DeviceConfig, HfOverrides,
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
Expand Down Expand Up @@ -185,6 +186,7 @@ class EngineArgs:
qlora_adapter_name_or_path: Optional[str] = None
disable_logprobs_during_spec_decoding: Optional[bool] = None

show_hidden_metrics_for_version: Optional[str] = None
otlp_traces_endpoint: Optional[str] = None
collect_detailed_traces: Optional[str] = None
disable_async_output_proc: bool = False
Expand Down Expand Up @@ -877,6 +879,18 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
default=None,
help='Name or path of the QLoRA adapter.')

parser.add_argument('--show-hidden-metrics-for-version',
type=str,
default=None,
help='Enable deprecated Prometheus metrics that '
'have been hidden since the specified version. '
'For example, if a previously deprecated metric '
'has been hidden since the v0.7.0 release, you '
'use --show-hidden-metrics-for-version=0.7 as a '
'temporary escape hatch while you migrate to new '
'metrics. The metric is likely to be removed '
'completely in an upcoming release.')

parser.add_argument(
'--otlp-traces-endpoint',
type=str,
Expand Down Expand Up @@ -1273,6 +1287,11 @@ def create_engine_config(self,
decoding_config = DecodingConfig(
guided_decoding_backend=self.guided_decoding_backend)

show_hidden_metrics = False
if self.show_hidden_metrics_for_version is not None:
show_hidden_metrics = version._prev_minor_version_was(
self.show_hidden_metrics_for_version)

detailed_trace_modules = []
if self.collect_detailed_traces is not None:
detailed_trace_modules = self.collect_detailed_traces.split(",")
Expand All @@ -1282,6 +1301,7 @@ def create_engine_config(self,
f"Invalid module {m} in collect_detailed_traces. "
f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}")
observability_config = ObservabilityConfig(
show_hidden_metrics=show_hidden_metrics,
otlp_traces_endpoint=self.otlp_traces_endpoint,
collect_model_forward_time="model" in detailed_trace_modules
or "all" in detailed_trace_modules,
Expand Down
4 changes: 4 additions & 0 deletions vllm/engine/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,10 @@ def __init__(self, local_interval: float, labels: Dict[str, str],
self.metrics = self._metrics_cls(labelnames=list(labels.keys()),
vllm_config=vllm_config)

# Use this flag to hide metrics that were deprecated in
# a previous release and which will be removed future
self.show_hidden_metrics = vllm_config.show_hidden_metrics

def _log_gauge(self, gauge, data: Union[int, float]) -> None:
# Convenience function for logging to gauge.
gauge.labels(**self.labels).set(data)
Expand Down
2 changes: 1 addition & 1 deletion vllm/v1/engine/async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __init__(
if self.log_stats:
self.stat_loggers.extend([
LoggingStatLogger(),
PrometheusStatLogger(vllm_config.model_config),
PrometheusStatLogger(vllm_config),
])

# Tokenizer (+ ensure liveness if running in another process).
Expand Down
12 changes: 8 additions & 4 deletions vllm/v1/metrics/loggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
import prometheus_client

from vllm.config import ModelConfig
from vllm.config import VllmConfig
from vllm.logger import init_logger
from vllm.v1.core.kv_cache_utils import PrefixCachingMetrics
from vllm.v1.engine import FinishReason
Expand Down Expand Up @@ -92,13 +92,17 @@ def log(self, scheduler_stats: SchedulerStats,

class PrometheusStatLogger(StatLoggerBase):

def __init__(self, model_config: ModelConfig):
def __init__(self, vllm_config: VllmConfig):
self._unregister_vllm_metrics()

# Use this flag to hide metrics that were deprecated in
# a previous release and which will be removed future
self.show_hidden_metrics = vllm_config.show_hidden_metrics

labelnames = ["model_name"]
labelvalues = [model_config.served_model_name]
labelvalues = [vllm_config.model_config.served_model_name]

max_model_len = model_config.max_model_len
max_model_len = vllm_config.model_config.max_model_len

self.gauge_scheduler_running = prometheus_client.Gauge(
name="vllm:num_requests_running",
Expand Down
18 changes: 18 additions & 0 deletions vllm/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,21 @@

__version__ = "dev"
__version_tuple__ = (0, 0, __version__)


def _prev_minor_version_was(version_str):
"""Check whether a given version matches the previous minor version.
Return True if version_str matches the previous minor version.
For example - return True if the current version if 0.7.4 and the
supplied version_str is '0.6'.
Used for --show-hidden-metrics-for-version.
"""
# Match anything if this is a dev tree
if __version_tuple__[0:2] == (0, 0):
return True

# Note - this won't do the right thing when we release 1.0!
return version_str == f"{__version_tuple__[0]}.{__version_tuple__[1] - 1}"

0 comments on commit 52c20bf

Please sign in to comment.