diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py
index 803965f6e1..95bc24d5f7 100644
--- a/comps/cores/mega/orchestrator.py
+++ b/comps/cores/mega/orchestrator.py
@@ -25,20 +25,34 @@
 
 
 class OrchestratorMetrics:
-    # Because:
+    # Metrics use "megaservice_" name prefix, and are class variables because:
     # - CI creates several orchestrator instances
     # - Prometheus requires metrics to be singletons
-    # - Oorchestror instances are not provided their own names
-    # Metrics are class members with "megaservice" name prefix
-    first_token_latency = Histogram("megaservice_first_token_latency", "First token latency (histogram)")
-    inter_token_latency = Histogram("megaservice_inter_token_latency", "Inter-token latency (histogram)")
-    request_latency = Histogram("megaservice_request_latency", "Whole request/reply latency (histogram)")
+    # - Orchestror instances do not have their own names
     request_pending = Gauge("megaservice_request_pending", "Count of currently pending requests (gauge)")
+    # Metrics related to token processing are created on demand,
+    # to avoid bogus ones for services that never handle tokens.
+    first_token_latency = None
+    inter_token_latency = None
+    request_latency = None
 
     def __init__(self) -> None:
-        pass
-
-    def token_update(self, token_start: float, is_first: bool) -> float:
+        # initially directed to metric creation
+        self.token_update = self._token_update_create
+        self.request_update = self._request_update_create
+
+    def _token_update_create(self, token_start: float, is_first: bool) -> float:
+        self.first_token_latency = Histogram("megaservice_first_token_latency", "First token latency (histogram)")
+        self.inter_token_latency = Histogram("megaservice_inter_token_latency", "Inter-token latency (histogram)")
+        self.token_update = self._token_update_real
+        return self.token_update(token_start, is_first)
+
+    def _request_update_create(self, req_start: float) -> None:
+        self.request_latency = Histogram("megaservice_request_latency", "Whole LLM request/reply latency (histogram)")
+        self.request_update = self._request_update_real
+        self.request_update(req_start)
+
+    def _token_update_real(self, token_start: float, is_first: bool) -> float:
         now = time.time()
         if is_first:
             self.first_token_latency.observe(now - token_start)
@@ -46,7 +60,7 @@ def token_update(self, token_start: float, is_first: bool) -> float:
             self.inter_token_latency.observe(now - token_start)
         return now
 
-    def request_update(self, req_start: float) -> None:
+    def _request_update_real(self, req_start: float) -> None:
         self.request_latency.observe(time.time() - req_start)
 
     def pending_update(self, increase: bool) -> None:
diff --git a/comps/cores/telemetry/README.md b/comps/cores/telemetry/README.md
index 35a3710744..80d7174a43 100644
--- a/comps/cores/telemetry/README.md
+++ b/comps/cores/telemetry/README.md
@@ -48,7 +48,7 @@ Applications' megaservice `ServiceOrchectrator` provides following metrics:
 - `megaservice_first_token_latency`: time to first token (TTFT)
 - `megaservice_inter_token_latency`: inter-token latency (ITL ~ TPOT)
 - `megaservice_request_latency`: whole request E2E latency = TTFT + ITL \* tokens
-- `megaservice_request_pending`: how many LLM requests are still in progress
+- `megaservice_request_pending`: how many requests are still in progress
 
 Latency ones are histogram metrics i.e. include count, total value and set of value buckets for each item.