diff --git a/config/advanced-install/namespaced-numaflow-server.yaml b/config/advanced-install/namespaced-numaflow-server.yaml index 2804c4124..8dd8b588d 100644 --- a/config/advanced-install/namespaced-numaflow-server.yaml +++ b/config/advanced-install/namespaced-numaflow-server.yaml @@ -137,12 +137,39 @@ metadata: --- apiVersion: v1 data: - config.yaml: |- + config.yaml: | # url is a required field, it should be the url of the service to which the metrics proxy will connect # url: service_name + "." + service_namespace + ".svc.cluster.local" + ":" + port # example for local prometheus service # url: http://prometheus-operated.monitoring.svc.cluster.local:9090 patterns: + - name: mono_vertex_gauge + object: mono-vertex + title: Pending Messages Lag + description: This query is the total number of pending messages for the mono vertex + expr: | + $metric_name{$filters} + params: + - name: start_time + required: false + - name: end_time + required: false + metrics: + - metric_name: monovtx_pending + required_filters: + - namespace + - mvtx_name + dimensions: + - name: pod + filters: + - name: pod + required: false + - name: period + required: false + - name: mono-vertex + filters: + - name: period + required: false - name: mono_vertex_histogram object: mono-vertex title: Processing Time Latency diff --git a/config/advanced-install/numaflow-server.yaml b/config/advanced-install/numaflow-server.yaml index 6a7444d39..b136cca99 100644 --- a/config/advanced-install/numaflow-server.yaml +++ b/config/advanced-install/numaflow-server.yaml @@ -144,12 +144,39 @@ metadata: --- apiVersion: v1 data: - config.yaml: |- + config.yaml: | # url is a required field, it should be the url of the service to which the metrics proxy will connect # url: service_name + "." + service_namespace + ".svc.cluster.local" + ":" + port # example for local prometheus service # url: http://prometheus-operated.monitoring.svc.cluster.local:9090 patterns: + - name: mono_vertex_gauge + object: mono-vertex + title: Pending Messages Lag + description: This query is the total number of pending messages for the mono vertex + expr: | + $metric_name{$filters} + params: + - name: start_time + required: false + - name: end_time + required: false + metrics: + - metric_name: monovtx_pending + required_filters: + - namespace + - mvtx_name + dimensions: + - name: pod + filters: + - name: pod + required: false + - name: period + required: false + - name: mono-vertex + filters: + - name: period + required: false - name: mono_vertex_histogram object: mono-vertex title: Processing Time Latency diff --git a/config/base/numaflow-server/numaflow-server-metrics-proxy-config.yaml b/config/base/numaflow-server/numaflow-server-metrics-proxy-config.yaml index 7824bae01..273727433 100644 --- a/config/base/numaflow-server/numaflow-server-metrics-proxy-config.yaml +++ b/config/base/numaflow-server/numaflow-server-metrics-proxy-config.yaml @@ -9,6 +9,33 @@ data: # example for local prometheus service # url: http://prometheus-operated.monitoring.svc.cluster.local:9090 patterns: + - name: mono_vertex_gauge + object: mono-vertex + title: Pending Messages Lag + description: This query is the total number of pending messages for the mono vertex + expr: | + $metric_name{$filters} + params: + - name: start_time + required: false + - name: end_time + required: false + metrics: + - metric_name: monovtx_pending + required_filters: + - namespace + - mvtx_name + dimensions: + - name: pod + filters: + - name: pod + required: false + - name: period + required: false + - name: mono-vertex + filters: + - name: period + required: false - name: mono_vertex_histogram object: mono-vertex title: Processing Time Latency @@ -77,4 +104,4 @@ data: - name: pod filters: - name: pod - required: false \ No newline at end of file + required: false diff --git a/config/install.yaml b/config/install.yaml index 60bbd6091..a11795176 100644 --- a/config/install.yaml +++ b/config/install.yaml @@ -28557,12 +28557,39 @@ metadata: --- apiVersion: v1 data: - config.yaml: |- + config.yaml: | # url is a required field, it should be the url of the service to which the metrics proxy will connect # url: service_name + "." + service_namespace + ".svc.cluster.local" + ":" + port # example for local prometheus service # url: http://prometheus-operated.monitoring.svc.cluster.local:9090 patterns: + - name: mono_vertex_gauge + object: mono-vertex + title: Pending Messages Lag + description: This query is the total number of pending messages for the mono vertex + expr: | + $metric_name{$filters} + params: + - name: start_time + required: false + - name: end_time + required: false + metrics: + - metric_name: monovtx_pending + required_filters: + - namespace + - mvtx_name + dimensions: + - name: pod + filters: + - name: pod + required: false + - name: period + required: false + - name: mono-vertex + filters: + - name: period + required: false - name: mono_vertex_histogram object: mono-vertex title: Processing Time Latency diff --git a/config/namespace-install.yaml b/config/namespace-install.yaml index 58c769f7f..e384b39e1 100644 --- a/config/namespace-install.yaml +++ b/config/namespace-install.yaml @@ -28445,12 +28445,39 @@ metadata: --- apiVersion: v1 data: - config.yaml: |- + config.yaml: | # url is a required field, it should be the url of the service to which the metrics proxy will connect # url: service_name + "." + service_namespace + ".svc.cluster.local" + ":" + port # example for local prometheus service # url: http://prometheus-operated.monitoring.svc.cluster.local:9090 patterns: + - name: mono_vertex_gauge + object: mono-vertex + title: Pending Messages Lag + description: This query is the total number of pending messages for the mono vertex + expr: | + $metric_name{$filters} + params: + - name: start_time + required: false + - name: end_time + required: false + metrics: + - metric_name: monovtx_pending + required_filters: + - namespace + - mvtx_name + dimensions: + - name: pod + filters: + - name: pod + required: false + - name: period + required: false + - name: mono-vertex + filters: + - name: period + required: false - name: mono_vertex_histogram object: mono-vertex title: Processing Time Latency diff --git a/server/apis/v1/promql_service_test.go b/server/apis/v1/promql_service_test.go index 733476ad2..3d923bb85 100644 --- a/server/apis/v1/promql_service_test.go +++ b/server/apis/v1/promql_service_test.go @@ -273,6 +273,70 @@ func Test_PromQueryBuilder(t *testing.T) { } }) } + + // tests for gauge metrics + var gauge_service = &PromQlService{ + PlaceHolders: map[string]map[string][]string{ + "monovtx_pending": { + "mono-vertex": {"$dimension", "$metric_name", "$filters"}, + }, + }, + Expression: map[string]map[string]string{ + "monovtx_pending": { + "mono-vertex": "$metric_name{$filters}", + }, + }, + } + + gauge_metrics_tests := []struct { + name string + requestBody MetricsRequestBody + expectedQuery string + expectError bool + }{ + { + name: "Successful gauge metrics template substitution", + requestBody: MetricsRequestBody{ + MetricName: "monovtx_pending", + Dimension: "mono-vertex", + Filters: map[string]string{ + "namespace": "test_namespace", + "mvtx_name": "test_mvtx", + "period": "5m", + }, + }, + expectedQuery: `monovtx_pending{namespace= "test_namespace", mvtx_name= "test_mvtx", period= "5m"}`, + }, + { + name: "Missing metric name in service config", + requestBody: MetricsRequestBody{ + MetricName: "non_existent_metric", + Dimension: "mono-vertex", + Filters: map[string]string{ + "namespace": "test_namespace", + "mvtx_name": "test_mvtx", + "period": "5m", + }, + }, + expectError: true, + }, + } + + for _, tt := range gauge_metrics_tests { + t.Run(tt.name, func(t *testing.T) { + actualQuery, err := gauge_service.BuildQuery(tt.requestBody) + if tt.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + if !comparePrometheusQueries(tt.expectedQuery, actualQuery) { + t.Errorf("Prometheus queries do not match.\nExpected: %s\nGot: %s", tt.expectedQuery, actualQuery) + } else { + t.Log("Prometheus queries match!") + } + } + }) + } } func Test_QueryPrometheus(t *testing.T) { @@ -322,6 +386,29 @@ func Test_QueryPrometheus(t *testing.T) { assert.Equal(t, 1, matrix.Len()) }) + t.Run("Successful gauge query", func(t *testing.T) { + mockAPI := &MockPrometheusAPI{} + promQlService := &PromQlService{ + PrometheusClient: &Prometheus{ + Api: mockAPI, + }, + } + query := `monovtx_pending{namespace="default", mvtx_name="test-mvtx", pending="5m"}` + startTime := time.Now().Add(-30 * time.Minute) + endTime := time.Now() + + ctx := context.Background() + result, err := promQlService.QueryPrometheus(ctx, query, startTime, endTime) + + assert.NoError(t, err) + assert.NotNil(t, result) + + // for query range , response should be a matrix + matrix, ok := result.(model.Matrix) + assert.True(t, ok) + assert.Equal(t, 1, matrix.Len()) + }) + t.Run("Prometheus client is nil", func(t *testing.T) { service := &PromQlService{ PrometheusClient: nil, diff --git a/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/partials/LineChart/index.tsx b/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/partials/LineChart/index.tsx index b7171a0c4..2415a39d1 100644 --- a/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/partials/LineChart/index.tsx +++ b/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/partials/LineChart/index.tsx @@ -18,7 +18,13 @@ import EmptyChart from "../EmptyChart"; import { useMetricsFetch } from "../../../../../../../../../../../../../../../utils/fetchWrappers/metricsFetch"; // TODO have a check for metricReq against metric object to ensure required fields are passed -const LineChartComponent = ({ namespaceId, pipelineId, type, metric, vertexId }: any) => { +const LineChartComponent = ({ + namespaceId, + pipelineId, + type, + metric, + vertexId, +}: any) => { const [transformedData, setTransformedData] = useState([]); const [chartLabels, setChartLabels] = useState([]); const [metricsReq, setMetricsReq] = useState({ @@ -28,7 +34,9 @@ const LineChartComponent = ({ namespaceId, pipelineId, type, metric, vertexId }: // store all filters for each selected dimension const [filtersList, setFiltersList] = useState([]); const [filters, setFilters] = useState({}); - const [previousDimension, setPreviousDimension] = useState(metricsReq?.dimension); + const [previousDimension, setPreviousDimension] = useState( + metricsReq?.dimension + ); const getRandomColor = useCallback((index: number) => { const hue = (index * 137.508) % 360; @@ -96,7 +104,7 @@ const LineChartComponent = ({ namespaceId, pipelineId, type, metric, vertexId }: name: param?.Name, required: param?.Required, })) || []; - + setParamsList([...initParams, ...newParams]); }, [metric, setParamsList]); @@ -110,7 +118,12 @@ const LineChartComponent = ({ namespaceId, pipelineId, type, metric, vertexId }: filters, }); - const groupByLabel = useCallback((dimension: string) => { + const groupByLabel = useCallback((dimension: string, metricName: string) => { + switch (metricName) { + case "monovtx_pending": + return "period"; + } + switch (dimension) { case "mono-vertex": return "mvtx_name"; @@ -123,7 +136,10 @@ const LineChartComponent = ({ namespaceId, pipelineId, type, metric, vertexId }: if (chartData) { const labels: any[] = []; const transformedData: any[] = []; - const label = groupByLabel(metricsReq?.dimension); + const label = groupByLabel( + metricsReq?.dimension, + metricsReq?.metric_name + ); chartData?.forEach((item) => { const labelVal = item?.metric?.[label]; labels.push(labelVal); diff --git a/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/partials/common/FiltersDropdown/index.tsx b/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/partials/common/FiltersDropdown/index.tsx index 193f2ef85..360fdbd92 100644 --- a/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/partials/common/FiltersDropdown/index.tsx +++ b/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/partials/common/FiltersDropdown/index.tsx @@ -29,6 +29,13 @@ export interface FiltersDropdownProps { setFilters: any; } +const periodData = [ + { name: "default" }, + { name: "1m" }, + { name: "5m" }, + { name: "15m" }, +]; + const FiltersDropdown = ({ items, namespaceId, @@ -64,7 +71,9 @@ const FiltersDropdown = ({ try { const response = await fetch( `${host}${getBaseHref()}/api/v1/namespaces/${namespaceId}/${ - type === "monoVertex" ? `mono-vertices/${pipelineId}/pods` : `pipelines/${pipelineId}/vertices/${vertexId}/pods` + type === "monoVertex" + ? `mono-vertices/${pipelineId}/pods` + : `pipelines/${pipelineId}/vertices/${vertexId}/pods` }` ); if (!response.ok) { @@ -101,6 +110,8 @@ const FiltersDropdown = ({ switch (filterName) { case "pod": return podsData; + case "period": + return periodData; default: return null; } diff --git a/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/utils/constants.ts b/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/utils/constants.ts index 925d8acea..6c367c427 100644 --- a/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/utils/constants.ts +++ b/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/utils/constants.ts @@ -19,7 +19,7 @@ export const dimensionMap: { [p: string]: string } = { "mono-vertex": "MonoVertex", pod: "Pod", pipeline: "Pipeline", - vertex: "Vertex" + vertex: "Vertex", }; export const dimensionReverseMap: { [p: string]: string } = { @@ -38,6 +38,6 @@ export const metricNameMap: { [p: string]: string } = { "Mono Vertex Processing Time Latency (in micro seconds)", monovtx_sink_time_bucket: "Mono Vertex Sink Write Time Latency (in micro seconds)", - forwarder_data_read_total: - "Vertex Read Processing Rate" + forwarder_data_read_total: "Vertex Read Processing Rate", + monovtx_pending: "Mono Vertex Pending", };