diff --git a/app/controllers/internal/metrics_controller.rb b/app/controllers/internal/metrics_controller.rb index 64790eb95ef..b9c9ac4e730 100644 --- a/app/controllers/internal/metrics_controller.rb +++ b/app/controllers/internal/metrics_controller.rb @@ -1,6 +1,5 @@ require 'prometheus/client' require 'prometheus/client/formats/text' -require 'cloud_controller/metrics/prometheus_updater' module VCAP::CloudController module Internal @@ -9,16 +8,7 @@ class MetricsController < RestController::BaseController get '/internal/v4/metrics', :index def index - periodic_updater = VCAP::CloudController::Metrics::PeriodicUpdater.new( - Time.now.utc, - Steno::Sink::Counter.new, - Steno.logger('cc.api'), - [ - VCAP::CloudController::Metrics::StatsdUpdater.new, - VCAP::CloudController::Metrics::PrometheusUpdater.new - ] - ) - periodic_updater.update! + CloudController::DependencyLocator.instance.periodic_updater.update! [200, Prometheus::Client::Formats::Text.marshal(Prometheus::Client.registry)] end end diff --git a/app/controllers/internal/staging_completion_controller.rb b/app/controllers/internal/staging_completion_controller.rb index 5f897e53fe1..7694b8b130f 100644 --- a/app/controllers/internal/staging_completion_controller.rb +++ b/app/controllers/internal/staging_completion_controller.rb @@ -115,7 +115,7 @@ def statsd_updater end def prometheus_updater - @prometheus_updater ||= VCAP::CloudController::Metrics::PrometheusUpdater.new # this should be using singleton + CloudController::DependencyLocator.instance.prometheus_updater end attr_reader :stagers diff --git a/app/jobs/diego/sync.rb b/app/jobs/diego/sync.rb index 96defdbe72e..629ee2ca9c2 100644 --- a/app/jobs/diego/sync.rb +++ b/app/jobs/diego/sync.rb @@ -6,9 +6,8 @@ module VCAP::CloudController module Jobs module Diego class Sync < VCAP::CloudController::Jobs::CCJob - def initialize(statsd=Statsd.new, prometheus_updater=VCAP::CloudController::Metrics::PrometheusUpdater.new) + def initialize(statsd=Statsd.new) @statsd = statsd - @prometheus_updater = prometheus_updater end def perform @@ -27,7 +26,6 @@ def perform elapsed_ms = ((finish - start) * 1000).round @statsd.timing('cc.diego_sync.duration', elapsed_ms) - @prometheus_updater.report_diego_cell_sync_duration(elapsed_ms) end end diff --git a/lib/cloud_controller/dependency_locator.rb b/lib/cloud_controller/dependency_locator.rb index 5184458d8ad..6bb42db847a 100644 --- a/lib/cloud_controller/dependency_locator.rb +++ b/lib/cloud_controller/dependency_locator.rb @@ -59,9 +59,28 @@ def runners @dependencies[:runners] || register(:runners, VCAP::CloudController::Runners.new(config)) end + def periodic_updater + @dependencies[:periodic_updater] || + register(:periodic_updater, + VCAP::CloudController::Metrics::PeriodicUpdater.new( + Time.now.utc, + log_counter, + Steno.logger('cc.api'), + statsd_updater, + prometheus_updater + )) + end + def prometheus_updater - register(:prometheus_updater, VCAP::CloudController::Metrics::PrometheusUpdater.new) unless @dependencies[:prometheus_updater] - @dependencies[:prometheus_updater] + @dependencies[:prometheus_updater] || register(:prometheus_updater, VCAP::CloudController::Metrics::PrometheusUpdater.new) + end + + def statsd_updater + @dependencies[:statsd_updater] || register(:statsd_updater, VCAP::CloudController::Metrics::StatsdUpdater.new(statsd_client)) + end + + def log_counter + @dependencies[:log_counter] || register(:log_counter, Steno::Sink::Counter.new) end def stagers @@ -328,8 +347,12 @@ def registry_buddy_client end def statsd_client - @dependencies[:statsd_client] || + if @dependencies[:statsd_client] + @dependencies[:statsd_client] + else + Statsd.logger = Steno.logger('statsd.client') register(:statsd_client, Statsd.new(config.get(:statsd_host), config.get(:statsd_port))) + end end private diff --git a/lib/cloud_controller/deployment_updater/scheduler.rb b/lib/cloud_controller/deployment_updater/scheduler.rb index 8090e31858f..c6b2b5a1a5e 100644 --- a/lib/cloud_controller/deployment_updater/scheduler.rb +++ b/lib/cloud_controller/deployment_updater/scheduler.rb @@ -10,13 +10,11 @@ def start with_error_logging('cc.deployment_updater') do config = CloudController::DependencyLocator.instance.config statsd_client = CloudController::DependencyLocator.instance.statsd_client - prometheus_updater = CloudController::DependencyLocator.instance.prometheus_updater update_step = proc { update( update_frequency: config.get(:deployment_updater, :update_frequency_in_seconds), - statsd_client: statsd_client, - prometheus_updater: prometheus_updater + statsd_client: statsd_client ) } @@ -42,7 +40,7 @@ def start private - def update(update_frequency:, statsd_client:, prometheus_updater:) + def update(update_frequency:, statsd_client:) logger = Steno.logger('cc.deployment_updater.scheduler') update_start_time = Time.now @@ -54,7 +52,6 @@ def update(update_frequency:, statsd_client:, prometheus_updater:) ## so feed in the entire value! update_duration_ms = update_duration * 1000 statsd_client.timing('cc.deployments.update.duration', update_duration_ms) - prometheus_updater.report_deployment_duration(update_duration_ms) logger.info("Update loop took #{update_duration}s") diff --git a/lib/cloud_controller/diego/messenger.rb b/lib/cloud_controller/diego/messenger.rb index 35d19289662..576790aa54d 100644 --- a/lib/cloud_controller/diego/messenger.rb +++ b/lib/cloud_controller/diego/messenger.rb @@ -4,8 +4,9 @@ module VCAP::CloudController module Diego class Messenger - def initialize(statsd_updater=VCAP::CloudController::Metrics::StatsdUpdater.new) + def initialize(statsd_updater=VCAP::CloudController::Metrics::StatsdUpdater.new, prometheus_updater=CloudController::DependencyLocator.instance.prometheus_updater) @statsd_updater = statsd_updater + @prometheus_updater = prometheus_updater end def send_stage_request(_config, staging_details) @@ -15,6 +16,7 @@ def send_stage_request(_config, staging_details) bbs_stager_client.stage(staging_guid, staging_details) @statsd_updater.start_staging_request_received + @prometheus_updater.start_staging_request_received end def send_stop_staging_request(staging_guid) diff --git a/lib/cloud_controller/metrics/periodic_updater.rb b/lib/cloud_controller/metrics/periodic_updater.rb index 49ee0f59988..b1780d79c3d 100644 --- a/lib/cloud_controller/metrics/periodic_updater.rb +++ b/lib/cloud_controller/metrics/periodic_updater.rb @@ -3,9 +3,10 @@ module VCAP::CloudController::Metrics class PeriodicUpdater - def initialize(start_time, log_counter, logger=Steno.logger, updaters=[StatsdUpdater.new, PrometheusUpdater.new]) + def initialize(start_time, log_counter, logger, statsd_updater, prometheus_updater) @start_time = start_time - @updaters = updaters + @statsd_updater = statsd_updater + @prometheus_updater = prometheus_updater @log_counter = log_counter @logger = logger @known_job_queues = { @@ -45,9 +46,9 @@ def catch_error def update_task_stats running_tasks = VCAP::CloudController::TaskModel.where(state: VCAP::CloudController::TaskModel::RUNNING_STATE) running_task_count = running_tasks.count - running_task_memory = running_tasks.sum(:memory_in_mb) - running_task_memory = 0 if running_task_memory.nil? - @updaters.each { |u| u.update_task_stats(running_task_count, running_task_memory) } + running_task_memory = running_tasks.sum(:memory_in_mb) || 0 + @statsd_updater.update_task_stats(running_task_count, running_task_memory) + @prometheus_updater.update_task_stats(running_task_count, running_task_memory * 1024 * 1024) end def update_log_counts @@ -58,19 +59,19 @@ def update_log_counts hash[level_name] = counts.fetch(level_name.to_s, 0) end - @updaters.each { |u| u.update_log_counts(hash) } + @statsd_updater.update_log_counts(hash) end def update_deploying_count deploying_count = VCAP::CloudController::DeploymentModel.deploying_count - @updaters.each { |u| u.update_deploying_count(deploying_count) } + [@statsd_updater, @prometheus_updater].each { |u| u.update_deploying_count(deploying_count) } end def update_user_count user_count = VCAP::CloudController::User.count - @updaters.each { |u| u.update_user_count(user_count) } + [@statsd_updater, @prometheus_updater].each { |u| u.update_user_count(user_count) } end def update_job_queue_length @@ -84,13 +85,14 @@ def update_job_queue_length end pending_job_count_by_queue.reverse_merge!(@known_job_queues) - @updaters.each { |u| u.update_job_queue_length(pending_job_count_by_queue, total) } + @statsd_updater.update_job_queue_length(pending_job_count_by_queue, total) + @prometheus_updater.update_job_queue_length(pending_job_count_by_queue) end def update_thread_info local_thread_info = thread_info - @updaters.each { |u| u.update_thread_info(local_thread_info) } + [@statsd_updater, @prometheus_updater].each { |u| u.update_thread_info(local_thread_info) } end def update_failed_job_count @@ -104,7 +106,8 @@ def update_failed_job_count end failed_jobs_by_queue.reverse_merge!(@known_job_queues) - @updaters.each { |u| u.update_failed_job_count(failed_jobs_by_queue, total) } + @statsd_updater.update_failed_job_count(failed_jobs_by_queue, total) + @prometheus_updater.update_failed_job_count(failed_jobs_by_queue) end def update_vitals @@ -120,7 +123,13 @@ def update_vitals num_cores: VCAP::HostSystem.new.num_cores } - @updaters.each { |u| u.update_vitals(vitals) } + @statsd_updater.update_vitals(vitals) + + prom_vitals = vitals.clone + prom_vitals.delete(:uptime) + prom_vitals.delete(:cpu) + prom_vitals[:started_at] = @start_time.to_i + @prometheus_updater.update_vitals(prom_vitals) end def thread_info diff --git a/lib/cloud_controller/metrics/prometheus_updater.rb b/lib/cloud_controller/metrics/prometheus_updater.rb index 34678fca9b4..447bd09e948 100644 --- a/lib/cloud_controller/metrics/prometheus_updater.rb +++ b/lib/cloud_controller/metrics/prometheus_updater.rb @@ -2,130 +2,137 @@ module VCAP::CloudController::Metrics class PrometheusUpdater + DURATION_BUCKETS = [5, 10, 30, 60, 300, 600, 890].freeze + + METRICS = [ + { type: :gauge, name: :cc_job_queues_length_total, docstring: 'Job queues length of worker processes', labels: [:queue] }, + { type: :gauge, name: :cc_failed_jobs_total, docstring: 'Number of failed jobs of worker processes', labels: [:queue] }, + { type: :counter, name: :cc_staging_requests_total, docstring: 'Number of staging requests' }, + { type: :histogram, name: :cc_staging_succeeded_duration_seconds, docstring: 'Durations of successful staging events', buckets: DURATION_BUCKETS }, + { type: :histogram, name: :cc_staging_failed_duration_seconds, docstring: 'Durations of failed staging events', buckets: DURATION_BUCKETS }, + { type: :gauge, name: :cc_requests_outstanding_total, docstring: 'Requests outstanding' }, + { type: :counter, name: :cc_requests_completed_total, docstring: 'Requests completed' }, + { type: :gauge, name: :cc_thread_info_thread_count, docstring: 'Thread count' }, + { type: :gauge, name: :cc_thread_info_event_machine_connection_count, docstring: 'EventMachine connection count' }, + { type: :gauge, name: :cc_thread_info_event_machine_threadqueue_size, docstring: 'EventMachine thread queue size' }, + { type: :gauge, name: :cc_thread_info_event_machine_threadqueue_num_waiting, docstring: 'EventMachine num waiting in thread' }, + { type: :gauge, name: :cc_thread_info_event_machine_resultqueue_size, docstring: 'EventMachine queue size' }, + { type: :gauge, name: :cc_thread_info_event_machine_resultqueue_num_waiting, docstring: 'EventMachine requests waiting in queue' }, + { type: :gauge, name: :cc_vitals_started_at, docstring: 'CloudController Vitals: started_at' }, + { type: :gauge, name: :cc_vitals_mem_bytes, docstring: 'CloudController Vitals: mem_bytes' }, + { type: :gauge, name: :cc_vitals_cpu_load_avg, docstring: 'CloudController Vitals: cpu_load_avg' }, + { type: :gauge, name: :cc_vitals_mem_used_bytes, docstring: 'CloudController Vitals: mem_used_bytes' }, + { type: :gauge, name: :cc_vitals_mem_free_bytes, docstring: 'CloudController Vitals: mem_free_bytes' }, + { type: :gauge, name: :cc_vitals_num_cores, docstring: 'CloudController Vitals: num_cores' }, + { type: :gauge, name: :cc_running_tasks_total, docstring: 'Total running tasks' }, + { type: :gauge, name: :cc_running_tasks_memory_bytes, docstring: 'Total memory consumed by running tasks' }, + { type: :gauge, name: :cc_users_total, docstring: 'Number of users' }, + { type: :gauge, name: :cc_deployments_in_progress_total, docstring: 'Number of in progress deployments' } + ].freeze + def initialize(registry=Prometheus::Client.registry) @registry = registry + + # Register all metrics, to initialize them for discoverability + METRICS.map do |metric| + register_metric(metric[:type], metric[:name], metric[:docstring], labels: metric[:labels] || {}, buckets: metric[:buckets] || {}) unless @registry.exist?(metric[:name]) + end + end + + def register_metric(type, name, message, labels: {}, buckets: {}) + case type + when :gauge + @registry.gauge(name, docstring: message, labels: labels) + when :counter + @registry.counter(name, docstring: message, labels: labels) + when :histogram + @registry.histogram(name, docstring: message, labels: labels, buckets: buckets) + else + throw ArgumentError("Metric type #{type} does not exist.") + end end - def update_gauge_metric(metric, value, message) - @registry.gauge(metric, docstring: message) unless @registry.exist?(metric) - @registry.get(metric).set(value) + def update_gauge_metric(metric, value, labels: {}) + @registry.get(metric).set(value, labels:) end - def increment_gauge_metric(metric, message) - @registry.gauge(metric, docstring: message) unless @registry.exist?(metric) + def increment_gauge_metric(metric) @registry.get(metric).increment end - def decrement_gauge_metric(metric, message) - @registry.gauge(metric, docstring: message) unless @registry.exist?(metric) + def decrement_gauge_metric(metric) @registry.get(metric).decrement end - def increment_counter_metric(metric, message) - @registry.counter(metric, docstring: message) unless @registry.exist?(metric) + def increment_counter_metric(metric) @registry.get(metric).increment end - def update_histogram_metric(metric, value, message, buckets) - @registry.histogram(metric, buckets: buckets, docstring: message) unless @registry.exist?(metric) + def update_histogram_metric(metric, value) @registry.get(metric).observe(value) end - def update_summary_metric(metric, value, message) - @registry.summary(metric, docstring: message) unless @registry.exist?(metric) + def update_summary_metric(metric, value) @registry.get(metric).observe(value) end def update_deploying_count(deploying_count) - update_gauge_metric(:cc_deployments_deploying, deploying_count, 'Number of in progress deployments') + update_gauge_metric(:cc_deployments_in_progress_total, deploying_count) end def update_user_count(user_count) - update_gauge_metric(:cc_total_users, user_count, 'Number of users') + update_gauge_metric(:cc_users_total, user_count) end - def update_job_queue_length(pending_job_count_by_queue, total) + def update_job_queue_length(pending_job_count_by_queue) pending_job_count_by_queue.each do |key, value| - metric_key = :"cc_job_queue_length_#{key.to_s.underscore}" - update_gauge_metric(metric_key, value, docstring: "Job queue length for worker #{key}") + update_gauge_metric(:cc_job_queues_length_total, value, labels: { queue: key.to_s.underscore }) end - - update_gauge_metric(:cc_job_queue_length_total, total, 'Total job queue length') end def update_thread_info(thread_info) - update_gauge_metric(:cc_thread_info_thread_count, thread_info[:thread_count], 'Thread count') - update_gauge_metric(:cc_thread_info_event_machine_connection_count, thread_info[:event_machine][:connection_count], 'Event Machine connection count') - update_gauge_metric(:cc_thread_info_event_machine_threadqueue_size, thread_info[:event_machine][:threadqueue][:size], 'EventMachine thread queue size') - update_gauge_metric(:cc_thread_info_event_machine_threadqueue_num_waiting, thread_info[:event_machine][:threadqueue][:num_waiting], 'EventMachine num waiting in thread') - update_gauge_metric(:cc_thread_info_event_machine_resultqueue_size, thread_info[:event_machine][:resultqueue][:size], 'EventMachine queue size') - update_gauge_metric(:cc_thread_info_event_machine_resultqueue_num_waiting, thread_info[:event_machine][:resultqueue][:num_waiting], 'EventMachine requests waiting in queue') + update_gauge_metric(:cc_thread_info_thread_count, thread_info[:thread_count]) + update_gauge_metric(:cc_thread_info_event_machine_connection_count, thread_info[:event_machine][:connection_count]) + update_gauge_metric(:cc_thread_info_event_machine_threadqueue_size, thread_info[:event_machine][:threadqueue][:size]) + update_gauge_metric(:cc_thread_info_event_machine_threadqueue_num_waiting, thread_info[:event_machine][:threadqueue][:num_waiting]) + update_gauge_metric(:cc_thread_info_event_machine_resultqueue_size, thread_info[:event_machine][:resultqueue][:size]) + update_gauge_metric(:cc_thread_info_event_machine_resultqueue_num_waiting, thread_info[:event_machine][:resultqueue][:num_waiting]) end - def update_failed_job_count(failed_jobs_by_queue, total) + def update_failed_job_count(failed_jobs_by_queue) failed_jobs_by_queue.each do |key, value| - metric_key = :"cc_failed_job_count_#{key.to_s.underscore}" - update_gauge_metric(metric_key, value, "Failed jobs for worker #{key}") + update_gauge_metric(:cc_failed_jobs_total, value, labels: { queue: key.to_s.underscore }) end - - update_gauge_metric(:cc_failed_job_count_total, total, 'Total failed jobs') end def update_vitals(vitals) vitals.each do |key, value| metric_key = :"cc_vitals_#{key.to_s.underscore}" - update_gauge_metric(metric_key, value, "CloudController Vitals: #{key}") + update_gauge_metric(metric_key, value) end end - def update_log_counts(counts) - counts.each do |key, value| - metric_key = :"cc_log_count_#{key.to_s.underscore}" - update_gauge_metric(metric_key, value, "Log count for log level '#{key}'") - end - end - - def update_task_stats(total_running_tasks, total_memory_in_mb) - update_gauge_metric(:cc_tasks_running_count, total_running_tasks, 'Total running tasks') - update_gauge_metric(:cc_tasks_running_memory_in_mb, total_memory_in_mb, 'Total memory consumed by running tasks') - end - - def update_synced_invalid_lrps(lrp_count) - update_gauge_metric(:cc_diego_sync_invalid_desired_lrps, lrp_count, 'Invalid Desired LRPs') + def update_task_stats(total_running_tasks, total_memory_in_bytes) + update_gauge_metric(:cc_running_tasks_total, total_running_tasks) + update_gauge_metric(:cc_running_tasks_memory_bytes, total_memory_in_bytes) end def start_staging_request_received - increment_counter_metric(:cc_staging_requested, 'Number of staging requests') + increment_counter_metric(:cc_staging_requests_total) end def report_staging_success_metrics(duration_ns) - increment_counter_metric(:cc_staging_succeeded, 'Number of successful staging events') - update_histogram_metric(:cc_staging_succeeded_duration, nanoseconds_to_milliseconds(duration_ns), 'Durations of successful staging events', duration_buckets) + update_histogram_metric(:cc_staging_succeeded_duration_seconds, nanoseconds_to_seconds(duration_ns)) end def report_staging_failure_metrics(duration_ns) - increment_counter_metric(:cc_staging_failed, 'Number of failed staging events') - update_histogram_metric(:cc_staging_failed_duration, nanoseconds_to_milliseconds(duration_ns), 'Durations of failed staging events', duration_buckets) - end - - def report_diego_cell_sync_duration(duration_ms) - update_summary_metric(:cc_diego_sync_duration, duration_ms, 'Diego cell sync duration') - update_gauge_metric(:cc_diego_sync_duration_gauge, duration_ms, 'Diego cell sync duration (gauge metric)') - end - - def report_deployment_duration(duration_ms) - update_summary_metric(:cc_deployments_update_duration, duration_ms, 'Deployment duration') - update_gauge_metric(:cc_deployments_update_duration_gauge, duration_ms, 'Deployment duration (gauge metric)') + update_histogram_metric(:cc_staging_failed_duration_seconds, nanoseconds_to_seconds(duration_ns)) end private - def duration_buckets - Prometheus::Client::Histogram.linear_buckets(start: 10_000, width: 5000, count: 5) - end - - def nanoseconds_to_milliseconds(time_ns) - (time_ns / 1e6).to_i + def nanoseconds_to_seconds(time_ns) + (time_ns / 1e9).to_f end end end diff --git a/lib/cloud_controller/metrics/request_metrics.rb b/lib/cloud_controller/metrics/request_metrics.rb index 83a0122112e..186cf9f5379 100644 --- a/lib/cloud_controller/metrics/request_metrics.rb +++ b/lib/cloud_controller/metrics/request_metrics.rb @@ -3,7 +3,7 @@ module VCAP::CloudController module Metrics class RequestMetrics - def initialize(statsd=Statsd.new, prometheus_updater=PrometheusUpdater.new) + def initialize(statsd=Statsd.new, prometheus_updater=CloudController::DependencyLocator.instance.prometheus_updater) @counter = 0 @statsd = statsd @prometheus_updater = prometheus_updater @@ -14,8 +14,7 @@ def start_request @statsd.gauge('cc.requests.outstanding.gauge', @counter) @statsd.increment 'cc.requests.outstanding' - @prometheus_updater.update_gauge_metric(:cc_requests_outstanding_gauge, @counter, 'Requests Outstanding Gauge') - @prometheus_updater.increment_gauge_metric(:cc_requests_outstanding, 'Requests Outstanding') + @prometheus_updater.increment_gauge_metric(:cc_requests_outstanding_total) end def complete_request(status) @@ -29,10 +28,8 @@ def complete_request(status) batch.increment http_status_metric end - @prometheus_updater.update_gauge_metric(:cc_requests_outstanding_gauge, @counter, 'Requests Outstanding Gauge') - @prometheus_updater.decrement_gauge_metric(:cc_requests_outstanding, 'Requests Outstanding') - @prometheus_updater.increment_gauge_metric(:cc_requests_completed, 'Requests Completed') - @prometheus_updater.increment_gauge_metric(http_status_metric.gsub('.', '_').to_sym, "Times HTTP status #{http_status_code} have been received") + @prometheus_updater.decrement_gauge_metric(:cc_requests_outstanding_total) + @prometheus_updater.increment_counter_metric(:cc_requests_completed_total) end end end diff --git a/lib/cloud_controller/runner.rb b/lib/cloud_controller/runner.rb index ff7567b64c2..c8e1bf72ebe 100644 --- a/lib/cloud_controller/runner.rb +++ b/lib/cloud_controller/runner.rb @@ -28,12 +28,12 @@ def initialize(argv) secrets_hash = parse_secrets parse_config(secrets_hash) - @log_counter = Steno::Sink::Counter.new setup_cloud_controller request_logs = VCAP::CloudController::Logs::RequestLogs.new(Steno.logger('cc.api')) - request_metrics = VCAP::CloudController::Metrics::RequestMetrics.new(statsd_client) + request_metrics = VCAP::CloudController::Metrics::RequestMetrics.new(CloudController::DependencyLocator.instance.statsd_client, + CloudController::DependencyLocator.instance.prometheus_updater) builder = RackAppBuilder.new app = builder.build(@config, request_metrics, request_logs) @@ -118,7 +118,7 @@ def setup_logging @setup_logging = true StenoConfigurer.new(@config.get(:logging)).configure do |steno_config_hash| - steno_config_hash[:sinks] << @log_counter + steno_config_hash[:sinks] << CloudController::DependencyLocator.instance.log_counter end end @@ -163,22 +163,7 @@ def fluent_emitter end def periodic_updater - @periodic_updater ||= VCAP::CloudController::Metrics::PeriodicUpdater.new( - Time.now.utc, - @log_counter, - Steno.logger('cc.api'), - [ - VCAP::CloudController::Metrics::StatsdUpdater.new(statsd_client) - ] - ) - end - - def statsd_client - return @statsd_client if @statsd_client - - logger.info("configuring statsd server at #{@config.get(:statsd_host)}:#{@config.get(:statsd_port)}") - Statsd.logger = Steno.logger('statsd.client') - @statsd_client = Statsd.new(@config.get(:statsd_host), @config.get(:statsd_port)) + CloudController::DependencyLocator.instance.periodic_updater end end end diff --git a/spec/request/internal/metrics_spec.rb b/spec/request/internal/metrics_spec.rb index 31fb37679d1..7aeca580d5d 100644 --- a/spec/request/internal/metrics_spec.rb +++ b/spec/request/internal/metrics_spec.rb @@ -7,8 +7,6 @@ let(:resultqueue) { double(EventMachine::Queue, size: 0, num_waiting: 1) } before do - # locator = CloudController::DependencyLocator.instance - # allow(locator).to receive(:prometheus_updater).and_return(VCAP::CloudController::Metrics::PrometheusUpdater.new(Prometheus::Client::Registry.new)) allow(EventMachine).to receive(:connection_count).and_return(123) allow(EventMachine).to receive(:instance_variable_get) do |instance_var| @@ -23,7 +21,7 @@ end end - it 'succeeds' do + it 'can be called several times' do get '/internal/v4/metrics', nil expect(last_response.status).to eq 200 @@ -40,7 +38,6 @@ cc_total_users = Prometheus::Client.registry.get(:cc_total_users) cc_total_users.set(0) unless cc_total_users.nil? - # Prometheus::Client::Config::data_store = nil 10.times do VCAP::CloudController::User.make end @@ -51,7 +48,7 @@ expect(last_response.status).to eq 200 - expect(last_response.body).to include('cc_total_users 10.0') + expect(last_response.body).to include('cc_users_total 10.0') end end @@ -60,8 +57,7 @@ get '/internal/v4/metrics', nil expect(last_response.body).to match(/cc_vitals_num_cores [1-9][0-9]*\.\d+/) - expect(last_response.body).to match(/cc_vitals_uptime [0-9][0-9]*\.\d+/) - expect(last_response.body).to match(/cc_vitals_cpu [1-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_vitals_started_at [0-9][0-9]*\.\d+/) expect(last_response.body).to match(/cc_vitals_mem_bytes [1-9][0-9]*\.\d+/) expect(last_response.body).to match(/cc_vitals_cpu_load_avg [0-9][0-9]*\.\d+/) expect(last_response.body).to match(/cc_vitals_mem_used_bytes [1-9][0-9]*\.\d+/) @@ -70,17 +66,20 @@ end context 'cc_job_queue_length' do - # NOTE: Because there is no easy way to enqueue a job that will - # stick around for long enough to appear in the metrics, - # we're only testing that the metric is emitted in the output. - # If you can figure out how to actually get a job enqueued that - # will show up in the "job queue length" metric, please do update - # the test! - it 'includes job queue length metric in output' do + before do + Delayed::Job.enqueue(VCAP::CloudController::Jobs::Runtime::EventsCleanup.new(1), { queue: 'cc_api_0', run_at: Time.now + 1.day }) + Delayed::Job.enqueue(VCAP::CloudController::Jobs::Runtime::EventsCleanup.new(1), { queue: 'cc_generic', run_at: Time.now + 1.day }) + end + + after do + Delayed::Job.dataset.delete + end + + it 'includes job queue length metric labelled for each queue' do get '/internal/v4/metrics', nil - expect(last_response.body).to match(/cc_job_queue_length_cc_api_0 [0-9][0-9]*\.\d+/) - expect(last_response.body).to match(/cc_job_queue_length_total [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_job_queues_length_total{queue="cc_api_0"} 1\.0/) + expect(last_response.body).to match(/cc_job_queues_length_total{queue="cc_generic"} 1\.0/) end end @@ -98,26 +97,22 @@ end context 'cc_failed_job_count' do - it 'reports failed job count' do - get '/internal/v4/metrics', nil + before do + Delayed::Job.enqueue(VCAP::CloudController::Jobs::Runtime::EventsCleanup.new(1), { queue: 'cc_api_0', run_at: Time.now + 1.day }) + Delayed::Job.enqueue(VCAP::CloudController::Jobs::Runtime::EventsCleanup.new(1), { queue: 'cc_generic', run_at: Time.now + 1.day }) - expect(last_response.body).to match(/cc_failed_job_count_cc_api_0 [0-9][0-9]*\.\d+/) - expect(last_response.body).to match(/cc_failed_job_count_total [0-9][0-9]*\.\d+/) + Delayed::Job.dataset.update(failed_at: Time.now.utc) end - end - context 'cc_log_count' do - it 'reports log counts' do + after do + Delayed::Job.dataset.delete + end + + it 'reports failed job count' do get '/internal/v4/metrics', nil - expect(last_response.body).to match(/cc_log_count_off [0-9][0-9]*\.\d+/) - expect(last_response.body).to match(/cc_log_count_fatal [0-9][0-9]*\.\d+/) - expect(last_response.body).to match(/cc_log_count_error [0-9][0-9]*\.\d+/) - expect(last_response.body).to match(/cc_log_count_warn [0-9][0-9]*\.\d+/) - expect(last_response.body).to match(/cc_log_count_info [0-9][0-9]*\.\d+/) - expect(last_response.body).to match(/cc_log_count_debug1 [0-9][0-9]*\.\d+/) - expect(last_response.body).to match(/cc_log_count_debug2 [0-9][0-9]*\.\d+/) - expect(last_response.body).to match(/cc_log_count_all [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_failed_jobs_total{queue="cc_api_0"} 1\.0/) + expect(last_response.body).to match(/cc_failed_jobs_total{queue="cc_generic"} 1\.0/) end end @@ -125,8 +120,8 @@ it 'reports task stats' do get '/internal/v4/metrics', nil - expect(last_response.body).to match(/cc_tasks_running_count [0-9][0-9]*\.\d+/) - expect(last_response.body).to match(/cc_tasks_running_memory_in_mb [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_running_tasks_total [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_running_tasks_memory_bytes [0-9][0-9]*\.\d+/) end end @@ -134,7 +129,69 @@ it 'reports deploying_count' do get '/internal/v4/metrics', nil - expect(last_response.body).to match(/cc_deployments_deploying [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_deployments_in_progress_total [0-9][0-9]*\.\d+/) + end + end + + context 'cc_staging_requests_total' do + it 'reports cc_staging_requests_total' do + get '/internal/v4/metrics', nil + + expect(last_response.body).to match(/cc_staging_requests_total [0-9][0-9]*\.\d+/) + end + end + + context 'cc_staging_succeeded_duration_seconds' do + it 'reports cc_staging_succeeded_duration_seconds' do + get '/internal/v4/metrics', nil + + expect(last_response.body).to match(/cc_staging_succeeded_duration_seconds_bucket{le="5"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_succeeded_duration_seconds_bucket{le="5"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_succeeded_duration_seconds_bucket{le="10"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_succeeded_duration_seconds_bucket{le="30"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_succeeded_duration_seconds_bucket{le="60"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_succeeded_duration_seconds_bucket{le="300"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_succeeded_duration_seconds_bucket{le="600"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_succeeded_duration_seconds_bucket{le="890"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_succeeded_duration_seconds_bucket{le="\+Inf"} [0-9][0-9]*\.\d+/) + + expect(last_response.body).to match(/cc_staging_succeeded_duration_seconds_sum [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_succeeded_duration_seconds_count [0-9][0-9]*\.\d+/) + end + end + + context 'cc_staging_failed_duration_seconds' do + it 'reports cc_staging_failed_duration_seconds' do + get '/internal/v4/metrics', nil + + expect(last_response.body).to match(/cc_staging_failed_duration_seconds_bucket{le="5"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_failed_duration_seconds_bucket{le="5"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_failed_duration_seconds_bucket{le="10"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_failed_duration_seconds_bucket{le="30"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_failed_duration_seconds_bucket{le="60"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_failed_duration_seconds_bucket{le="300"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_failed_duration_seconds_bucket{le="600"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_failed_duration_seconds_bucket{le="890"} [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_failed_duration_seconds_bucket{le="\+Inf"} [0-9][0-9]*\.\d+/) + + expect(last_response.body).to match(/cc_staging_failed_duration_seconds_sum [0-9][0-9]*\.\d+/) + expect(last_response.body).to match(/cc_staging_failed_duration_seconds_count [0-9][0-9]*\.\d+/) + end + end + + context 'cc_requests_completed_total' do + it 'reports cc_requests_completed_total' do + get '/internal/v4/metrics', nil + + expect(last_response.body).to match(/cc_requests_completed_total [0-9][0-9]*\.\d+/) + end + end + + context 'cc_requests_outstanding_total' do + it 'reports cc_requests_outstanding_total' do + get '/internal/v4/metrics', nil + + expect(last_response.body).to match(/cc_requests_outstanding_total [0-9][0-9]*\.\d+/) end end end diff --git a/spec/unit/jobs/diego/sync_spec.rb b/spec/unit/jobs/diego/sync_spec.rb index e834eb3ea98..67e2c9ab004 100644 --- a/spec/unit/jobs/diego/sync_spec.rb +++ b/spec/unit/jobs/diego/sync_spec.rb @@ -36,7 +36,6 @@ module Jobs::Diego expect(tasks_sync).to receive(:sync) expect(Time).to receive(:now).twice # Ensure that we get two time measurements. _Hopefully_ they get turned into an elapsed time and passed in where they need to be! expect_any_instance_of(Statsd).to receive(:timing).with('cc.diego_sync.duration', kind_of(Numeric)) - expect_any_instance_of(VCAP::CloudController::Metrics::PrometheusUpdater).to receive(:report_diego_cell_sync_duration).with(kind_of(Numeric)) job.perform end diff --git a/spec/unit/lib/cloud_controller/deployment_updater/scheduler_spec.rb b/spec/unit/lib/cloud_controller/deployment_updater/scheduler_spec.rb index 9b3232e057d..8f3dcafebf3 100644 --- a/spec/unit/lib/cloud_controller/deployment_updater/scheduler_spec.rb +++ b/spec/unit/lib/cloud_controller/deployment_updater/scheduler_spec.rb @@ -28,7 +28,6 @@ module VCAP::CloudController let(:lock_worker) { instance_double(Locket::LockWorker) } let(:logger) { instance_double(Steno::Logger, info: nil, debug: nil, error: nil) } let(:statsd_client) { instance_double(Statsd) } - let(:prometheus_updater) { instance_double(VCAP::CloudController::Metrics::PrometheusUpdater) } before do allow(Locket::LockRunner).to receive(:new).and_return(lock_runner) @@ -38,9 +37,8 @@ module VCAP::CloudController allow(lock_worker).to receive(:acquire_lock_and_repeatedly_call).and_yield allow(DeploymentUpdater::Scheduler).to receive(:sleep) allow(DeploymentUpdater::Dispatcher).to receive(:dispatch) - allow(CloudController::DependencyLocator.instance).to receive_messages(statsd_client:, prometheus_updater:) + allow(CloudController::DependencyLocator.instance).to receive_messages(statsd_client:) allow(statsd_client).to receive(:timing) - allow(prometheus_updater).to receive(:report_deployment_duration) end it 'correctly configures a LockRunner and uses it to initialize a LockWorker' do @@ -132,7 +130,6 @@ module VCAP::CloudController it 'records the deployment update duration' do expect(DeploymentUpdater::Dispatcher).to receive(:dispatch) expect(statsd_client).to receive(:timing).with('cc.deployments.update.duration', kind_of(Numeric)) - expect(prometheus_updater).to receive(:report_deployment_duration).with(kind_of(Numeric)) DeploymentUpdater::Scheduler.start end diff --git a/spec/unit/lib/cloud_controller/diego/messenger_spec.rb b/spec/unit/lib/cloud_controller/diego/messenger_spec.rb index a1e107c99dc..fa0ad30fd4d 100644 --- a/spec/unit/lib/cloud_controller/diego/messenger_spec.rb +++ b/spec/unit/lib/cloud_controller/diego/messenger_spec.rb @@ -4,8 +4,9 @@ module VCAP::CloudController module Diego RSpec.describe Messenger do - subject(:messenger) { Messenger.new(statsd_updater) } + subject(:messenger) { Messenger.new(statsd_updater, prometheus_updater) } let(:statsd_updater) { instance_double(VCAP::CloudController::Metrics::StatsdUpdater) } + let(:prometheus_updater) { instance_double(VCAP::CloudController::Metrics::PrometheusUpdater) } let(:bbs_stager_client) { instance_double(BbsStagerClient) } let(:config) { TestConfig.config_instance } @@ -33,13 +34,19 @@ module Diego staging_details.lifecycle = instance_double(BuildpackLifecycle, type: Lifecycles::BUILDPACK) allow(bbs_stager_client).to receive(:stage) allow(statsd_updater).to receive(:start_staging_request_received) + allow(prometheus_updater).to receive(:start_staging_request_received) end - it 'emits the `cc.staging.requested` metric' do + it 'emits the `cc.staging.requested` metric via statsd' do expect(statsd_updater).to receive(:start_staging_request_received) messenger.send_stage_request(config, staging_details) end + it 'emits the `cc.staging.requested` metric via prometheus' do + expect(prometheus_updater).to receive(:start_staging_request_received) + messenger.send_stage_request(config, staging_details) + end + it 'sends the staging message to the bbs' do messenger.send_stage_request(config, staging_details) diff --git a/spec/unit/lib/cloud_controller/metrics/periodic_updater_spec.rb b/spec/unit/lib/cloud_controller/metrics/periodic_updater_spec.rb index 6ff7a49fede..6482e27c3df 100644 --- a/spec/unit/lib/cloud_controller/metrics/periodic_updater_spec.rb +++ b/spec/unit/lib/cloud_controller/metrics/periodic_updater_spec.rb @@ -3,9 +3,9 @@ module VCAP::CloudController::Metrics RSpec.describe PeriodicUpdater do - let(:periodic_updater) { PeriodicUpdater.new(start_time, log_counter, logger, [updater1, updater2]) } - let(:updater1) { double(:updater1) } - let(:updater2) { double(:updater2) } + let(:periodic_updater) { PeriodicUpdater.new(start_time, log_counter, logger, statsd_updater, prometheus_updater) } + let(:statsd_updater) { double(:statsd_updater) } + let(:prometheus_updater) { double(:prometheus_updater) } let(:threadqueue) { double(EventMachine::Queue, size: 20, num_waiting: 0) } let(:resultqueue) { double(EventMachine::Queue, size: 0, num_waiting: 1) } let(:start_time) { Time.now.utc - 90 } @@ -29,8 +29,8 @@ module VCAP::CloudController::Metrics describe 'task stats' do before do - allow(updater1).to receive(:update_task_stats) - allow(updater2).to receive(:update_task_stats) + allow(statsd_updater).to receive(:update_task_stats) + allow(prometheus_updater).to receive(:update_task_stats) end describe 'number of tasks' do @@ -42,8 +42,8 @@ module VCAP::CloudController::Metrics periodic_updater.update_task_stats - expect(updater1).to have_received(:update_task_stats).with(2, anything) - expect(updater2).to have_received(:update_task_stats).with(2, anything) + expect(statsd_updater).to have_received(:update_task_stats).with(2, anything) + expect(prometheus_updater).to have_received(:update_task_stats).with(2, anything) end end @@ -55,38 +55,38 @@ module VCAP::CloudController::Metrics periodic_updater.update_task_stats - expect(updater1).to have_received(:update_task_stats).with(anything, 513) - expect(updater2).to have_received(:update_task_stats).with(anything, 513) + expect(statsd_updater).to have_received(:update_task_stats).with(anything, 513) + expect(prometheus_updater).to have_received(:update_task_stats).with(anything, 537_919_488) end context 'when there are no running tasks' do it 'properly reports 0' do periodic_updater.update_task_stats - expect(updater1).to have_received(:update_task_stats).with(0, 0) - expect(updater2).to have_received(:update_task_stats).with(0, 0) + expect(statsd_updater).to have_received(:update_task_stats).with(0, 0) + expect(prometheus_updater).to have_received(:update_task_stats).with(0, 0) end end end describe '#setup_updates' do before do - allow(updater1).to receive(:update_user_count) - allow(updater1).to receive(:update_job_queue_length) - allow(updater1).to receive(:update_thread_info) - allow(updater1).to receive(:update_failed_job_count) - allow(updater1).to receive(:update_vitals) - allow(updater1).to receive(:update_log_counts) - allow(updater1).to receive(:update_task_stats) - allow(updater1).to receive(:update_deploying_count) - - allow(updater2).to receive(:update_user_count) - allow(updater2).to receive(:update_job_queue_length) - allow(updater2).to receive(:update_thread_info) - allow(updater2).to receive(:update_failed_job_count) - allow(updater2).to receive(:update_vitals) - allow(updater2).to receive(:update_log_counts) - allow(updater2).to receive(:update_task_stats) - allow(updater2).to receive(:update_deploying_count) + allow(statsd_updater).to receive(:update_user_count) + allow(statsd_updater).to receive(:update_job_queue_length) + allow(statsd_updater).to receive(:update_thread_info) + allow(statsd_updater).to receive(:update_failed_job_count) + allow(statsd_updater).to receive(:update_vitals) + allow(statsd_updater).to receive(:update_log_counts) + allow(statsd_updater).to receive(:update_task_stats) + allow(statsd_updater).to receive(:update_deploying_count) + + allow(prometheus_updater).to receive(:update_user_count) + allow(prometheus_updater).to receive(:update_job_queue_length) + allow(prometheus_updater).to receive(:update_thread_info) + allow(prometheus_updater).to receive(:update_failed_job_count) + allow(prometheus_updater).to receive(:update_vitals) + allow(prometheus_updater).to receive(:update_log_counts) + allow(prometheus_updater).to receive(:update_task_stats) + allow(prometheus_updater).to receive(:update_deploying_count) allow(EventMachine).to receive(:add_periodic_timer) end @@ -208,22 +208,22 @@ module VCAP::CloudController::Metrics before do allow(VCAP::CloudController::DeploymentModel).to receive(:deploying_count).and_return(deploying_count) - allow(updater1).to receive(:update_deploying_count) - allow(updater2).to receive(:update_deploying_count) + allow(statsd_updater).to receive(:update_deploying_count) + allow(prometheus_updater).to receive(:update_deploying_count) end it 'sends the number of deploying deployments' do periodic_updater.update_deploying_count - expect(updater1).to have_received(:update_deploying_count).with(deploying_count) - expect(updater2).to have_received(:update_deploying_count).with(deploying_count) + expect(statsd_updater).to have_received(:update_deploying_count).with(deploying_count) + expect(prometheus_updater).to have_received(:update_deploying_count).with(deploying_count) end end describe '#update_user_count' do before do - allow(updater1).to receive(:update_user_count) - allow(updater2).to receive(:update_user_count) + allow(statsd_updater).to receive(:update_user_count) + allow(prometheus_updater).to receive(:update_user_count) end it 'includes the number of users' do @@ -231,15 +231,15 @@ module VCAP::CloudController::Metrics periodic_updater.update_user_count - expect(updater1).to have_received(:update_user_count).with(VCAP::CloudController::User.count) - expect(updater2).to have_received(:update_user_count).with(VCAP::CloudController::User.count) + expect(statsd_updater).to have_received(:update_user_count).with(VCAP::CloudController::User.count) + expect(prometheus_updater).to have_received(:update_user_count).with(VCAP::CloudController::User.count) end end describe '#update_job_queue_length' do before do - allow(updater1).to receive(:update_job_queue_length) - allow(updater2).to receive(:update_job_queue_length) + allow(statsd_updater).to receive(:update_job_queue_length) + allow(prometheus_updater).to receive(:update_job_queue_length) end context 'when local queue has pending jobs' do @@ -255,8 +255,8 @@ module VCAP::CloudController::Metrics } expected_total = 1 - expect(updater1).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) - expect(updater2).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) + expect(statsd_updater).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) + expect(prometheus_updater).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue) end end @@ -269,8 +269,8 @@ module VCAP::CloudController::Metrics } expected_total = 0 - expect(updater1).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) - expect(updater2).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) + expect(statsd_updater).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) + expect(prometheus_updater).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue) end end @@ -288,8 +288,8 @@ module VCAP::CloudController::Metrics } expected_total = 3 - expect(updater1).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) - expect(updater2).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) + expect(statsd_updater).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) + expect(prometheus_updater).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue) end it 'finds jobs which have not been attempted yet' do @@ -305,8 +305,8 @@ module VCAP::CloudController::Metrics } expected_total = 2 - expect(updater1).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) - expect(updater2).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) + expect(statsd_updater).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) + expect(prometheus_updater).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue) end it 'ignores jobs that have already been attempted' do @@ -320,8 +320,8 @@ module VCAP::CloudController::Metrics } expected_total = 0 - expect(updater1).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) - expect(updater2).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) + expect(statsd_updater).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) + expect(prometheus_updater).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue) end it '"resets" pending job count to 0 after they have been emitted' do @@ -336,8 +336,8 @@ module VCAP::CloudController::Metrics } expected_total = 2 - expect(updater1).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) - expect(updater2).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) + expect(statsd_updater).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) + expect(prometheus_updater).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue) Delayed::Job.dataset.delete periodic_updater.update_job_queue_length @@ -348,15 +348,15 @@ module VCAP::CloudController::Metrics } expected_total = 0 - expect(updater1).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) - expect(updater2).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) + expect(statsd_updater).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue, expected_total) + expect(prometheus_updater).to have_received(:update_job_queue_length).with(expected_pending_job_count_by_queue) end end describe '#update_failed_job_count' do before do - allow(updater1).to receive(:update_failed_job_count) - allow(updater2).to receive(:update_failed_job_count) + allow(statsd_updater).to receive(:update_failed_job_count) + allow(prometheus_updater).to receive(:update_failed_job_count) end context 'when local queue has failed jobs' do @@ -373,8 +373,8 @@ module VCAP::CloudController::Metrics } expected_total = 1 - expect(updater1).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue, expected_total) - expect(updater2).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue, expected_total) + expect(statsd_updater).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue, expected_total) + expect(prometheus_updater).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue) end end @@ -391,8 +391,8 @@ module VCAP::CloudController::Metrics } expected_total = 0 - expect(updater1).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue, expected_total) - expect(updater2).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue, expected_total) + expect(statsd_updater).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue, expected_total) + expect(prometheus_updater).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue) end end @@ -413,8 +413,8 @@ module VCAP::CloudController::Metrics } expected_total = 3 - expect(updater1).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue, expected_total) - expect(updater2).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue, expected_total) + expect(statsd_updater).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue, expected_total) + expect(prometheus_updater).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue) end it '"resets" failed job count to 0 after they have been emitted' do @@ -430,8 +430,8 @@ module VCAP::CloudController::Metrics } expected_total = 2 - expect(updater1).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue, expected_total) - expect(updater2).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue, expected_total) + expect(statsd_updater).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue, expected_total) + expect(prometheus_updater).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue) Delayed::Job.dataset.delete periodic_updater.update_failed_job_count @@ -442,15 +442,15 @@ module VCAP::CloudController::Metrics } expected_total = 0 - expect(updater1).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue, expected_total) - expect(updater2).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue, expected_total) + expect(statsd_updater).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue, expected_total) + expect(prometheus_updater).to have_received(:update_failed_job_count).with(expected_failed_jobs_by_queue) end end describe '#update_thread_info' do before do - allow(updater1).to receive(:update_thread_info) - allow(updater2).to receive(:update_thread_info) + allow(statsd_updater).to receive(:update_thread_info) + allow(prometheus_updater).to receive(:update_thread_info) periodic_updater.update_thread_info end @@ -471,8 +471,8 @@ module VCAP::CloudController::Metrics } } - expect(updater1).to have_received(:update_thread_info).with(expected_thread_info) - expect(updater2).to have_received(:update_thread_info).with(expected_thread_info) + expect(statsd_updater).to have_received(:update_thread_info).with(expected_thread_info) + expect(prometheus_updater).to have_received(:update_thread_info).with(expected_thread_info) end context 'when resultqueue and/or threadqueue is not a queue' do @@ -495,16 +495,15 @@ module VCAP::CloudController::Metrics } } - expect(updater1).to have_received(:update_thread_info).with(expected_thread_info) - expect(updater2).to have_received(:update_thread_info).with(expected_thread_info) + expect(statsd_updater).to have_received(:update_thread_info).with(expected_thread_info) end end end describe '#update_vitals' do before do - allow(updater1).to receive(:update_vitals) - allow(updater2).to receive(:update_vitals) + allow(statsd_updater).to receive(:update_vitals) + allow(prometheus_updater).to receive(:update_vitals) allow(VCAP::Stats).to receive_messages(process_memory_bytes_and_cpu: [1.1, 2], cpu_load_average: 0.5, memory_used_bytes: 542, memory_free_bytes: 927) allow_any_instance_of(VCAP::HostSystem).to receive(:num_cores).and_return(4) @@ -513,7 +512,7 @@ module VCAP::CloudController::Metrics it 'update the vitals on all updaters' do periodic_updater.update_vitals - expect(updater1).to have_received(:update_vitals) do |expected_vitals| + expect(statsd_updater).to have_received(:update_vitals) do |expected_vitals| expect(expected_vitals[:uptime]).to be_within(1).of(Time.now.to_i - start_time.to_i) expect(expected_vitals[:cpu_load_avg]).to eq(0.5) expect(expected_vitals[:mem_used_bytes]).to eq(542) @@ -523,13 +522,12 @@ module VCAP::CloudController::Metrics expect(expected_vitals[:num_cores]).to eq(4) end - expect(updater2).to have_received(:update_vitals) do |expected_vitals| - expect(expected_vitals[:uptime]).to be_within(1).of(Time.now.to_i - start_time.to_i) + expect(prometheus_updater).to have_received(:update_vitals) do |expected_vitals| + expect(expected_vitals[:started_at]).to eq(start_time.to_i) expect(expected_vitals[:cpu_load_avg]).to eq(0.5) expect(expected_vitals[:mem_used_bytes]).to eq(542) expect(expected_vitals[:mem_free_bytes]).to eq(927) expect(expected_vitals[:mem_bytes]).to eq(1.1.to_i) - expect(expected_vitals[:cpu]).to eq(2.to_f) expect(expected_vitals[:num_cores]).to eq(4) end end @@ -565,8 +563,7 @@ module VCAP::CloudController::Metrics end before do - allow(updater1).to receive(:update_log_counts) - allow(updater2).to receive(:update_log_counts) + allow(statsd_updater).to receive(:update_log_counts) allow(log_counter).to receive(:counts).and_return(count) end @@ -574,8 +571,7 @@ module VCAP::CloudController::Metrics it 'update the log counts on all updaters' do periodic_updater.update_log_counts - expect(updater1).to have_received(:update_log_counts).with(expected) - expect(updater2).to have_received(:update_log_counts).with(expected) + expect(statsd_updater).to have_received(:update_log_counts).with(expected) end it 'fills in zeros for levels without counts' do @@ -584,32 +580,11 @@ module VCAP::CloudController::Metrics periodic_updater.update_log_counts - expect(updater1).to have_received(:update_log_counts).with(expected) - expect(updater2).to have_received(:update_log_counts).with(expected) + expect(statsd_updater).to have_received(:update_log_counts).with(expected) end end describe '#update!' do - before do - allow(updater1).to receive(:update_user_count) - allow(updater1).to receive(:update_job_queue_length) - allow(updater1).to receive(:update_thread_info) - allow(updater1).to receive(:update_failed_job_count) - allow(updater1).to receive(:update_vitals) - allow(updater1).to receive(:update_log_counts) - allow(updater1).to receive(:update_task_stats) - allow(updater1).to receive(:update_deploying_count) - - allow(updater2).to receive(:update_user_count) - allow(updater2).to receive(:update_job_queue_length) - allow(updater2).to receive(:update_thread_info) - allow(updater2).to receive(:update_failed_job_count) - allow(updater2).to receive(:update_vitals) - allow(updater2).to receive(:update_log_counts) - allow(updater2).to receive(:update_task_stats) - allow(updater2).to receive(:update_deploying_count) - end - it 'calls all update methods' do expect(periodic_updater).to receive(:update_user_count).once expect(periodic_updater).to receive(:update_job_queue_length).once @@ -619,6 +594,7 @@ module VCAP::CloudController::Metrics expect(periodic_updater).to receive(:update_log_counts).once expect(periodic_updater).to receive(:update_task_stats).once expect(periodic_updater).to receive(:update_deploying_count).once + periodic_updater.update! end end diff --git a/spec/unit/lib/cloud_controller/metrics/prometheus_updater_spec.rb b/spec/unit/lib/cloud_controller/metrics/prometheus_updater_spec.rb index 1bf38d9865c..c2bac1460c9 100644 --- a/spec/unit/lib/cloud_controller/metrics/prometheus_updater_spec.rb +++ b/spec/unit/lib/cloud_controller/metrics/prometheus_updater_spec.rb @@ -1,46 +1,12 @@ require 'spec_helper' require 'cloud_controller/metrics/prometheus_updater' -# There are a few things to NOTE here: -# 1) We're adding this function because the Prometheus Client Registry -# is interpreter-global state, so changes that happen in one test will -# absolutely affect other tests, unless the test author has taken pains -# to carefully clean up their changes. -# 2) In our CI, tests are run in parallel to speed up execution. At the time -# of this writing (2022-05-03), we use the `grosser/parallel_tests` gem to -# run tests in parallel. This gem uses the `grosser/parallel` gem to distribute -# work which runs workloads in separate subprocesses, rather than threads -# in the same interpreter process. -# You will notice that we call this function that we created in a top-level -# `before` block a little bit later in this file, and _also_ that we mention -# above that the Registry is interpeter-global state. Because our -# test-parallelizer uses subprocesses to distribute its work, and also does -# not interleave tests from one file with tests from another, the registry -# recreation that we're doing here will be restricted to the tests in _this_ -# file and will not leak out into other files. -# 3) So, if you see weird or unreliable behavior out of the Registry when running -# specs (this would probably be stuff like "metric values are sometimes not what -# they should be"), check to see if our test parallelizer is still using -# subprocesses, or if it has switched to threads and runs everything in one -# interpreter. -module Prometheus - module Client - def self.recreate_registry - @registry = Prometheus::Client::Registry.new - end - end -end - module VCAP::CloudController::Metrics RSpec.describe PrometheusUpdater do let(:updater) { PrometheusUpdater.new(prom_client) } - let(:prom_client) { Prometheus::Client.registry } + let(:prom_client) { Prometheus::Client::Registry.new } - before do - Prometheus::Client.recreate_registry - end - - describe 'Promethus creation guards work correctly' do + describe 'Prometheus creation guards work correctly' do # This might look to be a duplicate of 'records the current number of deployments that are DEPLOYING' # below, but it tests that at least one of the metric updating functions can be called multiple times # without failures. Because we are re-creating the Prometheus Client Registry before every test, we @@ -56,12 +22,12 @@ module VCAP::CloudController::Metrics expected_deploying_count = 7 updater.update_deploying_count(expected_deploying_count) - metric = prom_client.metrics.find { |m| m.name == :cc_deployments_deploying } + metric = prom_client.metrics.find { |m| m.name == :cc_deployments_in_progress_total } expect(metric).to be_present expect(metric.get).to eq 7 updater.update_deploying_count(expected_deploying_count) - metric = prom_client.metrics.find { |m| m.name == :cc_deployments_deploying } + metric = prom_client.metrics.find { |m| m.name == :cc_deployments_in_progress_total } expect(metric).to be_present expect(metric.get).to eq 7 end @@ -72,7 +38,7 @@ module VCAP::CloudController::Metrics expected_deploying_count = 7 updater.update_deploying_count(expected_deploying_count) - metric = prom_client.metrics.find { |m| m.name == :cc_deployments_deploying } + metric = prom_client.metrics.find { |m| m.name == :cc_deployments_in_progress_total } expect(metric).to be_present expect(metric.get).to eq 7 end @@ -84,7 +50,7 @@ module VCAP::CloudController::Metrics updater.update_user_count(expected_user_count) - metric = prom_client.metrics.find { |m| m.name == :cc_total_users } + metric = prom_client.metrics.find { |m| m.name == :cc_users_total } expect(metric).to be_present expect(metric.get).to eq 5 end @@ -94,23 +60,17 @@ module VCAP::CloudController::Metrics it 'records the length of the delayed job queues and total' do expected_local_length = 5 expected_generic_length = 6 - total = expected_local_length + expected_generic_length pending_job_count_by_queue = { cc_local: expected_local_length, cc_generic: expected_generic_length } - updater.update_job_queue_length(pending_job_count_by_queue, total) + updater.update_job_queue_length(pending_job_count_by_queue) - metric = prom_client.metrics.find { |m| m.name == :cc_job_queue_length_cc_local } - expect(metric.get).to eq 5 - - metric = prom_client.metrics.find { |m| m.name == :cc_job_queue_length_cc_generic } - expect(metric.get).to eq 6 - - metric = prom_client.metrics.find { |m| m.name == :cc_job_queue_length_total } - expect(metric.get).to eq 11 + metric = prom_client.get :cc_job_queues_length_total + expect(metric.get(labels: { queue: 'cc_local' })).to eq 5 + expect(metric.get(labels: { queue: 'cc_generic' })).to eq 6 end end @@ -118,23 +78,17 @@ module VCAP::CloudController::Metrics it 'records the number of failed jobs in the delayed job queue and the total to statsd' do expected_local_length = 5 expected_generic_length = 6 - total = expected_local_length + expected_generic_length failed_jobs_by_queue = { cc_local: expected_local_length, cc_generic: expected_generic_length } - updater.update_failed_job_count(failed_jobs_by_queue, total) - - metric = prom_client.metrics.find { |m| m.name == :cc_failed_job_count_cc_local } - expect(metric.get).to eq 5 - - metric = prom_client.metrics.find { |m| m.name == :cc_failed_job_count_cc_generic } - expect(metric.get).to eq 6 + updater.update_failed_job_count(failed_jobs_by_queue) - metric = prom_client.metrics.find { |m| m.name == :cc_failed_job_count_total } - expect(metric.get).to eq 11 + metric = prom_client.get :cc_failed_jobs_total + expect(metric.get(labels: { queue: 'cc_local' })).to eq 5 + expect(metric.get(labels: { queue: 'cc_generic' })).to eq 6 end end @@ -180,19 +134,18 @@ module VCAP::CloudController::Metrics describe '#update_vitals' do it 'updates vitals' do vitals = { - uptime: 33, + started_at: 1_699_522_477.0, cpu_load_avg: 0.5, mem_used_bytes: 542, mem_free_bytes: 927, mem_bytes: 1, - cpu: 2.0, num_cores: 4 } updater.update_vitals(vitals) - metric = prom_client.metrics.find { |m| m.name == :cc_vitals_uptime } - expect(metric.get).to eq 33 + metric = prom_client.metrics.find { |m| m.name == :cc_vitals_started_at } + expect(metric.get).to eq 1_699_522_477.0 metric = prom_client.metrics.find { |m| m.name == :cc_vitals_cpu_load_avg } expect(metric.get).to eq 0.5 @@ -206,144 +159,58 @@ module VCAP::CloudController::Metrics metric = prom_client.metrics.find { |m| m.name == :cc_vitals_mem_bytes } expect(metric.get).to eq 1 - metric = prom_client.metrics.find { |m| m.name == :cc_vitals_cpu } - expect(metric.get).to eq 2.0 - metric = prom_client.metrics.find { |m| m.name == :cc_vitals_num_cores } expect(metric.get).to eq 4 end end - describe '#update_log_counts' do - it 'updates log counts' do - counts = { - off: 1, - fatal: 2, - error: 3, - warn: 4, - info: 5, - debug: 6, - debug1: 7, - debug2: 8, - all: 9 - } - - updater.update_log_counts(counts) - - metric = prom_client.metrics.find { |m| m.name == :cc_log_count_off } - expect(metric.get).to eq 1 - - metric = prom_client.metrics.find { |m| m.name == :cc_log_count_fatal } - expect(metric.get).to eq 2 - - metric = prom_client.metrics.find { |m| m.name == :cc_log_count_error } - expect(metric.get).to eq 3 - - metric = prom_client.metrics.find { |m| m.name == :cc_log_count_warn } - expect(metric.get).to eq 4 - - metric = prom_client.metrics.find { |m| m.name == :cc_log_count_info } - expect(metric.get).to eq 5 - - metric = prom_client.metrics.find { |m| m.name == :cc_log_count_debug } - expect(metric.get).to eq 6 - - metric = prom_client.metrics.find { |m| m.name == :cc_log_count_debug1 } - expect(metric.get).to eq 7 - - metric = prom_client.metrics.find { |m| m.name == :cc_log_count_debug2 } - expect(metric.get).to eq 8 - - metric = prom_client.metrics.find { |m| m.name == :cc_log_count_all } - expect(metric.get).to eq 9 - end - end - describe '#update_task_stats' do it 'records the number of running tasks and task memory' do updater.update_task_stats(5, 512) - metric = prom_client.metrics.find { |m| m.name == :cc_tasks_running_count } + metric = prom_client.metrics.find { |m| m.name == :cc_running_tasks_total } expect(metric.get).to eq 5 - metric = prom_client.metrics.find { |m| m.name == :cc_tasks_running_memory_in_mb } + metric = prom_client.metrics.find { |m| m.name == :cc_running_tasks_memory_bytes } expect(metric.get).to eq 512 end end - describe '#update_synced_invalid_lrps' do - it 'records number of running tasks and task memory to statsd' do - updater.update_synced_invalid_lrps(5) - metric = prom_client.metrics.find { |m| m.name == :cc_diego_sync_invalid_desired_lrps } - expect(metric.get).to eq 5 - end - end - describe '#start_staging_request_received' do - it 'increments "cc_staging_requested"' do + it 'increments "cc_staging_requests_total"' do updater.start_staging_request_received - metric = prom_client.metrics.find { |m| m.name == :cc_staging_requested } + metric = prom_client.metrics.find { |m| m.name == :cc_staging_requests_total } expect(metric.get).to eq 1 updater.start_staging_request_received - metric = prom_client.metrics.find { |m| m.name == :cc_staging_requested } + metric = prom_client.metrics.find { |m| m.name == :cc_staging_requests_total } expect(metric.get).to eq 2 end end describe '#report_staging_success_metrics' do it 'records staging success metrics' do + # 20 seconds duration_ns = 20 * 1e9 updater.report_staging_success_metrics(duration_ns) - metric = prom_client.metrics.find { |m| m.name == :cc_staging_succeeded } - expect(metric.get).to eq 1 - metric = prom_client.metrics.find { |m| m.name == :cc_staging_succeeded_duration } - # expected buckets for duration, in millis : 10000, 15000, 20000, 25000, 30000 - expect(metric.get).to eq({ '10000.0' => 0, '15000.0' => 0, '20000.0' => 1, '25000.0' => 1, '30000.0' => 1, 'sum' => 20_000, '+Inf' => 1 }) + metric = prom_client.get :cc_staging_succeeded_duration_seconds + expect(metric.get).to eq({ '5' => 0.0, '10' => 0.0, '30' => 1.0, '60' => 1.0, '300' => 1.0, '600' => 1.0, '890' => 1.0, 'sum' => 20.0, '+Inf' => 1.0 }) end end describe '#report_staging_failure_metrics' do it 'emits staging failure metrics' do - duration_ns = 20 * 1e9 + # 900 seconds + duration_ns = 900 * 1e9 updater.report_staging_failure_metrics(duration_ns) - metric = prom_client.metrics.find { |m| m.name == :cc_staging_failed } - expect(metric.get).to eq 1 - - metric = prom_client.metrics.find { |m| m.name == :cc_staging_failed_duration } - # expected buckets for duration, in millis : 10000, 15000, 20000, 25000, 30000 - expect(metric.get).to eq({ '10000.0' => 0, '15000.0' => 0, '20000.0' => 1, '25000.0' => 1, '30000.0' => 1, 'sum' => 20_000, '+Inf' => 1 }) - end - end - - describe '#report_diego_cell_sync_duration' do - it 'reports diego cell sync duration' do - duration_ns = 20 * 1e9 - - updater.report_diego_cell_sync_duration(duration_ns) - metric = prom_client.metrics.find { |m| m.name == :cc_diego_sync_duration } - expect(metric.get).to eq({ 'count' => 1.0, 'sum' => 20_000_000_000.0 }) - - metric = prom_client.metrics.find { |m| m.name == :cc_diego_sync_duration_gauge } - expect(metric.get).to eq duration_ns - end - end - - describe '#report_deployment_duration' do - it 'reports deployments update duration' do - duration_ns = 20 * 1e9 - - updater.report_deployment_duration(duration_ns) - metric = prom_client.metrics.find { |m| m.name == :cc_deployments_update_duration } - expect(metric.get).to eq({ 'count' => 1.0, 'sum' => 20_000_000_000.0 }) - metric = prom_client.metrics.find { |m| m.name == :cc_deployments_update_duration_gauge } - expect(metric.get).to eq duration_ns + metric = prom_client.get :cc_staging_failed_duration_seconds + expect(metric.get).to eq({ '5' => 0.0, '10' => 0.0, '30' => 0.0, '60' => 0.0, '300' => 0.0, '600' => 0.0, '890' => 0.0, 'sum' => 900.0, '+Inf' => 1.0 }) end end end diff --git a/spec/unit/lib/cloud_controller/metrics/request_metrics_spec.rb b/spec/unit/lib/cloud_controller/metrics/request_metrics_spec.rb index 7b95241b9c5..8d5bf7dd220 100644 --- a/spec/unit/lib/cloud_controller/metrics/request_metrics_spec.rb +++ b/spec/unit/lib/cloud_controller/metrics/request_metrics_spec.rb @@ -11,6 +11,7 @@ module VCAP::CloudController::Metrics allow(prometheus_client).to receive(:update_gauge_metric) allow(prometheus_client).to receive(:decrement_gauge_metric) allow(prometheus_client).to receive(:increment_gauge_metric) + allow(prometheus_client).to receive(:increment_counter_metric) end describe '#start_request' do @@ -24,8 +25,12 @@ module VCAP::CloudController::Metrics expect(statsd_client).to have_received(:gauge).with('cc.requests.outstanding.gauge', 1) expect(statsd_client).to have_received(:increment).with('cc.requests.outstanding') - expect(prometheus_client).to have_received(:update_gauge_metric).with(:cc_requests_outstanding_gauge, 1, kind_of(String)) - expect(prometheus_client).to have_received(:increment_gauge_metric).with(:cc_requests_outstanding, kind_of(String)) + end + + it 'increments outstanding requests for prometheus' do + request_metrics.start_request + + expect(prometheus_client).to have_received(:increment_gauge_metric).with(:cc_requests_outstanding_total) end end @@ -47,25 +52,24 @@ module VCAP::CloudController::Metrics expect(batch).to have_received(:decrement).with('cc.requests.outstanding') expect(batch).to have_received(:increment).with('cc.requests.completed') expect(batch).to have_received(:increment).with('cc.http_status.2XX') + end + + it 'increments completed and decrements outstanding for prometheus' do + request_metrics.complete_request(status) - expect(prometheus_client).to have_received(:update_gauge_metric).with(:cc_requests_outstanding_gauge, -1, kind_of(String)) - expect(prometheus_client).to have_received(:decrement_gauge_metric).with(:cc_requests_outstanding, kind_of(String)) - expect(prometheus_client).to have_received(:increment_gauge_metric).with(:cc_requests_completed, kind_of(String)) - expect(prometheus_client).to have_received(:increment_gauge_metric).with(:cc_http_status_2XX, kind_of(String)) + expect(prometheus_client).to have_received(:decrement_gauge_metric).with(:cc_requests_outstanding_total) + expect(prometheus_client).to have_received(:increment_counter_metric).with(:cc_requests_completed_total) end it 'normalizes http status codes in statsd' do request_metrics.complete_request(200) expect(batch).to have_received(:increment).with('cc.http_status.2XX') - expect(prometheus_client).to have_received(:increment_gauge_metric).with(:cc_http_status_2XX, kind_of(String)) request_metrics.complete_request(300) expect(batch).to have_received(:increment).with('cc.http_status.3XX') - expect(prometheus_client).to have_received(:increment_gauge_metric).with(:cc_http_status_3XX, kind_of(String)) request_metrics.complete_request(400) expect(batch).to have_received(:increment).with('cc.http_status.4XX') - expect(prometheus_client).to have_received(:increment_gauge_metric).with(:cc_http_status_4XX, kind_of(String)) end end end