Skip to content

Commit

Permalink
test(pd): check for emitted metrics
Browse files Browse the repository at this point in the history
Tacks on a new integration test to catch metrics regressions,
rather than waiting until post-deploy. The test is straightforward: GET
the metrics endpoint, confirm a few matches. More subtle is the
reordering of the smoke test suite: the pd tests come last now, since
the metrics will be empty on pd start, if no work has been performed.

Refs #3780, #5004.
  • Loading branch information
conorsch committed Jan 27, 2025
1 parent bcdfc23 commit e3c417c
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 14 deletions.
1 change: 1 addition & 0 deletions crates/bin/pd/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,4 @@ penumbra-sdk-proof-params = { workspace = true, features = [
assert_cmd = { workspace = true }
predicates = "2.1"
prost-reflect = "0.14.3"
regex = { workspace = true }
37 changes: 37 additions & 0 deletions crates/bin/pd/tests/network_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,45 @@ use http::StatusCode;
use penumbra_sdk_proto::FILE_DESCRIPTOR_SET;
use predicates::prelude::*;
use prost_reflect::{DescriptorPool, ServiceDescriptor};
use regex::Regex;
use url::Url;

/// Specific patterns for spot-checking the metrics emitted by pd.
/// It's a smattering of metrics from the various components, including
/// some from outside the workspace, e.g. `cnidarium`.
const PD_METRICS_PATTERNS: &[&str] = &[
r"^cnidarium_get_raw_duration_seconds_count_seconds \d+",
r"^cnidarium_nonverifiable_get_raw_duration_seconds_count_seconds \d+",
r"^pd_async_sleep_drift_microseconds \d+",
r"^penumbra_funding_streams_total_processing_time_milliseconds_count_milliseconds \d+",
r"^penumbra_dex_path_search_duration_seconds_count_seconds \d+",
];

#[ignore]
#[tokio::test]
/// Confirm that prometheus metrics are being exported for scraping.
/// Several times while bumping related crates we've missed a breakage
/// to metrics, and only noticed when we checked the grafana boards
/// for the preview environment post-deploy.
async fn confirm_metrics_emission() -> anyhow::Result<()> {
let client = reqwest::Client::new();
let metrics_url = std::env::var("PENUMBRA_NODE_PD_METRICS_URL")
.unwrap_or("http://localhost:9000/metrics".to_string());
let r = client.get(metrics_url).send().await?;
let status = r.status();
let body = r.text().await?;
// Assert 200
assert_eq!(status, StatusCode::OK);

// Check specific metrics in the combined output
for pattern in PD_METRICS_PATTERNS {
// Enable multi-line support in the regex matching.
let re = Regex::new(&format!(r"(?m){}", pattern))?;
assert!(re.is_match(&body), "pd metric missing: {}", pattern);
}
Ok(())
}

#[ignore]
#[tokio::test]
/// Confirm that permissive CORS headers are returned in HTTP responses
Expand Down
35 changes: 21 additions & 14 deletions deployments/compose/process-compose-smoke-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,18 +65,12 @@ processes:
pd:
condition: process_healthy

# Run `pd` integration tests.
test-pd:
command: >-
cargo test --release --package pd -- --ignored --test-threads 1 --nocapture
depends_on:
pd:
condition: process_healthy
cometbft:
condition: process_started
availability:
restart: exit_on_failure

# The order of the integration tests is (unfortunately) important:
#
# 1. First up are the pclientd tests, because
# 2. The pcli tests assume the pclientd tests have been run first
# 3. Finally, we run the pd tests, which need work to have been performed for metrics to be emitted.
#
# Run `pclientd` integration tests.
test-pclientd:
command: >-
Expand All @@ -88,8 +82,6 @@ processes:
condition: process_healthy
cometbft:
condition: process_started
test-pd:
condition: process_completed
availability:
restart: exit_on_failure

Expand All @@ -109,6 +101,21 @@ processes:
availability:
restart: exit_on_failure

# Run `pd` integration tests. These run last, as the metrics checks
# will be empty unless actual work has been performed.
test-pd:
command: >-
cargo test --release --package pd -- --ignored --test-threads 1 --nocapture
depends_on:
pd:
condition: process_healthy
cometbft:
condition: process_started
test-pcli:
condition: process_completed
availability:
restart: exit_on_failure

# Finalizer task, which will wait until all test suites have finished.
# This allows us to ensure that.
summary:
Expand Down

0 comments on commit e3c417c

Please sign in to comment.