From d17ee31679ab333fa98247a1561e5c1c3ba2c7da Mon Sep 17 00:00:00 2001 From: IDX GitLab Automation Date: Tue, 4 Feb 2025 19:10:47 +0000 Subject: [PATCH] chore: add metrics --- Cargo.lock | 2 + rs/boundary_node/salt_sharing/Cargo.toml | 2 + rs/boundary_node/salt_sharing/api/src/lib.rs | 11 +- .../salt_sharing/canister/canister.rs | 150 +++--------------- .../salt_sharing/canister/helpers.rs | 132 +++++++++++++++ rs/boundary_node/salt_sharing/canister/lib.rs | 5 + .../salt_sharing/canister/logs.rs | 51 +++++- .../salt_sharing/canister/metrics.rs | 115 ++++++++++++++ .../salt_sharing/canister/storage.rs | 6 +- 9 files changed, 341 insertions(+), 133 deletions(-) create mode 100644 rs/boundary_node/salt_sharing/canister/helpers.rs create mode 100644 rs/boundary_node/salt_sharing/canister/metrics.rs diff --git a/Cargo.lock b/Cargo.lock index 0b61cc4752dc..c7037ab91e04 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19853,7 +19853,9 @@ dependencies = [ "ic-cdk 0.16.0", "ic-cdk-macros 0.9.0", "ic-cdk-timers", + "ic-nns-constants", "ic-stable-structures", + "prometheus", "salt-api", "serde", "serde_cbor", diff --git a/rs/boundary_node/salt_sharing/Cargo.toml b/rs/boundary_node/salt_sharing/Cargo.toml index ba9f5b17d5da..97c683b34005 100644 --- a/rs/boundary_node/salt_sharing/Cargo.toml +++ b/rs/boundary_node/salt_sharing/Cargo.toml @@ -13,7 +13,9 @@ ic-canister-log = { path = "../../rust_canisters/canister_log" } ic-cdk = { workspace = true } ic-cdk-macros = { workspace = true } ic-cdk-timers = { workspace = true } +ic-nns-constants = { path = "../../nns/constants" } ic-stable-structures = { workspace = true } +prometheus = {workspace = true } salt-api = { path = "./api" } serde = { workspace = true } serde_cbor = { workspace = true } diff --git a/rs/boundary_node/salt_sharing/api/src/lib.rs b/rs/boundary_node/salt_sharing/api/src/lib.rs index 1dfd01f4ff4f..7b20483ff23d 100644 --- a/rs/boundary_node/salt_sharing/api/src/lib.rs +++ b/rs/boundary_node/salt_sharing/api/src/lib.rs @@ -1,4 +1,5 @@ -use candid::{CandidType, Deserialize}; +use candid::{CandidType, Principal}; +use serde::{Deserialize, Serialize}; pub type GetSaltResponse = Result; @@ -26,3 +27,11 @@ pub enum GetSaltError { Unauthorized, Internal(String), } + +#[derive(CandidType, Serialize, Deserialize, Clone, PartialEq, Debug, Eq)] +pub struct ApiBoundaryNodeIdRecord { + pub id: Option, +} + +#[derive(CandidType, Deserialize, Clone, Copy, PartialEq, Eq)] +pub struct GetApiBoundaryNodeIdsRequest {} diff --git a/rs/boundary_node/salt_sharing/canister/canister.rs b/rs/boundary_node/salt_sharing/canister/canister.rs index d7ab9de74390..6c13775cfb3d 100644 --- a/rs/boundary_node/salt_sharing/canister/canister.rs +++ b/rs/boundary_node/salt_sharing/canister/canister.rs @@ -1,16 +1,13 @@ -use std::time::Duration; - -use crate::logs::{self, Log, LogEntry, Priority, P0}; -use crate::storage::{StorableSalt, SALT, SALT_SIZE}; -use crate::time::delay_till_next_month; -use candid::Principal; -use ic_canister_log::{export as export_logs, log}; +use crate::helpers::init_async; +use crate::logs::export_logs_as_http_response; +use crate::metrics::{export_metrics_as_http_response, METRICS}; +use crate::storage::SALT; use ic_canisters_http_types::{HttpRequest, HttpResponse, HttpResponseBuilder}; use ic_cdk::{api::time, spawn}; use ic_cdk_macros::{init, post_upgrade, query}; use ic_cdk_timers::set_timer; -use salt_api::{GetSaltError, GetSaltResponse, InitArg, SaltGenerationStrategy, SaltResponse}; -use std::str::FromStr; +use salt_api::{GetSaltError, GetSaltResponse, InitArg, SaltResponse}; +use std::time::Duration; // Runs when canister is first installed #[init] @@ -18,6 +15,13 @@ fn init(init_arg: InitArg) { set_timer(Duration::ZERO, || { spawn(async { init_async(init_arg).await }); }); + // Update metric. + let current_time = time() as i64; + METRICS.with(|cell| { + cell.borrow_mut() + .last_canister_change_time + .set(current_time); + }); } // Runs on every canister upgrade @@ -29,95 +33,6 @@ fn post_upgrade(init_arg: InitArg) { #[query] fn get_salt() -> GetSaltResponse { - get_salt_response() -} - -#[query(decoding_quota = 10000)] -fn http_request(request: HttpRequest) -> HttpResponse { - match request.path() { - "/logs" => { - use serde_json; - - let max_skip_timestamp = match request.raw_query_param("time") { - Some(arg) => match u64::from_str(arg) { - Ok(value) => value, - Err(_) => { - return HttpResponseBuilder::bad_request() - .with_body_and_content_length("failed to parse the 'time' parameter") - .build() - } - }, - None => 0, - }; - - let mut entries: Log = Default::default(); - - for entry in export_logs(&logs::P0) { - entries.entries.push(LogEntry { - timestamp: entry.timestamp, - counter: entry.counter, - priority: Priority::P0, - file: entry.file.to_string(), - line: entry.line, - message: entry.message, - }); - } - - for entry in export_logs(&logs::P1) { - entries.entries.push(LogEntry { - timestamp: entry.timestamp, - counter: entry.counter, - priority: Priority::P1, - file: entry.file.to_string(), - line: entry.line, - message: entry.message, - }); - } - - entries - .entries - .retain(|entry| entry.timestamp >= max_skip_timestamp); - - HttpResponseBuilder::ok() - .header("Content-Type", "application/json; charset=utf-8") - .with_body_and_content_length(serde_json::to_string(&entries).unwrap_or_default()) - .build() - } - _ => HttpResponseBuilder::not_found().build(), - } -} - -async fn init_async(init_arg: InitArg) { - if !is_salt_init() || init_arg.regenerate_now { - if let Err(err) = try_regenerate_salt().await { - log!(P0, "[init_regenerate_salt_failed]: {err}"); - } - } - // Start salt generation schedule based on the argument. - match init_arg.salt_generation_strategy { - SaltGenerationStrategy::StartOfMonth => schedule_monthly_salt_generation(), - } -} - -// Sets an execution timer (delayed future task) and returns immediately. -fn schedule_monthly_salt_generation() { - let delay = delay_till_next_month(time()); - set_timer(delay, || { - spawn(async { - if let Err(err) = try_regenerate_salt().await { - log!(P0, "[scheduled_regenerate_salt_failed]: {err}"); - } - // Function is called recursively to schedule next execution - schedule_monthly_salt_generation(); - }); - }); -} - -fn is_salt_init() -> bool { - SALT.with(|cell| cell.borrow().get(&())).is_some() -} - -fn get_salt_response() -> Result { let stored_salt = SALT .with(|cell| cell.borrow().get(&())) .ok_or(GetSaltError::SaltNotInitialized)?; @@ -128,36 +43,11 @@ fn get_salt_response() -> Result { }) } -// Regenerate salt and store it in the stable memory -// Can only fail, if the calls to management canister fail. -async fn try_regenerate_salt() -> Result<(), String> { - // Closure for getting random bytes from the IC. - let rnd_call = |attempt: u32| async move { - ic_cdk::call(Principal::management_canister(), "raw_rand", ()) - .await - .map_err(|err| { - format!( - "Call {attempt} to raw_rand failed: code={:?}, err={}", - err.0, err.1 - ) - }) - }; - - let (rnd_bytes_1,): ([u8; 32],) = rnd_call(1).await?; - let (rnd_bytes_2,): ([u8; 32],) = rnd_call(2).await?; - - // Concatenate arrays to form an array of 64 random bytes. - let mut salt = [rnd_bytes_1, rnd_bytes_2].concat(); - salt.truncate(SALT_SIZE); - - let stored_salt = StorableSalt { - salt, - salt_id: time(), - }; - - SALT.with(|cell| { - cell.borrow_mut().insert((), stored_salt); - }); - - Ok(()) +#[query(decoding_quota = 10000)] +fn http_request(request: HttpRequest) -> HttpResponse { + match request.path() { + "/metrics" => export_metrics_as_http_response(), + "/logs" => export_logs_as_http_response(request), + _ => HttpResponseBuilder::not_found().build(), + } } diff --git a/rs/boundary_node/salt_sharing/canister/helpers.rs b/rs/boundary_node/salt_sharing/canister/helpers.rs new file mode 100644 index 000000000000..eead27d1c4d0 --- /dev/null +++ b/rs/boundary_node/salt_sharing/canister/helpers.rs @@ -0,0 +1,132 @@ +use std::{collections::HashSet, time::Duration}; + +use crate::{ + logs::P0, + metrics::METRICS, + storage::{StorableSalt, API_BOUNDARY_NODE_PRINCIPALS, SALT, SALT_SIZE}, + time::delay_till_next_month, +}; +use candid::Principal; +use ic_canister_log::log; +use ic_cdk::{api::time, call, spawn}; +use ic_cdk_timers::{set_timer, set_timer_interval}; +use ic_nns_constants::REGISTRY_CANISTER_ID; +use salt_api::{ + ApiBoundaryNodeIdRecord, GetApiBoundaryNodeIdsRequest, InitArg, SaltGenerationStrategy, +}; + +const REGISTRY_CANISTER_METHOD: &str = "get_api_boundary_node_ids"; + +pub async fn init_async(init_arg: InitArg) { + if !is_salt_init() || init_arg.regenerate_now { + if let Err(err) = try_regenerate_salt().await { + log!(P0, "[init_regenerate_salt_failed]: {err}"); + } + } + // Start salt generation schedule based on the argument. + match init_arg.salt_generation_strategy { + SaltGenerationStrategy::StartOfMonth => schedule_monthly_salt_generation(), + } + // Periodically poll API boundary nodes + let period = Duration::from_secs(init_arg.registry_polling_interval_secs); + set_timer_interval(period, || spawn(poll_api_boundary_nodes())); +} + +// Sets an execution timer (delayed future task) and returns immediately. +pub fn schedule_monthly_salt_generation() { + let delay = delay_till_next_month(time()); + set_timer(delay, || { + spawn(async { + if let Err(err) = try_regenerate_salt().await { + log!(P0, "[scheduled_regenerate_salt_failed]: {err}"); + } + // Function is called recursively to schedule next execution + schedule_monthly_salt_generation(); + }); + }); +} + +pub fn is_salt_init() -> bool { + SALT.with(|cell| cell.borrow().get(&())).is_some() +} + +// Regenerate salt and store it in the stable memory +// Can only fail, if the calls to management canister fail. +pub async fn try_regenerate_salt() -> Result<(), String> { + // Closure for getting random bytes from the IC. + let rnd_call = |attempt: u32| async move { + ic_cdk::call(Principal::management_canister(), "raw_rand", ()) + .await + .map_err(|err| { + format!( + "Call {attempt} to raw_rand failed: code={:?}, err={}", + err.0, err.1 + ) + }) + }; + + let (rnd_bytes_1,): ([u8; 32],) = rnd_call(1).await?; + let (rnd_bytes_2,): ([u8; 32],) = rnd_call(2).await?; + + // Concatenate arrays to form an array of 64 random bytes. + let mut salt = [rnd_bytes_1, rnd_bytes_2].concat(); + salt.truncate(SALT_SIZE); + + let stored_salt = StorableSalt { + salt, + salt_id: time(), + }; + + SALT.with(|cell| { + cell.borrow_mut().insert((), stored_salt); + }); + + Ok(()) +} + +pub async fn poll_api_boundary_nodes() { + let canister_id = Principal::from(REGISTRY_CANISTER_ID); + + let (call_status, message) = match call::<_, (Result, String>,)>( + canister_id, + REGISTRY_CANISTER_METHOD, + (&GetApiBoundaryNodeIdsRequest {},), + ) + .await + { + Ok((Ok(api_bn_records),)) => { + // Set authorized readers of salt. + let principals: HashSet<_> = api_bn_records.into_iter().filter_map(|n| n.id).collect(); + API_BOUNDARY_NODE_PRINCIPALS.with(|cell| *cell.borrow_mut() = principals); + // Update metric. + let current_time = time() as i64; + METRICS.with(|cell| { + cell.borrow_mut() + .last_successful_registry_poll_time + .set(current_time); + }); + ("success", "") + } + Ok((Err(err),)) => { + log!( + P0, + "[poll_api_boundary_nodes]: failed to fetch nodes from registry {err:?}", + ); + ("failure", "calling_canister_method_failed") + } + Err(err) => { + log!( + P0, + "[poll_api_boundary_nodes]: failed to fetch nodes from registry {err:?}", + ); + ("failure", "canister_call_rejected") + } + }; + // Update metric. + METRICS.with(|cell| { + cell.borrow_mut() + .registry_poll_calls + .with_label_values(&[call_status, message]) + .inc(); + }); +} diff --git a/rs/boundary_node/salt_sharing/canister/lib.rs b/rs/boundary_node/salt_sharing/canister/lib.rs index 7e3936e1e105..93a84ba7d891 100644 --- a/rs/boundary_node/salt_sharing/canister/lib.rs +++ b/rs/boundary_node/salt_sharing/canister/lib.rs @@ -1,7 +1,12 @@ #[cfg(any(target_family = "wasm", test))] mod canister; +#[allow(dead_code)] +mod helpers; +#[allow(dead_code)] mod logs; #[allow(dead_code)] +mod metrics; +#[allow(dead_code)] mod storage; #[allow(dead_code)] mod time; diff --git a/rs/boundary_node/salt_sharing/canister/logs.rs b/rs/boundary_node/salt_sharing/canister/logs.rs index a470a72c57c0..ff375832733c 100644 --- a/rs/boundary_node/salt_sharing/canister/logs.rs +++ b/rs/boundary_node/salt_sharing/canister/logs.rs @@ -1,5 +1,7 @@ use candid::Deserialize; -use ic_canister_log::declare_log_buffer; +use ic_canister_log::{declare_log_buffer, export as export_logs}; +use ic_canisters_http_types::{HttpRequest, HttpResponse, HttpResponseBuilder}; +use std::str::FromStr; // High-priority messages. declare_log_buffer!(name = P0, capacity = 1000); @@ -27,3 +29,50 @@ pub enum Priority { P0, P1, } + +pub fn export_logs_as_http_response(request: HttpRequest) -> HttpResponse { + let max_skip_timestamp = match request.raw_query_param("time") { + Some(arg) => match u64::from_str(arg) { + Ok(value) => value, + Err(_) => { + return HttpResponseBuilder::bad_request() + .with_body_and_content_length("failed to parse the 'time' parameter") + .build() + } + }, + None => 0, + }; + + let mut entries: Log = Default::default(); + + for entry in export_logs(&P0) { + entries.entries.push(LogEntry { + timestamp: entry.timestamp, + counter: entry.counter, + priority: Priority::P0, + file: entry.file.to_string(), + line: entry.line, + message: entry.message, + }); + } + + for entry in export_logs(&P1) { + entries.entries.push(LogEntry { + timestamp: entry.timestamp, + counter: entry.counter, + priority: Priority::P1, + file: entry.file.to_string(), + line: entry.line, + message: entry.message, + }); + } + + entries + .entries + .retain(|entry| entry.timestamp >= max_skip_timestamp); + + HttpResponseBuilder::ok() + .header("Content-Type", "application/json; charset=utf-8") + .with_body_and_content_length(serde_json::to_string(&entries).unwrap_or_default()) + .build() +} diff --git a/rs/boundary_node/salt_sharing/canister/metrics.rs b/rs/boundary_node/salt_sharing/canister/metrics.rs new file mode 100644 index 000000000000..9ee5fd98a2ef --- /dev/null +++ b/rs/boundary_node/salt_sharing/canister/metrics.rs @@ -0,0 +1,115 @@ +use ic_canisters_http_types::{HttpResponse, HttpResponseBuilder}; +use ic_cdk::api::stable::WASM_PAGE_SIZE_IN_BYTES; +use prometheus::{ + CounterVec, Encoder, Gauge, IntGauge, Opts, Registry, Result as PrometheusResult, TextEncoder, +}; +use std::{borrow::BorrowMut, cell::RefCell}; + +use crate::storage::{API_BOUNDARY_NODE_PRINCIPALS, SALT}; + +thread_local! { + pub static METRICS: RefCell = RefCell::new(CanisterMetrics::new().expect("failed to create Prometheus metrics")); +} + +/// Represents all metrics collected in the canister +pub struct CanisterMetrics { + pub registry: Registry, // Prometheus registry + pub latest_salt_id: IntGauge, + pub api_boundary_nodes_count: IntGauge, + pub last_canister_change_time: IntGauge, + pub last_successful_registry_poll_time: IntGauge, + pub registry_poll_calls: CounterVec, + pub stable_memory_size: Gauge, +} + +impl CanisterMetrics { + pub fn new() -> PrometheusResult { + let registry = Registry::new(); + + let latest_salt_id = IntGauge::new("latest_salt_id", "ID of the latest salt")?; + + let api_boundary_nodes_count = IntGauge::new( + "api_boundary_nodes_count", + "Number of API boundary nodes with read access to salt.", + )?; + + let last_canister_change_time = IntGauge::new( + "last_successful_canister_upgrade", + "The Unix timestamp of the last successful canister upgrade", + )?; + + let last_successful_registry_poll_time = IntGauge::new( + "last_successful_registry_poll", + "The Unix timestamp of the last successful poll of the API boundary nodes from registry canister", + )?; + + let registry_poll_calls = CounterVec::new( + Opts::new( + "registry_poll_calls", + "Number of registry polling calls with the status and message (in case of error)", + ), + &["status", "message"], + )?; + + let stable_memory_size = Gauge::new( + "stable_memory_bytes", + "Size of the stable memory allocated by this canister in bytes.", + )?; + + // Register all metrics in the registry + registry.register(Box::new(latest_salt_id.clone()))?; + registry.register(Box::new(api_boundary_nodes_count.clone()))?; + registry.register(Box::new(last_canister_change_time.clone()))?; + registry.register(Box::new(last_successful_registry_poll_time.clone()))?; + registry.register(Box::new(registry_poll_calls.clone()))?; + registry.register(Box::new(stable_memory_size.clone()))?; + + Ok(Self { + registry, + latest_salt_id, + api_boundary_nodes_count, + last_canister_change_time, + last_successful_registry_poll_time, + registry_poll_calls, + stable_memory_size, + }) + } +} + +pub fn export_metrics_as_http_response() -> HttpResponse { + // Certain metrics need to be recomputed + recompute_metrics(); + + let mut buffer = vec![]; + let encoder = TextEncoder::new(); + let registry = METRICS.with(|cell| cell.borrow().registry.clone()); + let metrics_family = registry.gather(); + + match encoder.encode(&metrics_family, &mut buffer) { + Ok(()) => HttpResponseBuilder::ok() + .header("Content-Type", "text/plain") + .with_body_and_content_length(buffer) + .build(), + Err(err) => { + // Return an HTTP 500 error with detailed error information + HttpResponseBuilder::server_error(format!("Failed to encode metrics: {:?}", err)) + .build() + } + } +} + +pub fn recompute_metrics() { + METRICS.with(|cell| { + let mut cell = cell.borrow_mut(); + + let memory = (ic_cdk::api::stable::stable_size() * WASM_PAGE_SIZE_IN_BYTES) as f64; + cell.stable_memory_size.borrow_mut().set(memory); + + let api_bns_count = API_BOUNDARY_NODE_PRINCIPALS.with(|cell| cell.borrow().len()); + cell.api_boundary_nodes_count.set(api_bns_count as i64); + + if let Some(stored_salt) = SALT.with(|cell| cell.borrow().get(&())) { + cell.latest_salt_id.set(stored_salt.salt_id as i64); + } + }); +} diff --git a/rs/boundary_node/salt_sharing/canister/storage.rs b/rs/boundary_node/salt_sharing/canister/storage.rs index 728442dff4c2..10619395b5fb 100644 --- a/rs/boundary_node/salt_sharing/canister/storage.rs +++ b/rs/boundary_node/salt_sharing/canister/storage.rs @@ -1,3 +1,4 @@ +use candid::Principal; use ic_stable_structures::{ memory_manager::{MemoryId, MemoryManager, VirtualMemory}, storable::Bound, @@ -5,7 +6,7 @@ use ic_stable_structures::{ }; use serde::{Deserialize, Serialize}; use serde_cbor::{from_slice, to_vec}; -use std::{borrow::Cow, cell::RefCell}; +use std::{borrow::Cow, cell::RefCell, collections::HashSet}; pub type Timestamp = u64; pub const SALT_SIZE: usize = 64; @@ -46,4 +47,7 @@ thread_local! { pub static SALT: RefCell> = RefCell::new( StableMap::init(MEMORY_MANAGER.with(|m| m.borrow().get(MEMORY_ID_SALT))) ); + + // Authorized principals allowed to retrieve the salt from the canister. + pub static API_BOUNDARY_NODE_PRINCIPALS: RefCell> = RefCell::new(HashSet::new()); }