From b5e3e854a8a5536fee919b2d4b698b373a8c2d63 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Tue, 7 Jan 2025 13:19:30 +0530 Subject: [PATCH 01/27] Tested callbacks with monovertex Signed-off-by: Sreekanth --- rust/numaflow-core/src/config/monovertex.rs | 24 ++++ rust/numaflow-core/src/config/pipeline.rs | 34 ++++- rust/numaflow-core/src/mapper/map.rs | 9 ++ .../src/mapper/map/user_defined.rs | 6 + rust/numaflow-core/src/message.rs | 9 ++ rust/numaflow-core/src/monovertex.rs | 23 +++- .../numaflow-core/src/monovertex/forwarder.rs | 16 ++- rust/numaflow-core/src/pipeline.rs | 38 ++++++ .../src/pipeline/forwarder/sink_forwarder.rs | 12 +- .../pipeline/forwarder/source_forwarder.rs | 1 + .../src/pipeline/isb/jetstream/reader.rs | 4 + .../src/pipeline/isb/jetstream/writer.rs | 30 +++++ rust/numaflow-core/src/sink.rs | 23 +++- rust/numaflow-core/src/source/generator.rs | 1 + rust/numaflow-core/src/source/pulsar.rs | 1 + rust/numaflow-core/src/source/serving.rs | 5 +- rust/numaflow-core/src/source/user_defined.rs | 1 + rust/numaflow-core/src/transformer.rs | 3 + .../src/transformer/user_defined.rs | 3 + rust/serving/Cargo.toml | 1 + rust/serving/src/app.rs | 18 ++- rust/serving/src/callback.rs | 126 ++++++++++++++++++ rust/serving/src/config.rs | 1 + rust/serving/src/lib.rs | 2 + rust/serving/src/source.rs | 14 +- 25 files changed, 389 insertions(+), 16 deletions(-) create mode 100644 rust/serving/src/callback.rs diff --git a/rust/numaflow-core/src/config/monovertex.rs b/rust/numaflow-core/src/config/monovertex.rs index c6f18e3c8..a9931c785 100644 --- a/rust/numaflow-core/src/config/monovertex.rs +++ b/rust/numaflow-core/src/config/monovertex.rs @@ -18,10 +18,16 @@ use crate::config::monovertex::sink::SinkType; use crate::error::Error; use crate::Result; +use super::pipeline::ServingCallbackConfig; + const DEFAULT_BATCH_SIZE: u64 = 500; const DEFAULT_TIMEOUT_IN_MS: u32 = 1000; const DEFAULT_LOOKBACK_WINDOW_IN_SECS: u16 = 120; +const ENV_CALLBACK_ENABLED: &str = "NUMAFLOW_CALLBACK_ENABLED"; //FIXME: duplicates +const ENV_CALLBACK_CONCURRENCY: &str = "NUMAFLOW_CALLBACK_CONCURRENCY"; +const DEFAULT_CALLBACK_CONCURRENCY: usize = 100; + #[derive(Debug, Clone, PartialEq)] pub(crate) struct MonovertexConfig { pub(crate) name: String, @@ -33,6 +39,7 @@ pub(crate) struct MonovertexConfig { pub(crate) transformer_config: Option, pub(crate) fb_sink_config: Option, pub(crate) metrics_config: MetricsConfig, + pub(crate) callback_config: Option, } impl Default for MonovertexConfig { @@ -53,6 +60,7 @@ impl Default for MonovertexConfig { transformer_config: None, fb_sink_config: None, metrics_config: MetricsConfig::default(), + callback_config: None, } } } @@ -143,6 +151,21 @@ impl MonovertexConfig { .and_then(|scale| scale.lookback_seconds.map(|x| x as u16)) .unwrap_or(DEFAULT_LOOKBACK_WINDOW_IN_SECS); + let mut callback_config = None; + if let Ok(_) = env::var(ENV_CALLBACK_ENABLED) { + let callback_concurrency: usize = env::var(ENV_CALLBACK_CONCURRENCY) + .unwrap_or_else(|_| format!("{DEFAULT_CALLBACK_CONCURRENCY}")) + .parse() + .map_err(|e| { + Error::Config(format!( + "Parsing value of {ENV_CALLBACK_CONCURRENCY}: {e:?}" + )) + })?; + callback_config = Some(ServingCallbackConfig { + callback_concurrency, + }); + } + Ok(MonovertexConfig { name: mono_vertex_name, replica: *get_vertex_replica(), @@ -153,6 +176,7 @@ impl MonovertexConfig { sink_config, transformer_config, fb_sink_config, + callback_config, }) } } diff --git a/rust/numaflow-core/src/config/pipeline.rs b/rust/numaflow-core/src/config/pipeline.rs index 1368b0b32..255da3286 100644 --- a/rust/numaflow-core/src/config/pipeline.rs +++ b/rust/numaflow-core/src/config/pipeline.rs @@ -25,6 +25,9 @@ const DEFAULT_LOOKBACK_WINDOW_IN_SECS: u16 = 120; const ENV_NUMAFLOW_SERVING_JETSTREAM_URL: &str = "NUMAFLOW_ISBSVC_JETSTREAM_URL"; const ENV_NUMAFLOW_SERVING_JETSTREAM_USER: &str = "NUMAFLOW_ISBSVC_JETSTREAM_USER"; const ENV_NUMAFLOW_SERVING_JETSTREAM_PASSWORD: &str = "NUMAFLOW_ISBSVC_JETSTREAM_PASSWORD"; +const ENV_CALLBACK_ENABLED: &str = "NUMAFLOW_CALLBACK_ENABLED"; +const ENV_CALLBACK_CONCURRENCY: &str = "NUMAFLOW_CALLBACK_CONCURRENCY"; +const DEFAULT_CALLBACK_CONCURRENCY: usize = 100; const DEFAULT_GRPC_MAX_MESSAGE_SIZE: usize = 64 * 1024 * 1024; // 64 MB const DEFAULT_MAP_SOCKET: &str = "/var/run/numaflow/map.sock"; pub(crate) const DEFAULT_BATCH_MAP_SOCKET: &str = "/var/run/numaflow/batchmap.sock"; @@ -47,6 +50,12 @@ pub(crate) struct PipelineConfig { pub(crate) to_vertex_config: Vec, pub(crate) vertex_config: VertexType, pub(crate) metrics_config: MetricsConfig, + pub(crate) callback_config: Option, +} + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct ServingCallbackConfig { + pub(crate) callback_concurrency: usize, } impl Default for PipelineConfig { @@ -66,6 +75,7 @@ impl Default for PipelineConfig { transformer_config: None, }), metrics_config: Default::default(), + callback_config: None, } } } @@ -373,6 +383,21 @@ impl PipelineConfig { .and_then(|scale| scale.lookback_seconds.map(|x| x as u16)) .unwrap_or(DEFAULT_LOOKBACK_WINDOW_IN_SECS); + let mut callback_config = None; + if let Ok(_) = get_var(ENV_CALLBACK_ENABLED) { + let callback_concurrency: usize = get_var(ENV_CALLBACK_CONCURRENCY) + .unwrap_or_else(|_| format!("{DEFAULT_CALLBACK_CONCURRENCY}")) + .parse() + .map_err(|e| { + Error::Config(format!( + "Parsing value of {ENV_CALLBACK_CONCURRENCY}: {e:?}" + )) + })?; + callback_config = Some(ServingCallbackConfig { + callback_concurrency, + }); + } + Ok(PipelineConfig { batch_size: batch_size as usize, paf_concurrency: env::var("PAF_BATCH_SIZE") @@ -388,6 +413,7 @@ impl PipelineConfig { to_vertex_config, vertex_config: vertex, metrics_config: MetricsConfig::with_lookback_window_in_secs(look_back_window), + callback_config, }) } } @@ -419,6 +445,7 @@ mod tests { transformer_config: None, }), metrics_config: Default::default(), + callback_config: None, }; let config = PipelineConfig::default(); @@ -485,6 +512,7 @@ mod tests { lag_refresh_interval_in_secs: 3, lookback_window_in_secs: 120, }, + ..Default::default() }; assert_eq!(pipeline_config, expected); } @@ -536,7 +564,7 @@ mod tests { }, transformer_config: None, }), - metrics_config: Default::default(), + ..Default::default() }; assert_eq!(pipeline_config, expected); @@ -588,7 +616,7 @@ mod tests { }, transformer_config: None, }), - metrics_config: Default::default(), + ..Default::default() }; assert_eq!(pipeline_config, expected); @@ -704,7 +732,7 @@ mod tests { }), map_mode: MapMode::Unary, }), - metrics_config: MetricsConfig::default(), + ..Default::default() }; assert_eq!(pipeline_config, expected); diff --git a/rust/numaflow-core/src/mapper/map.rs b/rust/numaflow-core/src/mapper/map.rs index 8c279376a..c2241bf7f 100644 --- a/rust/numaflow-core/src/mapper/map.rs +++ b/rust/numaflow-core/src/mapper/map.rs @@ -554,6 +554,7 @@ mod tests { index: 0, }, headers: Default::default(), + metadata: None, }; let (output_tx, mut output_rx) = mpsc::channel(10); @@ -646,6 +647,7 @@ mod tests { index: i, }, headers: Default::default(), + metadata: None, }; input_tx.send(message).await.unwrap(); } @@ -735,6 +737,7 @@ mod tests { index: 0, }, headers: Default::default(), + metadata: None, }; input_tx.send(message).await.unwrap(); @@ -829,6 +832,7 @@ mod tests { index: 0, }, headers: Default::default(), + metadata: None, }, Message { keys: Arc::from(vec!["second".into()]), @@ -842,6 +846,7 @@ mod tests { index: 1, }, headers: Default::default(), + metadata: None, }, ]; @@ -939,6 +944,7 @@ mod tests { index: 0, }, headers: Default::default(), + metadata: None, }, Message { keys: Arc::from(vec!["second".into()]), @@ -952,6 +958,7 @@ mod tests { index: 1, }, headers: Default::default(), + metadata: None, }, ]; @@ -1049,6 +1056,7 @@ mod tests { index: 0, }, headers: Default::default(), + metadata: None, }; let (input_tx, input_rx) = mpsc::channel(10); @@ -1145,6 +1153,7 @@ mod tests { index: 0, }, headers: Default::default(), + metadata: None, }; let (input_tx, input_rx) = mpsc::channel(10); diff --git a/rust/numaflow-core/src/mapper/map/user_defined.rs b/rust/numaflow-core/src/mapper/map/user_defined.rs index 0799eb654..b123c6a3f 100644 --- a/rust/numaflow-core/src/mapper/map/user_defined.rs +++ b/rust/numaflow-core/src/mapper/map/user_defined.rs @@ -261,6 +261,7 @@ async fn process_response(sender_map: &ResponseSenderMap, resp: MapResponse) { offset: Some(msg_info.offset.clone()), event_time: msg_info.event_time, headers: msg_info.headers.clone(), + metadata: None, }; response_messages.push(message); } @@ -387,6 +388,7 @@ impl UserDefinedStreamMap { offset: None, event_time: message_info.event_time, headers: message_info.headers.clone(), + metadata: None, }; response_sender .send(Ok(message)) @@ -496,6 +498,7 @@ mod tests { index: 0, }, headers: Default::default(), + metadata: None, }; let (tx, rx) = tokio::sync::oneshot::channel(); @@ -586,6 +589,7 @@ mod tests { index: 0, }, headers: Default::default(), + metadata: None, }, crate::message::Message { keys: Arc::from(vec!["second".into()]), @@ -602,6 +606,7 @@ mod tests { index: 1, }, headers: Default::default(), + metadata: None, }, ]; @@ -701,6 +706,7 @@ mod tests { index: 0, }, headers: Default::default(), + metadata: None, }; let (tx, mut rx) = tokio::sync::mpsc::channel(3); diff --git a/rust/numaflow-core/src/message.rs b/rust/numaflow-core/src/message.rs index fe20613da..7f6e1c685 100644 --- a/rust/numaflow-core/src/message.rs +++ b/rust/numaflow-core/src/message.rs @@ -34,6 +34,13 @@ pub(crate) struct Message { pub(crate) id: MessageID, /// headers of the message pub(crate) headers: HashMap, + pub(crate) metadata: Option, +} + +#[derive(Debug, Clone)] +pub(crate) struct Metadata { + // name of the previous vertex. + pub(crate) previous_vertex: String, } /// Offset of the message which will be used to acknowledge the message. @@ -212,6 +219,7 @@ impl TryFrom for Message { event_time: utc_from_timestamp(message_info.event_time), id: id.into(), headers: header.headers, + metadata: None, }) } } @@ -263,6 +271,7 @@ mod tests { index: 0, }, headers: HashMap::new(), + metadata: None, }; let result: Result = message.clone().try_into(); diff --git a/rust/numaflow-core/src/monovertex.rs b/rust/numaflow-core/src/monovertex.rs index 1518a3c9f..67a597ac6 100644 --- a/rust/numaflow-core/src/monovertex.rs +++ b/rust/numaflow-core/src/monovertex.rs @@ -1,4 +1,5 @@ use forwarder::ForwarderBuilder; +use serving::callback::CallbackHandler; use tokio_util::sync::CancellationToken; use tracing::info; @@ -68,7 +69,24 @@ pub(crate) async fn start_forwarder( // FIXME: what to do with the handle shared::metrics::start_metrics_server(config.metrics_config.clone(), metrics_state).await; - start(config.clone(), source, sink_writer, transformer, cln_token).await?; + let callback_handler = match config.callback_config { + Some(ref cb_cfg) => Some(CallbackHandler::new( + config.name.clone(), + config.name.clone(), + cb_cfg.callback_concurrency, + )), + None => None, + }; + + start( + config.clone(), + source, + sink_writer, + transformer, + cln_token, + callback_handler, + ) + .await?; Ok(()) } @@ -79,13 +97,14 @@ async fn start( sink: SinkWriter, transformer: Option, cln_token: CancellationToken, + callback_handler: Option, ) -> error::Result<()> { // start the pending reader to publish pending metrics let pending_reader = shared::metrics::create_pending_reader(&mvtx_config.metrics_config, source.clone()).await; let _pending_reader_handle = pending_reader.start(is_mono_vertex()).await; - let mut forwarder_builder = ForwarderBuilder::new(source, sink, cln_token); + let mut forwarder_builder = ForwarderBuilder::new(source, sink, cln_token, callback_handler); // add transformer if exists if let Some(transformer_client) = transformer { diff --git a/rust/numaflow-core/src/monovertex/forwarder.rs b/rust/numaflow-core/src/monovertex/forwarder.rs index 51851e4ee..ea14a662b 100644 --- a/rust/numaflow-core/src/monovertex/forwarder.rs +++ b/rust/numaflow-core/src/monovertex/forwarder.rs @@ -28,6 +28,7 @@ //! [Stream]: https://docs.rs/tokio-stream/latest/tokio_stream/wrappers/struct.ReceiverStream.html //! [Actor Pattern]: https://ryhl.io/blog/actors-with-tokio/ +use serving::callback::CallbackHandler; use tokio_util::sync::CancellationToken; use crate::error; @@ -44,6 +45,7 @@ pub(crate) struct Forwarder { transformer: Option, sink_writer: SinkWriter, cln_token: CancellationToken, + callback_handler: Option, } pub(crate) struct ForwarderBuilder { @@ -51,6 +53,7 @@ pub(crate) struct ForwarderBuilder { sink_writer: SinkWriter, cln_token: CancellationToken, transformer: Option, + callback_handler: Option, } impl ForwarderBuilder { @@ -59,12 +62,14 @@ impl ForwarderBuilder { streaming_source: Source, streaming_sink: SinkWriter, cln_token: CancellationToken, + callback_handler: Option, ) -> Self { Self { source: streaming_source, sink_writer: streaming_sink, cln_token, transformer: None, + callback_handler, } } @@ -82,6 +87,7 @@ impl ForwarderBuilder { sink_writer: self.sink_writer, transformer: self.transformer, cln_token: self.cln_token, + callback_handler: self.callback_handler, } } } @@ -102,7 +108,11 @@ impl Forwarder { let sink_writer_handle = self .sink_writer - .streaming_write(transformed_messages_stream, self.cln_token.clone()) + .streaming_write( + transformed_messages_stream, + self.cln_token.clone(), + self.callback_handler.clone(), + ) .await?; match tokio::try_join!( @@ -307,7 +317,7 @@ mod tests { .unwrap(); // create the forwarder with the source, transformer, and writer - let forwarder = ForwarderBuilder::new(source.clone(), sink_writer, cln_token.clone()) + let forwarder = ForwarderBuilder::new(source.clone(), sink_writer, cln_token.clone(), None) .transformer(transformer) .build(); @@ -437,7 +447,7 @@ mod tests { .unwrap(); // create the forwarder with the source, transformer, and writer - let forwarder = ForwarderBuilder::new(source.clone(), sink_writer, cln_token.clone()) + let forwarder = ForwarderBuilder::new(source.clone(), sink_writer, cln_token.clone(), None) .transformer(transformer) .build(); diff --git a/rust/numaflow-core/src/pipeline.rs b/rust/numaflow-core/src/pipeline.rs index d2cb77091..af6f755e0 100644 --- a/rust/numaflow-core/src/pipeline.rs +++ b/rust/numaflow-core/src/pipeline.rs @@ -3,6 +3,7 @@ use std::time::Duration; use async_nats::jetstream::Context; use async_nats::{jetstream, ConnectOptions}; use futures::future::try_join_all; +use serving::callback::CallbackHandler; use tokio_util::sync::CancellationToken; use tracing::info; @@ -53,11 +54,21 @@ async fn start_source_forwarder( let tracker_handle = TrackerHandle::new(); let js_context = create_js_context(config.js_client_config.clone()).await?; + let callback_handler = match config.callback_config { + Some(ref cb_cfg) => Some(CallbackHandler::new( + config.pipeline_name.clone(), + config.vertex_name.clone(), + cb_cfg.callback_concurrency, + )), + None => None, + }; + let buffer_writer = create_buffer_writer( &config, js_context.clone(), tracker_handle.clone(), cln_token.clone(), + callback_handler, ) .await; @@ -142,11 +153,21 @@ async fn start_map_forwarder( mapper_grpc_client = Some(mapper_rpc_client); } + let callback_handler = match config.callback_config { + Some(ref cb_cfg) => Some(CallbackHandler::new( + config.pipeline_name.clone(), + config.vertex_name.clone(), + cb_cfg.callback_concurrency, + )), + None => None, + }; + let buffer_writer = create_buffer_writer( &config, js_context.clone(), tracker_handle.clone(), cln_token.clone(), + callback_handler, ) .await; forwarder_components.push((buffer_reader, buffer_writer, mapper)); @@ -240,6 +261,15 @@ async fn start_sink_forwarder( .await; } + let callback_handler = match config.callback_config { + Some(ref cb_cfg) => Some(CallbackHandler::new( + config.pipeline_name.clone(), + config.vertex_name.clone(), + cb_cfg.callback_concurrency, + )), + None => None, + }; + // Start a new forwarder for each buffer reader let mut forwarder_tasks = Vec::new(); for (buffer_reader, (sink_writer, _, _)) in buffer_readers.into_iter().zip(sink_writers) { @@ -248,6 +278,7 @@ async fn start_sink_forwarder( buffer_reader, sink_writer, cln_token.clone(), + callback_handler.clone(), ) .await; @@ -273,6 +304,7 @@ async fn create_buffer_writer( js_context: Context, tracker_handle: TrackerHandle, cln_token: CancellationToken, + callback_handler: Option, ) -> JetstreamWriter { JetstreamWriter::new( config.to_vertex_config.clone(), @@ -280,6 +312,7 @@ async fn create_buffer_writer( config.paf_concurrency, tracker_handle, cln_token, + callback_handler, ) } @@ -450,6 +483,7 @@ mod tests { lag_refresh_interval_in_secs: 3, lookback_window_in_secs: 120, }, + callback_config: None, }; let cancellation_token = CancellationToken::new(); @@ -542,6 +576,7 @@ mod tests { index: 0, }, headers: HashMap::new(), + metadata: None, }; let message: bytes::BytesMut = message.try_into().unwrap(); @@ -607,6 +642,7 @@ mod tests { lag_refresh_interval_in_secs: 3, lookback_window_in_secs: 120, }, + callback_config: None, }; let cancellation_token = CancellationToken::new(); @@ -738,6 +774,7 @@ mod tests { index: 0, }, headers: HashMap::new(), + metadata: None, }; let message: bytes::BytesMut = message.try_into().unwrap(); @@ -850,6 +887,7 @@ mod tests { lag_refresh_interval_in_secs: 3, lookback_window_in_secs: 120, }, + callback_config: None, }; let cancellation_token = CancellationToken::new(); diff --git a/rust/numaflow-core/src/pipeline/forwarder/sink_forwarder.rs b/rust/numaflow-core/src/pipeline/forwarder/sink_forwarder.rs index 1d560e94e..4610235b4 100644 --- a/rust/numaflow-core/src/pipeline/forwarder/sink_forwarder.rs +++ b/rust/numaflow-core/src/pipeline/forwarder/sink_forwarder.rs @@ -1,3 +1,4 @@ +use serving::callback::CallbackHandler; use tokio_util::sync::CancellationToken; use crate::error::Error; @@ -11,6 +12,7 @@ pub(crate) struct SinkForwarder { jetstream_reader: JetstreamReader, sink_writer: SinkWriter, cln_token: CancellationToken, + callback_handler: Option, } impl SinkForwarder { @@ -18,11 +20,13 @@ impl SinkForwarder { jetstream_reader: JetstreamReader, sink_writer: SinkWriter, cln_token: CancellationToken, + callback_handler: Option, ) -> Self { Self { jetstream_reader, sink_writer, cln_token, + callback_handler, } } @@ -34,9 +38,15 @@ impl SinkForwarder { .streaming_read(reader_cancellation_token.clone()) .await?; + let callback_handler = self.callback_handler.clone(); + let sink_writer_handle = self .sink_writer - .streaming_write(read_messages_stream, self.cln_token.clone()) + .streaming_write( + read_messages_stream, + self.cln_token.clone(), + callback_handler, + ) .await?; // Join the reader and sink writer diff --git a/rust/numaflow-core/src/pipeline/forwarder/source_forwarder.rs b/rust/numaflow-core/src/pipeline/forwarder/source_forwarder.rs index b81ddaf80..ec717b8c8 100644 --- a/rust/numaflow-core/src/pipeline/forwarder/source_forwarder.rs +++ b/rust/numaflow-core/src/pipeline/forwarder/source_forwarder.rs @@ -292,6 +292,7 @@ mod tests { 100, tracker_handle.clone(), cln_token.clone(), + None, ); // create a transformer diff --git a/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs b/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs index 79b8572ef..bf4faa2da 100644 --- a/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs +++ b/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs @@ -346,6 +346,7 @@ mod tests { index: i, }, headers: HashMap::new(), + metadata: None, }; let message_bytes: BytesMut = message.try_into().unwrap(); context @@ -377,6 +378,8 @@ mod tests { #[cfg(feature = "nats-tests")] #[tokio::test] async fn test_jetstream_ack() { + use async_nats::jetstream::stream::No; + let js_url = "localhost:4222"; // Create JetStream context let client = async_nats::connect(js_url).await.unwrap(); @@ -445,6 +448,7 @@ mod tests { index: i, }, headers: HashMap::new(), + metadata: None, }; offsets.push(message.id.offset.clone()); let message_bytes: BytesMut = message.try_into().unwrap(); diff --git a/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs b/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs index e71335a57..dfc28fb5a 100644 --- a/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs +++ b/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs @@ -10,6 +10,7 @@ use async_nats::jetstream::publish::PublishAck; use async_nats::jetstream::stream::RetentionPolicy::Limits; use async_nats::jetstream::Context; use bytes::{Bytes, BytesMut}; +use serving::callback::CallbackHandler; use tokio::sync::Semaphore; use tokio::task::JoinHandle; use tokio::time; @@ -44,6 +45,7 @@ pub(crate) struct JetstreamWriter { cancel_token: CancellationToken, tracker_handle: TrackerHandle, sem: Arc, + callback_handler: Option, } impl JetstreamWriter { @@ -55,6 +57,7 @@ impl JetstreamWriter { paf_concurrency: usize, tracker_handle: TrackerHandle, cancel_token: CancellationToken, + callback_handler: Option, ) -> Self { let streams = config .iter() @@ -73,6 +76,7 @@ impl JetstreamWriter { cancel_token, tracker_handle, sem: Arc::new(Semaphore::new(paf_concurrency)), + callback_handler, }; // spawn a task for checking whether buffer is_full @@ -246,6 +250,18 @@ impl JetstreamWriter { }) .await?; + if let Some(ref callback_handler) = this.callback_handler { + let metadata = message.metadata.ok_or_else(|| { + Error::Source(format!( + "Message does not contain previous vertex name in the metadata" + )) + })?; + callback_handler + .callback(&message.headers, &message.tags, metadata.previous_vertex) + .await + .unwrap(); // FIXME: + }; + processed_msgs_count += 1; if last_logged_at.elapsed().as_secs() >= 1 { info!( @@ -538,6 +554,7 @@ mod tests { 100, tracker_handle, cln_token.clone(), + None, ); let message = Message { @@ -552,6 +569,7 @@ mod tests { index: 0, }, headers: HashMap::new(), + metadata: None, }; let paf = writer @@ -611,6 +629,7 @@ mod tests { index: 0, }, headers: HashMap::new(), + metadata: None, }; let message_bytes: BytesMut = message.try_into().unwrap(); @@ -678,6 +697,7 @@ mod tests { 100, tracker_handle, cancel_token.clone(), + None, ); let mut result_receivers = Vec::new(); @@ -695,6 +715,7 @@ mod tests { index: i, }, headers: HashMap::new(), + metadata: None, }; let paf = writer .write( @@ -720,6 +741,7 @@ mod tests { index: 11, }, headers: HashMap::new(), + metadata: None, }; let paf = writer .write( @@ -879,6 +901,7 @@ mod tests { 100, tracker_handle, cancel_token.clone(), + None, ); let mut js_writer = writer.clone(); @@ -969,6 +992,7 @@ mod tests { 100, tracker_handle.clone(), cln_token.clone(), + None, ); let (messages_tx, messages_rx) = tokio::sync::mpsc::channel(500); @@ -987,6 +1011,7 @@ mod tests { index: i, }, headers: HashMap::new(), + metadata: None, }; let (ack_tx, ack_rx) = tokio::sync::oneshot::channel(); tracker_handle @@ -1057,6 +1082,7 @@ mod tests { 100, tracker_handle.clone(), cancel_token.clone(), + None, ); let (tx, rx) = tokio::sync::mpsc::channel(500); @@ -1075,6 +1101,7 @@ mod tests { index: i, }, headers: HashMap::new(), + metadata: None, }; let (ack_tx, ack_rx) = tokio::sync::oneshot::channel(); tracker_handle @@ -1102,6 +1129,7 @@ mod tests { index: 101, }, headers: HashMap::new(), + metadata: None, }; let (ack_tx, ack_rx) = tokio::sync::oneshot::channel(); tracker_handle @@ -1198,6 +1226,7 @@ mod tests { 100, tracker_handle.clone(), cln_token.clone(), + None, ); let (messages_tx, messages_rx) = tokio::sync::mpsc::channel(500); @@ -1215,6 +1244,7 @@ mod tests { index: i, }, headers: HashMap::new(), + metadata: None, }; let (ack_tx, ack_rx) = tokio::sync::oneshot::channel(); tracker_handle diff --git a/rust/numaflow-core/src/sink.rs b/rust/numaflow-core/src/sink.rs index c60c57bd3..040bcf700 100644 --- a/rust/numaflow-core/src/sink.rs +++ b/rust/numaflow-core/src/sink.rs @@ -4,6 +4,7 @@ use std::time::Duration; use numaflow_pb::clients::sink::sink_client::SinkClient; use numaflow_pb::clients::sink::sink_response; use numaflow_pb::clients::sink::Status::{Failure, Fallback, Success}; +use serving::callback::CallbackHandler; use tokio::sync::mpsc::Receiver; use tokio::sync::{mpsc, oneshot}; use tokio::task::JoinHandle; @@ -255,6 +256,7 @@ impl SinkWriter { &self, messages_stream: ReceiverStream, cancellation_token: CancellationToken, + callback_handler: Option, ) -> Result>> { let handle: JoinHandle> = tokio::spawn({ let mut this = self.clone(); @@ -293,7 +295,7 @@ impl SinkWriter { let sink_start = time::Instant::now(); let total_valid_msgs = batch.len(); - match this.write(batch, cancellation_token.clone()).await { + match this.write(batch.clone(), cancellation_token.clone()).await { Ok(_) => { for offset in offsets { // Delete the message from the tracker @@ -309,6 +311,20 @@ impl SinkWriter { } } + if let Some(ref callback_handler) = callback_handler { + for message in batch { + let metadata = message.metadata.ok_or_else(|| { + Error::Source(format!( + "Message does not contain previous vertex name in the metadata" + )) + })?; + callback_handler + .callback(&message.headers, &message.tags, metadata.previous_vertex) + .await + .unwrap(); // FIXME: + } + }; + // publish sink metrics if is_mono_vertex() { monovertex_metrics() @@ -769,6 +785,7 @@ mod tests { index: i, }, headers: HashMap::new(), + metadata: None, }) .collect(); @@ -804,6 +821,7 @@ mod tests { index: i, }, headers: HashMap::new(), + metadata: None, }) .collect(); @@ -882,6 +900,7 @@ mod tests { index: i, }, headers: HashMap::new(), + metadata: None, }) .collect(); @@ -969,6 +988,7 @@ mod tests { index: i, }, headers: HashMap::new(), + metadata: None, }) .collect(); @@ -1016,6 +1036,7 @@ mod tests { index: 0, }, headers: HashMap::new(), + metadata: None, }; let request: SinkRequest = message.into(); diff --git a/rust/numaflow-core/src/source/generator.rs b/rust/numaflow-core/src/source/generator.rs index 855030f73..b1ab1694d 100644 --- a/rust/numaflow-core/src/source/generator.rs +++ b/rust/numaflow-core/src/source/generator.rs @@ -178,6 +178,7 @@ mod stream_generator { index: Default::default(), }, headers: Default::default(), + metadata: None, } } diff --git a/rust/numaflow-core/src/source/pulsar.rs b/rust/numaflow-core/src/source/pulsar.rs index 6a2c7162b..5e02db3da 100644 --- a/rust/numaflow-core/src/source/pulsar.rs +++ b/rust/numaflow-core/src/source/pulsar.rs @@ -26,6 +26,7 @@ impl TryFrom for Message { index: 0, }, headers: message.headers, + metadata: None, }) } } diff --git a/rust/numaflow-core/src/source/serving.rs b/rust/numaflow-core/src/source/serving.rs index b9fb6c72e..5b4a7986f 100644 --- a/rust/numaflow-core/src/source/serving.rs +++ b/rust/numaflow-core/src/source/serving.rs @@ -3,7 +3,7 @@ use std::sync::Arc; pub(crate) use serving::ServingSource; use crate::config::get_vertex_replica; -use crate::message::{MessageID, StringOffset}; +use crate::message::{MessageID, Metadata, StringOffset}; use crate::Error; use crate::Result; @@ -28,6 +28,9 @@ impl TryFrom for Message { index: 0, }, headers: message.headers, + metadata: Some(Metadata { + previous_vertex: get_vertex_name().to_string(), + }), }) } } diff --git a/rust/numaflow-core/src/source/user_defined.rs b/rust/numaflow-core/src/source/user_defined.rs index 5f274119b..aeebfa81a 100644 --- a/rust/numaflow-core/src/source/user_defined.rs +++ b/rust/numaflow-core/src/source/user_defined.rs @@ -128,6 +128,7 @@ impl TryFrom for Message { index: 0, }, headers: result.headers, + metadata: None, }) } } diff --git a/rust/numaflow-core/src/transformer.rs b/rust/numaflow-core/src/transformer.rs index 6f9298b7c..a44e838c0 100644 --- a/rust/numaflow-core/src/transformer.rs +++ b/rust/numaflow-core/src/transformer.rs @@ -295,6 +295,7 @@ mod tests { index: 0, }, headers: Default::default(), + metadata: None, }; let (output_tx, mut output_rx) = mpsc::channel(10); @@ -374,6 +375,7 @@ mod tests { index: i, }, headers: Default::default(), + metadata: None, }; input_tx.send(message).await.unwrap(); } @@ -458,6 +460,7 @@ mod tests { index: 0, }, headers: Default::default(), + metadata: None, }; input_tx.send(message).await.unwrap(); diff --git a/rust/numaflow-core/src/transformer/user_defined.rs b/rust/numaflow-core/src/transformer/user_defined.rs index 78518e4c0..6409ed8c6 100644 --- a/rust/numaflow-core/src/transformer/user_defined.rs +++ b/rust/numaflow-core/src/transformer/user_defined.rs @@ -143,6 +143,7 @@ impl UserDefinedTransformer { offset: Some(msg_info.offset.clone()), event_time: utc_from_timestamp(result.event_time), headers: msg_info.headers.clone(), + metadata: None, }; response_messages.push(message); } @@ -251,6 +252,7 @@ mod tests { index: 0, }, headers: Default::default(), + metadata: None, }; let (tx, rx) = tokio::sync::oneshot::channel(); @@ -295,6 +297,7 @@ mod tests { index: 0, }, headers: HashMap::new(), + metadata: None, }; let request: SourceTransformRequest = message.into(); diff --git a/rust/serving/Cargo.toml b/rust/serving/Cargo.toml index 857d69db7..673bf6046 100644 --- a/rust/serving/Cargo.toml +++ b/rust/serving/Cargo.toml @@ -38,6 +38,7 @@ rcgen = "0.13.1" parking_lot = "0.12.3" prometheus-client = "0.22.3" thiserror = "1.0.63" +reqwest = { workspace = true, features = ["rustls-tls", "json"] } [dev-dependencies] reqwest = { workspace = true, features = ["json"] } diff --git a/rust/serving/src/app.rs b/rust/serving/src/app.rs index 82ef1ef62..0f33311e8 100644 --- a/rust/serving/src/app.rs +++ b/rust/serving/src/app.rs @@ -12,9 +12,10 @@ use hyper_util::client::legacy::connect::HttpConnector; use hyper_util::rt::TokioExecutor; use tokio::signal; use tower::ServiceBuilder; +use tower_http::classify::ServerErrorsFailureClass; use tower_http::timeout::TimeoutLayer; -use tower_http::trace::{DefaultOnResponse, TraceLayer}; -use tracing::{info, info_span, Level}; +use tower_http::trace::TraceLayer; +use tracing::{info, info_span, Span}; use uuid::Uuid; use self::{ @@ -93,9 +94,18 @@ where .get::() .map(MatchedPath::as_str); - info_span!("request", tid, method=?req.method(), matched_path) + info_span!("request", tid, method=?req.method(), path=req.uri().path(), matched_path) }) - .on_response(DefaultOnResponse::new().level(Level::INFO)), + .on_response( + |response: &Response, latency: Duration, _span: &Span| { + tracing::info!(status=?response.status(), ?latency) + }, + ) + .on_failure( + |error: ServerErrorsFailureClass, latency: Duration, _span: &Span| { + tracing::error!(?error, ?latency, "Server error"); + }, + ), ) // capture metrics for all requests .layer(middleware::from_fn(capture_metrics)) diff --git a/rust/serving/src/callback.rs b/rust/serving/src/callback.rs new file mode 100644 index 000000000..ea9af4036 --- /dev/null +++ b/rust/serving/src/callback.rs @@ -0,0 +1,126 @@ +use std::{ + collections::HashMap, + sync::Arc, + time::{Duration, SystemTime, UNIX_EPOCH}, +}; + +use reqwest::Client; +use tokio::sync::Semaphore; + +use crate::Error; + +const ID_HEADER: &str = "X-Numaflow-Id"; +const CALLBACK_URL_HEADER_KEY: &str = "X-Numaflow-Callback-Url"; + +/// The data to be sent in the POST request +#[derive(serde::Serialize)] +struct CallbackPayload { + /// Name of the vertex + vertex: String, + /// Name of the pipeline + pipeline: String, + /// Unique identifier of the message + id: String, + /// Time when the callback was made + cb_time: u64, + /// List of tags associated with the message + tags: Vec, + /// Name of the vertex from which the message was sent + from_vertex: String, +} + +#[derive(Clone)] +pub struct CallbackHandler { + client: Client, + pipeline_name: String, + vertex_name: String, + semaphore: Arc, +} + +impl CallbackHandler { + pub fn new(pipeline_name: String, vertex_name: String, concurrency_limit: usize) -> Self { + // if env::var(ENV_CALLBACK_ENABLED).is_err() { + // return Ok(None); + // }; + + // let Ok(callback_url) = env::var(ENV_CALLBACK_URL) else { + // return Err(Error::Source(format!("Environment variable {ENV_CALLBACK_ENABLED} is set, but {ENV_CALLBACK_URL} is not set"))); + // }; + + let client = Client::builder() + .danger_accept_invalid_certs(true) + .timeout(Duration::from_secs(1)) + .build() + .expect("Creating callback client for Serving source"); + + let semaphore = Arc::new(tokio::sync::Semaphore::new(concurrency_limit)); + + Self { + client, + pipeline_name, + vertex_name, + semaphore, + } + } + + pub async fn callback( + &self, + message_headers: &HashMap, + message_tags: &Option>, + previous_vertex: String, + ) -> crate::Result<()> { + let callback_url = message_headers + .get(CALLBACK_URL_HEADER_KEY) + .ok_or_else(|| { + Error::Source(format!( + "{CALLBACK_URL_HEADER_KEY} header is not present in the message headers", + )) + })? + .to_owned(); + let uuid = message_headers.get(ID_HEADER).ok_or_else(|| { + Error::Source(format!("{ID_HEADER} is not found in message headers",)) + })?; + let cb_time = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("System time is older than Unix epoch time") + .as_millis() as u64; + + let mut msg_tags = vec![]; + if let Some(tags) = message_tags { + msg_tags = tags.iter().cloned().collect(); + }; + + let callback_payload = CallbackPayload { + vertex: self.vertex_name.clone(), + pipeline: self.pipeline_name.clone(), + id: uuid.to_owned(), + cb_time, + tags: msg_tags, + from_vertex: previous_vertex, + }; + + let permit = Arc::clone(&self.semaphore).acquire_owned().await.unwrap(); + let client = self.client.clone(); + tokio::spawn(async move { + let _permit = permit; + let resp = client + .post(callback_url) + .json(&callback_payload) + .send() + .await + .map_err(|e| Error::Source(format!("Sending callback request: {e:?}"))) + .unwrap(); //FIXME: + + if !resp.status().is_success() { + // return Err(Error::Source(format!( + // "Received non-OK status for callback request. Status={}, Body: {}", + // resp.status(), + // resp.text().await.unwrap_or_default() + // ))); + tracing::error!("Received non-OK status for callback request"); //FIXME: what to do with errors + } + }); + + Ok(()) + } +} diff --git a/rust/serving/src/config.rs b/rust/serving/src/config.rs index 16c2ee125..3ef798129 100644 --- a/rust/serving/src/config.rs +++ b/rust/serving/src/config.rs @@ -237,6 +237,7 @@ mod tests { conditions: None, }], }, + ..Default::default() }; assert_eq!(settings, expected_config); } diff --git a/rust/serving/src/lib.rs b/rust/serving/src/lib.rs index 001065ddf..edfed4d91 100644 --- a/rust/serving/src/lib.rs +++ b/rust/serving/src/lib.rs @@ -26,6 +26,8 @@ pub mod source; use crate::source::MessageWrapper; pub use source::{Message, ServingSource}; +pub mod callback; + #[derive(Clone)] pub(crate) struct AppState { pub(crate) message: mpsc::Sender, diff --git a/rust/serving/src/source.rs b/rust/serving/src/source.rs index d03817967..4db7e7d3a 100644 --- a/rust/serving/src/source.rs +++ b/rust/serving/src/source.rs @@ -50,6 +50,7 @@ struct ServingSourceActor { /// has been successfully processed. tracker: HashMap>, vertex_replica_id: u16, + callback_url: String, } impl ServingSourceActor { @@ -72,12 +73,17 @@ impl ServingSourceActor { })?; let callback_state = CallbackState::new(msg_graph, redis_store).await?; + let callback_url = format!( + "https://{}:{}/v1/process/callback", + &settings.host_ip, &settings.app_listen_port + ); tokio::spawn(async move { let mut serving_actor = ServingSourceActor { messages: messages_rx, handler_rx, tracker: HashMap::new(), vertex_replica_id, + callback_url, }; serving_actor.run().await; }); @@ -140,10 +146,16 @@ impl ServingSourceActor { }; let MessageWrapper { confirm_save, - message, + mut message, } = message; self.tracker.insert(message.id.clone(), confirm_save); + message + .headers + .insert("X-Numaflow-Callback-Url".into(), self.callback_url.clone()); + message + .headers + .insert("X-Numaflow-Id".into(), message.id.clone()); messages.push(message); } Ok(messages) From 94ad530a69a5929c19dcfdfd9a3fb6a3f1bdc283 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Thu, 9 Jan 2025 15:58:03 +0530 Subject: [PATCH 02/27] Callbacks are working with pipeline Signed-off-by: Sreekanth --- rust/numaflow-core/src/config.rs | 4 +-- rust/numaflow-core/src/config/pipeline.rs | 15 +++++++--- rust/numaflow-core/src/metrics.rs | 3 ++ .../src/pipeline/isb/jetstream/reader.rs | 6 ++++ rust/numaflow-core/src/sink.rs | 2 +- rust/numaflow/src/main.rs | 10 +++++-- rust/serving/src/app.rs | 11 ++++++- rust/serving/src/callback.rs | 29 ++++++++++--------- 8 files changed, 56 insertions(+), 24 deletions(-) diff --git a/rust/numaflow-core/src/config.rs b/rust/numaflow-core/src/config.rs index 167c2f1cd..6355f0d14 100644 --- a/rust/numaflow-core/src/config.rs +++ b/rust/numaflow-core/src/config.rs @@ -97,7 +97,7 @@ pub(crate) struct Settings { impl Settings { /// load based on the CRD type, either a pipeline or a monovertex. /// Settings are populated through reading the env vars set via the controller. The main - /// CRD is the base64 spec of the CR. + /// CRD is the base64 spec of the CR. fn load() -> Result { if let Ok(obj) = env::var(ENV_MONO_VERTEX_OBJ) { let cfg = MonovertexConfig::load(obj)?; @@ -112,7 +112,7 @@ impl Settings { custom_resource_type: CustomResourceType::Pipeline(cfg), }); } - Err(Error::Config("No configuration found".to_string())) + Err(Error::Config("No configuration found - env variable {ENV_MONO_VERTEX_OBJ} or {ENV_VERTEX_OBJ} is not set".to_string())) } } diff --git a/rust/numaflow-core/src/config/pipeline.rs b/rust/numaflow-core/src/config/pipeline.rs index 255da3286..0f40017aa 100644 --- a/rust/numaflow-core/src/config/pipeline.rs +++ b/rust/numaflow-core/src/config/pipeline.rs @@ -25,6 +25,7 @@ const DEFAULT_LOOKBACK_WINDOW_IN_SECS: u16 = 120; const ENV_NUMAFLOW_SERVING_JETSTREAM_URL: &str = "NUMAFLOW_ISBSVC_JETSTREAM_URL"; const ENV_NUMAFLOW_SERVING_JETSTREAM_USER: &str = "NUMAFLOW_ISBSVC_JETSTREAM_USER"; const ENV_NUMAFLOW_SERVING_JETSTREAM_PASSWORD: &str = "NUMAFLOW_ISBSVC_JETSTREAM_PASSWORD"; +const ENV_PAF_BATCH_SIZE: &str = "PAF_BATCH_SIZE"; const ENV_CALLBACK_ENABLED: &str = "NUMAFLOW_CALLBACK_ENABLED"; const ENV_CALLBACK_CONCURRENCY: &str = "NUMAFLOW_CALLBACK_CONCURRENCY"; const DEFAULT_CALLBACK_CONCURRENCY: usize = 100; @@ -296,9 +297,15 @@ impl PipelineConfig { .map(|(key, val)| (key.into(), val.into())) .filter(|(key, _val)| { // FIXME(cr): this filter is non-exhaustive, should we invert? - key == ENV_NUMAFLOW_SERVING_JETSTREAM_URL - || key == ENV_NUMAFLOW_SERVING_JETSTREAM_USER - || key == ENV_NUMAFLOW_SERVING_JETSTREAM_PASSWORD + [ + ENV_NUMAFLOW_SERVING_JETSTREAM_URL, + ENV_NUMAFLOW_SERVING_JETSTREAM_USER, + ENV_NUMAFLOW_SERVING_JETSTREAM_PASSWORD, + ENV_PAF_BATCH_SIZE, + ENV_CALLBACK_ENABLED, + ENV_CALLBACK_CONCURRENCY, + ] + .contains(&key.as_str()) }) .collect(); @@ -400,7 +407,7 @@ impl PipelineConfig { Ok(PipelineConfig { batch_size: batch_size as usize, - paf_concurrency: env::var("PAF_BATCH_SIZE") + paf_concurrency: get_var(ENV_PAF_BATCH_SIZE) .unwrap_or((DEFAULT_BATCH_SIZE * 2).to_string()) .parse() .unwrap(), diff --git a/rust/numaflow-core/src/metrics.rs b/rust/numaflow-core/src/metrics.rs index 2a672ec31..0ec903aa0 100644 --- a/rust/numaflow-core/src/metrics.rs +++ b/rust/numaflow-core/src/metrics.rs @@ -600,6 +600,9 @@ pub(crate) async fn start_metrics_https_server( addr: SocketAddr, metrics_state: UserDefinedContainerState, ) -> crate::Result<()> { + // Setup the CryptoProvider (controls core cryptography used by rustls) for the process + let _ = rustls::crypto::aws_lc_rs::default_provider().install_default(); + // Generate a self-signed certificate let CertifiedKey { cert, key_pair } = generate_simple_self_signed(vec!["localhost".into()]) .map_err(|e| Error::Metrics(format!("Generating self-signed certificate: {}", e)))?; diff --git a/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs b/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs index bf4faa2da..d09ffb32c 100644 --- a/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs +++ b/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs @@ -155,6 +155,12 @@ impl JetstreamReader { index: 0, }; + let metadata = crate::message::Metadata{ + // Copy previous vertex name from message id + previous_vertex: String::from_utf8_lossy(&message.id.vertex_name).into(), + }; + message.metadata = Some(metadata); + message.offset = Some(offset.clone()); message.id = message_id.clone(); diff --git a/rust/numaflow-core/src/sink.rs b/rust/numaflow-core/src/sink.rs index 040bcf700..9d64b1844 100644 --- a/rust/numaflow-core/src/sink.rs +++ b/rust/numaflow-core/src/sink.rs @@ -315,7 +315,7 @@ impl SinkWriter { for message in batch { let metadata = message.metadata.ok_or_else(|| { Error::Source(format!( - "Message does not contain previous vertex name in the metadata" + "Writing to Sink: message does not contain previous vertex name in the metadata" )) })?; callback_handler diff --git a/rust/numaflow/src/main.rs b/rust/numaflow/src/main.rs index 9a5ab6fe8..7af5a0b97 100644 --- a/rust/numaflow/src/main.rs +++ b/rust/numaflow/src/main.rs @@ -36,7 +36,13 @@ async fn run() -> Result<(), Box> { } else if args.contains(&"--rust".to_string()) { numaflow_core::run() .await - .map_err(|e| format!("Error running rust binary: {e:?}"))? + .map_err(|e| format!("Error running rust binary: {e:?}"))?; + } else { + return Err(format!( + "Invalid argument. Use --servesink, or --rust. Current args = {:?}", + args + ) + .into()); } - Err("Invalid argument. Use --servesink, or --rust".into()) + Ok(()) } diff --git a/rust/serving/src/app.rs b/rust/serving/src/app.rs index 0f33311e8..b73ac4743 100644 --- a/rust/serving/src/app.rs +++ b/rust/serving/src/app.rs @@ -82,6 +82,11 @@ where .layer( TraceLayer::new_for_http() .make_span_with(move |req: &Request| { + let req_path = req.uri().path(); + if ["/metrics", "/readyz", "/livez", "/sidecar-livez"].contains(&req_path) { + // We don't need request ID for these endpoints + return info_span!("request", method=?req.method(), path=req_path); + } let tid = req .headers() .get(&tid_header) @@ -94,10 +99,14 @@ where .get::() .map(MatchedPath::as_str); - info_span!("request", tid, method=?req.method(), path=req.uri().path(), matched_path) + info_span!("request", tid, method=?req.method(), path=req_path, matched_path) }) .on_response( |response: &Response, latency: Duration, _span: &Span| { + if response.status().is_server_error() { + // 5xx responses will be captured in on_failure at and logged at 'error' level + return; + } tracing::info!(status=?response.status(), ?latency) }, ) diff --git a/rust/serving/src/callback.rs b/rust/serving/src/callback.rs index ea9af4036..191d753bb 100644 --- a/rust/serving/src/callback.rs +++ b/rust/serving/src/callback.rs @@ -15,18 +15,16 @@ const CALLBACK_URL_HEADER_KEY: &str = "X-Numaflow-Callback-Url"; /// The data to be sent in the POST request #[derive(serde::Serialize)] struct CallbackPayload { - /// Name of the vertex - vertex: String, - /// Name of the pipeline - pipeline: String, /// Unique identifier of the message id: String, + /// Name of the vertex + vertex: String, /// Time when the callback was made cb_time: u64, - /// List of tags associated with the message - tags: Vec, /// Name of the vertex from which the message was sent from_vertex: String, + /// List of tags associated with the message + tags: Option>, } #[derive(Clone)] @@ -77,23 +75,25 @@ impl CallbackHandler { )) })? .to_owned(); - let uuid = message_headers.get(ID_HEADER).ok_or_else(|| { - Error::Source(format!("{ID_HEADER} is not found in message headers",)) - })?; + let uuid = message_headers + .get(ID_HEADER) + .ok_or_else(|| Error::Source(format!("{ID_HEADER} is not found in message headers",)))? + .to_owned(); let cb_time = SystemTime::now() .duration_since(UNIX_EPOCH) .expect("System time is older than Unix epoch time") .as_millis() as u64; - let mut msg_tags = vec![]; + let mut msg_tags = None; if let Some(tags) = message_tags { - msg_tags = tags.iter().cloned().collect(); + if !tags.is_empty() { + msg_tags = Some(tags.iter().cloned().collect()); + } }; let callback_payload = CallbackPayload { vertex: self.vertex_name.clone(), - pipeline: self.pipeline_name.clone(), - id: uuid.to_owned(), + id: uuid.clone(), cb_time, tags: msg_tags, from_vertex: previous_vertex, @@ -105,7 +105,8 @@ impl CallbackHandler { let _permit = permit; let resp = client .post(callback_url) - .json(&callback_payload) + .header(ID_HEADER, uuid.clone()) + .json(&[callback_payload]) .send() .await .map_err(|e| Error::Source(format!("Sending callback request: {e:?}"))) From b8dbb6d5e02c86c5b793f4df2620c42506b6053b Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 13 Jan 2025 10:14:13 +0530 Subject: [PATCH 03/27] Retries when callback fails Signed-off-by: Sreekanth --- rust/numaflow-core/src/config.rs | 6 +- rust/numaflow-core/src/config/monovertex.rs | 6 +- rust/numaflow-core/src/config/pipeline.rs | 7 +- rust/numaflow-core/src/mapper/map.rs | 1 + rust/numaflow-core/src/monovertex.rs | 1 - rust/numaflow-core/src/pipeline.rs | 3 - .../src/pipeline/isb/jetstream/writer.rs | 8 +- rust/numaflow-core/src/sink.rs | 12 +-- rust/numaflow-core/src/sink/blackhole.rs | 2 + rust/numaflow-core/src/sink/log.rs | 2 + rust/numaflow-core/src/sink/user_defined.rs | 2 + rust/serving/src/callback.rs | 86 +++++++++++++++---- 12 files changed, 97 insertions(+), 39 deletions(-) diff --git a/rust/numaflow-core/src/config.rs b/rust/numaflow-core/src/config.rs index 6355f0d14..686136c78 100644 --- a/rust/numaflow-core/src/config.rs +++ b/rust/numaflow-core/src/config.rs @@ -10,6 +10,10 @@ use crate::Result; const ENV_MONO_VERTEX_OBJ: &str = "NUMAFLOW_MONO_VERTEX_OBJECT"; const ENV_VERTEX_OBJ: &str = "NUMAFLOW_VERTEX_OBJECT"; +const ENV_CALLBACK_ENABLED: &str = "NUMAFLOW_CALLBACK_ENABLED"; +const ENV_CALLBACK_CONCURRENCY: &str = "NUMAFLOW_CALLBACK_CONCURRENCY"; +const DEFAULT_CALLBACK_CONCURRENCY: usize = 100; + /// Building blocks (Source, Sink, Transformer, FallBack, Metrics, etc.) to build a Pipeline or a /// MonoVertex. pub(crate) mod components; @@ -112,7 +116,7 @@ impl Settings { custom_resource_type: CustomResourceType::Pipeline(cfg), }); } - Err(Error::Config("No configuration found - env variable {ENV_MONO_VERTEX_OBJ} or {ENV_VERTEX_OBJ} is not set".to_string())) + Err(Error::Config("No configuration found - environment variable {ENV_MONO_VERTEX_OBJ} or {ENV_VERTEX_OBJ} is not set".to_string())) } } diff --git a/rust/numaflow-core/src/config/monovertex.rs b/rust/numaflow-core/src/config/monovertex.rs index a9931c785..4fdbc95fe 100644 --- a/rust/numaflow-core/src/config/monovertex.rs +++ b/rust/numaflow-core/src/config/monovertex.rs @@ -20,14 +20,12 @@ use crate::Result; use super::pipeline::ServingCallbackConfig; +use super::{DEFAULT_CALLBACK_CONCURRENCY, ENV_CALLBACK_CONCURRENCY, ENV_CALLBACK_ENABLED}; + const DEFAULT_BATCH_SIZE: u64 = 500; const DEFAULT_TIMEOUT_IN_MS: u32 = 1000; const DEFAULT_LOOKBACK_WINDOW_IN_SECS: u16 = 120; -const ENV_CALLBACK_ENABLED: &str = "NUMAFLOW_CALLBACK_ENABLED"; //FIXME: duplicates -const ENV_CALLBACK_CONCURRENCY: &str = "NUMAFLOW_CALLBACK_CONCURRENCY"; -const DEFAULT_CALLBACK_CONCURRENCY: usize = 100; - #[derive(Debug, Clone, PartialEq)] pub(crate) struct MonovertexConfig { pub(crate) name: String, diff --git a/rust/numaflow-core/src/config/pipeline.rs b/rust/numaflow-core/src/config/pipeline.rs index 0f40017aa..549fff556 100644 --- a/rust/numaflow-core/src/config/pipeline.rs +++ b/rust/numaflow-core/src/config/pipeline.rs @@ -19,6 +19,8 @@ use crate::config::pipeline::map::MapVtxConfig; use crate::error::Error; use crate::Result; +use super::{DEFAULT_CALLBACK_CONCURRENCY, ENV_CALLBACK_CONCURRENCY, ENV_CALLBACK_ENABLED}; + const DEFAULT_BATCH_SIZE: u64 = 500; const DEFAULT_TIMEOUT_IN_MS: u32 = 1000; const DEFAULT_LOOKBACK_WINDOW_IN_SECS: u16 = 120; @@ -26,9 +28,6 @@ const ENV_NUMAFLOW_SERVING_JETSTREAM_URL: &str = "NUMAFLOW_ISBSVC_JETSTREAM_URL" const ENV_NUMAFLOW_SERVING_JETSTREAM_USER: &str = "NUMAFLOW_ISBSVC_JETSTREAM_USER"; const ENV_NUMAFLOW_SERVING_JETSTREAM_PASSWORD: &str = "NUMAFLOW_ISBSVC_JETSTREAM_PASSWORD"; const ENV_PAF_BATCH_SIZE: &str = "PAF_BATCH_SIZE"; -const ENV_CALLBACK_ENABLED: &str = "NUMAFLOW_CALLBACK_ENABLED"; -const ENV_CALLBACK_CONCURRENCY: &str = "NUMAFLOW_CALLBACK_CONCURRENCY"; -const DEFAULT_CALLBACK_CONCURRENCY: usize = 100; const DEFAULT_GRPC_MAX_MESSAGE_SIZE: usize = 64 * 1024 * 1024; // 64 MB const DEFAULT_MAP_SOCKET: &str = "/var/run/numaflow/map.sock"; pub(crate) const DEFAULT_BATCH_MAP_SOCKET: &str = "/var/run/numaflow/batchmap.sock"; @@ -408,7 +407,7 @@ impl PipelineConfig { Ok(PipelineConfig { batch_size: batch_size as usize, paf_concurrency: get_var(ENV_PAF_BATCH_SIZE) - .unwrap_or((DEFAULT_BATCH_SIZE * 2).to_string()) + .unwrap_or_else(|_| (DEFAULT_BATCH_SIZE * 2).to_string()) .parse() .unwrap(), read_timeout: Duration::from_millis(timeout_in_ms as u64), diff --git a/rust/numaflow-core/src/mapper/map.rs b/rust/numaflow-core/src/mapper/map.rs index c2241bf7f..29411b60b 100644 --- a/rust/numaflow-core/src/mapper/map.rs +++ b/rust/numaflow-core/src/mapper/map.rs @@ -302,6 +302,7 @@ impl MapHandle { Ok(()) }); + tracing::info!("Returning output_rx stream"); Ok((ReceiverStream::new(output_rx), handle)) } diff --git a/rust/numaflow-core/src/monovertex.rs b/rust/numaflow-core/src/monovertex.rs index 67a597ac6..411179b38 100644 --- a/rust/numaflow-core/src/monovertex.rs +++ b/rust/numaflow-core/src/monovertex.rs @@ -71,7 +71,6 @@ pub(crate) async fn start_forwarder( let callback_handler = match config.callback_config { Some(ref cb_cfg) => Some(CallbackHandler::new( - config.name.clone(), config.name.clone(), cb_cfg.callback_concurrency, )), diff --git a/rust/numaflow-core/src/pipeline.rs b/rust/numaflow-core/src/pipeline.rs index af6f755e0..8011ae625 100644 --- a/rust/numaflow-core/src/pipeline.rs +++ b/rust/numaflow-core/src/pipeline.rs @@ -56,7 +56,6 @@ async fn start_source_forwarder( let callback_handler = match config.callback_config { Some(ref cb_cfg) => Some(CallbackHandler::new( - config.pipeline_name.clone(), config.vertex_name.clone(), cb_cfg.callback_concurrency, )), @@ -155,7 +154,6 @@ async fn start_map_forwarder( let callback_handler = match config.callback_config { Some(ref cb_cfg) => Some(CallbackHandler::new( - config.pipeline_name.clone(), config.vertex_name.clone(), cb_cfg.callback_concurrency, )), @@ -263,7 +261,6 @@ async fn start_sink_forwarder( let callback_handler = match config.callback_config { Some(ref cb_cfg) => Some(CallbackHandler::new( - config.pipeline_name.clone(), config.vertex_name.clone(), cb_cfg.callback_concurrency, )), diff --git a/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs b/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs index dfc28fb5a..2a7818df7 100644 --- a/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs +++ b/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs @@ -185,6 +185,7 @@ impl JetstreamWriter { let this = self.clone(); let handle: JoinHandle> = tokio::spawn(async move { + tracing::info!("Starting streaming Jetstream writer"); let mut messages_stream = messages_stream; let mut hash = DefaultHasher::new(); @@ -256,10 +257,12 @@ impl JetstreamWriter { "Message does not contain previous vertex name in the metadata" )) })?; - callback_handler + if let Err(e) = callback_handler .callback(&message.headers, &message.tags, metadata.previous_vertex) .await - .unwrap(); // FIXME: + { + tracing::error!(?e, "Failed to send callback for message"); + } }; processed_msgs_count += 1; @@ -273,6 +276,7 @@ impl JetstreamWriter { last_logged_at = Instant::now(); } } + tracing::info!("Streaming jetstream writer finished"); Ok(()) }); Ok(handle) diff --git a/rust/numaflow-core/src/sink.rs b/rust/numaflow-core/src/sink.rs index 9d64b1844..e6f44cb15 100644 --- a/rust/numaflow-core/src/sink.rs +++ b/rust/numaflow-core/src/sink.rs @@ -318,10 +318,12 @@ impl SinkWriter { "Writing to Sink: message does not contain previous vertex name in the metadata" )) })?; - callback_handler + if let Err(e) = callback_handler .callback(&message.headers, &message.tags, metadata.previous_vertex) .await - .unwrap(); // FIXME: + { + tracing::error!(?e, "Failed to send callback for message"); + } } }; @@ -839,7 +841,7 @@ mod tests { drop(tx); let handle = sink_writer - .streaming_write(ReceiverStream::new(rx), CancellationToken::new()) + .streaming_write(ReceiverStream::new(rx), CancellationToken::new(), None) .await .unwrap(); @@ -918,7 +920,7 @@ mod tests { drop(tx); let cln_token = CancellationToken::new(); let handle = sink_writer - .streaming_write(ReceiverStream::new(rx), cln_token.clone()) + .streaming_write(ReceiverStream::new(rx), cln_token.clone(), None) .await .unwrap(); @@ -1006,7 +1008,7 @@ mod tests { drop(tx); let cln_token = CancellationToken::new(); let handle = sink_writer - .streaming_write(ReceiverStream::new(rx), cln_token.clone()) + .streaming_write(ReceiverStream::new(rx), cln_token.clone(), None) .await .unwrap(); diff --git a/rust/numaflow-core/src/sink/blackhole.rs b/rust/numaflow-core/src/sink/blackhole.rs index 308a59e35..328731b91 100644 --- a/rust/numaflow-core/src/sink/blackhole.rs +++ b/rust/numaflow-core/src/sink/blackhole.rs @@ -44,6 +44,7 @@ mod tests { offset: "1".to_string().into(), index: 0, }, + metadata: None, }, Message { keys: Arc::from(vec![]), @@ -57,6 +58,7 @@ mod tests { offset: "2".to_string().into(), index: 1, }, + metadata: None, }, ]; diff --git a/rust/numaflow-core/src/sink/log.rs b/rust/numaflow-core/src/sink/log.rs index 71bb37374..c70a1d253 100644 --- a/rust/numaflow-core/src/sink/log.rs +++ b/rust/numaflow-core/src/sink/log.rs @@ -57,6 +57,7 @@ mod tests { offset: "1".to_string().into(), index: 0, }, + metadata: None, }, Message { keys: Arc::from(vec![]), @@ -70,6 +71,7 @@ mod tests { offset: "2".to_string().into(), index: 1, }, + metadata: None, }, ]; diff --git a/rust/numaflow-core/src/sink/user_defined.rs b/rust/numaflow-core/src/sink/user_defined.rs index 0bcb4c685..76e0ba6ad 100644 --- a/rust/numaflow-core/src/sink/user_defined.rs +++ b/rust/numaflow-core/src/sink/user_defined.rs @@ -212,6 +212,7 @@ mod tests { offset: "1".to_string().into(), index: 0, }, + metadata: None, }, Message { keys: Arc::from(vec![]), @@ -225,6 +226,7 @@ mod tests { offset: "2".to_string().into(), index: 1, }, + metadata: None, }, ]; diff --git a/rust/serving/src/callback.rs b/rust/serving/src/callback.rs index 191d753bb..a1b0d89e3 100644 --- a/rust/serving/src/callback.rs +++ b/rust/serving/src/callback.rs @@ -30,13 +30,12 @@ struct CallbackPayload { #[derive(Clone)] pub struct CallbackHandler { client: Client, - pipeline_name: String, vertex_name: String, semaphore: Arc, } impl CallbackHandler { - pub fn new(pipeline_name: String, vertex_name: String, concurrency_limit: usize) -> Self { + pub fn new(vertex_name: String, concurrency_limit: usize) -> Self { // if env::var(ENV_CALLBACK_ENABLED).is_err() { // return Ok(None); // }; @@ -55,7 +54,6 @@ impl CallbackHandler { Self { client, - pipeline_name, vertex_name, semaphore, } @@ -103,22 +101,72 @@ impl CallbackHandler { let client = self.client.clone(); tokio::spawn(async move { let _permit = permit; - let resp = client - .post(callback_url) - .header(ID_HEADER, uuid.clone()) - .json(&[callback_payload]) - .send() - .await - .map_err(|e| Error::Source(format!("Sending callback request: {e:?}"))) - .unwrap(); //FIXME: - - if !resp.status().is_success() { - // return Err(Error::Source(format!( - // "Received non-OK status for callback request. Status={}, Body: {}", - // resp.status(), - // resp.text().await.unwrap_or_default() - // ))); - tracing::error!("Received non-OK status for callback request"); //FIXME: what to do with errors + // Retry incase of failure in making request. + // When there is a failure, we retry after wait_secs. This value is doubled after each retry attempt. + // Then longest wait time will be 64 seconds. + let mut wait_secs = 1; + const TOTAL_ATTEMPTS: usize = 7; + for i in 1..=TOTAL_ATTEMPTS { + let resp = client + .post(&callback_url) + .header(ID_HEADER, uuid.clone()) + .json(&[&callback_payload]) + .send() + .await; + let resp = match resp { + Ok(resp) => resp, + Err(e) => { + if i < TOTAL_ATTEMPTS { + tracing::warn!( + ?e, + "Sending callback request failed. Will retry after a delay" + ); + tokio::time::sleep(Duration::from_secs(wait_secs)).await; + wait_secs *= 2; + } else { + tracing::error!(?e, "Sending callback request failed"); + } + continue; + } + }; + + if resp.status().is_success() { + break; + } + + if resp.status().is_client_error() { + // TODO: When the source serving pod restarts, the callbacks will fail with 4xx status + // since the request ID won't be available in it's in-memory tracker. + // No point in retrying such cases + // 4xx can also happen if payload is wrong (due to bugs in the code). We should differentiate + // between what can be retried and not. + let status_code = resp.status(); + let response_body = resp.text().await; + tracing::error!( + ?status_code, + ?response_body, + "Received client error while making callback. Callback will not be retried" + ); + break; + } + + let status_code = resp.status(); + let response_body = resp.text().await; + if i < TOTAL_ATTEMPTS { + tracing::warn!( + ?status_code, + ?response_body, + "Received non-OK status for callback request. Will retry after a delay" + ); + tokio::time::sleep(Duration::from_secs(wait_secs)).await; + wait_secs *= 2; + } else { + tracing::error!( + ?status_code, + ?response_body, + "Received non-OK status for callback request" + ); + } } }); From d86e41d2768db00a8aa91db4f9be6645b1194910 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 13 Jan 2025 15:52:59 +0530 Subject: [PATCH 04/27] Unit test for callbacks Signed-off-by: Sreekanth --- .../src/app/callback/store/memstore.rs | 2 +- rust/serving/src/app/jetstream_proxy.rs | 3 +- rust/serving/src/callback.rs | 130 +++++++++++++++--- rust/serving/src/config.rs | 8 +- rust/serving/src/source.rs | 5 +- 5 files changed, 124 insertions(+), 24 deletions(-) diff --git a/rust/serving/src/app/callback/store/memstore.rs b/rust/serving/src/app/callback/store/memstore.rs index a9cbaea31..898f7b9f6 100644 --- a/rust/serving/src/app/callback/store/memstore.rs +++ b/rust/serving/src/app/callback/store/memstore.rs @@ -12,7 +12,7 @@ use crate::Error; pub(crate) struct InMemoryStore { /// The data field is a `HashMap` where the key is a `String` and the value is a `Vec>`. /// It is wrapped in an `Arc>` to allow shared ownership and thread safety. - data: Arc>>>>, + pub(crate) data: Arc>>>>, } impl InMemoryStore { diff --git a/rust/serving/src/app/jetstream_proxy.rs b/rust/serving/src/app/jetstream_proxy.rs index 6f61a0530..34c3d4343 100644 --- a/rust/serving/src/app/jetstream_proxy.rs +++ b/rust/serving/src/app/jetstream_proxy.rs @@ -273,6 +273,7 @@ mod tests { use crate::app::tracker::MessageGraph; use crate::pipeline::PipelineDCG; use crate::{Error, Settings}; + use crate::config::DEFAULT_ID_HEADER; const PIPELINE_SPEC_ENCODED: &str = "eyJ2ZXJ0aWNlcyI6W3sibmFtZSI6ImluIiwic291cmNlIjp7InNlcnZpbmciOnsiYXV0aCI6bnVsbCwic2VydmljZSI6dHJ1ZSwibXNnSURIZWFkZXJLZXkiOiJYLU51bWFmbG93LUlkIiwic3RvcmUiOnsidXJsIjoicmVkaXM6Ly9yZWRpczo2Mzc5In19fSwiY29udGFpbmVyVGVtcGxhdGUiOnsicmVzb3VyY2VzIjp7fSwiaW1hZ2VQdWxsUG9saWN5IjoiTmV2ZXIiLCJlbnYiOlt7Im5hbWUiOiJSVVNUX0xPRyIsInZhbHVlIjoiZGVidWcifV19LCJzY2FsZSI6eyJtaW4iOjF9LCJ1cGRhdGVTdHJhdGVneSI6eyJ0eXBlIjoiUm9sbGluZ1VwZGF0ZSIsInJvbGxpbmdVcGRhdGUiOnsibWF4VW5hdmFpbGFibGUiOiIyNSUifX19LHsibmFtZSI6InBsYW5uZXIiLCJ1ZGYiOnsiY29udGFpbmVyIjp7ImltYWdlIjoiYXNjaWk6MC4xIiwiYXJncyI6WyJwbGFubmVyIl0sInJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn0sImJ1aWx0aW4iOm51bGwsImdyb3VwQnkiOm51bGx9LCJjb250YWluZXJUZW1wbGF0ZSI6eyJyZXNvdXJjZXMiOnt9LCJpbWFnZVB1bGxQb2xpY3kiOiJOZXZlciJ9LCJzY2FsZSI6eyJtaW4iOjF9LCJ1cGRhdGVTdHJhdGVneSI6eyJ0eXBlIjoiUm9sbGluZ1VwZGF0ZSIsInJvbGxpbmdVcGRhdGUiOnsibWF4VW5hdmFpbGFibGUiOiIyNSUifX19LHsibmFtZSI6InRpZ2VyIiwidWRmIjp7ImNvbnRhaW5lciI6eyJpbWFnZSI6ImFzY2lpOjAuMSIsImFyZ3MiOlsidGlnZXIiXSwicmVzb3VyY2VzIjp7fSwiaW1hZ2VQdWxsUG9saWN5IjoiTmV2ZXIifSwiYnVpbHRpbiI6bnVsbCwiZ3JvdXBCeSI6bnVsbH0sImNvbnRhaW5lclRlbXBsYXRlIjp7InJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn0sInNjYWxlIjp7Im1pbiI6MX0sInVwZGF0ZVN0cmF0ZWd5Ijp7InR5cGUiOiJSb2xsaW5nVXBkYXRlIiwicm9sbGluZ1VwZGF0ZSI6eyJtYXhVbmF2YWlsYWJsZSI6IjI1JSJ9fX0seyJuYW1lIjoiZG9nIiwidWRmIjp7ImNvbnRhaW5lciI6eyJpbWFnZSI6ImFzY2lpOjAuMSIsImFyZ3MiOlsiZG9nIl0sInJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn0sImJ1aWx0aW4iOm51bGwsImdyb3VwQnkiOm51bGx9LCJjb250YWluZXJUZW1wbGF0ZSI6eyJyZXNvdXJjZXMiOnt9LCJpbWFnZVB1bGxQb2xpY3kiOiJOZXZlciJ9LCJzY2FsZSI6eyJtaW4iOjF9LCJ1cGRhdGVTdHJhdGVneSI6eyJ0eXBlIjoiUm9sbGluZ1VwZGF0ZSIsInJvbGxpbmdVcGRhdGUiOnsibWF4VW5hdmFpbGFibGUiOiIyNSUifX19LHsibmFtZSI6ImVsZXBoYW50IiwidWRmIjp7ImNvbnRhaW5lciI6eyJpbWFnZSI6ImFzY2lpOjAuMSIsImFyZ3MiOlsiZWxlcGhhbnQiXSwicmVzb3VyY2VzIjp7fSwiaW1hZ2VQdWxsUG9saWN5IjoiTmV2ZXIifSwiYnVpbHRpbiI6bnVsbCwiZ3JvdXBCeSI6bnVsbH0sImNvbnRhaW5lclRlbXBsYXRlIjp7InJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn0sInNjYWxlIjp7Im1pbiI6MX0sInVwZGF0ZVN0cmF0ZWd5Ijp7InR5cGUiOiJSb2xsaW5nVXBkYXRlIiwicm9sbGluZ1VwZGF0ZSI6eyJtYXhVbmF2YWlsYWJsZSI6IjI1JSJ9fX0seyJuYW1lIjoiYXNjaWlhcnQiLCJ1ZGYiOnsiY29udGFpbmVyIjp7ImltYWdlIjoiYXNjaWk6MC4xIiwiYXJncyI6WyJhc2NpaWFydCJdLCJyZXNvdXJjZXMiOnt9LCJpbWFnZVB1bGxQb2xpY3kiOiJOZXZlciJ9LCJidWlsdGluIjpudWxsLCJncm91cEJ5IjpudWxsfSwiY29udGFpbmVyVGVtcGxhdGUiOnsicmVzb3VyY2VzIjp7fSwiaW1hZ2VQdWxsUG9saWN5IjoiTmV2ZXIifSwic2NhbGUiOnsibWluIjoxfSwidXBkYXRlU3RyYXRlZ3kiOnsidHlwZSI6IlJvbGxpbmdVcGRhdGUiLCJyb2xsaW5nVXBkYXRlIjp7Im1heFVuYXZhaWxhYmxlIjoiMjUlIn19fSx7Im5hbWUiOiJzZXJ2ZS1zaW5rIiwic2luayI6eyJ1ZHNpbmsiOnsiY29udGFpbmVyIjp7ImltYWdlIjoic2VydmVzaW5rOjAuMSIsImVudiI6W3sibmFtZSI6Ik5VTUFGTE9XX0NBTExCQUNLX1VSTF9LRVkiLCJ2YWx1ZSI6IlgtTnVtYWZsb3ctQ2FsbGJhY2stVXJsIn0seyJuYW1lIjoiTlVNQUZMT1dfTVNHX0lEX0hFQURFUl9LRVkiLCJ2YWx1ZSI6IlgtTnVtYWZsb3ctSWQifV0sInJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn19LCJyZXRyeVN0cmF0ZWd5Ijp7fX0sImNvbnRhaW5lclRlbXBsYXRlIjp7InJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn0sInNjYWxlIjp7Im1pbiI6MX0sInVwZGF0ZVN0cmF0ZWd5Ijp7InR5cGUiOiJSb2xsaW5nVXBkYXRlIiwicm9sbGluZ1VwZGF0ZSI6eyJtYXhVbmF2YWlsYWJsZSI6IjI1JSJ9fX0seyJuYW1lIjoiZXJyb3Itc2luayIsInNpbmsiOnsidWRzaW5rIjp7ImNvbnRhaW5lciI6eyJpbWFnZSI6InNlcnZlc2luazowLjEiLCJlbnYiOlt7Im5hbWUiOiJOVU1BRkxPV19DQUxMQkFDS19VUkxfS0VZIiwidmFsdWUiOiJYLU51bWFmbG93LUNhbGxiYWNrLVVybCJ9LHsibmFtZSI6Ik5VTUFGTE9XX01TR19JRF9IRUFERVJfS0VZIiwidmFsdWUiOiJYLU51bWFmbG93LUlkIn1dLCJyZXNvdXJjZXMiOnt9LCJpbWFnZVB1bGxQb2xpY3kiOiJOZXZlciJ9fSwicmV0cnlTdHJhdGVneSI6e319LCJjb250YWluZXJUZW1wbGF0ZSI6eyJyZXNvdXJjZXMiOnt9LCJpbWFnZVB1bGxQb2xpY3kiOiJOZXZlciJ9LCJzY2FsZSI6eyJtaW4iOjF9LCJ1cGRhdGVTdHJhdGVneSI6eyJ0eXBlIjoiUm9sbGluZ1VwZGF0ZSIsInJvbGxpbmdVcGRhdGUiOnsibWF4VW5hdmFpbGFibGUiOiIyNSUifX19XSwiZWRnZXMiOlt7ImZyb20iOiJpbiIsInRvIjoicGxhbm5lciIsImNvbmRpdGlvbnMiOm51bGx9LHsiZnJvbSI6InBsYW5uZXIiLCJ0byI6ImFzY2lpYXJ0IiwiY29uZGl0aW9ucyI6eyJ0YWdzIjp7Im9wZXJhdG9yIjoib3IiLCJ2YWx1ZXMiOlsiYXNjaWlhcnQiXX19fSx7ImZyb20iOiJwbGFubmVyIiwidG8iOiJ0aWdlciIsImNvbmRpdGlvbnMiOnsidGFncyI6eyJvcGVyYXRvciI6Im9yIiwidmFsdWVzIjpbInRpZ2VyIl19fX0seyJmcm9tIjoicGxhbm5lciIsInRvIjoiZG9nIiwiY29uZGl0aW9ucyI6eyJ0YWdzIjp7Im9wZXJhdG9yIjoib3IiLCJ2YWx1ZXMiOlsiZG9nIl19fX0seyJmcm9tIjoicGxhbm5lciIsInRvIjoiZWxlcGhhbnQiLCJjb25kaXRpb25zIjp7InRhZ3MiOnsib3BlcmF0b3IiOiJvciIsInZhbHVlcyI6WyJlbGVwaGFudCJdfX19LHsiZnJvbSI6InRpZ2VyIiwidG8iOiJzZXJ2ZS1zaW5rIiwiY29uZGl0aW9ucyI6bnVsbH0seyJmcm9tIjoiZG9nIiwidG8iOiJzZXJ2ZS1zaW5rIiwiY29uZGl0aW9ucyI6bnVsbH0seyJmcm9tIjoiZWxlcGhhbnQiLCJ0byI6InNlcnZlLXNpbmsiLCJjb25kaXRpb25zIjpudWxsfSx7ImZyb20iOiJhc2NpaWFydCIsInRvIjoic2VydmUtc2luayIsImNvbmRpdGlvbnMiOm51bGx9LHsiZnJvbSI6InBsYW5uZXIiLCJ0byI6ImVycm9yLXNpbmsiLCJjb25kaXRpb25zIjp7InRhZ3MiOnsib3BlcmF0b3IiOiJvciIsInZhbHVlcyI6WyJlcnJvciJdfX19XSwibGlmZWN5Y2xlIjp7fSwid2F0ZXJtYXJrIjp7fX0="; @@ -534,7 +535,7 @@ mod tests { .method("POST") .uri("/sync_serve") .header("Content-Type", "text/plain") - .header("ID", ID_VALUE) + .header(DEFAULT_ID_HEADER, ID_VALUE) .body(Body::from("Test Message")) .unwrap(); diff --git a/rust/serving/src/callback.rs b/rust/serving/src/callback.rs index a1b0d89e3..d696c51e7 100644 --- a/rust/serving/src/callback.rs +++ b/rust/serving/src/callback.rs @@ -7,11 +7,10 @@ use std::{ use reqwest::Client; use tokio::sync::Semaphore; +use crate::config::DEFAULT_CALLBACK_URL_HEADER_KEY; +use crate::config::DEFAULT_ID_HEADER; use crate::Error; -const ID_HEADER: &str = "X-Numaflow-Id"; -const CALLBACK_URL_HEADER_KEY: &str = "X-Numaflow-Callback-Url"; - /// The data to be sent in the POST request #[derive(serde::Serialize)] struct CallbackPayload { @@ -36,21 +35,13 @@ pub struct CallbackHandler { impl CallbackHandler { pub fn new(vertex_name: String, concurrency_limit: usize) -> Self { - // if env::var(ENV_CALLBACK_ENABLED).is_err() { - // return Ok(None); - // }; - - // let Ok(callback_url) = env::var(ENV_CALLBACK_URL) else { - // return Err(Error::Source(format!("Environment variable {ENV_CALLBACK_ENABLED} is set, but {ENV_CALLBACK_URL} is not set"))); - // }; - let client = Client::builder() .danger_accept_invalid_certs(true) .timeout(Duration::from_secs(1)) .build() .expect("Creating callback client for Serving source"); - let semaphore = Arc::new(tokio::sync::Semaphore::new(concurrency_limit)); + let semaphore = Arc::new(Semaphore::new(concurrency_limit)); Self { client, @@ -66,16 +57,20 @@ impl CallbackHandler { previous_vertex: String, ) -> crate::Result<()> { let callback_url = message_headers - .get(CALLBACK_URL_HEADER_KEY) + .get(DEFAULT_CALLBACK_URL_HEADER_KEY) .ok_or_else(|| { Error::Source(format!( - "{CALLBACK_URL_HEADER_KEY} header is not present in the message headers", + "{DEFAULT_CALLBACK_URL_HEADER_KEY} header is not present in the message headers", )) })? .to_owned(); let uuid = message_headers - .get(ID_HEADER) - .ok_or_else(|| Error::Source(format!("{ID_HEADER} is not found in message headers",)))? + .get(DEFAULT_ID_HEADER) + .ok_or_else(|| { + Error::Source(format!( + "{DEFAULT_ID_HEADER} is not found in message headers", + )) + })? .to_owned(); let cb_time = SystemTime::now() .duration_since(UNIX_EPOCH) @@ -101,7 +96,7 @@ impl CallbackHandler { let client = self.client.clone(); tokio::spawn(async move { let _permit = permit; - // Retry incase of failure in making request. + // Retry in case of failure in making request. // When there is a failure, we retry after wait_secs. This value is doubled after each retry attempt. // Then longest wait time will be 64 seconds. let mut wait_secs = 1; @@ -109,7 +104,7 @@ impl CallbackHandler { for i in 1..=TOTAL_ATTEMPTS { let resp = client .post(&callback_url) - .header(ID_HEADER, uuid.clone()) + .header(DEFAULT_ID_HEADER, uuid.clone()) .json(&[&callback_payload]) .send() .await; @@ -173,3 +168,102 @@ impl CallbackHandler { Ok(()) } } + +#[cfg(test)] +mod tests { + use crate::app::callback::state::State as CallbackState; + use crate::app::callback::store::memstore::InMemoryStore; + use crate::app::start_main_server; + use crate::app::tracker::MessageGraph; + use crate::callback::{CallbackHandler, DEFAULT_CALLBACK_URL_HEADER_KEY, DEFAULT_ID_HEADER}; + use crate::config::generate_certs; + use crate::pipeline::PipelineDCG; + use crate::{AppState, Settings}; + use axum_server::tls_rustls::RustlsConfig; + use std::collections::HashMap; + use std::sync::Arc; + use std::time::Duration; + use tokio::sync::mpsc; + + type Result = std::result::Result>; + #[tokio::test] + async fn test_callback() -> Result<()> { + // Set up the CryptoProvider (controls core cryptography used by rustls) for the process + let _ = rustls::crypto::aws_lc_rs::default_provider().install_default(); + + let (cert, key) = generate_certs()?; + + let tls_config = RustlsConfig::from_pem(cert.pem().into(), key.serialize_pem().into()) + .await + .map_err(|e| format!("Failed to create tls config {:?}", e))?; + + let settings = Settings { + app_listen_port: 3003, + ..Default::default() + }; + // We start the 'Serving' https server with an in-memory store + // When the server receives callback request, the in-memory store will be populated. + // This is verified at the end of the test. + let store = InMemoryStore::new(); + let message_graph = MessageGraph::from_pipeline(&PipelineDCG::default())?; + let (tx, _) = mpsc::channel(10); + + let mut app_state = AppState { + message: tx, + settings: Arc::new(settings), + callback_state: CallbackState::new(message_graph, store.clone()).await?, + }; + + // Reg + let _callback_notify_rx = app_state.callback_state.register("1234".into()); + + let server_handle = tokio::spawn(start_main_server(app_state, tls_config)); + + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(2)) + .danger_accept_invalid_certs(true) + .build()?; + + // Wait for the server to be ready + let mut server_ready = false; + for _ in 0..10 { + let resp = client.get("https://localhost:3003/livez").send().await?; + if resp.status().is_success() { + server_ready = true; + break; + } + tokio::time::sleep(Duration::from_millis(5)).await; + } + assert!(server_ready, "Server is not ready"); + + let callback_handler = CallbackHandler::new("test".into(), 10); + let message_headers: HashMap = [ + ( + DEFAULT_CALLBACK_URL_HEADER_KEY, + "https://localhost:3003/v1/process/callback", + ), + (DEFAULT_ID_HEADER, "1234"), + ] + .into_iter() + .map(|(k, v)| (k.into(), v.into())) + .collect(); + callback_handler + .callback(&message_headers, &None, "in".into()) + .await?; + tokio::time::sleep(Duration::from_secs(2)).await; + let mut data = None; + for _ in 0..10 { + tokio::time::sleep(Duration::from_millis(2)).await; + data = { + let guard = store.data.lock().unwrap(); + guard.get("1234").cloned() + }; + if data.is_some() { + break; + } + } + assert!(data.is_some(), "Callback data not found in store"); + server_handle.abort(); + Ok(()) + } +} diff --git a/rust/serving/src/config.rs b/rust/serving/src/config.rs index 3ef798129..0b010afcb 100644 --- a/rust/serving/src/config.rs +++ b/rust/serving/src/config.rs @@ -18,6 +18,10 @@ const ENV_NUMAFLOW_SERVING_APP_PORT: &str = "NUMAFLOW_SERVING_APP_LISTEN_PORT"; const ENV_NUMAFLOW_SERVING_AUTH_TOKEN: &str = "NUMAFLOW_SERVING_AUTH_TOKEN"; const ENV_MIN_PIPELINE_SPEC: &str = "NUMAFLOW_SERVING_MIN_PIPELINE_SPEC"; +pub(crate) const DEFAULT_ID_HEADER: &str = "X-Numaflow-Id"; +pub(crate) const DEFAULT_CALLBACK_URL_HEADER_KEY: &str = "X-Numaflow-Callback-Url"; + + pub fn generate_certs() -> std::result::Result<(Certificate, KeyPair), String> { let CertifiedKey { cert, key_pair } = generate_simple_self_signed(vec!["localhost".into()]) .map_err(|e| format!("Failed to generate cert {:?}", e))?; @@ -63,7 +67,7 @@ pub struct Settings { impl Default for Settings { fn default() -> Self { Self { - tid_header: "ID".to_owned(), + tid_header: DEFAULT_ID_HEADER.to_owned(), app_listen_port: 3000, metrics_server_listen_port: 3001, upstream_addr: "localhost:8888".to_owned(), @@ -176,7 +180,7 @@ mod tests { fn test_default_config() { let settings = Settings::default(); - assert_eq!(settings.tid_header, "ID"); + assert_eq!(settings.tid_header, "X-Numaflow-Id"); assert_eq!(settings.app_listen_port, 3000); assert_eq!(settings.metrics_server_listen_port, 3001); assert_eq!(settings.upstream_addr, "localhost:8888"); diff --git a/rust/serving/src/source.rs b/rust/serving/src/source.rs index 4db7e7d3a..ebcdae612 100644 --- a/rust/serving/src/source.rs +++ b/rust/serving/src/source.rs @@ -11,6 +11,7 @@ use crate::app::callback::store::redisstore::RedisConnection; use crate::app::tracker::MessageGraph; use crate::Settings; use crate::{Error, Result}; +use crate::config::{DEFAULT_CALLBACK_URL_HEADER_KEY, DEFAULT_ID_HEADER}; /// [Message] with a oneshot for notifying when the message has been completed processed. pub(crate) struct MessageWrapper { @@ -152,10 +153,10 @@ impl ServingSourceActor { self.tracker.insert(message.id.clone(), confirm_save); message .headers - .insert("X-Numaflow-Callback-Url".into(), self.callback_url.clone()); + .insert(DEFAULT_CALLBACK_URL_HEADER_KEY.into(), self.callback_url.clone()); message .headers - .insert("X-Numaflow-Id".into(), message.id.clone()); + .insert(DEFAULT_ID_HEADER.into(), message.id.clone()); messages.push(message); } Ok(messages) From b65f928f5eed31cdab6da9b3c235ec569335a13f Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 13 Jan 2025 16:05:27 +0530 Subject: [PATCH 05/27] Formatting Signed-off-by: Sreekanth --- rust/serving/src/app/jetstream_proxy.rs | 2 +- rust/serving/src/callback.rs | 16 +++++++++++----- rust/serving/src/config.rs | 1 - rust/serving/src/source.rs | 9 +++++---- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/rust/serving/src/app/jetstream_proxy.rs b/rust/serving/src/app/jetstream_proxy.rs index 34c3d4343..dc33a6733 100644 --- a/rust/serving/src/app/jetstream_proxy.rs +++ b/rust/serving/src/app/jetstream_proxy.rs @@ -271,9 +271,9 @@ mod tests { use crate::app::callback::store::PayloadToSave; use crate::app::callback::CallbackRequest; use crate::app::tracker::MessageGraph; + use crate::config::DEFAULT_ID_HEADER; use crate::pipeline::PipelineDCG; use crate::{Error, Settings}; - use crate::config::DEFAULT_ID_HEADER; const PIPELINE_SPEC_ENCODED: &str = "eyJ2ZXJ0aWNlcyI6W3sibmFtZSI6ImluIiwic291cmNlIjp7InNlcnZpbmciOnsiYXV0aCI6bnVsbCwic2VydmljZSI6dHJ1ZSwibXNnSURIZWFkZXJLZXkiOiJYLU51bWFmbG93LUlkIiwic3RvcmUiOnsidXJsIjoicmVkaXM6Ly9yZWRpczo2Mzc5In19fSwiY29udGFpbmVyVGVtcGxhdGUiOnsicmVzb3VyY2VzIjp7fSwiaW1hZ2VQdWxsUG9saWN5IjoiTmV2ZXIiLCJlbnYiOlt7Im5hbWUiOiJSVVNUX0xPRyIsInZhbHVlIjoiZGVidWcifV19LCJzY2FsZSI6eyJtaW4iOjF9LCJ1cGRhdGVTdHJhdGVneSI6eyJ0eXBlIjoiUm9sbGluZ1VwZGF0ZSIsInJvbGxpbmdVcGRhdGUiOnsibWF4VW5hdmFpbGFibGUiOiIyNSUifX19LHsibmFtZSI6InBsYW5uZXIiLCJ1ZGYiOnsiY29udGFpbmVyIjp7ImltYWdlIjoiYXNjaWk6MC4xIiwiYXJncyI6WyJwbGFubmVyIl0sInJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn0sImJ1aWx0aW4iOm51bGwsImdyb3VwQnkiOm51bGx9LCJjb250YWluZXJUZW1wbGF0ZSI6eyJyZXNvdXJjZXMiOnt9LCJpbWFnZVB1bGxQb2xpY3kiOiJOZXZlciJ9LCJzY2FsZSI6eyJtaW4iOjF9LCJ1cGRhdGVTdHJhdGVneSI6eyJ0eXBlIjoiUm9sbGluZ1VwZGF0ZSIsInJvbGxpbmdVcGRhdGUiOnsibWF4VW5hdmFpbGFibGUiOiIyNSUifX19LHsibmFtZSI6InRpZ2VyIiwidWRmIjp7ImNvbnRhaW5lciI6eyJpbWFnZSI6ImFzY2lpOjAuMSIsImFyZ3MiOlsidGlnZXIiXSwicmVzb3VyY2VzIjp7fSwiaW1hZ2VQdWxsUG9saWN5IjoiTmV2ZXIifSwiYnVpbHRpbiI6bnVsbCwiZ3JvdXBCeSI6bnVsbH0sImNvbnRhaW5lclRlbXBsYXRlIjp7InJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn0sInNjYWxlIjp7Im1pbiI6MX0sInVwZGF0ZVN0cmF0ZWd5Ijp7InR5cGUiOiJSb2xsaW5nVXBkYXRlIiwicm9sbGluZ1VwZGF0ZSI6eyJtYXhVbmF2YWlsYWJsZSI6IjI1JSJ9fX0seyJuYW1lIjoiZG9nIiwidWRmIjp7ImNvbnRhaW5lciI6eyJpbWFnZSI6ImFzY2lpOjAuMSIsImFyZ3MiOlsiZG9nIl0sInJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn0sImJ1aWx0aW4iOm51bGwsImdyb3VwQnkiOm51bGx9LCJjb250YWluZXJUZW1wbGF0ZSI6eyJyZXNvdXJjZXMiOnt9LCJpbWFnZVB1bGxQb2xpY3kiOiJOZXZlciJ9LCJzY2FsZSI6eyJtaW4iOjF9LCJ1cGRhdGVTdHJhdGVneSI6eyJ0eXBlIjoiUm9sbGluZ1VwZGF0ZSIsInJvbGxpbmdVcGRhdGUiOnsibWF4VW5hdmFpbGFibGUiOiIyNSUifX19LHsibmFtZSI6ImVsZXBoYW50IiwidWRmIjp7ImNvbnRhaW5lciI6eyJpbWFnZSI6ImFzY2lpOjAuMSIsImFyZ3MiOlsiZWxlcGhhbnQiXSwicmVzb3VyY2VzIjp7fSwiaW1hZ2VQdWxsUG9saWN5IjoiTmV2ZXIifSwiYnVpbHRpbiI6bnVsbCwiZ3JvdXBCeSI6bnVsbH0sImNvbnRhaW5lclRlbXBsYXRlIjp7InJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn0sInNjYWxlIjp7Im1pbiI6MX0sInVwZGF0ZVN0cmF0ZWd5Ijp7InR5cGUiOiJSb2xsaW5nVXBkYXRlIiwicm9sbGluZ1VwZGF0ZSI6eyJtYXhVbmF2YWlsYWJsZSI6IjI1JSJ9fX0seyJuYW1lIjoiYXNjaWlhcnQiLCJ1ZGYiOnsiY29udGFpbmVyIjp7ImltYWdlIjoiYXNjaWk6MC4xIiwiYXJncyI6WyJhc2NpaWFydCJdLCJyZXNvdXJjZXMiOnt9LCJpbWFnZVB1bGxQb2xpY3kiOiJOZXZlciJ9LCJidWlsdGluIjpudWxsLCJncm91cEJ5IjpudWxsfSwiY29udGFpbmVyVGVtcGxhdGUiOnsicmVzb3VyY2VzIjp7fSwiaW1hZ2VQdWxsUG9saWN5IjoiTmV2ZXIifSwic2NhbGUiOnsibWluIjoxfSwidXBkYXRlU3RyYXRlZ3kiOnsidHlwZSI6IlJvbGxpbmdVcGRhdGUiLCJyb2xsaW5nVXBkYXRlIjp7Im1heFVuYXZhaWxhYmxlIjoiMjUlIn19fSx7Im5hbWUiOiJzZXJ2ZS1zaW5rIiwic2luayI6eyJ1ZHNpbmsiOnsiY29udGFpbmVyIjp7ImltYWdlIjoic2VydmVzaW5rOjAuMSIsImVudiI6W3sibmFtZSI6Ik5VTUFGTE9XX0NBTExCQUNLX1VSTF9LRVkiLCJ2YWx1ZSI6IlgtTnVtYWZsb3ctQ2FsbGJhY2stVXJsIn0seyJuYW1lIjoiTlVNQUZMT1dfTVNHX0lEX0hFQURFUl9LRVkiLCJ2YWx1ZSI6IlgtTnVtYWZsb3ctSWQifV0sInJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn19LCJyZXRyeVN0cmF0ZWd5Ijp7fX0sImNvbnRhaW5lclRlbXBsYXRlIjp7InJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn0sInNjYWxlIjp7Im1pbiI6MX0sInVwZGF0ZVN0cmF0ZWd5Ijp7InR5cGUiOiJSb2xsaW5nVXBkYXRlIiwicm9sbGluZ1VwZGF0ZSI6eyJtYXhVbmF2YWlsYWJsZSI6IjI1JSJ9fX0seyJuYW1lIjoiZXJyb3Itc2luayIsInNpbmsiOnsidWRzaW5rIjp7ImNvbnRhaW5lciI6eyJpbWFnZSI6InNlcnZlc2luazowLjEiLCJlbnYiOlt7Im5hbWUiOiJOVU1BRkxPV19DQUxMQkFDS19VUkxfS0VZIiwidmFsdWUiOiJYLU51bWFmbG93LUNhbGxiYWNrLVVybCJ9LHsibmFtZSI6Ik5VTUFGTE9XX01TR19JRF9IRUFERVJfS0VZIiwidmFsdWUiOiJYLU51bWFmbG93LUlkIn1dLCJyZXNvdXJjZXMiOnt9LCJpbWFnZVB1bGxQb2xpY3kiOiJOZXZlciJ9fSwicmV0cnlTdHJhdGVneSI6e319LCJjb250YWluZXJUZW1wbGF0ZSI6eyJyZXNvdXJjZXMiOnt9LCJpbWFnZVB1bGxQb2xpY3kiOiJOZXZlciJ9LCJzY2FsZSI6eyJtaW4iOjF9LCJ1cGRhdGVTdHJhdGVneSI6eyJ0eXBlIjoiUm9sbGluZ1VwZGF0ZSIsInJvbGxpbmdVcGRhdGUiOnsibWF4VW5hdmFpbGFibGUiOiIyNSUifX19XSwiZWRnZXMiOlt7ImZyb20iOiJpbiIsInRvIjoicGxhbm5lciIsImNvbmRpdGlvbnMiOm51bGx9LHsiZnJvbSI6InBsYW5uZXIiLCJ0byI6ImFzY2lpYXJ0IiwiY29uZGl0aW9ucyI6eyJ0YWdzIjp7Im9wZXJhdG9yIjoib3IiLCJ2YWx1ZXMiOlsiYXNjaWlhcnQiXX19fSx7ImZyb20iOiJwbGFubmVyIiwidG8iOiJ0aWdlciIsImNvbmRpdGlvbnMiOnsidGFncyI6eyJvcGVyYXRvciI6Im9yIiwidmFsdWVzIjpbInRpZ2VyIl19fX0seyJmcm9tIjoicGxhbm5lciIsInRvIjoiZG9nIiwiY29uZGl0aW9ucyI6eyJ0YWdzIjp7Im9wZXJhdG9yIjoib3IiLCJ2YWx1ZXMiOlsiZG9nIl19fX0seyJmcm9tIjoicGxhbm5lciIsInRvIjoiZWxlcGhhbnQiLCJjb25kaXRpb25zIjp7InRhZ3MiOnsib3BlcmF0b3IiOiJvciIsInZhbHVlcyI6WyJlbGVwaGFudCJdfX19LHsiZnJvbSI6InRpZ2VyIiwidG8iOiJzZXJ2ZS1zaW5rIiwiY29uZGl0aW9ucyI6bnVsbH0seyJmcm9tIjoiZG9nIiwidG8iOiJzZXJ2ZS1zaW5rIiwiY29uZGl0aW9ucyI6bnVsbH0seyJmcm9tIjoiZWxlcGhhbnQiLCJ0byI6InNlcnZlLXNpbmsiLCJjb25kaXRpb25zIjpudWxsfSx7ImZyb20iOiJhc2NpaWFydCIsInRvIjoic2VydmUtc2luayIsImNvbmRpdGlvbnMiOm51bGx9LHsiZnJvbSI6InBsYW5uZXIiLCJ0byI6ImVycm9yLXNpbmsiLCJjb25kaXRpb25zIjp7InRhZ3MiOnsib3BlcmF0b3IiOiJvciIsInZhbHVlcyI6WyJlcnJvciJdfX19XSwibGlmZWN5Y2xlIjp7fSwid2F0ZXJtYXJrIjp7fX0="; diff --git a/rust/serving/src/callback.rs b/rust/serving/src/callback.rs index d696c51e7..b46641cb1 100644 --- a/rust/serving/src/callback.rs +++ b/rust/serving/src/callback.rs @@ -214,8 +214,12 @@ mod tests { callback_state: CallbackState::new(message_graph, store.clone()).await?, }; - // Reg - let _callback_notify_rx = app_state.callback_state.register("1234".into()); + // We use this value as the request id of the callback request + const ID_VALUE: &str = "1234"; + + // Register the request id in the store. This normally happens when the Serving source + // receives a request from the client. The callbacks for this request must only happen after this. + let _callback_notify_rx = app_state.callback_state.register(ID_VALUE.into()); let server_handle = tokio::spawn(start_main_server(app_state, tls_config)); @@ -242,21 +246,23 @@ mod tests { DEFAULT_CALLBACK_URL_HEADER_KEY, "https://localhost:3003/v1/process/callback", ), - (DEFAULT_ID_HEADER, "1234"), + (DEFAULT_ID_HEADER, ID_VALUE), ] .into_iter() .map(|(k, v)| (k.into(), v.into())) .collect(); + + // On the server, this fails with SubGraphInvalidInput("Invalid callback: 1234, vertex: in") + // We get 200 OK response from the server, since we already registered this request ID in the store. callback_handler .callback(&message_headers, &None, "in".into()) .await?; - tokio::time::sleep(Duration::from_secs(2)).await; let mut data = None; for _ in 0..10 { tokio::time::sleep(Duration::from_millis(2)).await; data = { let guard = store.data.lock().unwrap(); - guard.get("1234").cloned() + guard.get(ID_VALUE).cloned() }; if data.is_some() { break; diff --git a/rust/serving/src/config.rs b/rust/serving/src/config.rs index 0b010afcb..df2a740dc 100644 --- a/rust/serving/src/config.rs +++ b/rust/serving/src/config.rs @@ -21,7 +21,6 @@ const ENV_MIN_PIPELINE_SPEC: &str = "NUMAFLOW_SERVING_MIN_PIPELINE_SPEC"; pub(crate) const DEFAULT_ID_HEADER: &str = "X-Numaflow-Id"; pub(crate) const DEFAULT_CALLBACK_URL_HEADER_KEY: &str = "X-Numaflow-Callback-Url"; - pub fn generate_certs() -> std::result::Result<(Certificate, KeyPair), String> { let CertifiedKey { cert, key_pair } = generate_simple_self_signed(vec!["localhost".into()]) .map_err(|e| format!("Failed to generate cert {:?}", e))?; diff --git a/rust/serving/src/source.rs b/rust/serving/src/source.rs index ebcdae612..3723efb32 100644 --- a/rust/serving/src/source.rs +++ b/rust/serving/src/source.rs @@ -9,9 +9,9 @@ use tokio::time::Instant; use crate::app::callback::state::State as CallbackState; use crate::app::callback::store::redisstore::RedisConnection; use crate::app::tracker::MessageGraph; +use crate::config::{DEFAULT_CALLBACK_URL_HEADER_KEY, DEFAULT_ID_HEADER}; use crate::Settings; use crate::{Error, Result}; -use crate::config::{DEFAULT_CALLBACK_URL_HEADER_KEY, DEFAULT_ID_HEADER}; /// [Message] with a oneshot for notifying when the message has been completed processed. pub(crate) struct MessageWrapper { @@ -151,9 +151,10 @@ impl ServingSourceActor { } = message; self.tracker.insert(message.id.clone(), confirm_save); - message - .headers - .insert(DEFAULT_CALLBACK_URL_HEADER_KEY.into(), self.callback_url.clone()); + message.headers.insert( + DEFAULT_CALLBACK_URL_HEADER_KEY.into(), + self.callback_url.clone(), + ); message .headers .insert(DEFAULT_ID_HEADER.into(), message.id.clone()); From d68abf96d682f459bc5fdecb267fcb3b448c3072 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 13 Jan 2025 19:19:46 +0530 Subject: [PATCH 06/27] Unit tests callback failures Signed-off-by: Sreekanth --- .../src/pipeline/isb/jetstream/writer.rs | 5 ++-- rust/serving/src/callback.rs | 27 +++++++++++++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs b/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs index 2a7818df7..2605c625f 100644 --- a/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs +++ b/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs @@ -253,9 +253,10 @@ impl JetstreamWriter { if let Some(ref callback_handler) = this.callback_handler { let metadata = message.metadata.ok_or_else(|| { - Error::Source(format!( + Error::Source( "Message does not contain previous vertex name in the metadata" - )) + .to_owned(), + ) })?; if let Err(e) = callback_handler .callback(&message.headers, &message.tags, metadata.previous_vertex) diff --git a/rust/serving/src/callback.rs b/rust/serving/src/callback.rs index b46641cb1..52c0a5754 100644 --- a/rust/serving/src/callback.rs +++ b/rust/serving/src/callback.rs @@ -178,7 +178,7 @@ mod tests { use crate::callback::{CallbackHandler, DEFAULT_CALLBACK_URL_HEADER_KEY, DEFAULT_ID_HEADER}; use crate::config::generate_certs; use crate::pipeline::PipelineDCG; - use crate::{AppState, Settings}; + use crate::{AppState, Error, Settings}; use axum_server::tls_rustls::RustlsConfig; use std::collections::HashMap; use std::sync::Arc; @@ -252,10 +252,11 @@ mod tests { .map(|(k, v)| (k.into(), v.into())) .collect(); + let tags = Arc::from(vec!["tag1".to_owned()]); // On the server, this fails with SubGraphInvalidInput("Invalid callback: 1234, vertex: in") // We get 200 OK response from the server, since we already registered this request ID in the store. callback_handler - .callback(&message_headers, &None, "in".into()) + .callback(&message_headers, &Some(tags), "in".into()) .await?; let mut data = None; for _ in 0..10 { @@ -272,4 +273,26 @@ mod tests { server_handle.abort(); Ok(()) } + + #[tokio::test] + async fn test_callback_missing_headers() -> Result<()> { + let callback_handler = CallbackHandler::new("test".into(), 10); + let message_headers: HashMap = HashMap::new(); + let result = callback_handler + .callback(&message_headers, &None, "in".into()) + .await; + assert!(result.is_err()); + + let mut message_headers: HashMap = HashMap::new(); + message_headers.insert( + DEFAULT_CALLBACK_URL_HEADER_KEY.into(), + "https://localhost:3003/v1/process/callback".into(), + ); + let result = callback_handler + .callback(&message_headers, &None, "in".into()) + .await; + assert!(result.is_err()); + + Ok(()) + } } From 011a64939bd820e46bd4d6ddfc86ebb82914b4bc Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Wed, 15 Jan 2025 10:41:44 +0530 Subject: [PATCH 07/27] Callback after resolving PAFs Signed-off-by: Sreekanth --- .../src/pipeline/isb/jetstream/reader.rs | 2 - .../src/pipeline/isb/jetstream/writer.rs | 80 +++++++++---------- rust/serving/src/callback.rs | 2 +- 3 files changed, 41 insertions(+), 43 deletions(-) diff --git a/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs b/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs index d09ffb32c..c46838007 100644 --- a/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs +++ b/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs @@ -384,8 +384,6 @@ mod tests { #[cfg(feature = "nats-tests")] #[tokio::test] async fn test_jetstream_ack() { - use async_nats::jetstream::stream::No; - let js_url = "localhost:4222"; // Create JetStream context let client = async_nats::connect(js_url).await.unwrap(); diff --git a/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs b/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs index 2605c625f..7baba7d11 100644 --- a/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs +++ b/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs @@ -201,6 +201,7 @@ impl JetstreamWriter { continue; } + // List of PAFs(one message can be written to multiple streams) let mut pafs = vec![]; for vertex in &*this.config { // check whether we need to write to this downstream vertex @@ -244,27 +245,8 @@ impl JetstreamWriter { continue; } - this.resolve_pafs(ResolveAndPublishResult { - pafs, - payload: message.value.clone().into(), - offset: message.id.offset, - }) - .await?; - - if let Some(ref callback_handler) = this.callback_handler { - let metadata = message.metadata.ok_or_else(|| { - Error::Source( - "Message does not contain previous vertex name in the metadata" - .to_owned(), - ) - })?; - if let Err(e) = callback_handler - .callback(&message.headers, &message.tags, metadata.previous_vertex) - .await - { - tracing::error!(?e, "Failed to send callback for message"); - } - }; + this.resolve_pafs(pafs, message, this.callback_handler.clone()) + .await?; processed_msgs_count += 1; if last_logged_at.elapsed().as_secs() >= 1 { @@ -357,7 +339,12 @@ impl JetstreamWriter { /// asynchronously, if it fails it will do a blocking write to resolve the PAFs. /// At any point in time, we will only have X PAF resolvers running, this will help us create a /// natural backpressure. - pub(super) async fn resolve_pafs(&self, result: ResolveAndPublishResult) -> Result<()> { + pub(super) async fn resolve_pafs( + &self, + pafs: Vec<((String, u16), PublishAckFuture)>, + message: Message, + callback_handler: Option, + ) -> Result<()> { let start_time = Instant::now(); let permit = Arc::clone(&self.sem) .acquire_owned() @@ -371,7 +358,7 @@ impl JetstreamWriter { tokio::spawn(async move { let _permit = permit; - for (stream, paf) in result.pafs { + for (stream, paf) in pafs { match paf.await { Ok(ack) => { if ack.duplicate { @@ -385,9 +372,12 @@ impl JetstreamWriter { Offset::Int(IntOffset::new(ack.sequence, stream.1)), )); tracker_handle - .delete(result.offset.clone()) + .delete(message.id.offset.clone()) .await .expect("Failed to delete offset from tracker"); + if let Err(e) = do_callback(callback_handler.as_ref(), &message).await { + tracing::error!(?e, "Failed to send callback request"); + } } Err(e) => { error!( @@ -397,7 +387,7 @@ impl JetstreamWriter { ); match JetstreamWriter::blocking_write( stream.clone(), - result.payload.clone(), + message.value.clone(), js_ctx.clone(), cancel_token.clone(), ) @@ -414,12 +404,17 @@ impl JetstreamWriter { stream.clone(), Offset::Int(IntOffset::new(ack.sequence, stream.1)), )); + if let Err(e) = + do_callback(callback_handler.as_ref(), &message).await + { + tracing::error!(?e, "Failed to send callback request"); + } } Err(e) => { error!(?e, "Blocking write failed for stream {}", stream.0); // Since we failed to write to the stream, we need to send a NAK to the reader tracker_handle - .discard(result.offset.clone()) + .discard(message.id.offset.clone()) .await .expect("Failed to discard offset from the tracker"); return; @@ -442,17 +437,14 @@ impl JetstreamWriter { /// an error it means it is fatal non-retryable error. async fn blocking_write( stream: Stream, - payload: Vec, + payload: Bytes, js_ctx: Context, cln_token: CancellationToken, ) -> Result { let start_time = Instant::now(); info!("Blocking write for stream {}", stream.0); loop { - match js_ctx - .publish(stream.0.clone(), Bytes::from(payload.clone())) - .await - { + match js_ctx.publish(stream.0.clone(), payload.clone()).await { Ok(paf) => match paf.await { Ok(ack) => { if ack.duplicate { @@ -484,15 +476,23 @@ impl JetstreamWriter { } } -/// ResolveAndPublishResult resolves the result of the write PAF operation. -/// It contains the list of pafs(one message can be written to multiple streams) -/// and the payload that was written. Once the PAFs for all the streams have been -/// resolved, the information is published to callee_tx. -#[derive(Debug)] -pub(crate) struct ResolveAndPublishResult { - pub(crate) pafs: Vec<(Stream, PublishAckFuture)>, - pub(crate) payload: Vec, - pub(crate) offset: Bytes, +async fn do_callback(callback_handler: Option<&CallbackHandler>, message: &Message) -> Result<()> { + let Some(callback_handler) = callback_handler else { + return Ok(()); + }; + + let metadata = message.metadata.as_ref().ok_or_else(|| { + Error::Source("Message does not contain previous vertex name in the metadata".to_owned()) + })?; + + callback_handler + .callback( + &message.headers, + &message.tags, + metadata.previous_vertex.clone(), + ) + .await + .map_err(|e| Error::Source(format!("Failed to send callback for message: {e:?}"))) } #[cfg(test)] diff --git a/rust/serving/src/callback.rs b/rust/serving/src/callback.rs index 52c0a5754..75d5522b8 100644 --- a/rust/serving/src/callback.rs +++ b/rust/serving/src/callback.rs @@ -178,7 +178,7 @@ mod tests { use crate::callback::{CallbackHandler, DEFAULT_CALLBACK_URL_HEADER_KEY, DEFAULT_ID_HEADER}; use crate::config::generate_certs; use crate::pipeline::PipelineDCG; - use crate::{AppState, Error, Settings}; + use crate::{AppState, Settings}; use axum_server::tls_rustls::RustlsConfig; use std::collections::HashMap; use std::sync::Arc; From abf5b89f06267ff56a942366e3e8c81df165506c Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Thu, 16 Jan 2025 09:00:54 +0530 Subject: [PATCH 08/27] Avoid Bytes to String conversion in tracker Signed-off-by: Sreekanth --- rust/numaflow-core/src/tracker.rs | 41 +++++++++++-------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/rust/numaflow-core/src/tracker.rs b/rust/numaflow-core/src/tracker.rs index a8ccaca54..6a568ea91 100644 --- a/rust/numaflow-core/src/tracker.rs +++ b/rust/numaflow-core/src/tracker.rs @@ -1,4 +1,4 @@ -//! Tracker is added because when do data forwarding in [MonoVertex](crate::monovertex::forwarder) or +//! Tracker is added because when we do data forwarding in [MonoVertex](crate::monovertex::forwarder) or //! in [Pipeline](crate::pipeline::forwarder), immaterial whether we are in source, UDF, or Sink, we //! have to track whether the message has completely moved to the next vertex (N+1)th before we can //! mark that message as done in the Nth vertex. We use Tracker to let Read know that it can mark the @@ -28,19 +28,19 @@ struct TrackerEntry { /// ActorMessage represents the messages that can be sent to the Tracker actor. enum ActorMessage { Insert { - offset: String, + offset: Bytes, ack_send: oneshot::Sender, }, Update { - offset: String, + offset: Bytes, count: u32, eof: bool, }, Delete { - offset: String, + offset: Bytes, }, Discard { - offset: String, + offset: Bytes, }, DiscardAll, // New variant for discarding all messages #[cfg(test)] @@ -52,7 +52,7 @@ enum ActorMessage { /// Tracker is responsible for managing the state of messages being processed. /// It keeps track of message offsets and their completeness, and sends acknowledgments. struct Tracker { - entries: HashMap, + entries: HashMap, receiver: mpsc::Receiver, } @@ -114,7 +114,7 @@ impl Tracker { } /// Inserts a new entry into the tracker with the given offset and ack sender. - fn handle_insert(&mut self, offset: String, respond_to: oneshot::Sender) { + fn handle_insert(&mut self, offset: Bytes, respond_to: oneshot::Sender) { self.entries.insert( offset, TrackerEntry { @@ -126,7 +126,7 @@ impl Tracker { } /// Updates an existing entry in the tracker with the number of expected messages and EOF status. - fn handle_update(&mut self, offset: String, count: u32, eof: bool) { + fn handle_update(&mut self, offset: Bytes, count: u32, eof: bool) { if let Some(entry) = self.entries.get_mut(&offset) { entry.count += count; entry.eof = eof; @@ -144,8 +144,8 @@ impl Tracker { } /// Removes an entry from the tracker and sends an acknowledgment if the count is zero - /// or the entry is marked as EOF. - fn handle_delete(&mut self, offset: String) { + /// and the entry is marked as EOF. + fn handle_delete(&mut self, offset: Bytes) { if let Some(mut entry) = self.entries.remove(&offset) { if entry.count > 0 { entry.count -= 1; @@ -162,7 +162,7 @@ impl Tracker { } /// Discards an entry from the tracker and sends a nak. - fn handle_discard(&mut self, offset: String) { + fn handle_discard(&mut self, offset: Bytes) { if let Some(entry) = self.entries.remove(&offset) { entry .ack_send @@ -204,10 +204,7 @@ impl TrackerHandle { offset: Bytes, ack_send: oneshot::Sender, ) -> Result<()> { - let message = ActorMessage::Insert { - offset: String::from_utf8_lossy(&offset).to_string(), - ack_send, - }; + let message = ActorMessage::Insert { offset, ack_send }; self.sender .send(message) .await @@ -217,11 +214,7 @@ impl TrackerHandle { /// Updates an existing message in the Tracker with the given offset, count, and EOF status. pub(crate) async fn update(&self, offset: Bytes, count: u32, eof: bool) -> Result<()> { - let message = ActorMessage::Update { - offset: String::from_utf8_lossy(&offset).to_string(), - count, - eof, - }; + let message = ActorMessage::Update { offset, count, eof }; self.sender .send(message) .await @@ -231,9 +224,7 @@ impl TrackerHandle { /// Deletes a message from the Tracker with the given offset. pub(crate) async fn delete(&self, offset: Bytes) -> Result<()> { - let message = ActorMessage::Delete { - offset: String::from_utf8_lossy(&offset).to_string(), - }; + let message = ActorMessage::Delete { offset }; self.sender .send(message) .await @@ -243,9 +234,7 @@ impl TrackerHandle { /// Discards a message from the Tracker with the given offset. pub(crate) async fn discard(&self, offset: Bytes) -> Result<()> { - let message = ActorMessage::Discard { - offset: String::from_utf8_lossy(&offset).to_string(), - }; + let message = ActorMessage::Discard { offset }; self.sender .send(message) .await From a1b52dc4bd8db4733cd1421cc6770d9f9e0201ff Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Thu, 16 Jan 2025 15:50:29 +0530 Subject: [PATCH 09/27] Move callback handler to tracker Signed-off-by: Sreekanth --- rust/numaflow-core/src/mapper/map.rs | 33 +- rust/numaflow-core/src/monovertex.rs | 27 +- .../numaflow-core/src/monovertex/forwarder.rs | 20 +- rust/numaflow-core/src/pipeline.rs | 46 +-- .../src/pipeline/forwarder/sink_forwarder.rs | 12 +- .../pipeline/forwarder/source_forwarder.rs | 3 +- .../src/pipeline/isb/jetstream/reader.rs | 6 +- .../src/pipeline/isb/jetstream/writer.rs | 73 +--- rust/numaflow-core/src/sink.rs | 47 +-- rust/numaflow-core/src/source.rs | 6 +- rust/numaflow-core/src/tracker.rs | 353 +++++++++++++++--- rust/numaflow-core/src/transformer.rs | 12 +- rust/serving/src/app/callback.rs | 23 +- rust/serving/src/app/callback/state.rs | 2 +- .../src/app/callback/store/memstore.rs | 2 +- .../src/app/callback/store/redisstore.rs | 2 +- rust/serving/src/app/jetstream_proxy.rs | 9 +- rust/serving/src/app/tracker.rs | 4 +- rust/serving/src/callback.rs | 118 ++---- rust/serving/src/config.rs | 4 +- rust/serving/src/lib.rs | 2 +- rust/serving/src/source.rs | 3 + 22 files changed, 424 insertions(+), 383 deletions(-) diff --git a/rust/numaflow-core/src/mapper/map.rs b/rust/numaflow-core/src/mapper/map.rs index 6a9b40d56..c9682a48b 100644 --- a/rust/numaflow-core/src/mapper/map.rs +++ b/rust/numaflow-core/src/mapper/map.rs @@ -344,14 +344,7 @@ impl MapHandle { match receiver.await { Ok(Ok(mut mapped_messages)) => { // update the tracker with the number of messages sent and send the mapped messages - if let Err(e) = tracker_handle - .update( - read_msg.id.offset.clone(), - mapped_messages.len() as u32, - true, - ) - .await - { + if let Err(e) = tracker_handle.update_many(&mapped_messages, true).await { error_tx.send(e).await.expect("failed to send error"); return; } @@ -398,10 +391,7 @@ impl MapHandle { for receiver in receivers { match receiver.await { Ok(Ok(mut mapped_messages)) => { - let offset = mapped_messages.first().unwrap().id.offset.clone(); - tracker_handle - .update(offset.clone(), mapped_messages.len() as u32, true) - .await?; + tracker_handle.update_many(&mapped_messages, true).await?; for mapped_message in mapped_messages.drain(..) { output_tx .send(mapped_message) @@ -455,8 +445,7 @@ impl MapHandle { while let Some(result) = receiver.recv().await { match result { Ok(mapped_message) => { - let offset = mapped_message.id.offset.clone(); - if let Err(e) = tracker_handle.update(offset.clone(), 1, false).await { + if let Err(e) = tracker_handle.update(&mapped_message).await { error_tx.send(e).await.expect("failed to send error"); return; } @@ -475,7 +464,7 @@ impl MapHandle { } } - if let Err(e) = tracker_handle.update(read_msg.id.offset, 0, true).await { + if let Err(e) = tracker_handle.update_eof(read_msg.id.offset).await { error_tx.send(e).await.expect("failed to send error"); } }); @@ -530,7 +519,7 @@ mod tests { // wait for the server to start tokio::time::sleep(Duration::from_millis(100)).await; - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let client = MapClient::new(create_rpc_channel(sock_file).await?); let mapper = MapHandle::new( @@ -623,7 +612,7 @@ mod tests { // wait for the server to start tokio::time::sleep(Duration::from_millis(100)).await; - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let client = MapClient::new(create_rpc_channel(sock_file).await?); let mapper = MapHandle::new( MapMode::Unary, @@ -714,7 +703,7 @@ mod tests { // wait for the server to start tokio::time::sleep(Duration::from_millis(100)).await; - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let client = MapClient::new(create_rpc_channel(sock_file).await?); let mapper = MapHandle::new( MapMode::Unary, @@ -810,7 +799,7 @@ mod tests { // wait for the server to start tokio::time::sleep(Duration::from_millis(100)).await; - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let client = MapClient::new(create_rpc_channel(sock_file).await?); let mapper = MapHandle::new( @@ -923,7 +912,7 @@ mod tests { // wait for the server to start tokio::time::sleep(Duration::from_millis(100)).await; - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let client = MapClient::new(create_rpc_channel(sock_file).await?); let mapper = MapHandle::new( MapMode::Batch, @@ -1035,7 +1024,7 @@ mod tests { // wait for the server to start tokio::time::sleep(Duration::from_millis(100)).await; - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let client = MapClient::new(create_rpc_channel(sock_file).await?); let mapper = MapHandle::new( @@ -1134,7 +1123,7 @@ mod tests { tokio::time::sleep(Duration::from_millis(100)).await; let client = MapClient::new(create_rpc_channel(sock_file).await?); - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let mapper = MapHandle::new( MapMode::Stream, 500, diff --git a/rust/numaflow-core/src/monovertex.rs b/rust/numaflow-core/src/monovertex.rs index 6c9cca7b6..937963fea 100644 --- a/rust/numaflow-core/src/monovertex.rs +++ b/rust/numaflow-core/src/monovertex.rs @@ -26,7 +26,11 @@ pub(crate) async fn start_forwarder( cln_token: CancellationToken, config: &MonovertexConfig, ) -> error::Result<()> { - let tracker_handle = TrackerHandle::new(); + let callback_handler = config + .callback_config + .as_ref() + .map(|cb_cfg| CallbackHandler::new(config.name.clone(), cb_cfg.callback_concurrency)); + let tracker_handle = TrackerHandle::new(callback_handler); let (source, source_grpc_client) = create_components::create_source( config.batch_size, config.read_timeout, @@ -70,23 +74,7 @@ pub(crate) async fn start_forwarder( // FIXME: what to do with the handle shared::metrics::start_metrics_server(config.metrics_config.clone(), metrics_state).await; - let callback_handler = match config.callback_config { - Some(ref cb_cfg) => Some(CallbackHandler::new( - config.name.clone(), - cb_cfg.callback_concurrency, - )), - None => None, - }; - - start( - config.clone(), - source, - sink_writer, - transformer, - cln_token, - callback_handler, - ) - .await?; + start(config.clone(), source, sink_writer, transformer, cln_token).await?; Ok(()) } @@ -97,7 +85,6 @@ async fn start( sink: SinkWriter, transformer: Option, cln_token: CancellationToken, - callback_handler: Option, ) -> error::Result<()> { // start the pending reader to publish pending metrics let pending_reader = shared::metrics::create_pending_reader( @@ -107,7 +94,7 @@ async fn start( .await; let _pending_reader_handle = pending_reader.start(is_mono_vertex()).await; - let mut forwarder_builder = ForwarderBuilder::new(source, sink, cln_token, callback_handler); + let mut forwarder_builder = ForwarderBuilder::new(source, sink, cln_token); // add transformer if exists if let Some(transformer_client) = transformer { diff --git a/rust/numaflow-core/src/monovertex/forwarder.rs b/rust/numaflow-core/src/monovertex/forwarder.rs index ea14a662b..fb0be9b3e 100644 --- a/rust/numaflow-core/src/monovertex/forwarder.rs +++ b/rust/numaflow-core/src/monovertex/forwarder.rs @@ -28,7 +28,6 @@ //! [Stream]: https://docs.rs/tokio-stream/latest/tokio_stream/wrappers/struct.ReceiverStream.html //! [Actor Pattern]: https://ryhl.io/blog/actors-with-tokio/ -use serving::callback::CallbackHandler; use tokio_util::sync::CancellationToken; use crate::error; @@ -45,7 +44,6 @@ pub(crate) struct Forwarder { transformer: Option, sink_writer: SinkWriter, cln_token: CancellationToken, - callback_handler: Option, } pub(crate) struct ForwarderBuilder { @@ -53,7 +51,6 @@ pub(crate) struct ForwarderBuilder { sink_writer: SinkWriter, cln_token: CancellationToken, transformer: Option, - callback_handler: Option, } impl ForwarderBuilder { @@ -62,14 +59,12 @@ impl ForwarderBuilder { streaming_source: Source, streaming_sink: SinkWriter, cln_token: CancellationToken, - callback_handler: Option, ) -> Self { Self { source: streaming_source, sink_writer: streaming_sink, cln_token, transformer: None, - callback_handler, } } @@ -87,7 +82,6 @@ impl ForwarderBuilder { sink_writer: self.sink_writer, transformer: self.transformer, cln_token: self.cln_token, - callback_handler: self.callback_handler, } } } @@ -108,11 +102,7 @@ impl Forwarder { let sink_writer_handle = self .sink_writer - .streaming_write( - transformed_messages_stream, - self.cln_token.clone(), - self.callback_handler.clone(), - ) + .streaming_write(transformed_messages_stream, self.cln_token.clone()) .await?; match tokio::try_join!( @@ -273,7 +263,7 @@ mod tests { .await .map_err(|e| panic!("failed to create source reader: {:?}", e)) .unwrap(); - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let source = Source::new( 5, SourceType::UserDefinedSource(src_read, src_ack, lag_reader), @@ -317,7 +307,7 @@ mod tests { .unwrap(); // create the forwarder with the source, transformer, and writer - let forwarder = ForwarderBuilder::new(source.clone(), sink_writer, cln_token.clone(), None) + let forwarder = ForwarderBuilder::new(source.clone(), sink_writer, cln_token.clone()) .transformer(transformer) .build(); @@ -372,7 +362,7 @@ mod tests { #[tokio::test] async fn test_flatmap_operation() { - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); // create the source which produces x number of messages let cln_token = CancellationToken::new(); @@ -447,7 +437,7 @@ mod tests { .unwrap(); // create the forwarder with the source, transformer, and writer - let forwarder = ForwarderBuilder::new(source.clone(), sink_writer, cln_token.clone(), None) + let forwarder = ForwarderBuilder::new(source.clone(), sink_writer, cln_token.clone()) .transformer(transformer) .build(); diff --git a/rust/numaflow-core/src/pipeline.rs b/rust/numaflow-core/src/pipeline.rs index b83864dd8..9de727a0d 100644 --- a/rust/numaflow-core/src/pipeline.rs +++ b/rust/numaflow-core/src/pipeline.rs @@ -51,23 +51,17 @@ async fn start_source_forwarder( config: PipelineConfig, source_config: SourceVtxConfig, ) -> Result<()> { - let tracker_handle = TrackerHandle::new(); + let callback_handler = config.callback_config.as_ref().map(|cb_cfg| { + CallbackHandler::new(config.vertex_name.clone(), cb_cfg.callback_concurrency) + }); + let tracker_handle = TrackerHandle::new(callback_handler); let js_context = create_js_context(config.js_client_config.clone()).await?; - let callback_handler = match config.callback_config { - Some(ref cb_cfg) => Some(CallbackHandler::new( - config.vertex_name.clone(), - cb_cfg.callback_concurrency, - )), - None => None, - }; - let buffer_writer = create_buffer_writer( &config, js_context.clone(), tracker_handle.clone(), cln_token.clone(), - callback_handler, ) .await; @@ -136,8 +130,12 @@ async fn start_map_forwarder( let mut mapper_grpc_client = None; let mut isb_lag_readers = vec![]; + let callback_handler = config.callback_config.as_ref().map(|cb_cfg| { + CallbackHandler::new(config.vertex_name.clone(), cb_cfg.callback_concurrency) + }); + for stream in reader_config.streams.clone() { - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(callback_handler.clone()); let buffer_reader = create_buffer_reader( stream, @@ -163,20 +161,11 @@ async fn start_map_forwarder( mapper_grpc_client = Some(mapper_rpc_client); } - let callback_handler = match config.callback_config { - Some(ref cb_cfg) => Some(CallbackHandler::new( - config.vertex_name.clone(), - cb_cfg.callback_concurrency, - )), - None => None, - }; - let buffer_writer = create_buffer_writer( &config, js_context.clone(), tracker_handle.clone(), cln_token.clone(), - callback_handler, ) .await; forwarder_components.push((buffer_reader, buffer_writer, mapper)); @@ -237,11 +226,15 @@ async fn start_sink_forwarder( .ok_or_else(|| error::Error::Config("No from vertex config found".to_string()))? .reader_config; + let callback_handler = config.callback_config.as_ref().map(|cb_cfg| { + CallbackHandler::new(config.vertex_name.clone(), cb_cfg.callback_concurrency) + }); + // Create sink writers and buffer readers for each stream let mut sink_writers = vec![]; let mut buffer_readers = vec![]; for stream in reader_config.streams.clone() { - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(callback_handler.clone()); let buffer_reader = create_buffer_reader( stream, @@ -284,14 +277,6 @@ async fn start_sink_forwarder( .await; } - let callback_handler = match config.callback_config { - Some(ref cb_cfg) => Some(CallbackHandler::new( - config.vertex_name.clone(), - cb_cfg.callback_concurrency, - )), - None => None, - }; - // Start a new forwarder for each buffer reader let mut forwarder_tasks = Vec::new(); for (buffer_reader, (sink_writer, _, _)) in buffer_readers.into_iter().zip(sink_writers) { @@ -300,7 +285,6 @@ async fn start_sink_forwarder( buffer_reader, sink_writer, cln_token.clone(), - callback_handler.clone(), ) .await; @@ -326,7 +310,6 @@ async fn create_buffer_writer( js_context: Context, tracker_handle: TrackerHandle, cln_token: CancellationToken, - callback_handler: Option, ) -> JetstreamWriter { JetstreamWriter::new( config.to_vertex_config.clone(), @@ -334,7 +317,6 @@ async fn create_buffer_writer( config.paf_concurrency, tracker_handle, cln_token, - callback_handler, ) } diff --git a/rust/numaflow-core/src/pipeline/forwarder/sink_forwarder.rs b/rust/numaflow-core/src/pipeline/forwarder/sink_forwarder.rs index 4610235b4..1d560e94e 100644 --- a/rust/numaflow-core/src/pipeline/forwarder/sink_forwarder.rs +++ b/rust/numaflow-core/src/pipeline/forwarder/sink_forwarder.rs @@ -1,4 +1,3 @@ -use serving::callback::CallbackHandler; use tokio_util::sync::CancellationToken; use crate::error::Error; @@ -12,7 +11,6 @@ pub(crate) struct SinkForwarder { jetstream_reader: JetstreamReader, sink_writer: SinkWriter, cln_token: CancellationToken, - callback_handler: Option, } impl SinkForwarder { @@ -20,13 +18,11 @@ impl SinkForwarder { jetstream_reader: JetstreamReader, sink_writer: SinkWriter, cln_token: CancellationToken, - callback_handler: Option, ) -> Self { Self { jetstream_reader, sink_writer, cln_token, - callback_handler, } } @@ -38,15 +34,9 @@ impl SinkForwarder { .streaming_read(reader_cancellation_token.clone()) .await?; - let callback_handler = self.callback_handler.clone(); - let sink_writer_handle = self .sink_writer - .streaming_write( - read_messages_stream, - self.cln_token.clone(), - callback_handler, - ) + .streaming_write(read_messages_stream, self.cln_token.clone()) .await?; // Join the reader and sink writer diff --git a/rust/numaflow-core/src/pipeline/forwarder/source_forwarder.rs b/rust/numaflow-core/src/pipeline/forwarder/source_forwarder.rs index ec717b8c8..c164dafc8 100644 --- a/rust/numaflow-core/src/pipeline/forwarder/source_forwarder.rs +++ b/rust/numaflow-core/src/pipeline/forwarder/source_forwarder.rs @@ -208,7 +208,7 @@ mod tests { #[cfg(feature = "nats-tests")] #[tokio::test] async fn test_source_forwarder() { - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); // create the source which produces x number of messages let cln_token = CancellationToken::new(); @@ -292,7 +292,6 @@ mod tests { 100, tracker_handle.clone(), cln_token.clone(), - None, ); // create a transformer diff --git a/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs b/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs index ca4c9e831..97aa5f986 100644 --- a/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs +++ b/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs @@ -166,7 +166,7 @@ impl JetstreamReader { // Insert the message into the tracker and wait for the ack to be sent back. let (ack_tx, ack_rx) = oneshot::channel(); - tracker_handle.insert(message_id.offset.clone(), ack_tx).await?; + tracker_handle.insert(&message, ack_tx).await?; tokio::spawn(Self::start_work_in_progress( jetstream_message, @@ -341,7 +341,7 @@ mod tests { 0, context.clone(), buf_reader_config, - TrackerHandle::new(), + TrackerHandle::new(None), 500, ) .await @@ -402,7 +402,7 @@ mod tests { // Create JetStream context let client = async_nats::connect(js_url).await.unwrap(); let context = jetstream::new(client); - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let stream_name = "test_ack"; // Delete stream if it exists diff --git a/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs b/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs index 7baba7d11..063af6391 100644 --- a/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs +++ b/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs @@ -10,7 +10,6 @@ use async_nats::jetstream::publish::PublishAck; use async_nats::jetstream::stream::RetentionPolicy::Limits; use async_nats::jetstream::Context; use bytes::{Bytes, BytesMut}; -use serving::callback::CallbackHandler; use tokio::sync::Semaphore; use tokio::task::JoinHandle; use tokio::time; @@ -45,7 +44,6 @@ pub(crate) struct JetstreamWriter { cancel_token: CancellationToken, tracker_handle: TrackerHandle, sem: Arc, - callback_handler: Option, } impl JetstreamWriter { @@ -57,7 +55,6 @@ impl JetstreamWriter { paf_concurrency: usize, tracker_handle: TrackerHandle, cancel_token: CancellationToken, - callback_handler: Option, ) -> Self { let streams = config .iter() @@ -76,7 +73,6 @@ impl JetstreamWriter { cancel_token, tracker_handle, sem: Arc::new(Semaphore::new(paf_concurrency)), - callback_handler, }; // spawn a task for checking whether buffer is_full @@ -245,8 +241,7 @@ impl JetstreamWriter { continue; } - this.resolve_pafs(pafs, message, this.callback_handler.clone()) - .await?; + this.resolve_pafs(pafs, message).await?; processed_msgs_count += 1; if last_logged_at.elapsed().as_secs() >= 1 { @@ -343,7 +338,6 @@ impl JetstreamWriter { &self, pafs: Vec<((String, u16), PublishAckFuture)>, message: Message, - callback_handler: Option, ) -> Result<()> { let start_time = Instant::now(); let permit = Arc::clone(&self.sem) @@ -375,9 +369,6 @@ impl JetstreamWriter { .delete(message.id.offset.clone()) .await .expect("Failed to delete offset from tracker"); - if let Err(e) = do_callback(callback_handler.as_ref(), &message).await { - tracing::error!(?e, "Failed to send callback request"); - } } Err(e) => { error!( @@ -404,11 +395,6 @@ impl JetstreamWriter { stream.clone(), Offset::Int(IntOffset::new(ack.sequence, stream.1)), )); - if let Err(e) = - do_callback(callback_handler.as_ref(), &message).await - { - tracing::error!(?e, "Failed to send callback request"); - } } Err(e) => { error!(?e, "Blocking write failed for stream {}", stream.0); @@ -476,25 +462,6 @@ impl JetstreamWriter { } } -async fn do_callback(callback_handler: Option<&CallbackHandler>, message: &Message) -> Result<()> { - let Some(callback_handler) = callback_handler else { - return Ok(()); - }; - - let metadata = message.metadata.as_ref().ok_or_else(|| { - Error::Source("Message does not contain previous vertex name in the metadata".to_owned()) - })?; - - callback_handler - .callback( - &message.headers, - &message.tags, - metadata.previous_vertex.clone(), - ) - .await - .map_err(|e| Error::Source(format!("Failed to send callback for message: {e:?}"))) -} - #[cfg(test)] mod tests { use std::collections::HashMap; @@ -515,7 +482,7 @@ mod tests { #[cfg(feature = "nats-tests")] #[tokio::test] async fn test_async_write() { - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let cln_token = CancellationToken::new(); let js_url = "localhost:4222"; // Create JetStream context @@ -559,7 +526,6 @@ mod tests { 100, tracker_handle, cln_token.clone(), - None, ); let message = Message { @@ -656,7 +622,7 @@ mod tests { #[cfg(feature = "nats-tests")] #[tokio::test] async fn test_write_with_cancellation() { - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let js_url = "localhost:4222"; // Create JetStream context let client = async_nats::connect(js_url).await.unwrap(); @@ -702,7 +668,6 @@ mod tests { 100, tracker_handle, cancel_token.clone(), - None, ); let mut result_receivers = Vec::new(); @@ -857,7 +822,7 @@ mod tests { #[cfg(feature = "nats-tests")] #[tokio::test] async fn test_check_stream_status() { - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let js_url = "localhost:4222"; // Create JetStream context let client = async_nats::connect(js_url).await.unwrap(); @@ -906,7 +871,6 @@ mod tests { 100, tracker_handle, cancel_token.clone(), - None, ); let mut js_writer = writer.clone(); @@ -956,7 +920,7 @@ mod tests { // Create JetStream context let client = async_nats::connect(js_url).await.unwrap(); let context = jetstream::new(client); - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let stream_name = "test_publish_messages"; // Delete stream if it exists @@ -997,7 +961,6 @@ mod tests { 100, tracker_handle.clone(), cln_token.clone(), - None, ); let (messages_tx, messages_rx) = tokio::sync::mpsc::channel(500); @@ -1019,10 +982,7 @@ mod tests { metadata: None, }; let (ack_tx, ack_rx) = tokio::sync::oneshot::channel(); - tracker_handle - .insert(message.id.offset.clone(), ack_tx) - .await - .unwrap(); + tracker_handle.insert(&message, ack_tx).await.unwrap(); ack_rxs.push(ack_rx); messages_tx.send(message).await.unwrap(); } @@ -1046,7 +1006,7 @@ mod tests { // Create JetStream context let client = async_nats::connect(js_url).await.unwrap(); let context = jetstream::new(client); - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let stream_name = "test_publish_cancellation"; // Delete stream if it exists @@ -1087,7 +1047,6 @@ mod tests { 100, tracker_handle.clone(), cancel_token.clone(), - None, ); let (tx, rx) = tokio::sync::mpsc::channel(500); @@ -1109,10 +1068,7 @@ mod tests { metadata: None, }; let (ack_tx, ack_rx) = tokio::sync::oneshot::channel(); - tracker_handle - .insert(message.id.offset.clone(), ack_tx) - .await - .unwrap(); + tracker_handle.insert(&message, ack_tx).await.unwrap(); ack_rxs.push(ack_rx); tx.send(message).await.unwrap(); } @@ -1137,10 +1093,7 @@ mod tests { metadata: None, }; let (ack_tx, ack_rx) = tokio::sync::oneshot::channel(); - tracker_handle - .insert("offset_101".to_string().into(), ack_tx) - .await - .unwrap(); + tracker_handle.insert(&message, ack_tx).await.unwrap(); ack_rxs.push(ack_rx); tx.send(message).await.unwrap(); drop(tx); @@ -1168,7 +1121,7 @@ mod tests { let js_url = "localhost:4222"; let client = async_nats::connect(js_url).await.unwrap(); let context = jetstream::new(client); - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let cln_token = CancellationToken::new(); let vertex1_streams = vec!["vertex1-0", "vertex1-1"]; @@ -1231,7 +1184,6 @@ mod tests { 100, tracker_handle.clone(), cln_token.clone(), - None, ); let (messages_tx, messages_rx) = tokio::sync::mpsc::channel(500); @@ -1252,10 +1204,7 @@ mod tests { metadata: None, }; let (ack_tx, ack_rx) = tokio::sync::oneshot::channel(); - tracker_handle - .insert(message.id.offset.clone(), ack_tx) - .await - .unwrap(); + tracker_handle.insert(&message, ack_tx).await.unwrap(); ack_rxs.push(ack_rx); messages_tx.send(message).await.unwrap(); } diff --git a/rust/numaflow-core/src/sink.rs b/rust/numaflow-core/src/sink.rs index 027f42608..fdbfbe21c 100644 --- a/rust/numaflow-core/src/sink.rs +++ b/rust/numaflow-core/src/sink.rs @@ -4,7 +4,6 @@ use std::time::Duration; use numaflow_pb::clients::sink::sink_client::SinkClient; use numaflow_pb::clients::sink::sink_response; use numaflow_pb::clients::sink::Status::{Failure, Fallback, Success}; -use serving::callback::CallbackHandler; use tokio::sync::mpsc::Receiver; use tokio::sync::{mpsc, oneshot}; use tokio::task::JoinHandle; @@ -256,7 +255,6 @@ impl SinkWriter { &self, messages_stream: ReceiverStream, cancellation_token: CancellationToken, - callback_handler: Option, ) -> Result>> { let handle: JoinHandle> = tokio::spawn({ let mut this = self.clone(); @@ -311,22 +309,6 @@ impl SinkWriter { } } - if let Some(ref callback_handler) = callback_handler { - for message in batch { - let metadata = message.metadata.ok_or_else(|| { - Error::Source(format!( - "Writing to Sink: message does not contain previous vertex name in the metadata" - )) - })?; - if let Err(e) = callback_handler - .callback(&message.headers, &message.tags, metadata.previous_vertex) - .await - { - tracing::error!(?e, "Failed to send callback for message"); - } - } - }; - // publish sink metrics if is_mono_vertex() { monovertex_metrics() @@ -769,7 +751,7 @@ mod tests { 10, Duration::from_secs(1), SinkClientType::Log, - TrackerHandle::new(), + TrackerHandle::new(None), ) .build() .await @@ -800,7 +782,7 @@ mod tests { #[tokio::test] async fn test_streaming_write() { - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let sink_writer = SinkWriterBuilder::new( 10, Duration::from_millis(100), @@ -833,16 +815,13 @@ mod tests { for msg in messages { let (ack_tx, ack_rx) = oneshot::channel(); ack_rxs.push(ack_rx); - tracker_handle - .insert(msg.id.offset.clone(), ack_tx) - .await - .unwrap(); + tracker_handle.insert(&msg, ack_tx).await.unwrap(); let _ = tx.send(msg).await; } drop(tx); let handle = sink_writer - .streaming_write(ReceiverStream::new(rx), CancellationToken::new(), None) + .streaming_write(ReceiverStream::new(rx), CancellationToken::new()) .await .unwrap(); @@ -856,7 +835,7 @@ mod tests { #[tokio::test] async fn test_streaming_write_error() { - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); // start the server let (_shutdown_tx, shutdown_rx) = oneshot::channel(); let tmp_dir = tempfile::TempDir::new().unwrap(); @@ -912,16 +891,13 @@ mod tests { for msg in messages { let (ack_tx, ack_rx) = oneshot::channel(); ack_rxs.push(ack_rx); - tracker_handle - .insert(msg.id.offset.clone(), ack_tx) - .await - .unwrap(); + tracker_handle.insert(&msg, ack_tx).await.unwrap(); let _ = tx.send(msg).await; } drop(tx); let cln_token = CancellationToken::new(); let handle = sink_writer - .streaming_write(ReceiverStream::new(rx), cln_token.clone(), None) + .streaming_write(ReceiverStream::new(rx), cln_token.clone()) .await .unwrap(); @@ -942,7 +918,7 @@ mod tests { #[tokio::test] async fn test_fallback_write() { - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); // start the server let (_shutdown_tx, shutdown_rx) = oneshot::channel(); @@ -999,17 +975,14 @@ mod tests { let mut ack_rxs = vec![]; for msg in messages { let (ack_tx, ack_rx) = oneshot::channel(); - tracker_handle - .insert(msg.id.offset.clone(), ack_tx) - .await - .unwrap(); + tracker_handle.insert(&msg, ack_tx).await.unwrap(); ack_rxs.push(ack_rx); let _ = tx.send(msg).await; } drop(tx); let cln_token = CancellationToken::new(); let handle = sink_writer - .streaming_write(ReceiverStream::new(rx), cln_token.clone(), None) + .streaming_write(ReceiverStream::new(rx), cln_token.clone()) .await .unwrap(); diff --git a/rust/numaflow-core/src/source.rs b/rust/numaflow-core/src/source.rs index d48c37ab6..54ca9f90c 100644 --- a/rust/numaflow-core/src/source.rs +++ b/rust/numaflow-core/src/source.rs @@ -321,9 +321,7 @@ impl Source { let offset = message.offset.clone().expect("offset can never be none"); // insert the offset and the ack one shot in the tracker. - tracker_handle - .insert(message.id.offset.clone(), resp_ack_tx) - .await?; + tracker_handle.insert(&message, resp_ack_tx).await?; // store the ack one shot in the batch to invoke ack later. ack_batch.push((offset, resp_ack_rx)); @@ -546,7 +544,7 @@ mod tests { let source = Source::new( 5, SourceType::UserDefinedSource(src_read, src_ack, lag_reader), - TrackerHandle::new(), + TrackerHandle::new(None), true, ); diff --git a/rust/numaflow-core/src/tracker.rs b/rust/numaflow-core/src/tracker.rs index 6a568ea91..875bb6a1a 100644 --- a/rust/numaflow-core/src/tracker.rs +++ b/rust/numaflow-core/src/tracker.rs @@ -11,10 +11,12 @@ use std::collections::HashMap; use bytes::Bytes; +use serving::callback::CallbackHandler; +use serving::{DEFAULT_CALLBACK_URL_HEADER_KEY, DEFAULT_ID_HEADER}; use tokio::sync::{mpsc, oneshot}; use crate::error::Error; -use crate::message::ReadAck; +use crate::message::{Message, ReadAck}; use crate::Result; /// TrackerEntry represents the state of a tracked message. @@ -23,6 +25,7 @@ struct TrackerEntry { ack_send: oneshot::Sender, count: u32, eof: bool, + callback_info: Option, } /// ActorMessage represents the messages that can be sent to the Tracker actor. @@ -30,12 +33,20 @@ enum ActorMessage { Insert { offset: Bytes, ack_send: oneshot::Sender, + callback_info: Option, }, Update { offset: Bytes, - count: u32, + responses: Option>, + }, + UpdateMany { + offset: Bytes, + responses: Vec>>, eof: bool, }, + UpdateEOF { + offset: Bytes, + }, Delete { offset: Bytes, }, @@ -54,6 +65,61 @@ enum ActorMessage { struct Tracker { entries: HashMap, receiver: mpsc::Receiver, + callback_handler: Option, +} + +#[derive(Debug)] +struct CallbackInfo { + id: String, + callback_url: String, + from_vertex: String, + responses: Vec>>, +} + +impl TryFrom<&Message> for CallbackInfo { + type Error = Error; + + fn try_from(message: &Message) -> std::result::Result { + let callback_url = message + .headers + .get(DEFAULT_CALLBACK_URL_HEADER_KEY) + .ok_or_else(|| { + Error::Source(format!( + "{DEFAULT_CALLBACK_URL_HEADER_KEY} header is not present in the message headers", + )) + })? + .to_owned(); + let uuid = message + .headers + .get(DEFAULT_ID_HEADER) + .ok_or_else(|| { + Error::Source(format!( + "{DEFAULT_ID_HEADER} is not found in message headers", + )) + })? + .to_owned(); + + let from_vertex = message + .metadata + .as_ref() + .ok_or_else(|| Error::Source("Metadata field is empty in the message".into()))? + .previous_vertex + .clone(); + + // FIXME: empty message tags + let mut msg_tags = None; + if let Some(ref tags) = message.tags { + if !tags.is_empty() { + msg_tags = Some(tags.iter().cloned().collect()); + } + }; + Ok(CallbackInfo { + id: uuid, + callback_url, + from_vertex, + responses: vec![msg_tags], + }) + } } impl Drop for Tracker { @@ -70,10 +136,14 @@ impl Drop for Tracker { impl Tracker { /// Creates a new Tracker instance with the given receiver for actor messages. - fn new(receiver: mpsc::Receiver) -> Self { + fn new( + receiver: mpsc::Receiver, + callback_handler: impl Into>, + ) -> Self { Self { entries: HashMap::new(), receiver, + callback_handler: callback_handler.into(), } } @@ -90,11 +160,22 @@ impl Tracker { ActorMessage::Insert { offset, ack_send: respond_to, + callback_info, + } => { + self.handle_insert(offset, callback_info, respond_to); + } + ActorMessage::Update { offset, responses } => { + self.handle_update(offset, responses); + } + ActorMessage::UpdateMany { + offset, + responses, + eof, } => { - self.handle_insert(offset, respond_to); + self.handle_update_many(offset, responses, eof); } - ActorMessage::Update { offset, count, eof } => { - self.handle_update(offset, count, eof); + ActorMessage::UpdateEOF { offset } => { + self.handle_update_eof(offset); } ActorMessage::Delete { offset } => { self.handle_delete(offset); @@ -114,22 +195,73 @@ impl Tracker { } /// Inserts a new entry into the tracker with the given offset and ack sender. - fn handle_insert(&mut self, offset: Bytes, respond_to: oneshot::Sender) { + fn handle_insert( + &mut self, + offset: Bytes, + callback_info: Option, + respond_to: oneshot::Sender, + ) { self.entries.insert( offset, TrackerEntry { ack_send: respond_to, count: 0, eof: true, + callback_info, }, ); } /// Updates an existing entry in the tracker with the number of expected messages and EOF status. - fn handle_update(&mut self, offset: Bytes, count: u32, eof: bool) { + fn handle_update(&mut self, offset: Bytes, responses: Option>) { + if let Some(entry) = self.entries.get_mut(&offset) { + entry.count += 1; + entry + .callback_info + .as_mut() + .map(|cb| cb.responses.push(responses)); + // if the count is zero, we can send an ack immediately + // this is case where map stream will send eof true after + // receiving all the messages. + if entry.count == 0 { + let entry = self.entries.remove(&offset).unwrap(); + entry + .ack_send + .send(ReadAck::Ack) + .expect("Failed to send ack"); + } + } + } + + fn handle_update_eof(&mut self, offset: Bytes) { + if let Some(entry) = self.entries.get_mut(&offset) { + entry.eof = true; + // if the count is zero, we can send an ack immediately + // this is case where map stream will send eof true after + // receiving all the messages. + if entry.count == 0 { + let entry = self.entries.remove(&offset).unwrap(); + entry + .ack_send + .send(ReadAck::Ack) + .expect("Failed to send ack"); + } + } + } + + fn handle_update_many( + &mut self, + offset: Bytes, + responses: Vec>>, + eof: bool, + ) { if let Some(entry) = self.entries.get_mut(&offset) { - entry.count += count; + entry.count += responses.len() as u32; entry.eof = eof; + entry + .callback_info + .as_mut() + .map(|cb| cb.responses.extend(responses)); // if the count is zero, we can send an ack immediately // this is case where map stream will send eof true after // receiving all the messages. @@ -187,24 +319,68 @@ impl Tracker { #[derive(Clone)] pub(crate) struct TrackerHandle { sender: mpsc::Sender, + enable_callbacks: bool, } impl TrackerHandle { /// Creates a new TrackerHandle instance and spawns the Tracker. - pub(crate) fn new() -> Self { + pub(crate) fn new(callback_handler: Option) -> Self { + let enable_callbacks = callback_handler.is_some(); let (sender, receiver) = mpsc::channel(100); - let tracker = Tracker::new(receiver); + let tracker = Tracker::new(receiver, callback_handler); tokio::spawn(tracker.run()); - Self { sender } + Self { + sender, + enable_callbacks, + } } /// Inserts a new message into the Tracker with the given offset and acknowledgment sender. pub(crate) async fn insert( &self, - offset: Bytes, + message: &Message, ack_send: oneshot::Sender, ) -> Result<()> { - let message = ActorMessage::Insert { offset, ack_send }; + let offset = message.id.offset.clone(); + let mut callback_info = None; + if self.enable_callbacks { + callback_info = Some(message.try_into()?); + } + let message = ActorMessage::Insert { + offset, + ack_send, + callback_info, + }; + self.sender + .send(message) + .await + .map_err(|e| Error::Tracker(format!("{:?}", e)))?; + Ok(()) + } + + /// Updates an existing message in the Tracker with the given offset, count, and EOF status. + pub(crate) async fn update(&self, message: &Message) -> Result<()> { + let offset = message.id.offset.clone(); + let mut responses: Option> = None; + if self.enable_callbacks { + // FIXME: empty message tags + if let Some(ref tags) = message.tags { + if !tags.is_empty() { + responses = Some(tags.iter().cloned().collect()); + } + }; + } + let message = ActorMessage::Update { offset, responses }; + self.sender + .send(message) + .await + .map_err(|e| Error::Tracker(format!("{:?}", e)))?; + Ok(()) + } + + /// Updates an existing message in the Tracker with the given offset, count, and EOF status. + pub(crate) async fn update_eof(&self, offset: Bytes) -> Result<()> { + let message = ActorMessage::UpdateEOF { offset }; self.sender .send(message) .await @@ -213,8 +389,29 @@ impl TrackerHandle { } /// Updates an existing message in the Tracker with the given offset, count, and EOF status. - pub(crate) async fn update(&self, offset: Bytes, count: u32, eof: bool) -> Result<()> { - let message = ActorMessage::Update { offset, count, eof }; + pub(crate) async fn update_many(&self, messages: &[Message], eof: bool) -> Result<()> { + if messages.is_empty() { + return Ok(()); + } + let offset = messages.first().unwrap().id.offset.clone(); + let mut responses: Vec>> = vec![]; + // if self.enable_callbacks { + // FIXME: empty message tags + for message in messages { + let mut response: Option> = None; + if let Some(ref tags) = message.tags { + if !tags.is_empty() { + response = Some(tags.iter().cloned().collect()); + } + }; + responses.push(response); + } + // } + let message = ActorMessage::UpdateMany { + offset, + responses, + eof, + }; self.sender .send(message) .await @@ -268,27 +465,42 @@ impl TrackerHandle { #[cfg(test)] mod tests { + use std::sync::Arc; + use tokio::sync::oneshot; use tokio::time::{timeout, Duration}; + use crate::message::MessageID; + use super::*; #[tokio::test] async fn test_insert_update_delete() { - let handle = TrackerHandle::new(); + let handle = TrackerHandle::new(None); let (ack_send, ack_recv) = oneshot::channel(); + let offset = Bytes::from_static(b"offset1"); + let message = Message { + keys: Arc::from([]), + tags: None, + value: Bytes::from_static(b"test"), + offset: None, + event_time: Default::default(), + id: MessageID { + vertex_name: "in".into(), + offset: offset.clone(), + index: 1, + }, + headers: HashMap::new(), + metadata: None, + }; + // Insert a new message - handle - .insert("offset1".to_string().into(), ack_send) - .await - .unwrap(); + handle.insert(&message, ack_send).await.unwrap(); // Update the message - handle - .update("offset1".to_string().into(), 1, true) - .await - .unwrap(); + handle.update(&message).await.unwrap(); + handle.update_eof(offset).await.unwrap(); // Delete the message handle.delete("offset1".to_string().into()).await.unwrap(); @@ -302,25 +514,36 @@ mod tests { #[tokio::test] async fn test_update_with_multiple_deletes() { - let handle = TrackerHandle::new(); + let handle = TrackerHandle::new(None); let (ack_send, ack_recv) = oneshot::channel(); + let offset = Bytes::from_static(b"offset1"); + let message = Message { + keys: Arc::from([]), + tags: None, + value: Bytes::from_static(b"test"), + offset: None, + event_time: Default::default(), + id: MessageID { + vertex_name: "in".into(), + offset: offset.clone(), + index: 1, + }, + headers: HashMap::new(), + metadata: None, + }; + // Insert a new message - handle - .insert("offset1".to_string().into(), ack_send) - .await - .unwrap(); + handle.insert(&message, ack_send).await.unwrap(); + let messages: Vec = std::iter::repeat(message).take(3).collect(); // Update the message with a count of 3 - handle - .update("offset1".to_string().into(), 3, true) - .await - .unwrap(); + handle.update_many(&messages, true).await.unwrap(); // Delete the message three times - handle.delete("offset1".to_string().into()).await.unwrap(); - handle.delete("offset1".to_string().into()).await.unwrap(); - handle.delete("offset1".to_string().into()).await.unwrap(); + handle.delete(offset.clone()).await.unwrap(); + handle.delete(offset.clone()).await.unwrap(); + handle.delete(offset).await.unwrap(); // Verify that the message was deleted and ack was received after the third delete let result = timeout(Duration::from_secs(1), ack_recv).await.unwrap(); @@ -331,17 +554,30 @@ mod tests { #[tokio::test] async fn test_discard() { - let handle = TrackerHandle::new(); + let handle = TrackerHandle::new(None); let (ack_send, ack_recv) = oneshot::channel(); + let offset = Bytes::from_static(b"offset1"); + let message = Message { + keys: Arc::from([]), + tags: None, + value: Bytes::from_static(b"test"), + offset: None, + event_time: Default::default(), + id: MessageID { + vertex_name: "in".into(), + offset: offset.clone(), + index: 1, + }, + headers: HashMap::new(), + metadata: None, + }; + // Insert a new message - handle - .insert("offset1".to_string().into(), ack_send) - .await - .unwrap(); + handle.insert(&message, ack_send).await.unwrap(); // Discard the message - handle.discard("offset1".to_string().into()).await.unwrap(); + handle.discard(offset).await.unwrap(); // Verify that the message was discarded and nak was received let result = timeout(Duration::from_secs(1), ack_recv).await.unwrap(); @@ -352,23 +588,34 @@ mod tests { #[tokio::test] async fn test_discard_after_update_with_higher_count() { - let handle = TrackerHandle::new(); + let handle = TrackerHandle::new(None); let (ack_send, ack_recv) = oneshot::channel(); + let offset = Bytes::from_static(b"offset1"); + let message = Message { + keys: Arc::from([]), + tags: None, + value: Bytes::from_static(b"test"), + offset: None, + event_time: Default::default(), + id: MessageID { + vertex_name: "in".into(), + offset: offset.clone(), + index: 1, + }, + headers: HashMap::new(), + metadata: None, + }; + // Insert a new message - handle - .insert("offset1".to_string().into(), ack_send) - .await - .unwrap(); + handle.insert(&message, ack_send).await.unwrap(); + let messages: Vec = std::iter::repeat(message).take(3).collect(); // Update the message with a count of 3 - handle - .update("offset1".to_string().into(), 3, false) - .await - .unwrap(); + handle.update_many(&messages, false).await.unwrap(); // Discard the message - handle.discard("offset1".to_string().into()).await.unwrap(); + handle.discard(offset).await.unwrap(); // Verify that the message was discarded and nak was received let result = timeout(Duration::from_secs(1), ack_recv).await.unwrap(); diff --git a/rust/numaflow-core/src/transformer.rs b/rust/numaflow-core/src/transformer.rs index a44e838c0..21bdc19c5 100644 --- a/rust/numaflow-core/src/transformer.rs +++ b/rust/numaflow-core/src/transformer.rs @@ -138,11 +138,7 @@ impl Transformer { match receiver.await { Ok(Ok(mut transformed_messages)) => { if let Err(e) = tracker_handle - .update( - read_msg.id.offset.clone(), - transformed_messages.len() as u32, - true, - ) + .update_many(&transformed_messages, true) .await { let _ = error_tx.send(e).await; @@ -278,7 +274,7 @@ mod tests { // wait for the server to start tokio::time::sleep(Duration::from_millis(100)).await; - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let client = SourceTransformClient::new(create_rpc_channel(sock_file).await?); let transformer = Transformer::new(500, 10, client, tracker_handle.clone()).await?; @@ -355,7 +351,7 @@ mod tests { // wait for the server to start tokio::time::sleep(Duration::from_millis(100)).await; - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let client = SourceTransformClient::new(create_rpc_channel(sock_file).await?); let transformer = Transformer::new(500, 10, client, tracker_handle.clone()).await?; @@ -441,7 +437,7 @@ mod tests { // wait for the server to start tokio::time::sleep(Duration::from_millis(100)).await; - let tracker_handle = TrackerHandle::new(); + let tracker_handle = TrackerHandle::new(None); let client = SourceTransformClient::new(create_rpc_channel(sock_file).await?); let transformer = Transformer::new(500, 10, client, tracker_handle.clone()).await?; diff --git a/rust/serving/src/app/callback.rs b/rust/serving/src/app/callback.rs index d5708a4e6..7d8de8815 100644 --- a/rust/serving/src/app/callback.rs +++ b/rust/serving/src/app/callback.rs @@ -1,9 +1,9 @@ use axum::{body::Bytes, extract::State, http::HeaderMap, routing, Json, Router}; -use serde::{Deserialize, Serialize}; use tracing::error; use self::store::Store; use crate::app::response::ApiError; +use crate::callback::Callback; /// in-memory state store including connection tracking pub(crate) mod state; @@ -12,26 +12,6 @@ use state::State as CallbackState; /// store for storing the state pub(crate) mod store; -/// As message passes through each component (map, transformer, sink, etc.). it emits a beacon via callback -/// to inform that message has been processed by this component. -#[derive(Debug, Serialize, Deserialize)] -pub(crate) struct Callback { - pub(crate) id: String, - pub(crate) vertex: String, - pub(crate) cb_time: u64, - pub(crate) from_vertex: String, - /// Due to flat-map operation, we can have 0 or more responses. - pub(crate) responses: Vec, -} - -/// It contains details about the `To` vertex via tags (conditional forwarding). -#[derive(Debug, Serialize, Deserialize)] -pub(crate) struct Response { - /// If tags is None, the message is forwarded to all vertices, if len(Vec) == 0, it means that - /// the message has been dropped. - pub(crate) tags: Option>, -} - #[derive(Clone)] struct CallbackAppState { tid_header: String, @@ -106,6 +86,7 @@ mod tests { use crate::app::callback::state::State as CallbackState; use crate::app::callback::store::memstore::InMemoryStore; use crate::app::tracker::MessageGraph; + use crate::callback::Response; use crate::pipeline::PipelineDCG; const PIPELINE_SPEC_ENCODED: &str = "eyJ2ZXJ0aWNlcyI6W3sibmFtZSI6ImluIiwic291cmNlIjp7InNlcnZpbmciOnsiYXV0aCI6bnVsbCwic2VydmljZSI6dHJ1ZSwibXNnSURIZWFkZXJLZXkiOiJYLU51bWFmbG93LUlkIiwic3RvcmUiOnsidXJsIjoicmVkaXM6Ly9yZWRpczo2Mzc5In19fSwiY29udGFpbmVyVGVtcGxhdGUiOnsicmVzb3VyY2VzIjp7fSwiaW1hZ2VQdWxsUG9saWN5IjoiTmV2ZXIiLCJlbnYiOlt7Im5hbWUiOiJSVVNUX0xPRyIsInZhbHVlIjoiZGVidWcifV19LCJzY2FsZSI6eyJtaW4iOjF9LCJ1cGRhdGVTdHJhdGVneSI6eyJ0eXBlIjoiUm9sbGluZ1VwZGF0ZSIsInJvbGxpbmdVcGRhdGUiOnsibWF4VW5hdmFpbGFibGUiOiIyNSUifX19LHsibmFtZSI6InBsYW5uZXIiLCJ1ZGYiOnsiY29udGFpbmVyIjp7ImltYWdlIjoiYXNjaWk6MC4xIiwiYXJncyI6WyJwbGFubmVyIl0sInJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn0sImJ1aWx0aW4iOm51bGwsImdyb3VwQnkiOm51bGx9LCJjb250YWluZXJUZW1wbGF0ZSI6eyJyZXNvdXJjZXMiOnt9LCJpbWFnZVB1bGxQb2xpY3kiOiJOZXZlciJ9LCJzY2FsZSI6eyJtaW4iOjF9LCJ1cGRhdGVTdHJhdGVneSI6eyJ0eXBlIjoiUm9sbGluZ1VwZGF0ZSIsInJvbGxpbmdVcGRhdGUiOnsibWF4VW5hdmFpbGFibGUiOiIyNSUifX19LHsibmFtZSI6InRpZ2VyIiwidWRmIjp7ImNvbnRhaW5lciI6eyJpbWFnZSI6ImFzY2lpOjAuMSIsImFyZ3MiOlsidGlnZXIiXSwicmVzb3VyY2VzIjp7fSwiaW1hZ2VQdWxsUG9saWN5IjoiTmV2ZXIifSwiYnVpbHRpbiI6bnVsbCwiZ3JvdXBCeSI6bnVsbH0sImNvbnRhaW5lclRlbXBsYXRlIjp7InJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn0sInNjYWxlIjp7Im1pbiI6MX0sInVwZGF0ZVN0cmF0ZWd5Ijp7InR5cGUiOiJSb2xsaW5nVXBkYXRlIiwicm9sbGluZ1VwZGF0ZSI6eyJtYXhVbmF2YWlsYWJsZSI6IjI1JSJ9fX0seyJuYW1lIjoiZG9nIiwidWRmIjp7ImNvbnRhaW5lciI6eyJpbWFnZSI6ImFzY2lpOjAuMSIsImFyZ3MiOlsiZG9nIl0sInJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn0sImJ1aWx0aW4iOm51bGwsImdyb3VwQnkiOm51bGx9LCJjb250YWluZXJUZW1wbGF0ZSI6eyJyZXNvdXJjZXMiOnt9LCJpbWFnZVB1bGxQb2xpY3kiOiJOZXZlciJ9LCJzY2FsZSI6eyJtaW4iOjF9LCJ1cGRhdGVTdHJhdGVneSI6eyJ0eXBlIjoiUm9sbGluZ1VwZGF0ZSIsInJvbGxpbmdVcGRhdGUiOnsibWF4VW5hdmFpbGFibGUiOiIyNSUifX19LHsibmFtZSI6ImVsZXBoYW50IiwidWRmIjp7ImNvbnRhaW5lciI6eyJpbWFnZSI6ImFzY2lpOjAuMSIsImFyZ3MiOlsiZWxlcGhhbnQiXSwicmVzb3VyY2VzIjp7fSwiaW1hZ2VQdWxsUG9saWN5IjoiTmV2ZXIifSwiYnVpbHRpbiI6bnVsbCwiZ3JvdXBCeSI6bnVsbH0sImNvbnRhaW5lclRlbXBsYXRlIjp7InJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn0sInNjYWxlIjp7Im1pbiI6MX0sInVwZGF0ZVN0cmF0ZWd5Ijp7InR5cGUiOiJSb2xsaW5nVXBkYXRlIiwicm9sbGluZ1VwZGF0ZSI6eyJtYXhVbmF2YWlsYWJsZSI6IjI1JSJ9fX0seyJuYW1lIjoiYXNjaWlhcnQiLCJ1ZGYiOnsiY29udGFpbmVyIjp7ImltYWdlIjoiYXNjaWk6MC4xIiwiYXJncyI6WyJhc2NpaWFydCJdLCJyZXNvdXJjZXMiOnt9LCJpbWFnZVB1bGxQb2xpY3kiOiJOZXZlciJ9LCJidWlsdGluIjpudWxsLCJncm91cEJ5IjpudWxsfSwiY29udGFpbmVyVGVtcGxhdGUiOnsicmVzb3VyY2VzIjp7fSwiaW1hZ2VQdWxsUG9saWN5IjoiTmV2ZXIifSwic2NhbGUiOnsibWluIjoxfSwidXBkYXRlU3RyYXRlZ3kiOnsidHlwZSI6IlJvbGxpbmdVcGRhdGUiLCJyb2xsaW5nVXBkYXRlIjp7Im1heFVuYXZhaWxhYmxlIjoiMjUlIn19fSx7Im5hbWUiOiJzZXJ2ZS1zaW5rIiwic2luayI6eyJ1ZHNpbmsiOnsiY29udGFpbmVyIjp7ImltYWdlIjoic2VydmVzaW5rOjAuMSIsImVudiI6W3sibmFtZSI6Ik5VTUFGTE9XX0NBTExCQUNLX1VSTF9LRVkiLCJ2YWx1ZSI6IlgtTnVtYWZsb3ctQ2FsbGJhY2stVXJsIn0seyJuYW1lIjoiTlVNQUZMT1dfTVNHX0lEX0hFQURFUl9LRVkiLCJ2YWx1ZSI6IlgtTnVtYWZsb3ctSWQifV0sInJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn19LCJyZXRyeVN0cmF0ZWd5Ijp7fX0sImNvbnRhaW5lclRlbXBsYXRlIjp7InJlc291cmNlcyI6e30sImltYWdlUHVsbFBvbGljeSI6Ik5ldmVyIn0sInNjYWxlIjp7Im1pbiI6MX0sInVwZGF0ZVN0cmF0ZWd5Ijp7InR5cGUiOiJSb2xsaW5nVXBkYXRlIiwicm9sbGluZ1VwZGF0ZSI6eyJtYXhVbmF2YWlsYWJsZSI6IjI1JSJ9fX0seyJuYW1lIjoiZXJyb3Itc2luayIsInNpbmsiOnsidWRzaW5rIjp7ImNvbnRhaW5lciI6eyJpbWFnZSI6InNlcnZlc2luazowLjEiLCJlbnYiOlt7Im5hbWUiOiJOVU1BRkxPV19DQUxMQkFDS19VUkxfS0VZIiwidmFsdWUiOiJYLU51bWFmbG93LUNhbGxiYWNrLVVybCJ9LHsibmFtZSI6Ik5VTUFGTE9XX01TR19JRF9IRUFERVJfS0VZIiwidmFsdWUiOiJYLU51bWFmbG93LUlkIn1dLCJyZXNvdXJjZXMiOnt9LCJpbWFnZVB1bGxQb2xpY3kiOiJOZXZlciJ9fSwicmV0cnlTdHJhdGVneSI6e319LCJjb250YWluZXJUZW1wbGF0ZSI6eyJyZXNvdXJjZXMiOnt9LCJpbWFnZVB1bGxQb2xpY3kiOiJOZXZlciJ9LCJzY2FsZSI6eyJtaW4iOjF9LCJ1cGRhdGVTdHJhdGVneSI6eyJ0eXBlIjoiUm9sbGluZ1VwZGF0ZSIsInJvbGxpbmdVcGRhdGUiOnsibWF4VW5hdmFpbGFibGUiOiIyNSUifX19XSwiZWRnZXMiOlt7ImZyb20iOiJpbiIsInRvIjoicGxhbm5lciIsImNvbmRpdGlvbnMiOm51bGx9LHsiZnJvbSI6InBsYW5uZXIiLCJ0byI6ImFzY2lpYXJ0IiwiY29uZGl0aW9ucyI6eyJ0YWdzIjp7Im9wZXJhdG9yIjoib3IiLCJ2YWx1ZXMiOlsiYXNjaWlhcnQiXX19fSx7ImZyb20iOiJwbGFubmVyIiwidG8iOiJ0aWdlciIsImNvbmRpdGlvbnMiOnsidGFncyI6eyJvcGVyYXRvciI6Im9yIiwidmFsdWVzIjpbInRpZ2VyIl19fX0seyJmcm9tIjoicGxhbm5lciIsInRvIjoiZG9nIiwiY29uZGl0aW9ucyI6eyJ0YWdzIjp7Im9wZXJhdG9yIjoib3IiLCJ2YWx1ZXMiOlsiZG9nIl19fX0seyJmcm9tIjoicGxhbm5lciIsInRvIjoiZWxlcGhhbnQiLCJjb25kaXRpb25zIjp7InRhZ3MiOnsib3BlcmF0b3IiOiJvciIsInZhbHVlcyI6WyJlbGVwaGFudCJdfX19LHsiZnJvbSI6InRpZ2VyIiwidG8iOiJzZXJ2ZS1zaW5rIiwiY29uZGl0aW9ucyI6bnVsbH0seyJmcm9tIjoiZG9nIiwidG8iOiJzZXJ2ZS1zaW5rIiwiY29uZGl0aW9ucyI6bnVsbH0seyJmcm9tIjoiZWxlcGhhbnQiLCJ0byI6InNlcnZlLXNpbmsiLCJjb25kaXRpb25zIjpudWxsfSx7ImZyb20iOiJhc2NpaWFydCIsInRvIjoic2VydmUtc2luayIsImNvbmRpdGlvbnMiOm51bGx9LHsiZnJvbSI6InBsYW5uZXIiLCJ0byI6ImVycm9yLXNpbmsiLCJjb25kaXRpb25zIjp7InRhZ3MiOnsib3BlcmF0b3IiOiJvciIsInZhbHVlcyI6WyJlcnJvciJdfX19XSwibGlmZWN5Y2xlIjp7fSwid2F0ZXJtYXJrIjp7fX0="; diff --git a/rust/serving/src/app/callback/state.rs b/rust/serving/src/app/callback/state.rs index 419a40f60..2354899aa 100644 --- a/rust/serving/src/app/callback/state.rs +++ b/rust/serving/src/app/callback/state.rs @@ -228,7 +228,7 @@ where mod tests { use super::*; use crate::app::callback::store::memstore::InMemoryStore; - use crate::app::callback::Response; + use crate::callback::Response; use crate::pipeline::PipelineDCG; use axum::body::Bytes; diff --git a/rust/serving/src/app/callback/store/memstore.rs b/rust/serving/src/app/callback/store/memstore.rs index 9e470a7c4..0e7135b8e 100644 --- a/rust/serving/src/app/callback/store/memstore.rs +++ b/rust/serving/src/app/callback/store/memstore.rs @@ -98,7 +98,7 @@ mod tests { use super::*; use crate::app::callback::store::{PayloadToSave, Store}; - use crate::app::callback::{Callback, Response}; + use crate::callback::{Callback, Response}; #[tokio::test] async fn test_save_and_retrieve_callbacks() { diff --git a/rust/serving/src/app/callback/store/redisstore.rs b/rust/serving/src/app/callback/store/redisstore.rs index d4fe501cd..490b7be94 100644 --- a/rust/serving/src/app/callback/store/redisstore.rs +++ b/rust/serving/src/app/callback/store/redisstore.rs @@ -203,7 +203,7 @@ impl super::Store for RedisConnection { mod tests { use super::*; use crate::app::callback::store::LocalStore; - use crate::app::callback::Response; + use crate::callback::Response; use axum::body::Bytes; use redis::AsyncCommands; diff --git a/rust/serving/src/app/jetstream_proxy.rs b/rust/serving/src/app/jetstream_proxy.rs index d4302a393..ad4db52ca 100644 --- a/rust/serving/src/app/jetstream_proxy.rs +++ b/rust/serving/src/app/jetstream_proxy.rs @@ -260,12 +260,11 @@ mod tests { use tower::ServiceExt; use super::*; - use crate::app::callback; use crate::app::callback::state::State as CallbackState; use crate::app::callback::store::memstore::InMemoryStore; use crate::app::callback::store::PayloadToSave; - use crate::app::callback::Callback; use crate::app::tracker::MessageGraph; + use crate::callback::{Callback, Response}; use crate::config::DEFAULT_ID_HEADER; use crate::pipeline::PipelineDCG; use crate::{Error, Settings}; @@ -361,21 +360,21 @@ mod tests { vertex: "in".to_string(), cb_time: 12345, from_vertex: "in".to_string(), - responses: vec![callback::Response { tags: None }], + responses: vec![Response { tags: None }], }, Callback { id: id.to_string(), vertex: "cat".to_string(), cb_time: 12345, from_vertex: "in".to_string(), - responses: vec![callback::Response { tags: None }], + responses: vec![Response { tags: None }], }, Callback { id: id.to_string(), vertex: "out".to_string(), cb_time: 12345, from_vertex: "cat".to_string(), - responses: vec![callback::Response { tags: None }], + responses: vec![Response { tags: None }], }, ] } diff --git a/rust/serving/src/app/tracker.rs b/rust/serving/src/app/tracker.rs index 5f3b24db7..e1b23d35d 100644 --- a/rust/serving/src/app/tracker.rs +++ b/rust/serving/src/app/tracker.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use serde::{Deserialize, Serialize}; -use crate::app::callback::Callback; +use crate::callback::Callback; use crate::pipeline::{Edge, OperatorType, PipelineDCG}; use crate::Error; @@ -250,7 +250,7 @@ impl MessageGraph { #[cfg(test)] mod tests { use super::*; - use crate::app::callback::Response; + use crate::callback::Response; use crate::pipeline::{Conditions, Tag, Vertex}; #[test] diff --git a/rust/serving/src/callback.rs b/rust/serving/src/callback.rs index 75d5522b8..4c6db654c 100644 --- a/rust/serving/src/callback.rs +++ b/rust/serving/src/callback.rs @@ -1,29 +1,32 @@ use std::{ - collections::HashMap, sync::Arc, time::{Duration, SystemTime, UNIX_EPOCH}, }; use reqwest::Client; +use serde::{Deserialize, Serialize}; use tokio::sync::Semaphore; -use crate::config::DEFAULT_CALLBACK_URL_HEADER_KEY; use crate::config::DEFAULT_ID_HEADER; -use crate::Error; -/// The data to be sent in the POST request -#[derive(serde::Serialize)] -struct CallbackPayload { - /// Unique identifier of the message - id: String, - /// Name of the vertex - vertex: String, - /// Time when the callback was made - cb_time: u64, - /// Name of the vertex from which the message was sent - from_vertex: String, - /// List of tags associated with the message - tags: Option>, +/// As message passes through each component (map, transformer, sink, etc.). it emits a beacon via callback +/// to inform that message has been processed by this component. +#[derive(Debug, Serialize, Deserialize)] +pub(crate) struct Callback { + pub(crate) id: String, + pub(crate) vertex: String, + pub(crate) cb_time: u64, + pub(crate) from_vertex: String, + /// Due to flat-map operation, we can have 0 or more responses. + pub(crate) responses: Vec, +} + +/// It contains details about the `To` vertex via tags (conditional forwarding). +#[derive(Debug, Serialize, Deserialize)] +pub(crate) struct Response { + /// If tags is None, the message is forwarded to all vertices, if len(Vec) == 0, it means that + /// the message has been dropped. + pub(crate) tags: Option>, } #[derive(Clone)] @@ -52,43 +55,26 @@ impl CallbackHandler { pub async fn callback( &self, - message_headers: &HashMap, - message_tags: &Option>, + id: String, + callback_url: String, previous_vertex: String, + responses: Vec>>, ) -> crate::Result<()> { - let callback_url = message_headers - .get(DEFAULT_CALLBACK_URL_HEADER_KEY) - .ok_or_else(|| { - Error::Source(format!( - "{DEFAULT_CALLBACK_URL_HEADER_KEY} header is not present in the message headers", - )) - })? - .to_owned(); - let uuid = message_headers - .get(DEFAULT_ID_HEADER) - .ok_or_else(|| { - Error::Source(format!( - "{DEFAULT_ID_HEADER} is not found in message headers", - )) - })? - .to_owned(); let cb_time = SystemTime::now() .duration_since(UNIX_EPOCH) .expect("System time is older than Unix epoch time") .as_millis() as u64; - let mut msg_tags = None; - if let Some(tags) = message_tags { - if !tags.is_empty() { - msg_tags = Some(tags.iter().cloned().collect()); - } - }; + let responses = responses + .into_iter() + .map(|tags| Response { tags }) + .collect(); - let callback_payload = CallbackPayload { + let callback_payload = Callback { vertex: self.vertex_name.clone(), - id: uuid.clone(), + id: id.clone(), cb_time, - tags: msg_tags, + responses, from_vertex: previous_vertex, }; @@ -104,7 +90,7 @@ impl CallbackHandler { for i in 1..=TOTAL_ATTEMPTS { let resp = client .post(&callback_url) - .header(DEFAULT_ID_HEADER, uuid.clone()) + .header(DEFAULT_ID_HEADER, id.clone()) .json(&[&callback_payload]) .send() .await; @@ -175,17 +161,17 @@ mod tests { use crate::app::callback::store::memstore::InMemoryStore; use crate::app::start_main_server; use crate::app::tracker::MessageGraph; - use crate::callback::{CallbackHandler, DEFAULT_CALLBACK_URL_HEADER_KEY, DEFAULT_ID_HEADER}; + use crate::callback::CallbackHandler; use crate::config::generate_certs; use crate::pipeline::PipelineDCG; use crate::{AppState, Settings}; use axum_server::tls_rustls::RustlsConfig; - use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; use tokio::sync::mpsc; type Result = std::result::Result>; + #[tokio::test] async fn test_callback() -> Result<()> { // Set up the CryptoProvider (controls core cryptography used by rustls) for the process @@ -241,22 +227,16 @@ mod tests { assert!(server_ready, "Server is not ready"); let callback_handler = CallbackHandler::new("test".into(), 10); - let message_headers: HashMap = [ - ( - DEFAULT_CALLBACK_URL_HEADER_KEY, - "https://localhost:3003/v1/process/callback", - ), - (DEFAULT_ID_HEADER, ID_VALUE), - ] - .into_iter() - .map(|(k, v)| (k.into(), v.into())) - .collect(); - let tags = Arc::from(vec!["tag1".to_owned()]); // On the server, this fails with SubGraphInvalidInput("Invalid callback: 1234, vertex: in") // We get 200 OK response from the server, since we already registered this request ID in the store. callback_handler - .callback(&message_headers, &Some(tags), "in".into()) + .callback( + ID_VALUE.into(), + "https://localhost:3003/v1/process/callback".into(), + "in".into(), + vec![], + ) .await?; let mut data = None; for _ in 0..10 { @@ -273,26 +253,4 @@ mod tests { server_handle.abort(); Ok(()) } - - #[tokio::test] - async fn test_callback_missing_headers() -> Result<()> { - let callback_handler = CallbackHandler::new("test".into(), 10); - let message_headers: HashMap = HashMap::new(); - let result = callback_handler - .callback(&message_headers, &None, "in".into()) - .await; - assert!(result.is_err()); - - let mut message_headers: HashMap = HashMap::new(); - message_headers.insert( - DEFAULT_CALLBACK_URL_HEADER_KEY.into(), - "https://localhost:3003/v1/process/callback".into(), - ); - let result = callback_handler - .callback(&message_headers, &None, "in".into()) - .await; - assert!(result.is_err()); - - Ok(()) - } } diff --git a/rust/serving/src/config.rs b/rust/serving/src/config.rs index 116b7c257..d5e93ab6f 100644 --- a/rust/serving/src/config.rs +++ b/rust/serving/src/config.rs @@ -18,8 +18,8 @@ const ENV_NUMAFLOW_SERVING_APP_PORT: &str = "NUMAFLOW_SERVING_APP_LISTEN_PORT"; const ENV_NUMAFLOW_SERVING_AUTH_TOKEN: &str = "NUMAFLOW_SERVING_AUTH_TOKEN"; const ENV_MIN_PIPELINE_SPEC: &str = "NUMAFLOW_SERVING_MIN_PIPELINE_SPEC"; -pub(crate) const DEFAULT_ID_HEADER: &str = "X-Numaflow-Id"; -pub(crate) const DEFAULT_CALLBACK_URL_HEADER_KEY: &str = "X-Numaflow-Callback-Url"; +pub const DEFAULT_ID_HEADER: &str = "X-Numaflow-Id"; +pub const DEFAULT_CALLBACK_URL_HEADER_KEY: &str = "X-Numaflow-Callback-Url"; pub fn generate_certs() -> std::result::Result<(Certificate, KeyPair), String> { let CertifiedKey { cert, key_pair } = generate_simple_self_signed(vec!["localhost".into()]) diff --git a/rust/serving/src/lib.rs b/rust/serving/src/lib.rs index 1292fe64e..f49d0c916 100644 --- a/rust/serving/src/lib.rs +++ b/rust/serving/src/lib.rs @@ -15,7 +15,7 @@ use crate::metrics::start_https_metrics_server; mod app; mod config; -pub use config::Settings; +pub use {config::Settings, config::DEFAULT_CALLBACK_URL_HEADER_KEY, config::DEFAULT_ID_HEADER}; mod consts; mod error; diff --git a/rust/serving/src/source.rs b/rust/serving/src/source.rs index 25d36c9eb..4c1d7ae35 100644 --- a/rust/serving/src/source.rs +++ b/rust/serving/src/source.rs @@ -247,6 +247,9 @@ mod tests { type Result = std::result::Result>; #[tokio::test] async fn test_serving_source() -> Result<()> { + // Setup the CryptoProvider (controls core cryptography used by rustls) for the process + let _ = rustls::crypto::aws_lc_rs::default_provider().install_default(); + let settings = Arc::new(Settings::default()); let serving_source = ServingSource::new(Arc::clone(&settings), 10, Duration::from_millis(1), 0).await?; From 30e8a1187b1a8fa25e22cfc6f20f848b62b3a5b0 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Thu, 16 Jan 2025 18:09:13 +0530 Subject: [PATCH 10/27] Use let-else pattern Signed-off-by: Sreekanth --- rust/numaflow-core/src/tracker.rs | 131 ++++++++++++++++-------------- 1 file changed, 70 insertions(+), 61 deletions(-) diff --git a/rust/numaflow-core/src/tracker.rs b/rust/numaflow-core/src/tracker.rs index 875bb6a1a..2c23cef99 100644 --- a/rust/numaflow-core/src/tracker.rs +++ b/rust/numaflow-core/src/tracker.rs @@ -214,38 +214,42 @@ impl Tracker { /// Updates an existing entry in the tracker with the number of expected messages and EOF status. fn handle_update(&mut self, offset: Bytes, responses: Option>) { - if let Some(entry) = self.entries.get_mut(&offset) { - entry.count += 1; + let Some(entry) = self.entries.get_mut(&offset) else { + return; + }; + + entry.count += 1; + entry + .callback_info + .as_mut() + .map(|cb| cb.responses.push(responses)); + + // if the count is zero, we can send an ack immediately + // this is case where map stream will send eof true after + // receiving all the messages. + if entry.count == 0 { + let entry = self.entries.remove(&offset).unwrap(); entry - .callback_info - .as_mut() - .map(|cb| cb.responses.push(responses)); - // if the count is zero, we can send an ack immediately - // this is case where map stream will send eof true after - // receiving all the messages. - if entry.count == 0 { - let entry = self.entries.remove(&offset).unwrap(); - entry - .ack_send - .send(ReadAck::Ack) - .expect("Failed to send ack"); - } + .ack_send + .send(ReadAck::Ack) + .expect("Failed to send ack"); } } fn handle_update_eof(&mut self, offset: Bytes) { - if let Some(entry) = self.entries.get_mut(&offset) { - entry.eof = true; - // if the count is zero, we can send an ack immediately - // this is case where map stream will send eof true after - // receiving all the messages. - if entry.count == 0 { - let entry = self.entries.remove(&offset).unwrap(); - entry - .ack_send - .send(ReadAck::Ack) - .expect("Failed to send ack"); - } + let Some(entry) = self.entries.get_mut(&offset) else { + return; + }; + entry.eof = true; + // if the count is zero, we can send an ack immediately + // this is case where map stream will send eof true after + // receiving all the messages. + if entry.count == 0 { + let entry = self.entries.remove(&offset).unwrap(); + entry + .ack_send + .send(ReadAck::Ack) + .expect("Failed to send ack"); } } @@ -255,52 +259,57 @@ impl Tracker { responses: Vec>>, eof: bool, ) { - if let Some(entry) = self.entries.get_mut(&offset) { - entry.count += responses.len() as u32; - entry.eof = eof; + let Some(entry) = self.entries.get_mut(&offset) else { + return; + }; + + entry.count += responses.len() as u32; + entry.eof = eof; + entry + .callback_info + .as_mut() + .map(|cb| cb.responses.extend(responses)); + + // if the count is zero, we can send an ack immediately + // this is case where map stream will send eof true after + // receiving all the messages. + if entry.count == 0 { + let entry = self.entries.remove(&offset).unwrap(); entry - .callback_info - .as_mut() - .map(|cb| cb.responses.extend(responses)); - // if the count is zero, we can send an ack immediately - // this is case where map stream will send eof true after - // receiving all the messages. - if entry.count == 0 { - let entry = self.entries.remove(&offset).unwrap(); - entry - .ack_send - .send(ReadAck::Ack) - .expect("Failed to send ack"); - } + .ack_send + .send(ReadAck::Ack) + .expect("Failed to send ack"); } } /// Removes an entry from the tracker and sends an acknowledgment if the count is zero /// and the entry is marked as EOF. fn handle_delete(&mut self, offset: Bytes) { - if let Some(mut entry) = self.entries.remove(&offset) { - if entry.count > 0 { - entry.count -= 1; - } - if entry.count == 0 && entry.eof { - entry - .ack_send - .send(ReadAck::Ack) - .expect("Failed to send ack"); - } else { - self.entries.insert(offset, entry); - } + let Some(mut entry) = self.entries.remove(&offset) else { + return; + }; + if entry.count > 0 { + entry.count -= 1; + } + if entry.count == 0 && entry.eof { + entry + .ack_send + .send(ReadAck::Ack) + .expect("Failed to send ack"); + } else { + self.entries.insert(offset, entry); } } /// Discards an entry from the tracker and sends a nak. fn handle_discard(&mut self, offset: Bytes) { - if let Some(entry) = self.entries.remove(&offset) { - entry - .ack_send - .send(ReadAck::Nak) - .expect("Failed to send nak"); - } + let Some(entry) = self.entries.remove(&offset) else { + return; + }; + entry + .ack_send + .send(ReadAck::Nak) + .expect("Failed to send nak"); } /// Discards all entries from the tracker and sends a nak for each. From d6b40e7f9e4b999857ff8e8570bdd0b4fe914591 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Thu, 16 Jan 2025 19:55:16 +0530 Subject: [PATCH 11/27] Make callback from tracker Signed-off-by: Sreekanth --- rust/numaflow-core/src/tracker.rs | 81 +++++++++++++++++-------------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/rust/numaflow-core/src/tracker.rs b/rust/numaflow-core/src/tracker.rs index 2c23cef99..6f5a08937 100644 --- a/rust/numaflow-core/src/tracker.rs +++ b/rust/numaflow-core/src/tracker.rs @@ -23,7 +23,7 @@ use crate::Result; #[derive(Debug)] struct TrackerEntry { ack_send: oneshot::Sender, - count: u32, + count: usize, eof: bool, callback_info: Option, } @@ -42,6 +42,7 @@ enum ActorMessage { UpdateMany { offset: Bytes, responses: Vec>>, + count: usize, eof: bool, }, UpdateEOF { @@ -170,15 +171,16 @@ impl Tracker { ActorMessage::UpdateMany { offset, responses, + count, eof, } => { - self.handle_update_many(offset, responses, eof); + self.handle_update_many(offset, responses, count, eof); } ActorMessage::UpdateEOF { offset } => { - self.handle_update_eof(offset); + self.handle_update_eof(offset).await; } ActorMessage::Delete { offset } => { - self.handle_delete(offset); + self.handle_delete(offset).await; } ActorMessage::Discard { offset } => { self.handle_discard(offset); @@ -223,20 +225,9 @@ impl Tracker { .callback_info .as_mut() .map(|cb| cb.responses.push(responses)); - - // if the count is zero, we can send an ack immediately - // this is case where map stream will send eof true after - // receiving all the messages. - if entry.count == 0 { - let entry = self.entries.remove(&offset).unwrap(); - entry - .ack_send - .send(ReadAck::Ack) - .expect("Failed to send ack"); - } } - fn handle_update_eof(&mut self, offset: Bytes) { + async fn handle_update_eof(&mut self, offset: Bytes) { let Some(entry) = self.entries.get_mut(&offset) else { return; }; @@ -246,10 +237,7 @@ impl Tracker { // receiving all the messages. if entry.count == 0 { let entry = self.entries.remove(&offset).unwrap(); - entry - .ack_send - .send(ReadAck::Ack) - .expect("Failed to send ack"); + self.ack_message(entry).await; } } @@ -257,34 +245,24 @@ impl Tracker { &mut self, offset: Bytes, responses: Vec>>, + count: usize, eof: bool, ) { let Some(entry) = self.entries.get_mut(&offset) else { return; }; - entry.count += responses.len() as u32; + entry.count += count; entry.eof = eof; entry .callback_info .as_mut() .map(|cb| cb.responses.extend(responses)); - - // if the count is zero, we can send an ack immediately - // this is case where map stream will send eof true after - // receiving all the messages. - if entry.count == 0 { - let entry = self.entries.remove(&offset).unwrap(); - entry - .ack_send - .send(ReadAck::Ack) - .expect("Failed to send ack"); - } } /// Removes an entry from the tracker and sends an acknowledgment if the count is zero /// and the entry is marked as EOF. - fn handle_delete(&mut self, offset: Bytes) { + async fn handle_delete(&mut self, offset: Bytes) { let Some(mut entry) = self.entries.remove(&offset) else { return; }; @@ -292,10 +270,7 @@ impl Tracker { entry.count -= 1; } if entry.count == 0 && entry.eof { - entry - .ack_send - .send(ReadAck::Ack) - .expect("Failed to send ack"); + self.ack_message(entry).await; } else { self.entries.insert(offset, entry); } @@ -321,6 +296,37 @@ impl Tracker { .expect("Failed to send nak"); } } + + async fn ack_message(&self, entry: TrackerEntry) { + let TrackerEntry { + ack_send, + callback_info, + .. + } = entry; + + ack_send.send(ReadAck::Ack).expect("Failed to send ack"); + + let Some(ref callback_handler) = self.callback_handler else { + return; + }; + let Some(callback_info) = callback_info else { + tracing::error!("Callback is enabled, but Tracker doesn't contain callback info"); + return; + }; + + let id = callback_info.id.clone(); + let result = callback_handler + .callback( + callback_info.id, + callback_info.callback_url, + callback_info.from_vertex, + callback_info.responses, + ) + .await; + if let Err(e) = result { + tracing::error!(?e, id, "Failed to send callback"); + } + } } /// TrackerHandle provides an interface to interact with the Tracker. @@ -419,6 +425,7 @@ impl TrackerHandle { let message = ActorMessage::UpdateMany { offset, responses, + count: messages.len(), eof, }; self.sender From 45dfcd35e35e5227a770c8c393c7159e7fac789f Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Fri, 17 Jan 2025 11:58:47 +0530 Subject: [PATCH 12/27] Single update method for Tracker handle Signed-off-by: Sreekanth --- rust/numaflow-core/src/config/monovertex.rs | 2 +- rust/numaflow-core/src/config/pipeline.rs | 2 +- rust/numaflow-core/src/mapper/map.rs | 34 +++++- rust/numaflow-core/src/metrics.rs | 9 +- rust/numaflow-core/src/tracker.rs | 113 ++++++-------------- rust/numaflow-core/src/transformer.rs | 15 ++- rust/serving/src/app.rs | 2 +- 7 files changed, 81 insertions(+), 96 deletions(-) diff --git a/rust/numaflow-core/src/config/monovertex.rs b/rust/numaflow-core/src/config/monovertex.rs index 4fdbc95fe..4bd7ac565 100644 --- a/rust/numaflow-core/src/config/monovertex.rs +++ b/rust/numaflow-core/src/config/monovertex.rs @@ -150,7 +150,7 @@ impl MonovertexConfig { .unwrap_or(DEFAULT_LOOKBACK_WINDOW_IN_SECS); let mut callback_config = None; - if let Ok(_) = env::var(ENV_CALLBACK_ENABLED) { + if env::var(ENV_CALLBACK_ENABLED).is_ok() { let callback_concurrency: usize = env::var(ENV_CALLBACK_CONCURRENCY) .unwrap_or_else(|_| format!("{DEFAULT_CALLBACK_CONCURRENCY}")) .parse() diff --git a/rust/numaflow-core/src/config/pipeline.rs b/rust/numaflow-core/src/config/pipeline.rs index 549fff556..7e7fdf84f 100644 --- a/rust/numaflow-core/src/config/pipeline.rs +++ b/rust/numaflow-core/src/config/pipeline.rs @@ -390,7 +390,7 @@ impl PipelineConfig { .unwrap_or(DEFAULT_LOOKBACK_WINDOW_IN_SECS); let mut callback_config = None; - if let Ok(_) = get_var(ENV_CALLBACK_ENABLED) { + if get_var(ENV_CALLBACK_ENABLED).is_ok() { let callback_concurrency: usize = get_var(ENV_CALLBACK_CONCURRENCY) .unwrap_or_else(|_| format!("{DEFAULT_CALLBACK_CONCURRENCY}")) .parse() diff --git a/rust/numaflow-core/src/mapper/map.rs b/rust/numaflow-core/src/mapper/map.rs index c9682a48b..b1e85a8ba 100644 --- a/rust/numaflow-core/src/mapper/map.rs +++ b/rust/numaflow-core/src/mapper/map.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use std::time::Duration; +use bytes::Bytes; use numaflow_pb::clients::map::map_client::MapClient; use tokio::sync::{mpsc, oneshot, OwnedSemaphorePermit, Semaphore}; use tokio::task::JoinHandle; @@ -328,6 +329,7 @@ impl MapHandle { tokio::spawn(async move { let _permit = permit; + let offset = read_msg.id.offset.clone(); let (sender, receiver) = oneshot::channel(); let msg = UnaryActorMessage { message: read_msg.clone(), @@ -344,7 +346,16 @@ impl MapHandle { match receiver.await { Ok(Ok(mut mapped_messages)) => { // update the tracker with the number of messages sent and send the mapped messages - if let Err(e) = tracker_handle.update_many(&mapped_messages, true).await { + for message in mapped_messages.iter() { + if let Err(e) = tracker_handle + .update(offset.clone(), message.tags.clone()) + .await + { + error_tx.send(e).await.expect("failed to send error"); + return; + } + } + if let Err(e) = tracker_handle.update_eof(offset).await { error_tx.send(e).await.expect("failed to send error"); return; } @@ -391,7 +402,18 @@ impl MapHandle { for receiver in receivers { match receiver.await { Ok(Ok(mut mapped_messages)) => { - tracker_handle.update_many(&mapped_messages, true).await?; + let mut offset: Option = None; + for message in mapped_messages.iter() { + if offset.is_none() { + offset = Some(message.id.offset.clone()); + } + tracker_handle + .update(message.id.offset.clone(), message.tags.clone()) + .await?; + } + if let Some(offset) = offset { + tracker_handle.update_eof(offset).await?; + } for mapped_message in mapped_messages.drain(..) { output_tx .send(mapped_message) @@ -445,7 +467,13 @@ impl MapHandle { while let Some(result) = receiver.recv().await { match result { Ok(mapped_message) => { - if let Err(e) = tracker_handle.update(&mapped_message).await { + if let Err(e) = tracker_handle + .update( + mapped_message.id.offset.clone(), + mapped_message.tags.clone(), + ) + .await + { error_tx.send(e).await.expect("failed to send error"); return; } diff --git a/rust/numaflow-core/src/metrics.rs b/rust/numaflow-core/src/metrics.rs index ef285f57d..e0eb0cc1e 100644 --- a/rust/numaflow-core/src/metrics.rs +++ b/rust/numaflow-core/src/metrics.rs @@ -721,6 +721,7 @@ struct TimestampedPending { #[derive(Clone)] pub(crate) enum LagReader { Source(Source), + #[allow(clippy::upper_case_acronyms)] ISB(Vec), // multiple partitions } @@ -862,7 +863,7 @@ async fn build_pending_info( match &mut lag_reader { LagReader::Source(source) => { - match fetch_source_pending(&source).await { + match fetch_source_pending(source).await { Ok(pending) => { if pending != -1 { let mut stats = pending_stats.lock().await; @@ -887,8 +888,8 @@ async fn build_pending_info( } LagReader::ISB(readers) => { - for mut reader in readers { - match fetch_isb_pending(&mut reader).await { + for reader in readers { + match fetch_isb_pending(reader).await { Ok(pending) => { if pending != -1 { let mut stats = pending_stats.lock().await; @@ -984,7 +985,7 @@ async fn expose_pending_metrics( } /// Calculate the average pending messages over the last `seconds` seconds. -async fn calculate_pending(seconds: i64, pending_stats: &Vec) -> i64 { +async fn calculate_pending(seconds: i64, pending_stats: &[TimestampedPending]) -> i64 { let mut result = -1; let mut total = 0; let mut num = 0; diff --git a/rust/numaflow-core/src/tracker.rs b/rust/numaflow-core/src/tracker.rs index 6f5a08937..158882012 100644 --- a/rust/numaflow-core/src/tracker.rs +++ b/rust/numaflow-core/src/tracker.rs @@ -9,6 +9,7 @@ //! In the future Watermark will also be propagated based on this. use std::collections::HashMap; +use std::sync::Arc; use bytes::Bytes; use serving::callback::CallbackHandler; @@ -39,12 +40,6 @@ enum ActorMessage { offset: Bytes, responses: Option>, }, - UpdateMany { - offset: Bytes, - responses: Vec>>, - count: usize, - eof: bool, - }, UpdateEOF { offset: Bytes, }, @@ -107,7 +102,6 @@ impl TryFrom<&Message> for CallbackInfo { .previous_vertex .clone(); - // FIXME: empty message tags let mut msg_tags = None; if let Some(ref tags) = message.tags { if !tags.is_empty() { @@ -139,12 +133,12 @@ impl Tracker { /// Creates a new Tracker instance with the given receiver for actor messages. fn new( receiver: mpsc::Receiver, - callback_handler: impl Into>, + callback_handler: Option, ) -> Self { Self { entries: HashMap::new(), receiver, - callback_handler: callback_handler.into(), + callback_handler, } } @@ -168,14 +162,6 @@ impl Tracker { ActorMessage::Update { offset, responses } => { self.handle_update(offset, responses); } - ActorMessage::UpdateMany { - offset, - responses, - count, - eof, - } => { - self.handle_update_many(offset, responses, count, eof); - } ActorMessage::UpdateEOF { offset } => { self.handle_update_eof(offset).await; } @@ -221,10 +207,9 @@ impl Tracker { }; entry.count += 1; - entry - .callback_info - .as_mut() - .map(|cb| cb.responses.push(responses)); + if let Some(cb) = entry.callback_info.as_mut() { + cb.responses.push(responses); + } } async fn handle_update_eof(&mut self, offset: Bytes) { @@ -241,25 +226,6 @@ impl Tracker { } } - fn handle_update_many( - &mut self, - offset: Bytes, - responses: Vec>>, - count: usize, - eof: bool, - ) { - let Some(entry) = self.entries.get_mut(&offset) else { - return; - }; - - entry.count += count; - entry.eof = eof; - entry - .callback_info - .as_mut() - .map(|cb| cb.responses.extend(responses)); - } - /// Removes an entry from the tracker and sends an acknowledgment if the count is zero /// and the entry is marked as EOF. async fn handle_delete(&mut self, offset: Bytes) { @@ -373,13 +339,16 @@ impl TrackerHandle { Ok(()) } - /// Updates an existing message in the Tracker with the given offset, count, and EOF status. - pub(crate) async fn update(&self, message: &Message) -> Result<()> { - let offset = message.id.offset.clone(); + /// Informs the tracker that a new message has been generated. The tracker should contain + /// and entry for this message's offset. + pub(crate) async fn update( + &self, + offset: Bytes, + message_tags: Option>, + ) -> Result<()> { let mut responses: Option> = None; if self.enable_callbacks { - // FIXME: empty message tags - if let Some(ref tags) = message.tags { + if let Some(tags) = message_tags { if !tags.is_empty() { responses = Some(tags.iter().cloned().collect()); } @@ -393,7 +362,7 @@ impl TrackerHandle { Ok(()) } - /// Updates an existing message in the Tracker with the given offset, count, and EOF status. + /// Updates the EOF status for an offset in the Tracker pub(crate) async fn update_eof(&self, offset: Bytes) -> Result<()> { let message = ActorMessage::UpdateEOF { offset }; self.sender @@ -403,38 +372,6 @@ impl TrackerHandle { Ok(()) } - /// Updates an existing message in the Tracker with the given offset, count, and EOF status. - pub(crate) async fn update_many(&self, messages: &[Message], eof: bool) -> Result<()> { - if messages.is_empty() { - return Ok(()); - } - let offset = messages.first().unwrap().id.offset.clone(); - let mut responses: Vec>> = vec![]; - // if self.enable_callbacks { - // FIXME: empty message tags - for message in messages { - let mut response: Option> = None; - if let Some(ref tags) = message.tags { - if !tags.is_empty() { - response = Some(tags.iter().cloned().collect()); - } - }; - responses.push(response); - } - // } - let message = ActorMessage::UpdateMany { - offset, - responses, - count: messages.len(), - eof, - }; - self.sender - .send(message) - .await - .map_err(|e| Error::Tracker(format!("{:?}", e)))?; - Ok(()) - } - /// Deletes a message from the Tracker with the given offset. pub(crate) async fn delete(&self, offset: Bytes) -> Result<()> { let message = ActorMessage::Delete { offset }; @@ -515,7 +452,10 @@ mod tests { handle.insert(&message, ack_send).await.unwrap(); // Update the message - handle.update(&message).await.unwrap(); + handle + .update(offset.clone(), message.tags.clone()) + .await + .unwrap(); handle.update_eof(offset).await.unwrap(); // Delete the message @@ -554,7 +494,12 @@ mod tests { let messages: Vec = std::iter::repeat(message).take(3).collect(); // Update the message with a count of 3 - handle.update_many(&messages, true).await.unwrap(); + for message in messages { + handle + .update(offset.clone(), message.tags.clone()) + .await + .unwrap(); + } // Delete the message three times handle.delete(offset.clone()).await.unwrap(); @@ -627,8 +572,12 @@ mod tests { handle.insert(&message, ack_send).await.unwrap(); let messages: Vec = std::iter::repeat(message).take(3).collect(); - // Update the message with a count of 3 - handle.update_many(&messages, false).await.unwrap(); + for message in messages { + handle + .update(offset.clone(), message.tags.clone()) + .await + .unwrap(); + } // Discard the message handle.discard(offset).await.unwrap(); diff --git a/rust/numaflow-core/src/transformer.rs b/rust/numaflow-core/src/transformer.rs index 21bdc19c5..a474d6338 100644 --- a/rust/numaflow-core/src/transformer.rs +++ b/rust/numaflow-core/src/transformer.rs @@ -120,6 +120,7 @@ impl Transformer { let start_time = tokio::time::Instant::now(); let _permit = permit; + let offset = read_msg.id.offset.clone(); let (sender, receiver) = oneshot::channel(); let msg = ActorMessage::Transform { message: read_msg.clone(), @@ -137,10 +138,16 @@ impl Transformer { // wait for one-shot match receiver.await { Ok(Ok(mut transformed_messages)) => { - if let Err(e) = tracker_handle - .update_many(&transformed_messages, true) - .await - { + for message in transformed_messages.iter() { + if let Err(e) = tracker_handle + .update(offset.clone(), message.tags.clone()) + .await + { + let _ = error_tx.send(e).await; + return; + } + } + if let Err(e) = tracker_handle.update_eof(offset).await { let _ = error_tx.send(e).await; return; } diff --git a/rust/serving/src/app.rs b/rust/serving/src/app.rs index 7e6e7edb1..721376855 100644 --- a/rust/serving/src/app.rs +++ b/rust/serving/src/app.rs @@ -104,7 +104,7 @@ where .on_response( |response: &Response, latency: Duration, _span: &Span| { if response.status().is_server_error() { - // 5xx responses will be captured in on_failure at and logged at 'error' level + // 5xx responses will be logged at 'error' level in `on_failure` return; } tracing::info!(status=?response.status(), ?latency) From e6467a220f4cc37bdbba49ad54274b9a97c93081 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Fri, 17 Jan 2025 12:25:53 +0530 Subject: [PATCH 13/27] Refactoring Signed-off-by: Sreekanth --- rust/numaflow-core/src/tracker.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/rust/numaflow-core/src/tracker.rs b/rust/numaflow-core/src/tracker.rs index 158882012..919ab5d26 100644 --- a/rust/numaflow-core/src/tracker.rs +++ b/rust/numaflow-core/src/tracker.rs @@ -346,14 +346,16 @@ impl TrackerHandle { offset: Bytes, message_tags: Option>, ) -> Result<()> { - let mut responses: Option> = None; - if self.enable_callbacks { - if let Some(tags) = message_tags { + let responses: Option> = match (self.enable_callbacks, message_tags) { + (true, Some(tags)) => { if !tags.is_empty() { - responses = Some(tags.iter().cloned().collect()); + Some(tags.iter().cloned().collect::>()) + } else { + None } - }; - } + } + _ => None, + }; let message = ActorMessage::Update { offset, responses }; self.sender .send(message) From 5e7fc1ef2b92eeb0f90aac9b460b99f033964918 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Fri, 17 Jan 2025 17:20:22 +0530 Subject: [PATCH 14/27] Debugging test failure Signed-off-by: Sreekanth --- test/api-e2e/api_test.go | 3 --- test/fixtures/when.go | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/test/api-e2e/api_test.go b/test/api-e2e/api_test.go index e1800ea67..cc4557836 100644 --- a/test/api-e2e/api_test.go +++ b/test/api-e2e/api_test.go @@ -311,9 +311,6 @@ func (s *APISuite) TestAPIsForMetricsAndWatermarkAndPodsForPipeline() { } func (s *APISuite) TestMetricsAPIsForMonoVertex() { - _, cancel := context.WithTimeout(context.Background(), time.Minute) - defer cancel() - w := s.Given().MonoVertex("@testdata/mono-vertex.yaml"). When(). CreateMonoVertexAndWait() diff --git a/test/fixtures/when.go b/test/fixtures/when.go index 154c7cfda..7bbeac2b4 100644 --- a/test/fixtures/when.go +++ b/test/fixtures/when.go @@ -117,7 +117,7 @@ func (w *When) CreateMonoVertexAndWait() *When { w.monoVertex = i } // wait - if err := WaitForMonoVertexRunning(ctx, w.monoVertexClient, w.monoVertex.Name, defaultTimeout); err != nil { + if err := WaitForMonoVertexRunning(ctx, w.monoVertexClient, w.monoVertex.Name, 5*time.Minute); err != nil { w.t.Fatal(err) } return w From 22a2a8a2ed23e7dc8562a2e776c48378e99389ad Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Fri, 17 Jan 2025 17:58:44 +0530 Subject: [PATCH 15/27] Debugging test failure - increase timeout Signed-off-by: Sreekanth --- test/fixtures/expect.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/fixtures/expect.go b/test/fixtures/expect.go index 996d762bd..9398fa4fc 100644 --- a/test/fixtures/expect.go +++ b/test/fixtures/expect.go @@ -120,7 +120,7 @@ func (t *Expect) VertexPodsRunning() *Expect { func (t *Expect) MonoVertexPodsRunning() *Expect { t.t.Helper() - if err := WaitForMonoVertexPodRunning(t.kubeClient, t.monoVertexClient, Namespace, t.monoVertex.Name, 2*time.Minute); err != nil { + if err := WaitForMonoVertexPodRunning(t.kubeClient, t.monoVertexClient, Namespace, t.monoVertex.Name, 5*time.Minute); err != nil { t.t.Fatalf("Expected mono vertex %q pod running: %v", t.monoVertex.Name, err) } return t From 53fb2966a9d29ea391b389c9d03632013d959433 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Jan 2025 08:49:09 +0530 Subject: [PATCH 16/27] Debugging test failure - print kubectl logs Signed-off-by: Sreekanth --- Makefile | 14 ++++++++++++++ test/api-e2e/api_test.go | 2 +- test/fixtures/when.go | 2 +- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index aab918c93..dea153743 100644 --- a/Makefile +++ b/Makefile @@ -136,6 +136,7 @@ endif cat test/manifests/e2e-api-pod.yaml | sed 's@quay.io/numaproj/@$(IMAGE_NAMESPACE)/@' | sed 's/:latest/:$(VERSION)/' | kubectl -n numaflow-system apply -f - go generate $(shell find ./test/$* -name '*.go') go test -v -timeout 15m -count 1 --tags test -p 1 ./test/$* + $(MAKE) show-logs $(MAKE) cleanup-e2e image-restart: @@ -156,6 +157,19 @@ cleanup-e2e: kubectl -n numaflow-system delete secret -lnumaflow-e2e=true --ignore-not-found=true kubectl -n numaflow-system delete po -lnumaflow-e2e=true --ignore-not-found=true +show-logs: + kubectl get mvtx && \ + kubectl get po -l 'app.kubernetes.io/component=mono-vertex' && \ + kubectl describe po -l 'app.kubernetes.io/component=mono-vertex' && \ + echo "UDSource Logs--" && \ + kubectl logs -l 'app.kubernetes.io/component=mono-vertex' -c udsource && \ + echo "Transformer Logs--" && \ + kubectl logs -l 'app.kubernetes.io/component=mono-vertex' -c transformer && \ + echo "Udsink Logs--" && \ + kubectl logs -l 'app.kubernetes.io/component=mono-vertex' -c udsink && \ + echo "Numa Logs--" && \ + kubectl logs -l 'app.kubernetes.io/component=mono-vertex' -c numa + # To run just one of the e2e tests by name (i.e. 'make TestCreateSimplePipeline'): Test%: $(MAKE) cleanup-e2e diff --git a/test/api-e2e/api_test.go b/test/api-e2e/api_test.go index cc4557836..b31db2aa0 100644 --- a/test/api-e2e/api_test.go +++ b/test/api-e2e/api_test.go @@ -314,7 +314,7 @@ func (s *APISuite) TestMetricsAPIsForMonoVertex() { w := s.Given().MonoVertex("@testdata/mono-vertex.yaml"). When(). CreateMonoVertexAndWait() - defer w.DeleteMonoVertexAndWait() + // defer w.DeleteMonoVertexAndWait() monoVertexName := "mono-vertex" diff --git a/test/fixtures/when.go b/test/fixtures/when.go index 7bbeac2b4..154c7cfda 100644 --- a/test/fixtures/when.go +++ b/test/fixtures/when.go @@ -117,7 +117,7 @@ func (w *When) CreateMonoVertexAndWait() *When { w.monoVertex = i } // wait - if err := WaitForMonoVertexRunning(ctx, w.monoVertexClient, w.monoVertex.Name, 5*time.Minute); err != nil { + if err := WaitForMonoVertexRunning(ctx, w.monoVertexClient, w.monoVertex.Name, defaultTimeout); err != nil { w.t.Fatal(err) } return w From ee1374f281cff86a7368341ed1dcc355e1408ba4 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Jan 2025 09:59:01 +0530 Subject: [PATCH 17/27] Debugging test failure - mark test failure step in Makefile as success Signed-off-by: Sreekanth --- Makefile | 2 +- test/fixtures/expect.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index dea153743..8f25e6764 100644 --- a/Makefile +++ b/Makefile @@ -135,7 +135,7 @@ endif $(MAKE) restart-control-plane-components cat test/manifests/e2e-api-pod.yaml | sed 's@quay.io/numaproj/@$(IMAGE_NAMESPACE)/@' | sed 's/:latest/:$(VERSION)/' | kubectl -n numaflow-system apply -f - go generate $(shell find ./test/$* -name '*.go') - go test -v -timeout 15m -count 1 --tags test -p 1 ./test/$* + go test -v -timeout 15m -count 1 --tags test -p 1 ./test/$* || true $(MAKE) show-logs $(MAKE) cleanup-e2e diff --git a/test/fixtures/expect.go b/test/fixtures/expect.go index 9398fa4fc..31fcf643e 100644 --- a/test/fixtures/expect.go +++ b/test/fixtures/expect.go @@ -120,7 +120,7 @@ func (t *Expect) VertexPodsRunning() *Expect { func (t *Expect) MonoVertexPodsRunning() *Expect { t.t.Helper() - if err := WaitForMonoVertexPodRunning(t.kubeClient, t.monoVertexClient, Namespace, t.monoVertex.Name, 5*time.Minute); err != nil { + if err := WaitForMonoVertexPodRunning(t.kubeClient, t.monoVertexClient, Namespace, t.monoVertex.Name, defaultTimeout); err != nil { t.t.Fatalf("Expected mono vertex %q pod running: %v", t.monoVertex.Name, err) } return t From 141cd00c0c4f676b95e12777b3b593c37f9076e7 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Jan 2025 10:17:52 +0530 Subject: [PATCH 18/27] Debugging test failure - List pods from numaflow-system namespace Signed-off-by: Sreekanth --- Makefile | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 8f25e6764..ff9ce276e 100644 --- a/Makefile +++ b/Makefile @@ -135,8 +135,8 @@ endif $(MAKE) restart-control-plane-components cat test/manifests/e2e-api-pod.yaml | sed 's@quay.io/numaproj/@$(IMAGE_NAMESPACE)/@' | sed 's/:latest/:$(VERSION)/' | kubectl -n numaflow-system apply -f - go generate $(shell find ./test/$* -name '*.go') - go test -v -timeout 15m -count 1 --tags test -p 1 ./test/$* || true - $(MAKE) show-logs + go test -v -timeout 5m -count 1 --tags test -p 1 ./test/$* || true + $(MAKE) show-logs && false $(MAKE) cleanup-e2e image-restart: @@ -158,17 +158,17 @@ cleanup-e2e: kubectl -n numaflow-system delete po -lnumaflow-e2e=true --ignore-not-found=true show-logs: - kubectl get mvtx && \ - kubectl get po -l 'app.kubernetes.io/component=mono-vertex' && \ - kubectl describe po -l 'app.kubernetes.io/component=mono-vertex' && \ - echo "UDSource Logs--" && \ - kubectl logs -l 'app.kubernetes.io/component=mono-vertex' -c udsource && \ + kubectl -n numaflow-system get mvtx && \ + kubectl -n numaflow-system get po -l 'app.kubernetes.io/component=mono-vertex' && \ + kubectl -n numaflow-system describe po -l 'app.kubernetes.io/component=mono-vertex' && \ + echo -n numaflow-system "UDSource Logs--" && \ + kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' -c udsource && \ echo "Transformer Logs--" && \ - kubectl logs -l 'app.kubernetes.io/component=mono-vertex' -c transformer && \ + kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' -c transformer && \ echo "Udsink Logs--" && \ - kubectl logs -l 'app.kubernetes.io/component=mono-vertex' -c udsink && \ + kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' -c udsink && \ echo "Numa Logs--" && \ - kubectl logs -l 'app.kubernetes.io/component=mono-vertex' -c numa + kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' -c numa # To run just one of the e2e tests by name (i.e. 'make TestCreateSimplePipeline'): Test%: From 0400edd693a6cb716264b9038cbb0d6468d23cb1 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Jan 2025 11:04:49 +0530 Subject: [PATCH 19/27] Debugging test failure - print daemon pod logs Signed-off-by: Sreekanth --- Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ff9ce276e..257fa59c0 100644 --- a/Makefile +++ b/Makefile @@ -135,7 +135,7 @@ endif $(MAKE) restart-control-plane-components cat test/manifests/e2e-api-pod.yaml | sed 's@quay.io/numaproj/@$(IMAGE_NAMESPACE)/@' | sed 's/:latest/:$(VERSION)/' | kubectl -n numaflow-system apply -f - go generate $(shell find ./test/$* -name '*.go') - go test -v -timeout 5m -count 1 --tags test -p 1 ./test/$* || true + go test -v -timeout 15m -count 1 --tags test -p 1 ./test/$* || true $(MAKE) show-logs && false $(MAKE) cleanup-e2e @@ -159,7 +159,11 @@ cleanup-e2e: show-logs: kubectl -n numaflow-system get mvtx && \ + kubectl -n numaflow-system get po -l 'app.kubernetes.io/component=mono-vertex-daemon' && \ kubectl -n numaflow-system get po -l 'app.kubernetes.io/component=mono-vertex' && \ + echo "Monovertex Daemon pod----" && \ + kubectl -n numaflow-system describe po -l 'app.kubernetes.io/component=mono-vertex-daemon' && \ + kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex-daemon' kubectl -n numaflow-system describe po -l 'app.kubernetes.io/component=mono-vertex' && \ echo -n numaflow-system "UDSource Logs--" && \ kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' -c udsource && \ @@ -169,6 +173,7 @@ show-logs: kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' -c udsink && \ echo "Numa Logs--" && \ kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' -c numa + kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' # To run just one of the e2e tests by name (i.e. 'make TestCreateSimplePipeline'): Test%: From 75382e86dfbd3808e0c0fb8ee6a953f8fe9d24ae Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Jan 2025 11:49:48 +0530 Subject: [PATCH 20/27] Debugging test failure - Disable successful tests Signed-off-by: Sreekanth --- .github/workflows/ci.yaml | 26 +++++++++++++------------- Makefile | 18 ++++-------------- 2 files changed, 17 insertions(+), 27 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b7f59c231..c100d3da0 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -80,7 +80,7 @@ jobs: steps: - name: Start Pulsar standalone container - run: docker run -d -p 6650:6650 -p 8080:8080 apachepulsar/pulsar:4.0.0 bin/pulsar standalone + run: docker run -d -p 6650:6650 -p 8080:8080 apachepulsar/pulsar:4.0.0 bin/pulsar standalone - name: Set up Go 1.x uses: actions/setup-go@v5 @@ -204,19 +204,19 @@ jobs: driver: [jetstream] case: [ - e2e, - diamond-e2e, - transformer-e2e, - kafka-e2e, - map-e2e, - reduce-one-e2e, - reduce-two-e2e, - udsource-e2e, - api-e2e, - sideinputs-e2e, - idle-source-e2e, + # e2e, + # diamond-e2e, + # transformer-e2e, + # kafka-e2e, + # map-e2e, + # reduce-one-e2e, + # reduce-two-e2e, + # udsource-e2e, + # api-e2e, + # sideinputs-e2e, + # idle-source-e2e, monovertex-e2e, - builtin-source-e2e, + # builtin-source-e2e, ] include: - driver: redis diff --git a/Makefile b/Makefile index 257fa59c0..f42256ead 100644 --- a/Makefile +++ b/Makefile @@ -158,20 +158,10 @@ cleanup-e2e: kubectl -n numaflow-system delete po -lnumaflow-e2e=true --ignore-not-found=true show-logs: - kubectl -n numaflow-system get mvtx && \ - kubectl -n numaflow-system get po -l 'app.kubernetes.io/component=mono-vertex-daemon' && \ - kubectl -n numaflow-system get po -l 'app.kubernetes.io/component=mono-vertex' && \ - echo "Monovertex Daemon pod----" && \ - kubectl -n numaflow-system describe po -l 'app.kubernetes.io/component=mono-vertex-daemon' && \ - kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex-daemon' - kubectl -n numaflow-system describe po -l 'app.kubernetes.io/component=mono-vertex' && \ - echo -n numaflow-system "UDSource Logs--" && \ - kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' -c udsource && \ - echo "Transformer Logs--" && \ - kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' -c transformer && \ - echo "Udsink Logs--" && \ - kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' -c udsink && \ - echo "Numa Logs--" && \ + kubectl -n numaflow-system get mvtx + kubectl -n numaflow-system get po -l 'app.kubernetes.io/component=mono-vertex' + kubectl -n numaflow-system describe po -l 'app.kubernetes.io/component=mono-vertex' + echo "Numa Logs--" kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' -c numa kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' From 9902d8b8da1eeccd60bc16333544b4140c274245 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Jan 2025 12:06:41 +0530 Subject: [PATCH 21/27] Debugging test failure - tail -f logs Signed-off-by: Sreekanth --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f42256ead..e672c8bb1 100644 --- a/Makefile +++ b/Makefile @@ -163,7 +163,7 @@ show-logs: kubectl -n numaflow-system describe po -l 'app.kubernetes.io/component=mono-vertex' echo "Numa Logs--" kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' -c numa - kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' + kubectl -n numaflow-system logs -f -l 'app.kubernetes.io/component=mono-vertex' # To run just one of the e2e tests by name (i.e. 'make TestCreateSimplePipeline'): Test%: From fe373bef997f8e88404f25a925eaffb13238abd3 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Jan 2025 12:54:45 +0530 Subject: [PATCH 22/27] Debugging test failure - print previous termination state Signed-off-by: Sreekanth --- Makefile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index e672c8bb1..1abf68874 100644 --- a/Makefile +++ b/Makefile @@ -159,11 +159,12 @@ cleanup-e2e: show-logs: kubectl -n numaflow-system get mvtx - kubectl -n numaflow-system get po -l 'app.kubernetes.io/component=mono-vertex' - kubectl -n numaflow-system describe po -l 'app.kubernetes.io/component=mono-vertex' + kubectl -n numaflow-system get po -l 'app.kubernetes.io/name=transformer-mono-vertex' + kubectl -n numaflow-system describe po -l 'app.kubernetes.io/name=transformer-mono-vertex' echo "Numa Logs--" - kubectl -n numaflow-system logs -l 'app.kubernetes.io/component=mono-vertex' -c numa - kubectl -n numaflow-system logs -f -l 'app.kubernetes.io/component=mono-vertex' + kubectl -n numaflow-system logs -l 'app.kubernetes.io/name=transformer-mono-vertex' + echo "Previous Logs--" + kubectl -n numaflow-system logs -p -l 'app.kubernetes.io/name=transformer-mono-vertex' # To run just one of the e2e tests by name (i.e. 'make TestCreateSimplePipeline'): Test%: From 99b1cf557079b5017840b0f47631164497285d6f Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Jan 2025 13:40:31 +0530 Subject: [PATCH 23/27] Debugging test failure - Avoid pod deletion on test failure Signed-off-by: Sreekanth --- test/monovertex-e2e/monovertex_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/monovertex-e2e/monovertex_test.go b/test/monovertex-e2e/monovertex_test.go index ba0c64052..105c6d74b 100644 --- a/test/monovertex-e2e/monovertex_test.go +++ b/test/monovertex-e2e/monovertex_test.go @@ -34,7 +34,7 @@ type MonoVertexSuite struct { func (s *MonoVertexSuite) TestMonoVertexWithTransformer() { w := s.Given().MonoVertex("@testdata/mono-vertex-with-transformer.yaml"). When().CreateMonoVertexAndWait() - defer w.DeleteMonoVertexAndWait() + // defer w.DeleteMonoVertexAndWait() w.Expect().MonoVertexPodsRunning().MvtxDaemonPodsRunning() From 772c4ea1e38eba021b0fc14ee7bd55f2af07ecc4 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Jan 2025 15:06:34 +0530 Subject: [PATCH 24/27] Debugging test failure - tail logs with Go code Signed-off-by: Sreekanth --- test/fixtures/when.go | 21 +++++++++++++++++++++ test/monovertex-e2e/monovertex_test.go | 4 ++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/test/fixtures/when.go b/test/fixtures/when.go index 154c7cfda..d9257e08c 100644 --- a/test/fixtures/when.go +++ b/test/fixtures/when.go @@ -320,6 +320,27 @@ func (w *When) TerminateAllPodPortForwards() *When { return w } +func (w *When) StreamMonovertexPodLogs(vertexName, containerName string) *When { + w.t.Helper() + ctx := context.Background() + labelSelector := fmt.Sprintf("%s=%s,%s=%s", dfv1.KeyMonoVertexName, w.monoVertex.Name, dfv1.KeyComponent, vertexName) + w.t.Logf("Streaming logs for POD with label selector: %s", labelSelector) + podList, err := w.kubeClient.CoreV1().Pods(Namespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector, FieldSelector: "status.phase=Running"}) + if err != nil { + w.t.Fatalf("Error getting vertex pods: %v", err) + } + for _, pod := range podList.Items { + w.t.Logf("Streaming logs for POD: %s", pod.Name) + stopCh := make(chan struct{}, 1) + streamPodLogs(ctx, w.kubeClient, Namespace, pod.Name, containerName, stopCh) + if w.streamLogsStopChannels == nil { + w.streamLogsStopChannels = make(map[string]chan struct{}) + } + w.streamLogsStopChannels[pod.Name+":"+containerName] = stopCh + } + return w +} + func (w *When) StreamVertexPodLogs(vertexName, containerName string) *When { w.t.Helper() ctx := context.Background() diff --git a/test/monovertex-e2e/monovertex_test.go b/test/monovertex-e2e/monovertex_test.go index 105c6d74b..8c99efa4a 100644 --- a/test/monovertex-e2e/monovertex_test.go +++ b/test/monovertex-e2e/monovertex_test.go @@ -33,8 +33,8 @@ type MonoVertexSuite struct { func (s *MonoVertexSuite) TestMonoVertexWithTransformer() { w := s.Given().MonoVertex("@testdata/mono-vertex-with-transformer.yaml"). - When().CreateMonoVertexAndWait() - // defer w.DeleteMonoVertexAndWait() + When().CreateMonoVertexAndWait().StreamMonovertexPodLogs("mono-vertex", "numa") + defer w.DeleteMonoVertexAndWait() w.Expect().MonoVertexPodsRunning().MvtxDaemonPodsRunning() From ac571e61c28b6cf89b122706b297a7e629ae7e71 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Jan 2025 17:59:50 +0530 Subject: [PATCH 25/27] Debugging test failure - Increase liveness periods Signed-off-by: Sreekanth --- Makefile | 4 ++-- test/fixtures/expect.go | 2 +- test/fixtures/when.go | 21 ------------------- test/monovertex-e2e/monovertex_test.go | 4 ++-- .../mono-vertex-with-transformer.yaml | 9 +++++++- 5 files changed, 13 insertions(+), 27 deletions(-) diff --git a/Makefile b/Makefile index 1abf68874..a1ce2df5b 100644 --- a/Makefile +++ b/Makefile @@ -162,9 +162,9 @@ show-logs: kubectl -n numaflow-system get po -l 'app.kubernetes.io/name=transformer-mono-vertex' kubectl -n numaflow-system describe po -l 'app.kubernetes.io/name=transformer-mono-vertex' echo "Numa Logs--" - kubectl -n numaflow-system logs -l 'app.kubernetes.io/name=transformer-mono-vertex' + kubectl -n numaflow-system logs -l 'app.kubernetes.io/name=transformer-mono-vertex' -c numa echo "Previous Logs--" - kubectl -n numaflow-system logs -p -l 'app.kubernetes.io/name=transformer-mono-vertex' + kubectl -n numaflow-system logs -p -l 'app.kubernetes.io/name=transformer-mono-vertex' -c numa # To run just one of the e2e tests by name (i.e. 'make TestCreateSimplePipeline'): Test%: diff --git a/test/fixtures/expect.go b/test/fixtures/expect.go index 31fcf643e..996d762bd 100644 --- a/test/fixtures/expect.go +++ b/test/fixtures/expect.go @@ -120,7 +120,7 @@ func (t *Expect) VertexPodsRunning() *Expect { func (t *Expect) MonoVertexPodsRunning() *Expect { t.t.Helper() - if err := WaitForMonoVertexPodRunning(t.kubeClient, t.monoVertexClient, Namespace, t.monoVertex.Name, defaultTimeout); err != nil { + if err := WaitForMonoVertexPodRunning(t.kubeClient, t.monoVertexClient, Namespace, t.monoVertex.Name, 2*time.Minute); err != nil { t.t.Fatalf("Expected mono vertex %q pod running: %v", t.monoVertex.Name, err) } return t diff --git a/test/fixtures/when.go b/test/fixtures/when.go index d9257e08c..154c7cfda 100644 --- a/test/fixtures/when.go +++ b/test/fixtures/when.go @@ -320,27 +320,6 @@ func (w *When) TerminateAllPodPortForwards() *When { return w } -func (w *When) StreamMonovertexPodLogs(vertexName, containerName string) *When { - w.t.Helper() - ctx := context.Background() - labelSelector := fmt.Sprintf("%s=%s,%s=%s", dfv1.KeyMonoVertexName, w.monoVertex.Name, dfv1.KeyComponent, vertexName) - w.t.Logf("Streaming logs for POD with label selector: %s", labelSelector) - podList, err := w.kubeClient.CoreV1().Pods(Namespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector, FieldSelector: "status.phase=Running"}) - if err != nil { - w.t.Fatalf("Error getting vertex pods: %v", err) - } - for _, pod := range podList.Items { - w.t.Logf("Streaming logs for POD: %s", pod.Name) - stopCh := make(chan struct{}, 1) - streamPodLogs(ctx, w.kubeClient, Namespace, pod.Name, containerName, stopCh) - if w.streamLogsStopChannels == nil { - w.streamLogsStopChannels = make(map[string]chan struct{}) - } - w.streamLogsStopChannels[pod.Name+":"+containerName] = stopCh - } - return w -} - func (w *When) StreamVertexPodLogs(vertexName, containerName string) *When { w.t.Helper() ctx := context.Background() diff --git a/test/monovertex-e2e/monovertex_test.go b/test/monovertex-e2e/monovertex_test.go index 8c99efa4a..105c6d74b 100644 --- a/test/monovertex-e2e/monovertex_test.go +++ b/test/monovertex-e2e/monovertex_test.go @@ -33,8 +33,8 @@ type MonoVertexSuite struct { func (s *MonoVertexSuite) TestMonoVertexWithTransformer() { w := s.Given().MonoVertex("@testdata/mono-vertex-with-transformer.yaml"). - When().CreateMonoVertexAndWait().StreamMonovertexPodLogs("mono-vertex", "numa") - defer w.DeleteMonoVertexAndWait() + When().CreateMonoVertexAndWait() + // defer w.DeleteMonoVertexAndWait() w.Expect().MonoVertexPodsRunning().MvtxDaemonPodsRunning() diff --git a/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml b/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml index e49144850..c797234f3 100644 --- a/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml +++ b/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml @@ -3,6 +3,13 @@ kind: MonoVertex metadata: name: transformer-mono-vertex spec: + containerTemplate: + env: + - name: RUST_LOG + value: "debug" + livenessProbe: + periodSeconds: 300 + initialDelaySeconds: 300 scale: min: 1 source: @@ -23,4 +30,4 @@ spec: env: - name: SINK_HASH_KEY # Use the name of the mono vertex as the key - value: "transformer-mono-vertex" \ No newline at end of file + value: "transformer-mono-vertex" From 9a1b7ca340f5c55965a57603db94632ba8311bc1 Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Jan 2025 18:22:18 +0530 Subject: [PATCH 26/27] Debugging test failure - Sleep if error occurs Signed-off-by: Sreekanth --- rust/numaflow/src/main.rs | 3 +++ test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml | 2 ++ 2 files changed, 5 insertions(+) diff --git a/rust/numaflow/src/main.rs b/rust/numaflow/src/main.rs index 9a933b103..56f40ca48 100644 --- a/rust/numaflow/src/main.rs +++ b/rust/numaflow/src/main.rs @@ -1,5 +1,6 @@ use std::env; use std::error::Error; +use std::time::Duration; use tracing::error; use tracing_subscriber::layer::SubscriberExt; @@ -27,6 +28,8 @@ async fn main() -> Result<(), Box> { if let Err(e) = run().await { error!("{e:?}"); + tracing::warn!("Sleeping after error"); + tokio::time::sleep(Duration::from_secs(300)).await; return Err(e); } Ok(()) diff --git a/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml b/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml index c797234f3..c74d5583a 100644 --- a/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml +++ b/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml @@ -7,6 +7,8 @@ spec: env: - name: RUST_LOG value: "debug" + - name: RUST_BACKTRACE + value: "1" livenessProbe: periodSeconds: 300 initialDelaySeconds: 300 From ea5d6abbc4b6e82de84ef7143e192a550dd0ad7f Mon Sep 17 00:00:00 2001 From: Sreekanth Date: Mon, 20 Jan 2025 18:54:05 +0530 Subject: [PATCH 27/27] Debugging test failure - Run docker image directly Signed-off-by: Sreekanth --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index a1ce2df5b..fbb66aa8c 100644 --- a/Makefile +++ b/Makefile @@ -198,6 +198,7 @@ endif ifdef IMAGE_IMPORT_CMD $(IMAGE_IMPORT_CMD) $(IMAGE_NAMESPACE)/$(BINARY_NAME):$(VERSION) endif + docker run --entrypoint /bin/numaflow-rs $(IMAGE_NAMESPACE)/$(BINARY_NAME):$(VERSION) --rust || true .PHONY: build-rust-in-docker build-rust-in-docker: