From f486d4915956371103cb27d531dfa9122b10aab9 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Tue, 10 Sep 2024 12:08:08 +0530 Subject: [PATCH 01/40] bidi source Signed-off-by: Yashash H L --- go.mod | 27 ++- go.sum | 58 +++--- pkg/apis/proto/source/v1/source.proto | 42 +++- .../pbq/wal/unaligned/fs/compactor_test.go | 3 - pkg/sdkclient/options.go | 34 ++++ pkg/sdkclient/reducer/client.go | 1 + pkg/sdkclient/source/client.go | 84 +++++--- pkg/sdkclient/source/client_test.go | 74 ++++--- pkg/sources/source.go | 2 +- pkg/sources/udsource/grpc_udsource.go | 18 +- pkg/sources/udsource/grpc_udsource_test.go | 93 +++++---- pkg/sources/udsource/user_defined_source.go | 4 - rust/monovertex/proto/source.proto | 42 +++- rust/monovertex/src/forwarder.rs | 186 ++++++++--------- rust/monovertex/src/lib.rs | 181 ++++++++--------- rust/monovertex/src/message.rs | 5 +- rust/monovertex/src/metrics.rs | 45 +++-- rust/monovertex/src/shared.rs | 20 +- rust/monovertex/src/sink.rs | 100 ++------- rust/monovertex/src/source.rs | 191 +++++++----------- rust/monovertex/src/transformer.rs | 87 ++------ rust/serving/src/app/tracker.rs | 2 +- 22 files changed, 643 insertions(+), 656 deletions(-) diff --git a/go.mod b/go.mod index f0dd236bd7..b8776ed24d 100644 --- a/go.mod +++ b/go.mod @@ -32,7 +32,7 @@ require ( github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe github.com/nats-io/nats-server/v2 v2.10.17 github.com/nats-io/nats.go v1.36.0 - github.com/numaproj/numaflow-go v0.8.0 + github.com/numaproj/numaflow-go v0.8.1-0.20240909093557-c9946b0e8b33 github.com/prometheus/client_golang v1.18.0 github.com/prometheus/client_model v0.5.0 github.com/prometheus/common v0.45.0 @@ -48,13 +48,13 @@ require ( go.uber.org/goleak v1.3.0 go.uber.org/multierr v1.11.0 go.uber.org/zap v1.26.0 - golang.org/x/crypto v0.24.0 - golang.org/x/net v0.25.0 - golang.org/x/oauth2 v0.20.0 - golang.org/x/sync v0.7.0 + golang.org/x/crypto v0.26.0 + golang.org/x/net v0.28.0 + golang.org/x/oauth2 v0.21.0 + golang.org/x/sync v0.8.0 golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d - google.golang.org/genproto/googleapis/api v0.0.0-20231106174013-bbf56f31fb17 - google.golang.org/grpc v1.59.0 + google.golang.org/genproto/googleapis/api v0.0.0-20240604185151-ef581f913117 + google.golang.org/grpc v1.66.0 google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0 google.golang.org/protobuf v1.34.2 k8s.io/api v0.29.2 @@ -80,7 +80,7 @@ require ( github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bytedance/sonic v1.11.3 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d // indirect github.com/chenzhuoyu/iasm v0.9.1 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect @@ -113,7 +113,7 @@ require ( github.com/go-playground/validator/v10 v10.19.0 // indirect github.com/gobuffalo/flect v0.2.3 // indirect github.com/gobwas/glob v0.2.3 // indirect - github.com/golang/glog v1.1.2 // indirect + github.com/golang/glog v1.2.1 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/golang/snappy v0.0.4 // indirect @@ -201,14 +201,13 @@ require ( golang.org/x/arch v0.7.0 // indirect golang.org/x/exp v0.0.0-20240531132922-fd00a4e0eefc // indirect golang.org/x/mod v0.17.0 // indirect - golang.org/x/sys v0.21.0 // indirect - golang.org/x/term v0.21.0 // indirect - golang.org/x/text v0.16.0 // indirect + golang.org/x/sys v0.25.0 // indirect + golang.org/x/term v0.23.0 // indirect + golang.org/x/text v0.18.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect - google.golang.org/genproto v0.0.0-20231106174013-bbf56f31fb17 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20231120223509-83a465c0220f // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect diff --git a/go.sum b/go.sum index 1e46c54600..fc6202a07a 100644 --- a/go.sum +++ b/go.sum @@ -94,8 +94,8 @@ github.com/bytedance/sonic v1.11.3/go.mod h1:iZcSUejdk5aukTND/Eu/ivjQuEL0Cu9/rf5 github.com/casbin/casbin/v2 v2.77.2 h1:yQinn/w9x8AswiwqwtrXz93VU48R1aYTXdHEx4RI3jM= github.com/casbin/casbin/v2 v2.77.2/go.mod h1:mzGx0hYW9/ksOSpw3wNjk3NRAroq5VMFYUQ6G43iGPk= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d h1:77cEq6EriyTZ0g/qfRdp61a3Uu/AWrgIq2s0ClJV1g0= @@ -249,8 +249,8 @@ github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69 github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk= github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo= -github.com/golang/glog v1.1.2/go.mod h1:zR+okUeTbrL6EL3xHUDxZuEtGv04p5shwip1+mL/rLQ= +github.com/golang/glog v1.2.1 h1:OptwRhECazUx5ix5TTWC3EZhsZEHWcYWY4FQHTIubm4= +github.com/golang/glog v1.2.1/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -485,8 +485,12 @@ github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDm github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/numaproj/numaflow-go v0.8.0 h1:1Pp0AMLXkmUPlvFjKeY3a9X+OLU8oN1OQWxD9jLg8Uo= -github.com/numaproj/numaflow-go v0.8.0/go.mod h1:WoMt31+h3up202zTRI8c/qe42B8UbvwLe2mJH0MAlhI= +github.com/numaproj/numaflow-go v0.8.1-0.20240906054808-44a0a178c835 h1:sMucUReYuLPSchDtzjNzZHE0UefDAgH9Hl6pULzNKj8= +github.com/numaproj/numaflow-go v0.8.1-0.20240906054808-44a0a178c835/go.mod h1:GWXSR8ZLKv1yjzTStVWiqu6HuGSjtS+wpvP2xLhqX+A= +github.com/numaproj/numaflow-go v0.8.1-0.20240908024139-2506e0d7639d h1:4NiJDowEBSeR/ptanr66vN9wUH6kM0B+M10sb69Lysw= +github.com/numaproj/numaflow-go v0.8.1-0.20240908024139-2506e0d7639d/go.mod h1:jOCtHiDv5hvrrSOt3/swCd8FpmEP1w/RNZydqJCsB58= +github.com/numaproj/numaflow-go v0.8.1-0.20240909093557-c9946b0e8b33 h1:lrSD4qibegQp896k3wGRNwQSb2f533JJsG4gixFiv5k= +github.com/numaproj/numaflow-go v0.8.1-0.20240909093557-c9946b0e8b33/go.mod h1:jOCtHiDv5hvrrSOt3/swCd8FpmEP1w/RNZydqJCsB58= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= @@ -687,8 +691,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= -golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI= -golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM= +golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= +golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -774,8 +778,8 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= -golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= +golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -788,8 +792,8 @@ golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.20.0 h1:4mQdhULixXKP1rwYBW0vAijoXnkTG0BLCDRzfe1idMo= -golang.org/x/oauth2 v0.20.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= +golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -802,8 +806,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -863,15 +867,15 @@ golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= -golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= +golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= -golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= -golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= +golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= +golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -884,8 +888,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= +golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -1025,12 +1029,10 @@ google.golang.org/genproto v0.0.0-20210303154014-9728d6b83eeb/go.mod h1:FWY/as6D google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A= -google.golang.org/genproto v0.0.0-20231106174013-bbf56f31fb17 h1:wpZ8pe2x1Q3f2KyT5f8oP/fa9rHAKgFPr/HZdNuS+PQ= -google.golang.org/genproto v0.0.0-20231106174013-bbf56f31fb17/go.mod h1:J7XzRzVy1+IPwWHZUzoD0IccYZIrXILAQpc+Qy9CMhY= -google.golang.org/genproto/googleapis/api v0.0.0-20231106174013-bbf56f31fb17 h1:JpwMPBpFN3uKhdaekDpiNlImDdkUAyiJ6ez/uxGaUSo= -google.golang.org/genproto/googleapis/api v0.0.0-20231106174013-bbf56f31fb17/go.mod h1:0xJLfVdJqpAPl8tDg1ujOCGzx6LFLttXT5NhllGOXY4= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231120223509-83a465c0220f h1:ultW7fxlIvee4HYrtnaRPon9HpEgFk5zYpmfMgtKB5I= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231120223509-83a465c0220f/go.mod h1:L9KNLi232K1/xB6f7AlSX692koaRnKaWSR0stBki0Yc= +google.golang.org/genproto/googleapis/api v0.0.0-20240604185151-ef581f913117 h1:+rdxYoE3E5htTEWIe15GlN6IfvbURM//Jt0mmkmm6ZU= +google.golang.org/genproto/googleapis/api v0.0.0-20240604185151-ef581f913117/go.mod h1:OimBR/bc1wPO9iV4NC2bpyjy3VnAwZh5EBPQdtaE5oo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -1049,8 +1051,8 @@ google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA5 google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= -google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= +google.golang.org/grpc v1.66.0 h1:DibZuoBznOxbDQxRINckZcUvnCEvrW9pcWIE2yF9r1c= +google.golang.org/grpc v1.66.0/go.mod h1:s3/l6xSSCURdVfAnL+TqCNMyTDAGN6+lZeVxnZR128Y= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0 h1:rNBFJjBCOgVr9pWD7rs/knKL4FRTKgpZmsRfV214zcA= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0/go.mod h1:Dk1tviKTvMCz5tvh7t+fh94dhmQVHuCt2OzJB3CTW9Y= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= diff --git a/pkg/apis/proto/source/v1/source.proto b/pkg/apis/proto/source/v1/source.proto index 0fd0bdbb37..0a70646209 100644 --- a/pkg/apis/proto/source/v1/source.proto +++ b/pkg/apis/proto/source/v1/source.proto @@ -26,16 +26,17 @@ package source.v1; service Source { // Read returns a stream of datum responses. - // The size of the returned ReadResponse is less than or equal to the num_records specified in ReadRequest. - // If the request timeout is reached on server side, the returned ReadResponse will contain all the datum that have been read (which could be an empty list). - rpc ReadFn(ReadRequest) returns (stream ReadResponse); + // The size of the returned ReadResponse is less than or equal to the num_records specified in each ReadRequest. + // If the request timeout is reached on the server side, the returned ReadResponse will contain all the datum that have been read (which could be an empty list). + // The server will continue to read and respond to subsequent ReadRequests until the client closes the stream. + rpc ReadFn(stream ReadRequest) returns (stream ReadResponse); - // AckFn acknowledges a list of datum offsets. + // AckFn acknowledges a stream of datum offsets. // When AckFn is called, it implicitly indicates that the datum stream has been processed by the source vertex. // The caller (numa) expects the AckFn to be successful, and it does not expect any errors. // If there are some irrecoverable errors when the callee (UDSource) is processing the AckFn request, - // then it is best to crash because there are no other retry mechanisms possible. - rpc AckFn(AckRequest) returns (AckResponse); + // then it is best to crash because there are no other retry mechanisms possible. + rpc AckFn(stream AckRequest) returns (AckResponse); // PendingFn returns the number of pending records at the user defined source. rpc PendingFn(google.protobuf.Empty) returns (PendingResponse); @@ -84,8 +85,30 @@ message ReadResponse { // e.g. Kafka and Redis Stream message usually include information about the headers. map headers = 5; } + message Status { + // Code to indicate the status of the response. + enum Code { + SUCCESS = 0; + FAILURE = 1; + } + + // Error to indicate the error type. If the code is FAILURE, then the error field will be populated. + enum Error { + UNACKED = 0; + OTHER = 1; + } + + // End of transmission flag. + bool eot = 1; + Code code = 2; + Error error = 3; + optional string msg = 4; + } // Required field holding the result. Result result = 1; + // Status of the response. Holds the end of transmission flag and the status code. + // + Status status = 2; } /* @@ -94,11 +117,8 @@ message ReadResponse { */ message AckRequest { message Request { - // Required field holding a list of offsets to be acknowledged. - // The offsets must be strictly corresponding to the previously read batch, - // meaning the offsets must be in the same order as the datum responses in the ReadResponse. - // By enforcing ordering, we can save deserialization effort on the server side, assuming the server keeps a local copy of the raw/un-serialized offsets. - repeated Offset offsets = 1; + // Required field holding the offset to be acked. + Offset offset = 1; } // Required field holding the request. The list will be ordered and will have the same order as the original Read response. Request request = 1; diff --git a/pkg/reduce/pbq/wal/unaligned/fs/compactor_test.go b/pkg/reduce/pbq/wal/unaligned/fs/compactor_test.go index 1c451507cb..a811921efd 100644 --- a/pkg/reduce/pbq/wal/unaligned/fs/compactor_test.go +++ b/pkg/reduce/pbq/wal/unaligned/fs/compactor_test.go @@ -392,9 +392,6 @@ func TestCompactor_ContextClose(t *testing.T) { } time.Sleep(3 * time.Second) err = c.Stop() - if err != nil { - println(err.Error()) - } assert.NoError(t, err) } diff --git a/pkg/sdkclient/options.go b/pkg/sdkclient/options.go index e46e9c8869..3c7f30d24f 100644 --- a/pkg/sdkclient/options.go +++ b/pkg/sdkclient/options.go @@ -16,9 +16,17 @@ limitations under the License. package sdkclient +import ( + "time" + + "github.com/numaproj/numaflow/pkg/apis/numaflow/v1alpha1" +) + type Options struct { udsSockAddr string maxMessageSize int + readBatchSize int + readTimeout time.Duration } // UdsSockAddr returns the UDS sock addr. @@ -31,11 +39,23 @@ func (o *Options) MaxMessageSize() int { return o.maxMessageSize } +// ReadBatchSize returns the read batch size. +func (o *Options) ReadBatchSize() int { + return o.readBatchSize +} + +// ReadTimeout returns the read timeout. +func (o *Options) ReadTimeout() time.Duration { + return o.readTimeout +} + // DefaultOptions returns the default options. func DefaultOptions(address string) *Options { return &Options{ maxMessageSize: DefaultGRPCMaxMessageSize, udsSockAddr: address, + readBatchSize: v1alpha1.DefaultReadBatchSize, + readTimeout: v1alpha1.DefaultReadTimeout, } } @@ -55,3 +75,17 @@ func WithMaxMessageSize(size int) Option { opts.maxMessageSize = size } } + +// WithReadBatchSize sets the read batch size. +func WithReadBatchSize(size int) Option { + return func(opts *Options) { + opts.readBatchSize = size + } +} + +// WithReadTimeout sets the read timeout. +func WithReadTimeout(timeout time.Duration) Option { + return func(opts *Options) { + opts.readTimeout = timeout + } +} diff --git a/pkg/sdkclient/reducer/client.go b/pkg/sdkclient/reducer/client.go index 64580c831d..89950e3962 100644 --- a/pkg/sdkclient/reducer/client.go +++ b/pkg/sdkclient/reducer/client.go @@ -26,6 +26,7 @@ import ( reducepb "github.com/numaproj/numaflow-go/pkg/apis/proto/reduce/v1" "github.com/numaproj/numaflow-go/pkg/info" + "github.com/numaproj/numaflow/pkg/sdkclient" sdkerr "github.com/numaproj/numaflow/pkg/sdkclient/error" grpcutil "github.com/numaproj/numaflow/pkg/sdkclient/grpc" diff --git a/pkg/sdkclient/source/client.go b/pkg/sdkclient/source/client.go index c5275b3c77..13f15e1435 100644 --- a/pkg/sdkclient/source/client.go +++ b/pkg/sdkclient/source/client.go @@ -18,27 +18,31 @@ package source import ( "context" + "errors" "fmt" - "io" "google.golang.org/grpc" "google.golang.org/protobuf/types/known/emptypb" sourcepb "github.com/numaproj/numaflow-go/pkg/apis/proto/source/v1" "github.com/numaproj/numaflow-go/pkg/info" + "github.com/numaproj/numaflow/pkg/sdkclient" grpcutil "github.com/numaproj/numaflow/pkg/sdkclient/grpc" ) // client contains the grpc connection and the grpc client. type client struct { - conn *grpc.ClientConn - grpcClt sourcepb.SourceClient + conn *grpc.ClientConn + grpcClt sourcepb.SourceClient + readStream sourcepb.Source_ReadFnClient + ackStream sourcepb.Source_AckFnClient + datumCh chan *sourcepb.ReadResponse } var _ Client = (*client)(nil) -func New(serverInfo *info.ServerInfo, inputOptions ...sdkclient.Option) (Client, error) { +func New(ctx context.Context, serverInfo *info.ServerInfo, inputOptions ...sdkclient.Option) (Client, error) { var opts = sdkclient.DefaultOptions(sdkclient.SourceAddr) for _, inputOption := range inputOptions { @@ -54,16 +58,47 @@ func New(serverInfo *info.ServerInfo, inputOptions ...sdkclient.Option) (Client, c.conn = conn c.grpcClt = sourcepb.NewSourceClient(conn) + + c.readStream, err = c.grpcClt.ReadFn(ctx) + if err != nil { + return nil, err + } + + c.ackStream, err = c.grpcClt.AckFn(ctx) + if err != nil { + return nil, err + } + return c, nil } // NewFromClient creates a new client object from the grpc client. This is used for testing. -func NewFromClient(c sourcepb.SourceClient) (Client, error) { - return &client{grpcClt: c}, nil +func NewFromClient(ctx context.Context, srcClient sourcepb.SourceClient, inputOptions ...sdkclient.Option) (Client, error) { + var opts = sdkclient.DefaultOptions(sdkclient.SourceAddr) + + for _, inputOption := range inputOptions { + inputOption(opts) + } + + c := new(client) + c.grpcClt = srcClient + + c.readStream, _ = c.grpcClt.ReadFn(ctx) + c.ackStream, _ = c.grpcClt.AckFn(ctx) + + return c, nil } // CloseConn closes the grpc client connection. func (c *client) CloseConn(ctx context.Context) error { + err := c.readStream.CloseSend() + if err != nil { + return err + } + err = c.ackStream.CloseSend() + if err != nil { + return err + } return c.conn.Close() } @@ -76,33 +111,36 @@ func (c *client) IsReady(ctx context.Context, in *emptypb.Empty) (bool, error) { return resp.GetReady(), nil } -// ReadFn reads data from the source. func (c *client) ReadFn(ctx context.Context, req *sourcepb.ReadRequest, datumCh chan<- *sourcepb.ReadResponse) error { - stream, err := c.grpcClt.ReadFn(ctx, req) + err := c.readStream.Send(req) if err != nil { - return fmt.Errorf("failed to execute c.grpcClt.ReadFn(): %w", err) + return fmt.Errorf("failed to send read request: %v", err) } + for { - select { - case <-ctx.Done(): - return ctx.Err() - default: - var resp *sourcepb.ReadResponse - resp, err = stream.Recv() - if err == io.EOF { - return nil - } - if err != nil { - return err - } - datumCh <- resp + resp, err := c.readStream.Recv() + // we don't need an EOF check because we never close the stream. + if errors.Is(err, context.Canceled) { + break + } + if err != nil { + return fmt.Errorf("failed to receive read response: %v", err) + } + if resp.GetStatus().GetEot() { + break } + datumCh <- resp } + return nil } // AckFn acknowledges the data from the source. func (c *client) AckFn(ctx context.Context, req *sourcepb.AckRequest) (*sourcepb.AckResponse, error) { - return c.grpcClt.AckFn(ctx, req) + err := c.ackStream.Send(req) + if err != nil { + return nil, err + } + return &sourcepb.AckResponse{}, nil } // PendingFn returns the number of pending data from the source. diff --git a/pkg/sdkclient/source/client_test.go b/pkg/sdkclient/source/client_test.go index 450394b877..1c65d4cb80 100644 --- a/pkg/sdkclient/source/client_test.go +++ b/pkg/sdkclient/source/client_test.go @@ -19,9 +19,7 @@ package source import ( "context" "fmt" - "io" "reflect" - "sync" "testing" "time" @@ -61,11 +59,7 @@ func TestIsReady(t *testing.T) { mockClient.EXPECT().IsReady(gomock.Any(), gomock.Any()).Return(&sourcepb.ReadyResponse{Ready: true}, nil) mockClient.EXPECT().IsReady(gomock.Any(), gomock.Any()).Return(&sourcepb.ReadyResponse{Ready: false}, fmt.Errorf("mock connection refused")) - testClient, err := NewFromClient(mockClient) - assert.NoError(t, err) - reflect.DeepEqual(testClient, &client{ - grpcClt: mockClient, - }) + testClient := client{grpcClt: mockClient} ready, err := testClient.IsReady(ctx, &emptypb.Empty{}) assert.True(t, ready) @@ -100,16 +94,28 @@ func TestReadFn(t *testing.T) { for i := 0; i < numRecords; i++ { mockStreamClient.EXPECT().Recv().Return(expectedResp, nil) } - mockStreamClient.EXPECT().Recv().Return(expectedResp, io.EOF) + + eotResponse := &sourcepb.ReadResponse{ + Status: &sourcepb.ReadResponse_Status{ + Eot: true, + Code: 0, + }, + } + mockStreamClient.EXPECT().Recv().Return(eotResponse, nil) mockStreamClient.EXPECT().CloseSend().Return(nil).AnyTimes() - mockClient.EXPECT().ReadFn(gomock.Any(), gomock.Any()).Return(mockStreamClient, nil) - testClient, err := NewFromClient(mockClient) - assert.NoError(t, err) - assert.True(t, reflect.DeepEqual(testClient, &client{ - grpcClt: mockClient, - })) + request := &sourcepb.ReadRequest{ + Request: &sourcepb.ReadRequest_Request{ + NumRecords: uint64(numRecords), + }, + } + mockStreamClient.EXPECT().Send(request).Return(nil) + + testClient := &client{ + grpcClt: mockClient, + readStream: mockStreamClient, + } responseCh := make(chan *sourcepb.ReadResponse) @@ -127,18 +133,12 @@ func TestReadFn(t *testing.T) { } }() - var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() - err = testClient.ReadFn(ctx, &sourcepb.ReadRequest{ - Request: &sourcepb.ReadRequest_Request{ - NumRecords: uint64(numRecords), - }, - }, responseCh) - assert.NoError(t, err) - }() - wg.Wait() + err := testClient.ReadFn(ctx, &sourcepb.ReadRequest{ + Request: &sourcepb.ReadRequest_Request{ + NumRecords: uint64(numRecords), + }, + }, responseCh) + assert.NoError(t, err) close(responseCh) } @@ -150,14 +150,15 @@ func TestAckFn(t *testing.T) { defer ctrl.Finish() mockClient := sourcemock.NewMockSourceClient(ctrl) - mockClient.EXPECT().AckFn(gomock.Any(), gomock.Any()).Return(&sourcepb.AckResponse{}, nil) - mockClient.EXPECT().AckFn(gomock.Any(), gomock.Any()).Return(&sourcepb.AckResponse{}, fmt.Errorf("mock connection refused")) - testClient, err := NewFromClient(mockClient) - assert.NoError(t, err) - reflect.DeepEqual(testClient, &client{ - grpcClt: mockClient, - }) + mockStream := sourcemock.NewMockSource_AckFnClient(ctrl) + mockStream.EXPECT().Send(gomock.Any()).Return(nil) + mockStream.EXPECT().Send(gomock.Any()).Return(fmt.Errorf("mock connection refused")) + + testClient := client{ + grpcClt: mockClient, + ackStream: mockStream, + } ack, err := testClient.AckFn(ctx, &sourcepb.AckRequest{}) assert.NoError(t, err) @@ -165,7 +166,6 @@ func TestAckFn(t *testing.T) { ack, err = testClient.AckFn(ctx, &sourcepb.AckRequest{}) assert.EqualError(t, err, "mock connection refused") - assert.Equal(t, &sourcepb.AckResponse{}, ack) } func TestPendingFn(t *testing.T) { @@ -183,11 +183,9 @@ func TestPendingFn(t *testing.T) { }, nil) mockClient.EXPECT().PendingFn(gomock.Any(), gomock.Any()).Return(&sourcepb.PendingResponse{}, fmt.Errorf("mock connection refused")) - testClient, err := NewFromClient(mockClient) - assert.NoError(t, err) - reflect.DeepEqual(testClient, &client{ + testClient := client{ grpcClt: mockClient, - }) + } pending, err := testClient.PendingFn(ctx, &emptypb.Empty{}) assert.NoError(t, err) diff --git a/pkg/sources/source.go b/pkg/sources/source.go index 8a8e64ffd6..d4ebfe0253 100644 --- a/pkg/sources/source.go +++ b/pkg/sources/source.go @@ -201,7 +201,7 @@ func (sp *SourceProcessor) Start(ctx context.Context) error { return err } - srcClient, err := sourceclient.New(serverInfo, sdkclient.WithMaxMessageSize(maxMessageSize)) + srcClient, err := sourceclient.New(ctx, serverInfo, sdkclient.WithMaxMessageSize(maxMessageSize)) if err != nil { return fmt.Errorf("failed to create a new gRPC client: %w", err) } diff --git a/pkg/sources/udsource/grpc_udsource.go b/pkg/sources/udsource/grpc_udsource.go index 525326651f..2fdbb7cef0 100644 --- a/pkg/sources/udsource/grpc_udsource.go +++ b/pkg/sources/udsource/grpc_udsource.go @@ -170,18 +170,24 @@ func (u *GRPCBasedUDSource) ApplyReadFn(ctx context.Context, count int64, timeou } // ApplyAckFn acknowledges messages in the source. +// TODO should we make this accept a single offset? func (u *GRPCBasedUDSource) ApplyAckFn(ctx context.Context, offsets []isb.Offset) error { rOffsets := make([]*sourcepb.Offset, len(offsets)) for i, offset := range offsets { rOffsets[i] = ConvertToUserDefinedSourceOffset(offset) } - var r = &sourcepb.AckRequest{ - Request: &sourcepb.AckRequest_Request{ - Offsets: rOffsets, - }, + for _, offset := range rOffsets { + var r = &sourcepb.AckRequest{ + Request: &sourcepb.AckRequest_Request{ + Offset: offset, + }, + } + _, err := u.client.AckFn(ctx, r) + if err != nil { + return err + } } - _, err := u.client.AckFn(ctx, r) - return err + return nil } // ApplyPartitionFn returns the partitions associated with the source. diff --git a/pkg/sources/udsource/grpc_udsource_test.go b/pkg/sources/udsource/grpc_udsource_test.go index bf7a486fad..ed326ba742 100644 --- a/pkg/sources/udsource/grpc_udsource_test.go +++ b/pkg/sources/udsource/grpc_udsource_test.go @@ -20,7 +20,6 @@ import ( "context" "errors" "fmt" - "io" "testing" "time" @@ -32,7 +31,6 @@ import ( "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" - "google.golang.org/protobuf/types/known/emptypb" "google.golang.org/protobuf/types/known/timestamppb" "github.com/numaproj/numaflow/pkg/isb" @@ -59,8 +57,8 @@ func (r *rpcMsg) String() string { return fmt.Sprintf("is %s", r.msg) } -func NewMockUDSgRPCBasedUDSource(mockClient *sourcemock.MockSourceClient) *GRPCBasedUDSource { - c, _ := sourceclient.NewFromClient(mockClient) +func NewMockUDSgRPCBasedUDSource(ctx context.Context, mockClient *sourcemock.MockSourceClient) *GRPCBasedUDSource { + c, _ := sourceclient.NewFromClient(ctx, mockClient) return &GRPCBasedUDSource{ vertexName: "testVertex", pipelineName: "testPipeline", @@ -75,6 +73,8 @@ func Test_gRPCBasedUDSource_WaitUntilReadyWithMockClient(t *testing.T) { mockClient := sourcemock.NewMockSourceClient(ctrl) mockClient.EXPECT().IsReady(gomock.Any(), gomock.Any()).Return(&sourcepb.ReadyResponse{Ready: true}, nil) + mockClient.EXPECT().ReadFn(gomock.Any(), gomock.Any()).Return(nil, nil) + mockClient.EXPECT().AckFn(gomock.Any(), gomock.Any()).Return(nil, nil) ctx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() @@ -85,7 +85,7 @@ func Test_gRPCBasedUDSource_WaitUntilReadyWithMockClient(t *testing.T) { } }() - u := NewMockUDSgRPCBasedUDSource(mockClient) + u := NewMockUDSgRPCBasedUDSource(ctx, mockClient) err := u.WaitUntilReady(ctx) assert.NoError(t, err) } @@ -103,6 +103,8 @@ func Test_gRPCBasedUDSource_ApplyPendingWithMockClient(t *testing.T) { mockSourceClient := sourcemock.NewMockSourceClient(ctrl) mockSourceClient.EXPECT().PendingFn(gomock.Any(), gomock.Any()).Return(testResponse, nil).AnyTimes() + mockSourceClient.EXPECT().ReadFn(gomock.Any(), gomock.Any()).Return(nil, nil) + mockSourceClient.EXPECT().AckFn(gomock.Any(), gomock.Any()).Return(nil, nil) ctx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() @@ -113,7 +115,7 @@ func Test_gRPCBasedUDSource_ApplyPendingWithMockClient(t *testing.T) { } }() - u := NewMockUDSgRPCBasedUDSource(mockSourceClient) + u := NewMockUDSgRPCBasedUDSource(ctx, mockSourceClient) count, err := u.ApplyPendingFn(ctx) assert.NoError(t, err) assert.Equal(t, int64(123), count) @@ -131,6 +133,8 @@ func Test_gRPCBasedUDSource_ApplyPendingWithMockClient(t *testing.T) { mockSourceClient := sourcemock.NewMockSourceClient(ctrl) mockSourceClient.EXPECT().PendingFn(gomock.Any(), gomock.Any()).Return(testResponse, nil).AnyTimes() + mockSourceClient.EXPECT().ReadFn(gomock.Any(), gomock.Any()).Return(nil, nil) + mockSourceClient.EXPECT().AckFn(gomock.Any(), gomock.Any()).Return(nil, nil) ctx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() @@ -141,7 +145,7 @@ func Test_gRPCBasedUDSource_ApplyPendingWithMockClient(t *testing.T) { } }() - u := NewMockUDSgRPCBasedUDSource(mockSourceClient) + u := NewMockUDSgRPCBasedUDSource(ctx, mockSourceClient) count, err := u.ApplyPendingFn(ctx) assert.NoError(t, err) assert.Equal(t, isb.PendingNotAvailable, count) @@ -159,6 +163,8 @@ func Test_gRPCBasedUDSource_ApplyPendingWithMockClient(t *testing.T) { mockSourceErrClient := sourcemock.NewMockSourceClient(ctrl) mockSourceErrClient.EXPECT().PendingFn(gomock.Any(), gomock.Any()).Return(testResponse, fmt.Errorf("mock udsource pending error")).AnyTimes() + mockSourceErrClient.EXPECT().ReadFn(gomock.Any(), gomock.Any()).Return(nil, nil) + mockSourceErrClient.EXPECT().AckFn(gomock.Any(), gomock.Any()).Return(nil, nil) ctx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() @@ -169,7 +175,7 @@ func Test_gRPCBasedUDSource_ApplyPendingWithMockClient(t *testing.T) { } }() - u := NewMockUDSgRPCBasedUDSource(mockSourceErrClient) + u := NewMockUDSgRPCBasedUDSource(ctx, mockSourceErrClient) count, err := u.ApplyPendingFn(ctx) assert.Equal(t, isb.PendingNotAvailable, count) @@ -183,13 +189,8 @@ func Test_gRPCBasedUDSource_ApplyReadWithMockClient(t *testing.T) { defer ctrl.Finish() mockClient := sourcemock.NewMockSourceClient(ctrl) mockReadClient := sourcemock.NewMockSource_ReadFnClient(ctrl) - - req := &sourcepb.ReadRequest{ - Request: &sourcepb.ReadRequest_Request{ - NumRecords: 1, - TimeoutInMs: 1000, - }, - } + mockClient.EXPECT().ReadFn(gomock.Any(), gomock.Any()).Return(mockReadClient, nil) + mockClient.EXPECT().AckFn(gomock.Any(), gomock.Any()).Return(nil, nil) offset := &sourcepb.Offset{Offset: []byte(`test_offset`), PartitionId: 0} @@ -202,10 +203,18 @@ func Test_gRPCBasedUDSource_ApplyReadWithMockClient(t *testing.T) { Keys: []string{"test_key"}, }, } - mockReadClient.EXPECT().Recv().Return(expectedResponse, nil).Times(1) - mockReadClient.EXPECT().Recv().Return(nil, io.EOF).Times(1) - mockClient.EXPECT().ReadFn(gomock.Any(), &rpcMsg{msg: req}).Return(mockReadClient, nil) + + eotResponse := &sourcepb.ReadResponse{Status: &sourcepb.ReadResponse_Status{Eot: true}} + mockReadClient.EXPECT().Recv().Return(eotResponse, nil).Times(1) + + req := &sourcepb.ReadRequest{ + Request: &sourcepb.ReadRequest_Request{ + NumRecords: 1, + TimeoutInMs: 1000, + }, + } + mockReadClient.EXPECT().Send(req).Return(nil).Times(1) ctx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() @@ -216,7 +225,7 @@ func Test_gRPCBasedUDSource_ApplyReadWithMockClient(t *testing.T) { } }() - u := NewMockUDSgRPCBasedUDSource(mockClient) + u := NewMockUDSgRPCBasedUDSource(ctx, mockClient) readMessages, err := u.ApplyReadFn(ctx, 1, time.Millisecond*1000) assert.NoError(t, err) assert.Equal(t, 1, len(readMessages)) @@ -232,6 +241,8 @@ func Test_gRPCBasedUDSource_ApplyReadWithMockClient(t *testing.T) { mockClient := sourcemock.NewMockSourceClient(ctrl) mockReadClient := sourcemock.NewMockSource_ReadFnClient(ctrl) + mockClient.EXPECT().ReadFn(gomock.Any(), gomock.Any()).Return(mockReadClient, nil) + mockClient.EXPECT().AckFn(gomock.Any(), gomock.Any()).Return(nil, nil) req := &sourcepb.ReadRequest{ Request: &sourcepb.ReadRequest_Request{ @@ -239,6 +250,7 @@ func Test_gRPCBasedUDSource_ApplyReadWithMockClient(t *testing.T) { TimeoutInMs: 1000, }, } + mockReadClient.EXPECT().Send(req).Return(nil).Times(1) var TestEventTime = time.Unix(1661169600, 0).UTC() expectedResponse := &sourcepb.ReadResponse{ @@ -249,10 +261,7 @@ func Test_gRPCBasedUDSource_ApplyReadWithMockClient(t *testing.T) { Keys: []string{"test_key"}, }, } - mockReadClient.EXPECT().Recv().Return(expectedResponse, errors.New("mock error for read")).AnyTimes() - mockClient.EXPECT().ReadFn(gomock.Any(), &rpcMsg{msg: req}).Return(mockReadClient, nil) - ctx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() go func() { @@ -262,7 +271,7 @@ func Test_gRPCBasedUDSource_ApplyReadWithMockClient(t *testing.T) { } }() - u := NewMockUDSgRPCBasedUDSource(mockClient) + u := NewMockUDSgRPCBasedUDSource(ctx, mockClient) readMessages, err := u.ApplyReadFn(ctx, 1, time.Millisecond*1000) assert.Error(t, err) assert.Equal(t, 0, len(readMessages)) @@ -278,16 +287,24 @@ func Test_gRPCBasedUDSource_ApplyAckWithMockClient(t *testing.T) { offset2 := &sourcepb.Offset{Offset: []byte("test-offset-2"), PartitionId: 0} mockClient := sourcemock.NewMockSourceClient(ctrl) - req := &sourcepb.AckRequest{ + mockAckClient := sourcemock.NewMockSource_AckFnClient(ctrl) + mockClient.EXPECT().ReadFn(gomock.Any(), gomock.Any()).Return(nil, nil) + mockClient.EXPECT().AckFn(gomock.Any(), gomock.Any()).Return(mockAckClient, nil) + + req1 := &sourcepb.AckRequest{ Request: &sourcepb.AckRequest_Request{ - Offsets: []*sourcepb.Offset{ - offset1, - offset2, - }, + Offset: offset1, }, } - mockClient.EXPECT().AckFn(gomock.Any(), &rpcMsg{msg: req}).Return(&sourcepb.AckResponse{Result: &sourcepb.AckResponse_Result{Success: &emptypb.Empty{}}}, nil).AnyTimes() + req2 := &sourcepb.AckRequest{ + Request: &sourcepb.AckRequest_Request{ + Offset: offset2, + }, + } + + mockAckClient.EXPECT().Send(req1).Return(nil).Times(1) + mockAckClient.EXPECT().Send(req2).Return(nil).Times(1) ctx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() @@ -298,7 +315,7 @@ func Test_gRPCBasedUDSource_ApplyAckWithMockClient(t *testing.T) { } }() - u := NewMockUDSgRPCBasedUDSource(mockClient) + u := NewMockUDSgRPCBasedUDSource(ctx, mockClient) err := u.ApplyAckFn(ctx, []isb.Offset{ NewUserDefinedSourceOffset(offset1), NewUserDefinedSourceOffset(offset2), @@ -314,16 +331,18 @@ func Test_gRPCBasedUDSource_ApplyAckWithMockClient(t *testing.T) { offset2 := &sourcepb.Offset{Offset: []byte("test-offset-2"), PartitionId: 0} mockClient := sourcemock.NewMockSourceClient(ctrl) - req := &sourcepb.AckRequest{ + mockAckClient := sourcemock.NewMockSource_AckFnClient(ctrl) + mockClient.EXPECT().ReadFn(gomock.Any(), gomock.Any()).Return(nil, nil) + mockClient.EXPECT().AckFn(gomock.Any(), gomock.Any()).Return(mockAckClient, nil) + + req1 := &sourcepb.AckRequest{ Request: &sourcepb.AckRequest_Request{ - Offsets: []*sourcepb.Offset{ - offset1, - offset2, - }, + Offset: offset1, }, } - mockClient.EXPECT().AckFn(gomock.Any(), &rpcMsg{msg: req}).Return(nil, status.New(codes.DeadlineExceeded, "mock test err").Err()) + mockAckClient.EXPECT().Send(req1).Return(status.New(codes.DeadlineExceeded, "mock test err").Err()).Times(1) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() go func() { @@ -333,7 +352,7 @@ func Test_gRPCBasedUDSource_ApplyAckWithMockClient(t *testing.T) { } }() - u := NewMockUDSgRPCBasedUDSource(mockClient) + u := NewMockUDSgRPCBasedUDSource(ctx, mockClient) err := u.ApplyAckFn(ctx, []isb.Offset{ NewUserDefinedSourceOffset(offset1), NewUserDefinedSourceOffset(offset2), diff --git a/pkg/sources/udsource/user_defined_source.go b/pkg/sources/udsource/user_defined_source.go index ac1b2debb5..5ba77019a1 100644 --- a/pkg/sources/udsource/user_defined_source.go +++ b/pkg/sources/udsource/user_defined_source.go @@ -62,10 +62,6 @@ func NewUserDefinedSource(ctx context.Context, vertexInstance *dfv1.VertexInstan } } - if err != nil { - u.logger.Errorw("Error instantiating the forwarder", zap.Error(err)) - return nil, err - } return u, nil } diff --git a/rust/monovertex/proto/source.proto b/rust/monovertex/proto/source.proto index 131cc36d30..3ea56aeba5 100644 --- a/rust/monovertex/proto/source.proto +++ b/rust/monovertex/proto/source.proto @@ -7,16 +7,17 @@ package source.v1; service Source { // Read returns a stream of datum responses. - // The size of the returned ReadResponse is less than or equal to the num_records specified in ReadRequest. - // If the request timeout is reached on server side, the returned ReadResponse will contain all the datum that have been read (which could be an empty list). - rpc ReadFn(ReadRequest) returns (stream ReadResponse); + // The size of the returned ReadResponse is less than or equal to the num_records specified in each ReadRequest. + // If the request timeout is reached on the server side, the returned ReadResponse will contain all the datum that have been read (which could be an empty list). + // The server will continue to read and respond to subsequent ReadRequests until the client closes the stream. + rpc ReadFn(stream ReadRequest) returns (stream ReadResponse); - // AckFn acknowledges a list of datum offsets. + // AckFn acknowledges a stream of datum offsets. // When AckFn is called, it implicitly indicates that the datum stream has been processed by the source vertex. // The caller (numa) expects the AckFn to be successful, and it does not expect any errors. // If there are some irrecoverable errors when the callee (UDSource) is processing the AckFn request, - // then it is best to crash because there are no other retry mechanisms possible. - rpc AckFn(AckRequest) returns (AckResponse); + // then it is best to crash because there are no other retry mechanisms possible. + rpc AckFn(stream AckRequest) returns (AckResponse); // PendingFn returns the number of pending records at the user defined source. rpc PendingFn(google.protobuf.Empty) returns (PendingResponse); @@ -65,8 +66,30 @@ message ReadResponse { // e.g. Kafka and Redis Stream message usually include information about the headers. map headers = 5; } + message Status { + // Code to indicate the status of the response. + enum Code { + SUCCESS = 0; + FAILURE = 1; + } + + // Error to indicate the error type. If the code is FAILURE, then the error field will be populated. + enum Error { + UNACKED = 0; + OTHER = 1; + } + + // End of transmission flag. + bool eot = 1; + Code code = 2; + Error error = 3; + optional string msg = 4; + } // Required field holding the result. Result result = 1; + // Status of the response. Holds the end of transmission flag and the status code. + // + Status status = 2; } /* @@ -75,11 +98,8 @@ message ReadResponse { */ message AckRequest { message Request { - // Required field holding a list of offsets to be acknowledged. - // The offsets must be strictly corresponding to the previously read batch, - // meaning the offsets must be in the same order as the datum responses in the ReadResponse. - // By enforcing ordering, we can save deserialization effort on the server side, assuming the server keeps a local copy of the raw/un-serialized offsets. - repeated Offset offsets = 1; + // Required field holding the offset to be acked + Offset offset = 1; } // Required field holding the request. The list will be ordered and will have the same order as the original Read response. Request request = 1; diff --git a/rust/monovertex/src/forwarder.rs b/rust/monovertex/src/forwarder.rs index 1ba928123c..d37ebbe9b4 100644 --- a/rust/monovertex/src/forwarder.rs +++ b/rust/monovertex/src/forwarder.rs @@ -3,11 +3,11 @@ use std::collections::HashMap; use crate::config::{config, OnFailureStrategy}; use crate::error::{Error, Result}; use crate::message::{Message, Offset}; -use crate::metrics; use crate::metrics::forward_metrics; -use crate::sink::{proto, SinkClient}; -use crate::source::SourceClient; -use crate::transformer::TransformerClient; +use crate::sink::SinkWriter; +use crate::source::SourceReader; +use crate::transformer::SourceTransformer; +use crate::{metrics, proto}; use chrono::Utc; use tokio::task::JoinSet; use tokio::time::sleep; @@ -19,48 +19,48 @@ use tracing::{debug, info}; /// transformer is present, writing the messages to the sink, and then acknowledging the messages /// back to the source. pub(crate) struct Forwarder { - source_client: SourceClient, - sink_client: SinkClient, - transformer_client: Option, - fallback_client: Option, + source_reader: SourceReader, + sink_writer: SinkWriter, + source_transformer: Option, + fb_sink_writer: Option, cln_token: CancellationToken, common_labels: Vec<(String, String)>, } /// ForwarderBuilder is used to build a Forwarder instance with optional fields. pub(crate) struct ForwarderBuilder { - source_client: SourceClient, - sink_client: SinkClient, + source_reader: SourceReader, + sink_writer: SinkWriter, cln_token: CancellationToken, - transformer_client: Option, - fb_sink_client: Option, + source_transformer: Option, + fb_sink_writer: Option, } impl ForwarderBuilder { /// Create a new builder with mandatory fields pub(crate) fn new( - source_client: SourceClient, - sink_client: SinkClient, + source_client: SourceReader, + sink_client: SinkWriter, cln_token: CancellationToken, ) -> Self { Self { - source_client, - sink_client, + source_reader: source_client, + sink_writer: sink_client, cln_token, - transformer_client: None, - fb_sink_client: None, + source_transformer: None, + fb_sink_writer: None, } } /// Set the optional transformer client - pub(crate) fn transformer_client(mut self, transformer_client: TransformerClient) -> Self { - self.transformer_client = Some(transformer_client); + pub(crate) fn source_transformer(mut self, transformer_client: SourceTransformer) -> Self { + self.source_transformer = Some(transformer_client); self } /// Set the optional fallback client - pub(crate) fn fb_sink_client(mut self, fallback_client: SinkClient) -> Self { - self.fb_sink_client = Some(fallback_client); + pub(crate) fn fallback_sink_writer(mut self, fallback_client: SinkWriter) -> Self { + self.fb_sink_writer = Some(fallback_client); self } @@ -69,10 +69,10 @@ impl ForwarderBuilder { pub(crate) fn build(self) -> Forwarder { let common_labels = metrics::forward_metrics_labels().clone(); Forwarder { - source_client: self.source_client, - sink_client: self.sink_client, - transformer_client: self.transformer_client, - fallback_client: self.fb_sink_client, + source_reader: self.source_reader, + sink_writer: self.sink_writer, + source_transformer: self.source_transformer, + fb_sink_writer: self.fb_sink_writer, cln_token: self.cln_token, common_labels, } @@ -120,7 +120,7 @@ impl Forwarder { async fn read_and_process_messages(&mut self) -> Result { let start_time = tokio::time::Instant::now(); let messages = self - .source_client + .source_reader .read_fn(config().batch_size, config().timeout_in_ms) .await?; debug!( @@ -173,7 +173,7 @@ impl Forwarder { // Applies transformation to the messages if transformer is present // we concurrently apply transformation to all the messages. async fn apply_transformer(&self, messages: Vec) -> Result> { - let Some(transformer_client) = &self.transformer_client else { + let Some(transformer_client) = &self.source_transformer else { // return early if there is no transformer return Ok(messages); }; @@ -342,7 +342,7 @@ impl Forwarder { messages_to_send: &mut Vec, ) -> Result { let start_time = tokio::time::Instant::now(); - match self.sink_client.sink_fn(messages_to_send.clone()).await { + match self.sink_writer.sink_fn(messages_to_send.clone()).await { Ok(response) => { debug!("Sink latency - {}ms", start_time.elapsed().as_millis()); @@ -384,22 +384,22 @@ impl Forwarder { .await; // we need to retry - return Ok(false); + Ok(false) } - Err(e) => return Err(e), + Err(e) => Err(e), } } // Writes the fallback messages to the fallback sink async fn handle_fallback_messages(&mut self, fallback_msgs: Vec) -> Result<()> { - if self.fallback_client.is_none() { + if self.fb_sink_writer.is_none() { return Err(Error::SinkError( "Response contains fallback messages but no fallback sink is configured" .to_string(), )); } - let fallback_client = self.fallback_client.as_mut().unwrap(); + let fallback_client = self.fb_sink_writer.as_mut().unwrap(); let mut attempts = 0; let mut fallback_error_map = HashMap::new(); // start with the original set of message to be sent. @@ -497,7 +497,7 @@ impl Forwarder { let n = offsets.len(); let start_time = tokio::time::Instant::now(); - self.source_client.ack_fn(offsets).await?; + self.source_reader.ack_fn(offsets).await?; debug!("Ack latency - {}ms", start_time.elapsed().as_millis()); @@ -526,9 +526,13 @@ mod tests { use crate::error::Result; use crate::forwarder::ForwarderBuilder; - use crate::sink::{SinkClient, SinkConfig}; - use crate::source::{SourceClient, SourceConfig}; - use crate::transformer::{TransformerClient, TransformerConfig}; + use crate::proto::sink_client::SinkClient; + use crate::proto::source_client::SourceClient; + use crate::proto::source_transform_client::SourceTransformClient; + use crate::shared::create_rpc_channel; + use crate::sink::SinkWriter; + use crate::source::SourceReader; + use crate::transformer::SourceTransformer; struct SimpleSource { yet_to_be_acked: std::sync::RwLock>, @@ -672,11 +676,6 @@ mod tests { .await .unwrap(); }); - let source_config = SourceConfig { - socket_path: source_sock_file.to_str().unwrap().to_string(), - server_info_file: server_info_file.to_str().unwrap().to_string(), - max_message_size: 4 * 1024 * 1024, - }; // Start the sink server let (sink_shutdown_tx, sink_shutdown_rx) = tokio::sync::oneshot::channel(); @@ -694,11 +693,6 @@ mod tests { .await .unwrap(); }); - let sink_config = SinkConfig { - socket_path: sink_sock_file.to_str().unwrap().to_string(), - server_info_file: server_info_file.to_str().unwrap().to_string(), - max_message_size: 4 * 1024 * 1024, - }; // Start the transformer server let (transformer_shutdown_tx, transformer_shutdown_rx) = tokio::sync::oneshot::channel(); @@ -716,31 +710,32 @@ mod tests { .await .unwrap(); }); - let transformer_config = TransformerConfig { - socket_path: transformer_sock_file.to_str().unwrap().to_string(), - server_info_file: server_info_file.to_str().unwrap().to_string(), - max_message_size: 4 * 1024 * 1024, - }; // Wait for the servers to start tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; let cln_token = CancellationToken::new(); - let source_client = SourceClient::connect(source_config) - .await - .expect("failed to connect to source server"); + let source_client = SourceReader::new(SourceClient::new( + create_rpc_channel(source_sock_file).await.unwrap(), + )) + .await + .expect("failed to connect to source server"); - let sink_client = SinkClient::connect(sink_config) - .await - .expect("failed to connect to sink server"); + let sink_client = SinkWriter::new(SinkClient::new( + create_rpc_channel(sink_sock_file).await.unwrap(), + )) + .await + .expect("failed to connect to sink server"); - let transformer_client = TransformerClient::connect(transformer_config) - .await - .expect("failed to connect to transformer server"); + let transformer_client = SourceTransformer::new(SourceTransformClient::new( + create_rpc_channel(transformer_sock_file).await.unwrap(), + )) + .await + .expect("failed to connect to transformer server"); let mut forwarder = ForwarderBuilder::new(source_client, sink_client, cln_token.clone()) - .transformer_client(transformer_client) + .source_transformer(transformer_client) .build(); let forwarder_handle = tokio::spawn(async move { @@ -821,11 +816,6 @@ mod tests { .await .unwrap(); }); - let source_config = SourceConfig { - socket_path: source_sock_file.to_str().unwrap().to_string(), - server_info_file: server_info_file.to_str().unwrap().to_string(), - max_message_size: 4 * 1024 * 1024, - }; // Start the sink server let (sink_shutdown_tx, sink_shutdown_rx) = tokio::sync::oneshot::channel(); @@ -843,24 +833,23 @@ mod tests { .await .unwrap(); }); - let sink_config = SinkConfig { - socket_path: sink_sock_file.to_str().unwrap().to_string(), - server_info_file: server_info_file.to_str().unwrap().to_string(), - max_message_size: 4 * 1024 * 1024, - }; // Wait for the servers to start tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; let cln_token = CancellationToken::new(); - let source_client = SourceClient::connect(source_config) - .await - .expect("failed to connect to source server"); + let source_client = SourceReader::new(SourceClient::new( + create_rpc_channel(source_sock_file).await.unwrap(), + )) + .await + .expect("failed to connect to source server"); - let sink_client = SinkClient::connect(sink_config) - .await - .expect("failed to connect to sink server"); + let sink_client = SinkWriter::new(SinkClient::new( + create_rpc_channel(sink_sock_file).await.unwrap(), + )) + .await + .expect("failed to connect to sink server"); let mut forwarder = ForwarderBuilder::new(source_client, sink_client, cln_token.clone()).build(); @@ -929,11 +918,6 @@ mod tests { .await .unwrap(); }); - let source_config = SourceConfig { - socket_path: source_sock_file.to_str().unwrap().to_string(), - server_info_file: server_info_file.to_str().unwrap().to_string(), - max_message_size: 4 * 1024 * 1024, - }; // Start the primary sink server (which returns status fallback) let (sink_shutdown_tx, sink_shutdown_rx) = tokio::sync::oneshot::channel(); @@ -951,11 +935,6 @@ mod tests { .await .unwrap(); }); - let sink_config = SinkConfig { - socket_path: sink_sock_file.to_str().unwrap().to_string(), - server_info_file: server_info_file.to_str().unwrap().to_string(), - max_message_size: 4 * 1024 * 1024, - }; // Start the fb sink server let (fb_sink_shutdown_tx, fb_sink_shutdown_rx) = tokio::sync::oneshot::channel(); @@ -973,31 +952,32 @@ mod tests { .await .unwrap(); }); - let fb_sink_config = SinkConfig { - socket_path: fb_sink_sock_file.to_str().unwrap().to_string(), - server_info_file: server_info_file.to_str().unwrap().to_string(), - max_message_size: 4 * 1024 * 1024, - }; // Wait for the servers to start tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; let cln_token = CancellationToken::new(); - let source_client = SourceClient::connect(source_config) - .await - .expect("failed to connect to source server"); + let source_client = SourceReader::new(SourceClient::new( + create_rpc_channel(source_sock_file).await.unwrap(), + )) + .await + .expect("failed to connect to source server"); - let sink_client = SinkClient::connect(sink_config) - .await - .expect("failed to connect to sink server"); + let sink_client = SinkWriter::new(SinkClient::new( + create_rpc_channel(sink_sock_file).await.unwrap(), + )) + .await + .expect("failed to connect to sink server"); - let fb_sink_client = SinkClient::connect(fb_sink_config) - .await - .expect("failed to connect to fb sink server"); + let fb_sink_client = SinkWriter::new(SinkClient::new( + create_rpc_channel(fb_sink_sock_file).await.unwrap(), + )) + .await + .expect("failed to connect to fb sink server"); let mut forwarder = ForwarderBuilder::new(source_client, sink_client, cln_token.clone()) - .fb_sink_client(fb_sink_client) + .fallback_sink_writer(fb_sink_client) .build(); let forwarder_handle = tokio::spawn(async move { diff --git a/rust/monovertex/src/lib.rs b/rust/monovertex/src/lib.rs index c1d172adf9..d59691c155 100644 --- a/rust/monovertex/src/lib.rs +++ b/rust/monovertex/src/lib.rs @@ -3,15 +3,23 @@ use crate::config::config; pub(crate) use crate::error::Error; use crate::forwarder::ForwarderBuilder; use crate::metrics::{start_metrics_https_server, LagReaderBuilder, MetricsState}; -use crate::sink::{SinkClient, SinkConfig}; -use crate::source::{SourceClient, SourceConfig}; -use crate::transformer::{TransformerClient, TransformerConfig}; +use crate::proto::sink_client::SinkClient; +use crate::proto::source_client::SourceClient; +use crate::proto::source_transform_client::SourceTransformClient; +use crate::shared::create_rpc_channel; +use crate::sink::{ + SinkWriter, FB_SINK_SERVER_INFO_FILE, FB_SINK_SOCKET, SINK_SERVER_INFO_FILE, SINK_SOCKET, +}; +use crate::source::{SourceReader, SOURCE_SERVER_INFO_FILE, SOURCE_SOCKET}; +use crate::transformer::{SourceTransformer, TRANSFORMER_SERVER_INFO_FILE, TRANSFORMER_SOCKET}; use std::net::SocketAddr; use std::time::Duration; use tokio::signal; use tokio::task::JoinHandle; use tokio::time::sleep; use tokio_util::sync::CancellationToken; +use tonic::transport::Channel; +use tonic::Request; use tracing::{error, info, warn}; /// SourcerSinker orchestrates data movement from the Source to the Sink via the optional SourceTransformer. @@ -36,38 +44,17 @@ pub(crate) mod message; pub(crate) mod shared; +pub(crate) mod proto { + tonic::include_proto!("source.v1"); + tonic::include_proto!("sink.v1"); + tonic::include_proto!("sourcetransformer.v1"); +} + mod server_info; mod metrics; -pub async fn mono_vertex() { - // Initialize the source, sink and transformer configurations - // We are using the default configurations for now. - let source_config = SourceConfig { - max_message_size: config().grpc_max_message_size, - ..Default::default() - }; - - let sink_config = SinkConfig { - max_message_size: config().grpc_max_message_size, - ..Default::default() - }; - - let transformer_config = if config().is_transformer_enabled { - Some(TransformerConfig { - max_message_size: config().grpc_max_message_size, - ..Default::default() - }) - } else { - None - }; - - let fb_sink_config = if config().is_fallback_enabled { - Some(SinkConfig::fallback_default()) - } else { - None - }; - +pub async fn mono_vertex() -> Result<()> { let cln_token = CancellationToken::new(); let shutdown_cln_token = cln_token.clone(); @@ -79,15 +66,7 @@ pub async fn mono_vertex() { }); // Run the forwarder with cancellation token. - if let Err(e) = init( - source_config, - sink_config, - transformer_config, - fb_sink_config, - cln_token, - ) - .await - { + if let Err(e) = init(cln_token).await { error!("Application error: {:?}", e); // abort the signal handler task since we have an error and we are shutting down @@ -97,6 +76,7 @@ pub async fn mono_vertex() { } info!("Gracefully Exiting..."); + Ok(()) } async fn shutdown_signal() { @@ -121,62 +101,81 @@ async fn shutdown_signal() { } } -/// forwards a chunk of data from the source to the sink via an optional transformer. -/// It takes an optional custom_shutdown_rx for shutting down the forwarder, useful for testing. -pub async fn init( - source_config: SourceConfig, - sink_config: SinkConfig, - transformer_config: Option, - fb_sink_config: Option, - cln_token: CancellationToken, -) -> Result<()> { - server_info::check_for_server_compatibility(&source_config.server_info_file, cln_token.clone()) +pub async fn init(cln_token: CancellationToken) -> Result<()> { + server_info::check_for_server_compatibility(SOURCE_SERVER_INFO_FILE, cln_token.clone()) .await .map_err(|e| { warn!("Error waiting for source server info file: {:?}", e); Error::ForwarderError("Error waiting for server info file".to_string()) })?; - let mut source_client = SourceClient::connect(source_config).await?; - server_info::check_for_server_compatibility(&sink_config.server_info_file, cln_token.clone()) + let mut source_grpc_client = SourceClient::new(create_rpc_channel(SOURCE_SOCKET.into()).await?) + .max_encoding_message_size(config().grpc_max_message_size) + .max_encoding_message_size(config().grpc_max_message_size); + + let source_reader = SourceReader::new(source_grpc_client.clone()).await?; + + server_info::check_for_server_compatibility(SINK_SERVER_INFO_FILE, cln_token.clone()) .await .map_err(|e| { - warn!("Error waiting for sink server info file: {:?}", e); + error!("Error waiting for sink server info file: {:?}", e); Error::ForwarderError("Error waiting for server info file".to_string()) })?; - let mut sink_client = SinkClient::connect(sink_config).await?; + let mut sink_grpc_client = SinkClient::new(create_rpc_channel(SINK_SOCKET.into()).await?) + .max_encoding_message_size(config().grpc_max_message_size) + .max_encoding_message_size(config().grpc_max_message_size); - let mut transformer_client = if let Some(config) = transformer_config { - server_info::check_for_server_compatibility(&config.server_info_file, cln_token.clone()) - .await - .map_err(|e| { - warn!("Error waiting for transformer server info file: {:?}", e); - Error::ForwarderError("Error waiting for server info file".to_string()) - })?; - Some(TransformerClient::connect(config).await?) + let mut sink_writer = SinkWriter::new(sink_grpc_client.clone()).await?; + + let (mut transformer_grpc_client, mut transformer) = if config().is_transformer_enabled { + server_info::check_for_server_compatibility( + TRANSFORMER_SERVER_INFO_FILE, + cln_token.clone(), + ) + .await + .map_err(|e| { + error!("Error waiting for transformer server info file: {:?}", e); + Error::ForwarderError("Error waiting for server info file".to_string()) + })?; + let transformer_grpc_client = + SourceTransformClient::new(create_rpc_channel(TRANSFORMER_SOCKET.into()).await?) + .max_encoding_message_size(config().grpc_max_message_size) + .max_encoding_message_size(config().grpc_max_message_size); + + ( + Some(transformer_grpc_client), + Some(SourceTransformer::new(transformer_grpc_client.clone()).await?), + ) } else { - None + (None, None) }; - let mut fb_sink_client = if let Some(config) = fb_sink_config { - server_info::check_for_server_compatibility(&config.server_info_file, cln_token.clone()) + let (mut fb_sink_grpc_client, mut fallback_writer) = if config().is_fallback_enabled { + server_info::check_for_server_compatibility(FB_SINK_SERVER_INFO_FILE, cln_token.clone()) .await .map_err(|e| { warn!("Error waiting for fallback sink server info file: {:?}", e); Error::ForwarderError("Error waiting for server info file".to_string()) })?; - Some(SinkClient::connect(config).await?) + let fb_sink_grpc_client = SinkClient::new(create_rpc_channel(FB_SINK_SOCKET.into()).await?) + .max_encoding_message_size(config().grpc_max_message_size) + .max_encoding_message_size(config().grpc_max_message_size); + + ( + Some(fb_sink_grpc_client), + Some(SinkWriter::new(fb_sink_grpc_client.clone()).await?), + ) } else { - None + (None, None) }; // readiness check for all the ud containers wait_until_ready( - &mut source_client, - &mut sink_client, - &mut transformer_client, - &mut fb_sink_client, + &mut source_grpc_client, + &mut sink_grpc_client, + &mut transformer_grpc_client, + &mut fb_sink_grpc_client, ) .await?; @@ -189,11 +188,12 @@ pub async fn init( // This should be running throughout the lifetime of the application, hence the handle is not // joined. let metrics_state = MetricsState { - source_client: source_client.clone(), - sink_client: sink_client.clone(), - transformer_client: transformer_client.clone(), - fb_sink_client: fb_sink_client.clone(), + source_client: source_grpc_client.clone(), + sink_client: sink_grpc_client.clone(), + transformer_client: transformer_grpc_client.clone(), + fb_sink_client: fb_sink_grpc_client.clone(), }; + tokio::spawn(async move { if let Err(e) = start_metrics_https_server(metrics_addr, metrics_state).await { error!("Metrics server error: {:?}", e); @@ -201,7 +201,7 @@ pub async fn init( }); // start the lag reader to publish lag metrics - let mut lag_reader = LagReaderBuilder::new(source_client.clone()) + let mut lag_reader = LagReaderBuilder::new(source_grpc_client.clone()) .lag_checking_interval(Duration::from_secs( config().lag_check_interval_in_secs.into(), )) @@ -212,14 +212,14 @@ pub async fn init( lag_reader.start().await; // build the forwarder - let mut forwarder_builder = ForwarderBuilder::new(source_client, sink_client, cln_token); + let mut forwarder_builder = ForwarderBuilder::new(source_reader, sink_writer, cln_token); // add transformer if exists - if let Some(transformer_client) = transformer_client { - forwarder_builder = forwarder_builder.transformer_client(transformer_client); + if let Some(transformer) = transformer { + forwarder_builder = forwarder_builder.source_transformer(transformer); } // add fallback sink if exists - if let Some(fb_sink_client) = fb_sink_client { - forwarder_builder = forwarder_builder.fb_sink_client(fb_sink_client); + if let Some(fallback_writer) = fallback_writer { + forwarder_builder = forwarder_builder.fallback_sink_writer(fallback_writer); } // build the final forwarder let mut forwarder = forwarder_builder.build(); @@ -232,24 +232,24 @@ pub async fn init( } async fn wait_until_ready( - source_client: &mut SourceClient, - sink_client: &mut SinkClient, - transformer_client: &mut Option, - fb_sink_client: &mut Option, + source_client: &mut SourceClient, + sink_client: &mut SinkClient, + transformer_client: &mut Option>, + fb_sink_client: &mut Option>, ) -> Result<()> { loop { - let source_ready = source_client.is_ready().await; + let source_ready = source_client.is_ready(Request::new(())).await.is_ok(); if !source_ready { info!("UDSource is not ready, waiting..."); } - let sink_ready = sink_client.is_ready().await; + let sink_ready = sink_client.is_ready(Request::new(())).await.is_ok(); if !sink_ready { info!("UDSink is not ready, waiting..."); } let transformer_ready = if let Some(client) = transformer_client { - let ready = client.is_ready().await; + let ready = client.is_ready(Request::new(())).await.is_ok(); if !ready { info!("UDTransformer is not ready, waiting..."); } @@ -259,7 +259,7 @@ async fn wait_until_ready( }; let fb_sink_ready = if let Some(client) = fb_sink_client { - let ready = client.is_ready().await; + let ready = client.is_ready(Request::new(())).await.is_ok(); if !ready { info!("Fallback Sink is not ready, waiting..."); } @@ -287,9 +287,6 @@ mod tests { use tokio::sync::mpsc::Sender; use tokio_util::sync::CancellationToken; - use crate::sink::SinkConfig; - use crate::source::SourceConfig; - struct SimpleSource; #[tonic::async_trait] impl source::Sourcer for SimpleSource { diff --git a/rust/monovertex/src/message.rs b/rust/monovertex/src/message.rs index 6df0874948..6f0a9981a3 100644 --- a/rust/monovertex/src/message.rs +++ b/rust/monovertex/src/message.rs @@ -5,10 +5,9 @@ use base64::Engine; use chrono::{DateTime, Utc}; use crate::error::Error; +use crate::proto; +use crate::proto::{read_response, SourceTransformRequest}; use crate::shared::{prost_timestamp_from_utc, utc_from_timestamp}; -use crate::sink::proto; -use crate::source::proto::read_response; -use crate::transformer::proto::SourceTransformRequest; /// A message that is sent from the source to the sink. #[derive(Debug, Clone)] diff --git a/rust/monovertex/src/metrics.rs b/rust/monovertex/src/metrics.rs index f6f5519765..c2dc7511c9 100644 --- a/rust/monovertex/src/metrics.rs +++ b/rust/monovertex/src/metrics.rs @@ -17,15 +17,15 @@ use tracing::{debug, error, info}; use crate::config::config; use crate::error::Error; -use crate::sink::SinkClient; -use crate::source::SourceClient; -use crate::transformer::TransformerClient; +use crate::proto; use prometheus_client::encoding::text::encode; use prometheus_client::metrics::counter::Counter; use prometheus_client::metrics::family::Family; use prometheus_client::metrics::gauge::Gauge; use prometheus_client::metrics::histogram::{exponential_buckets, Histogram}; use prometheus_client::registry::Registry; +use tonic::transport::Channel; +use tonic::Request; // Define the labels for the metrics // Note: Please keep consistent with the definitions in MonoVertex daemon @@ -59,10 +59,10 @@ const SINK_TIME: &str = "monovtx_sink_time"; #[derive(Clone)] pub(crate) struct MetricsState { - pub source_client: SourceClient, - pub sink_client: SinkClient, - pub transformer_client: Option, - pub fb_sink_client: Option, + pub source_client: proto::source_client::SourceClient, + pub sink_client: proto::sink_client::SinkClient, + pub transformer_client: Option>, + pub fb_sink_client: Option>, } /// The global register of all metrics. @@ -323,22 +323,22 @@ async fn livez() -> impl IntoResponse { } async fn sidecar_livez(State(mut state): State) -> impl IntoResponse { - if !state.source_client.is_ready().await { + if !state.source_client.is_ready(Request::new(())).await.is_ok() { error!("Source client is not available"); return StatusCode::SERVICE_UNAVAILABLE; } - if !state.sink_client.is_ready().await { + if !state.sink_client.is_ready(Request::new(())).await.is_ok() { error!("Sink client is not available"); return StatusCode::SERVICE_UNAVAILABLE; } if let Some(mut transformer_client) = state.transformer_client { - if !transformer_client.is_ready().await { + if !transformer_client.is_ready(Request::new(())).await.is_ok() { error!("Transformer client is not available"); return StatusCode::SERVICE_UNAVAILABLE; } } if let Some(mut fb_sink_client) = state.fb_sink_client { - if !fb_sink_client.is_ready().await { + if !fb_sink_client.is_ready(Request::new(())).await.is_ok() { error!("Fallback sink client is not available"); return StatusCode::SERVICE_UNAVAILABLE; } @@ -358,7 +358,7 @@ struct TimestampedPending { /// and exposing the metrics. It maintains a list of pending stats and ensures that /// only the most recent entries are kept. pub(crate) struct LagReader { - source_client: SourceClient, + source_client: proto::source_client::SourceClient, lag_checking_interval: Duration, refresh_interval: Duration, buildup_handle: Option>, @@ -368,13 +368,13 @@ pub(crate) struct LagReader { /// LagReaderBuilder is used to build a `LagReader` instance. pub(crate) struct LagReaderBuilder { - source_client: SourceClient, + source_client: proto::source_client::SourceClient, lag_checking_interval: Option, refresh_interval: Option, } impl LagReaderBuilder { - pub(crate) fn new(source_client: SourceClient) -> Self { + pub(crate) fn new(source_client: proto::source_client::SourceClient) -> Self { Self { source_client, lag_checking_interval: None, @@ -447,14 +447,14 @@ impl Drop for LagReader { /// Periodically checks the pending messages from the source client and build the pending stats. async fn build_pending_info( - mut source_client: SourceClient, + mut source_client: proto::source_client::SourceClient, lag_checking_interval: Duration, pending_stats: Arc>>, ) { let mut ticker = time::interval(lag_checking_interval); loop { ticker.tick().await; - match source_client.pending_fn().await { + match fetch_pending(&mut source_client).await { Ok(pending) => { if pending != -1 { let mut stats = pending_stats.lock().await; @@ -476,6 +476,19 @@ async fn build_pending_info( } } +async fn fetch_pending( + source_client: &mut proto::source_client::SourceClient, +) -> crate::error::Result { + let request = Request::new(()); + let response = source_client + .pending_fn(request) + .await? + .into_inner() + .result + .map_or(-1, |r| r.count); // default to -1(unavailable) + Ok(response) +} + // Periodically exposes the pending metrics by calculating the average pending messages over different intervals. async fn expose_pending_metrics( refresh_interval: Duration, diff --git a/rust/monovertex/src/shared.rs b/rust/monovertex/src/shared.rs index 2c63244647..2ce22ba803 100644 --- a/rust/monovertex/src/shared.rs +++ b/rust/monovertex/src/shared.rs @@ -1,13 +1,14 @@ use std::path::PathBuf; +use crate::error::Error; +use backoff::retry::Retry; +use backoff::strategy::fixed; use chrono::{DateTime, TimeZone, Timelike, Utc}; use prost_types::Timestamp; use tokio::net::UnixStream; use tonic::transport::{Channel, Endpoint, Uri}; use tower::service_fn; -use crate::error::Error; - pub(crate) fn utc_from_timestamp(t: Option) -> DateTime { t.map_or(Utc.timestamp_nanos(-1), |t| { DateTime::from_timestamp(t.seconds, t.nanos as u32).unwrap_or(Utc.timestamp_nanos(-1)) @@ -21,6 +22,21 @@ pub(crate) fn prost_timestamp_from_utc(t: DateTime) -> Option { }) } +pub(crate) async fn create_rpc_channel(socket_path: PathBuf) -> crate::error::Result { + const RECONNECT_INTERVAL: u64 = 1000; + const MAX_RECONNECT_ATTEMPTS: usize = 5; + + let interval = fixed::Interval::from_millis(RECONNECT_INTERVAL).take(MAX_RECONNECT_ATTEMPTS); + + let channel = Retry::retry( + interval, + || async { connect_with_uds(socket_path.clone()).await }, + |_: &Error| true, + ) + .await?; + Ok(channel) +} + pub(crate) async fn connect_with_uds(uds_path: PathBuf) -> Result { let channel = Endpoint::try_from("http://[::]:50051") .map_err(|e| Error::ConnectionError(format!("Failed to create endpoint: {:?}", e)))? diff --git a/rust/monovertex/src/sink.rs b/rust/monovertex/src/sink.rs index fb82273fb6..487dc7874a 100644 --- a/rust/monovertex/src/sink.rs +++ b/rust/monovertex/src/sink.rs @@ -1,74 +1,23 @@ -use crate::config::config; -use crate::error::{Error, Result}; +use crate::error::Result; use crate::message::Message; -use crate::shared::connect_with_uds; -use backoff::retry::Retry; -use backoff::strategy::fixed; +use crate::proto; +use crate::proto::sink_client::SinkClient; use tonic::transport::Channel; -use tonic::Request; -pub mod proto { - tonic::include_proto!("sink.v1"); -} - -const RECONNECT_INTERVAL: u64 = 1000; -const MAX_RECONNECT_ATTEMPTS: usize = 5; -const SINK_SOCKET: &str = "/var/run/numaflow/sink.sock"; -const FB_SINK_SOCKET: &str = "/var/run/numaflow/fb-sink.sock"; - -const SINK_SERVER_INFO_FILE: &str = "/var/run/numaflow/sinker-server-info"; -const FB_SINK_SERVER_INFO_FILE: &str = "/var/run/numaflow/fb-sinker-server-info"; - -/// SinkConfig is the configuration for the sink server. -#[derive(Debug, Clone)] -pub struct SinkConfig { - pub socket_path: String, - pub server_info_file: String, - pub max_message_size: usize, -} +pub(crate) const SINK_SOCKET: &str = "/var/run/numaflow/sink.sock"; +pub(crate) const FB_SINK_SOCKET: &str = "/var/run/numaflow/fb-sink.sock"; -impl Default for SinkConfig { - fn default() -> Self { - SinkConfig { - socket_path: SINK_SOCKET.to_string(), - server_info_file: SINK_SERVER_INFO_FILE.to_string(), - max_message_size: config().grpc_max_message_size, - } - } -} - -impl SinkConfig { - /// default config for fallback sink - pub(crate) fn fallback_default() -> Self { - SinkConfig { - max_message_size: config().grpc_max_message_size, - socket_path: FB_SINK_SOCKET.to_string(), - server_info_file: FB_SINK_SERVER_INFO_FILE.to_string(), - } - } -} +pub(crate) const SINK_SERVER_INFO_FILE: &str = "/var/run/numaflow/sinker-server-info"; +pub(crate) const FB_SINK_SERVER_INFO_FILE: &str = "/var/run/numaflow/fb-sinker-server-info"; #[derive(Clone)] /// SinkClient is a client to interact with the sink server. -pub struct SinkClient { - client: proto::sink_client::SinkClient, +pub struct SinkWriter { + client: SinkClient, } -impl SinkClient { - pub(crate) async fn connect(config: SinkConfig) -> Result { - let interval = - fixed::Interval::from_millis(RECONNECT_INTERVAL).take(MAX_RECONNECT_ATTEMPTS); - - let channel = Retry::retry( - interval, - || async { connect_with_uds(config.socket_path.clone().into()).await }, - |_: &Error| true, - ) - .await?; - - let client = proto::sink_client::SinkClient::new(channel) - .max_decoding_message_size(config.max_message_size) - .max_encoding_message_size(config.max_message_size); +impl SinkWriter { + pub(crate) async fn new(client: SinkClient) -> Result { Ok(Self { client }) } @@ -99,10 +48,6 @@ impl SinkClient { Ok(response) } - - pub(crate) async fn is_ready(&mut self) -> bool { - self.client.is_ready(Request::new(())).await.is_ok() - } } #[cfg(test)] @@ -111,9 +56,9 @@ mod tests { use numaflow::sink; use tracing::info; - use crate::message::Offset; - use super::*; + use crate::message::Offset; + use crate::shared::create_rpc_channel; struct Logger; #[tonic::async_trait] @@ -139,7 +84,7 @@ mod tests { } } #[tokio::test] - async fn sink_operations() { + async fn sink_operations() -> Result<()> { // start the server let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel(); let tmp_dir = tempfile::TempDir::new().unwrap(); @@ -160,13 +105,10 @@ mod tests { // wait for the server to start tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - let mut sink_client = SinkClient::connect(SinkConfig { - socket_path: sock_file.to_str().unwrap().to_string(), - server_info_file: server_info_file.to_str().unwrap().to_string(), - max_message_size: 4 * 1024 * 1024, - }) - .await - .expect("failed to connect to sink server"); + let mut sink_client = + SinkWriter::new(SinkClient::new(create_rpc_channel(sock_file).await?)) + .await + .expect("failed to connect to sink server"); let messages = vec![ Message { @@ -193,15 +135,13 @@ mod tests { }, ]; - let ready_response = sink_client.is_ready().await; - assert!(ready_response); - - let response = sink_client.sink_fn(messages).await.unwrap(); + let response = sink_client.sink_fn(messages).await?; assert_eq!(response.results.len(), 2); shutdown_tx .send(()) .expect("failed to send shutdown signal"); server_handle.await.expect("failed to join server task"); + Ok(()) } } diff --git a/rust/monovertex/src/source.rs b/rust/monovertex/src/source.rs index 681b0beb58..da6a79b0c4 100644 --- a/rust/monovertex/src/source.rs +++ b/rust/monovertex/src/source.rs @@ -1,63 +1,49 @@ -use crate::error::{Error, Result}; +use crate::error::Error::SourceError; +use crate::error::Result; use crate::message::{Message, Offset}; -use crate::shared::connect_with_uds; -use backoff::retry::Retry; -use backoff::strategy::fixed; +use crate::proto; +use crate::source::proto::AckResponse; use base64::prelude::BASE64_STANDARD; use base64::Engine; -use tokio_stream::StreamExt; +use tokio::sync::mpsc; +use tokio_stream::wrappers::ReceiverStream; use tonic::transport::Channel; -use tonic::Request; +use tonic::{Request, Streaming}; -pub mod proto { - tonic::include_proto!("source.v1"); -} -const RECONNECT_INTERVAL: u64 = 1000; -const MAX_RECONNECT_ATTEMPTS: usize = 5; -const SOURCE_SOCKET: &str = "/var/run/numaflow/source.sock"; -const SOURCE_SERVER_INFO_FILE: &str = "/var/run/numaflow/sourcer-server-info"; - -/// SourceConfig is the configuration for the source server. -#[derive(Debug, Clone)] -pub struct SourceConfig { - pub socket_path: String, - pub server_info_file: String, - pub max_message_size: usize, -} - -impl Default for SourceConfig { - fn default() -> Self { - SourceConfig { - socket_path: SOURCE_SOCKET.to_string(), - server_info_file: SOURCE_SERVER_INFO_FILE.to_string(), - max_message_size: 64 * 1024 * 1024, // 64 MB - } - } -} +pub(crate) const SOURCE_SOCKET: &str = "/var/run/numaflow/source.sock"; +pub(crate) const SOURCE_SERVER_INFO_FILE: &str = "/var/run/numaflow/sourcer-server-info"; /// SourceClient is a client to interact with the source server. -#[derive(Debug, Clone)] -pub(crate) struct SourceClient { +#[derive(Debug)] +pub(crate) struct SourceReader { + read_tx: mpsc::Sender, + resp_stream: Streaming, + ack_tx: mpsc::Sender, client: proto::source_client::SourceClient, } -impl SourceClient { - pub(crate) async fn connect(config: SourceConfig) -> Result { - let interval = - fixed::Interval::from_millis(RECONNECT_INTERVAL).take(MAX_RECONNECT_ATTEMPTS); - - let channel = Retry::retry( - interval, - || async { connect_with_uds(config.socket_path.clone().into()).await }, - |_: &Error| true, - ) - .await?; - - let client = proto::source_client::SourceClient::new(channel) - .max_encoding_message_size(config.max_message_size) - .max_decoding_message_size(config.max_message_size); +impl SourceReader { + pub(crate) async fn new( + mut client: proto::source_client::SourceClient, + ) -> Result { + let (read_tx, read_rx) = mpsc::channel(500); - Ok(Self { client }) + let resp_stream = client + .read_fn(Request::new(ReceiverStream::new(read_rx))) + .await? + .into_inner(); + + let (ack_tx, ack_rx) = mpsc::channel(500); + let _ = client + .ack_fn(Request::new(ReceiverStream::new(ack_rx))) + .await?; + + Ok(Self { + client, + read_tx, + resp_stream, + ack_tx, + }) } pub(crate) async fn read_fn( @@ -65,20 +51,28 @@ impl SourceClient { num_records: u64, timeout_in_ms: u32, ) -> Result> { - let request = Request::new(proto::ReadRequest { + let request = proto::ReadRequest { request: Some(proto::read_request::Request { num_records, timeout_in_ms, }), - }); + }; + + self.read_tx + .send(request) + .await + .map_err(|e| SourceError(e.to_string()))?; - let mut stream = self.client.read_fn(request).await?.into_inner(); let mut messages = Vec::with_capacity(num_records as usize); - while let Some(response) = stream.next().await { - let result = response? + while let Some(response) = self.resp_stream.message().await? { + if response.status.as_ref().map_or(false, |status| status.eot) { + break; + } + + let result = response .result - .ok_or_else(|| Error::SourceError("Empty message".to_string()))?; + .ok_or_else(|| SourceError("Empty message".to_string()))?; messages.push(result.try_into()?); } @@ -86,46 +80,24 @@ impl SourceClient { Ok(messages) } - pub(crate) async fn ack_fn(&mut self, offsets: Vec) -> Result { - let offsets = offsets - .into_iter() - .map(|offset| proto::Offset { - offset: BASE64_STANDARD - .decode(offset.offset) - .expect("we control the encoding, so this should never fail"), - partition_id: offset.partition_id, - }) - .collect(); - - let request = Request::new(proto::AckRequest { - request: Some(proto::ack_request::Request { offsets }), - }); - - Ok(self.client.ack_fn(request).await?.into_inner()) - } - - pub(crate) async fn pending_fn(&mut self) -> Result { - let request = Request::new(()); - let response = self - .client - .pending_fn(request) - .await? - .into_inner() - .result - .map_or(-1, |r| r.count); // default to -1(unavailable) - Ok(response) - } - - #[allow(dead_code)] - // TODO: remove dead_code - pub(crate) async fn partitions_fn(&mut self) -> Result> { - let request = Request::new(()); - let response = self.client.partitions_fn(request).await?.into_inner(); - Ok(response.result.map_or(vec![], |r| r.partitions)) - } - - pub(crate) async fn is_ready(&mut self) -> bool { - self.client.is_ready(Request::new(())).await.is_ok() + pub(crate) async fn ack_fn(&mut self, offsets: Vec) -> Result { + for offset in offsets { + let request = proto::AckRequest { + request: Some(proto::ack_request::Request { + offset: Some(proto::Offset { + offset: BASE64_STANDARD + .decode(offset.offset) + .expect("we control the encoding, so this should never fail"), + partition_id: offset.partition_id, + }), + }), + }; + self.ack_tx + .send(request) + .await + .map_err(|e| SourceError(e.to_string()))?; + } + Ok(AckResponse::default()) } } @@ -134,13 +106,14 @@ mod tests { use std::collections::HashSet; use std::error::Error; + use crate::proto::source_client::SourceClient; + use crate::shared::create_rpc_channel; + use crate::source::SourceReader; use chrono::Utc; use numaflow::source; use numaflow::source::{Message, Offset, SourceReadRequest}; use tokio::sync::mpsc::Sender; - use crate::source::{SourceClient, SourceConfig}; - struct SimpleSource { num: usize, yet_to_ack: std::sync::RwLock>, @@ -213,24 +186,16 @@ mod tests { .with_socket_file(server_socket) .with_server_info_file(server_info) .start_with_shutdown(shutdown_rx) - .await - .unwrap(); + .await?; + Ok(()) }); // wait for the server to start // TODO: flaky tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - let mut source_client = SourceClient::connect(SourceConfig { - socket_path: sock_file.to_str().unwrap().to_string(), - server_info_file: server_info_file.to_str().unwrap().to_string(), - max_message_size: 4 * 1024 * 1024, - }) - .await - .expect("failed to connect to source server"); - - let response = source_client.is_ready().await; - assert!(response); + let mut source_client = + SourceReader::new(SourceClient::new(create_rpc_channel(sock_file).await?)).await?; let messages = source_client.read_fn(5, 1000).await.unwrap(); assert_eq!(messages.len(), 5); @@ -241,16 +206,10 @@ mod tests { .unwrap(); assert!(response.result.unwrap().success.is_some()); - let pending = source_client.pending_fn().await.unwrap(); - assert_eq!(pending, 0); - - let partitions = source_client.partitions_fn().await.unwrap(); - assert_eq!(partitions, vec![2]); - shutdown_tx .send(()) .expect("failed to send shutdown signal"); - server_handle.await.expect("failed to join server task"); + server_handle.await.expect("failed to join server task")?; Ok(()) } } diff --git a/rust/monovertex/src/transformer.rs b/rust/monovertex/src/transformer.rs index f891a851fc..a5761dc01d 100644 --- a/rust/monovertex/src/transformer.rs +++ b/rust/monovertex/src/transformer.rs @@ -1,61 +1,27 @@ -use crate::error::{Error, Result}; +use crate::error::Result; use crate::message::Message; -use crate::shared::{connect_with_uds, utc_from_timestamp}; +use crate::proto; +use crate::shared::utc_from_timestamp; use crate::transformer::proto::SourceTransformRequest; -use backoff::retry::Retry; -use backoff::strategy::fixed; use tonic::transport::Channel; -use tonic::Request; - -pub mod proto { - tonic::include_proto!("sourcetransformer.v1"); -} const DROP: &str = "U+005C__DROP__"; const RECONNECT_INTERVAL: u64 = 1000; const MAX_RECONNECT_ATTEMPTS: usize = 5; -const TRANSFORMER_SOCKET: &str = "/var/run/numaflow/sourcetransform.sock"; -const TRANSFORMER_SERVER_INFO_FILE: &str = "/var/run/numaflow/sourcetransformer-server-info"; - -/// TransformerConfig is the configuration for the transformer server. -#[derive(Debug, Clone)] -pub struct TransformerConfig { - pub socket_path: String, - pub server_info_file: String, - pub max_message_size: usize, -} - -impl Default for TransformerConfig { - fn default() -> Self { - TransformerConfig { - socket_path: TRANSFORMER_SOCKET.to_string(), - server_info_file: TRANSFORMER_SERVER_INFO_FILE.to_string(), - max_message_size: 64 * 1024 * 1024, // 64 MB - } - } -} +pub(crate) const TRANSFORMER_SOCKET: &str = "/var/run/numaflow/sourcetransform.sock"; +pub(crate) const TRANSFORMER_SERVER_INFO_FILE: &str = + "/var/run/numaflow/sourcetransformer-server-info"; /// TransformerClient is a client to interact with the transformer server. #[derive(Clone)] -pub struct TransformerClient { +pub struct SourceTransformer { client: proto::source_transform_client::SourceTransformClient, } -impl TransformerClient { - pub(crate) async fn connect(config: TransformerConfig) -> Result { - let interval = - fixed::Interval::from_millis(RECONNECT_INTERVAL).take(MAX_RECONNECT_ATTEMPTS); - - let channel = Retry::retry( - interval, - || async { connect_with_uds(config.socket_path.clone().into()).await }, - |_: &Error| true, - ) - .await?; - - let client = proto::source_transform_client::SourceTransformClient::new(channel) - .max_decoding_message_size(config.max_message_size) - .max_encoding_message_size(config.max_message_size); +impl SourceTransformer { + pub(crate) async fn new( + client: proto::source_transform_client::SourceTransformClient, + ) -> Result { Ok(Self { client }) } @@ -92,21 +58,18 @@ impl TransformerClient { Ok(Some(messages)) } - - pub(crate) async fn is_ready(&mut self) -> bool { - self.client.is_ready(Request::new(())).await.is_ok() - } } #[cfg(test)] mod tests { use std::error::Error; + use crate::proto::source_transform_client::SourceTransformClient; + use crate::shared::create_rpc_channel; + use crate::transformer::SourceTransformer; use numaflow::sourcetransform; use tempfile::TempDir; - use crate::transformer::{TransformerClient, TransformerConfig}; - struct NowCat; #[tonic::async_trait] @@ -143,11 +106,9 @@ mod tests { // wait for the server to start tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - let mut client = TransformerClient::connect(TransformerConfig { - socket_path: sock_file.to_str().unwrap().to_string(), - server_info_file: server_info_file.to_str().unwrap().to_string(), - max_message_size: 4 * 1024 * 1024, - }) + let mut client = SourceTransformer::new(SourceTransformClient::new( + create_rpc_channel(sock_file).await?, + )) .await?; let message = crate::message::Message { @@ -162,9 +123,6 @@ mod tests { headers: Default::default(), }; - let resp = client.is_ready().await; - assert!(resp); - let resp = client.transform_fn(message).await?; assert!(resp.is_some()); assert_eq!(resp.unwrap().len(), 1); @@ -212,11 +170,9 @@ mod tests { // wait for the server to start tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - let mut client = TransformerClient::connect(TransformerConfig { - socket_path: sock_file.to_str().unwrap().to_string(), - server_info_file: server_info_file.to_str().unwrap().to_string(), - max_message_size: 4 * 1024 * 1024, - }) + let mut client = SourceTransformer::new(SourceTransformClient::new( + create_rpc_channel(sock_file).await?, + )) .await?; let message = crate::message::Message { @@ -231,9 +187,6 @@ mod tests { headers: Default::default(), }; - let resp = client.is_ready().await; - assert!(resp); - let resp = client.transform_fn(message).await?; assert!(resp.is_none()); diff --git a/rust/serving/src/app/tracker.rs b/rust/serving/src/app/tracker.rs index 85d3c2b76d..12420f948c 100644 --- a/rust/serving/src/app/tracker.rs +++ b/rust/serving/src/app/tracker.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use serde::{Deserialize, Serialize}; use crate::app::callback::CallbackRequest; -use crate::pipeline::{Edge, PipelineDCG, OperatorType}; +use crate::pipeline::{Edge, OperatorType, PipelineDCG}; use crate::Error; fn compare_slice(operator: &OperatorType, a: &[String], b: &[String]) -> bool { From 5ce20aa3648bcd311f36f8b56f4b7a674c13799d Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Tue, 10 Sep 2024 19:13:18 +0530 Subject: [PATCH 02/40] fix tests Signed-off-by: Yashash H L --- rust/monovertex/src/forwarder.rs | 17 ++++++----- rust/monovertex/src/lib.rs | 48 ++++++++++++++---------------- rust/monovertex/src/message.rs | 7 +++-- rust/monovertex/src/metrics.rs | 24 +++++++-------- rust/monovertex/src/sink.rs | 8 ++--- rust/monovertex/src/source.rs | 48 +++++++++++++++--------------- rust/monovertex/src/transformer.rs | 14 ++++----- rust/src/bin/main.rs | 4 ++- 8 files changed, 84 insertions(+), 86 deletions(-) diff --git a/rust/monovertex/src/forwarder.rs b/rust/monovertex/src/forwarder.rs index d37ebbe9b4..f6afe05d82 100644 --- a/rust/monovertex/src/forwarder.rs +++ b/rust/monovertex/src/forwarder.rs @@ -3,11 +3,12 @@ use std::collections::HashMap; use crate::config::{config, OnFailureStrategy}; use crate::error::{Error, Result}; use crate::message::{Message, Offset}; +use crate::metrics; use crate::metrics::forward_metrics; use crate::sink::SinkWriter; +use crate::sinkpb::Status::{Failure, Fallback, Success}; use crate::source::SourceReader; use crate::transformer::SourceTransformer; -use crate::{metrics, proto}; use chrono::Utc; use tokio::task::JoinSet; use tokio::time::sleep; @@ -360,9 +361,9 @@ impl Forwarder { // construct the error map for the failed messages messages_to_send.retain(|msg| { if let Some(result) = result_map.get(&msg.id) { - return if result.status == proto::Status::Success as i32 { + return if result.status == Success as i32 { false - } else if result.status == proto::Status::Fallback as i32 { + } else if result.status == Fallback as i32 { fallback_msgs.push(msg.clone()); // add to fallback messages false } else { @@ -440,12 +441,12 @@ impl Forwarder { // construct the error map for the failed messages messages_to_send.retain(|msg| { if let Some(result) = result_map.get(&msg.id) { - if result.status == proto::Status::Failure as i32 { + if result.status == Failure as i32 { *fallback_error_map .entry(result.err_msg.clone()) .or_insert(0) += 1; true - } else if result.status == proto::Status::Fallback as i32 { + } else if result.status == Fallback as i32 { contains_fallback_status = true; false } else { @@ -526,12 +527,12 @@ mod tests { use crate::error::Result; use crate::forwarder::ForwarderBuilder; - use crate::proto::sink_client::SinkClient; - use crate::proto::source_client::SourceClient; - use crate::proto::source_transform_client::SourceTransformClient; use crate::shared::create_rpc_channel; use crate::sink::SinkWriter; + use crate::sinkpb::sink_client::SinkClient; use crate::source::SourceReader; + use crate::sourcepb::source_client::SourceClient; + use crate::sourcetransformpb::source_transform_client::SourceTransformClient; use crate::transformer::SourceTransformer; struct SimpleSource { diff --git a/rust/monovertex/src/lib.rs b/rust/monovertex/src/lib.rs index d59691c155..d39c7708bd 100644 --- a/rust/monovertex/src/lib.rs +++ b/rust/monovertex/src/lib.rs @@ -1,16 +1,14 @@ -pub(crate) use self::error::Result; use crate::config::config; -pub(crate) use crate::error::Error; use crate::forwarder::ForwarderBuilder; use crate::metrics::{start_metrics_https_server, LagReaderBuilder, MetricsState}; -use crate::proto::sink_client::SinkClient; -use crate::proto::source_client::SourceClient; -use crate::proto::source_transform_client::SourceTransformClient; use crate::shared::create_rpc_channel; use crate::sink::{ SinkWriter, FB_SINK_SERVER_INFO_FILE, FB_SINK_SOCKET, SINK_SERVER_INFO_FILE, SINK_SOCKET, }; +use crate::sinkpb::sink_client::SinkClient; use crate::source::{SourceReader, SOURCE_SERVER_INFO_FILE, SOURCE_SOCKET}; +use crate::sourcepb::source_client::SourceClient; +use crate::sourcetransformpb::source_transform_client::SourceTransformClient; use crate::transformer::{SourceTransformer, TRANSFORMER_SERVER_INFO_FILE, TRANSFORMER_SOCKET}; use std::net::SocketAddr; use std::time::Duration; @@ -22,6 +20,10 @@ use tonic::transport::Channel; use tonic::Request; use tracing::{error, info, warn}; +pub(crate) use self::error::Result; + +pub(crate) use crate::error::Error; + /// SourcerSinker orchestrates data movement from the Source to the Sink via the optional SourceTransformer. /// The forward-a-chunk executes the following in an infinite loop till a shutdown signal is received: /// - Read X messages from the source @@ -44,9 +46,15 @@ pub(crate) mod message; pub(crate) mod shared; -pub(crate) mod proto { +pub(crate) mod sourcepb { tonic::include_proto!("source.v1"); +} + +pub(crate) mod sinkpb { tonic::include_proto!("sink.v1"); +} + +pub(crate) mod sourcetransformpb { tonic::include_proto!("sourcetransformer.v1"); } @@ -126,9 +134,9 @@ pub async fn init(cln_token: CancellationToken) -> Result<()> { .max_encoding_message_size(config().grpc_max_message_size) .max_encoding_message_size(config().grpc_max_message_size); - let mut sink_writer = SinkWriter::new(sink_grpc_client.clone()).await?; + let sink_writer = SinkWriter::new(sink_grpc_client.clone()).await?; - let (mut transformer_grpc_client, mut transformer) = if config().is_transformer_enabled { + let (mut transformer_grpc_client, transformer) = if config().is_transformer_enabled { server_info::check_for_server_compatibility( TRANSFORMER_SERVER_INFO_FILE, cln_token.clone(), @@ -144,14 +152,14 @@ pub async fn init(cln_token: CancellationToken) -> Result<()> { .max_encoding_message_size(config().grpc_max_message_size); ( - Some(transformer_grpc_client), - Some(SourceTransformer::new(transformer_grpc_client.clone()).await?), + Some(transformer_grpc_client.clone()), + Some(SourceTransformer::new(transformer_grpc_client).await?), ) } else { (None, None) }; - let (mut fb_sink_grpc_client, mut fallback_writer) = if config().is_fallback_enabled { + let (mut fb_sink_grpc_client, fallback_writer) = if config().is_fallback_enabled { server_info::check_for_server_compatibility(FB_SINK_SERVER_INFO_FILE, cln_token.clone()) .await .map_err(|e| { @@ -163,8 +171,8 @@ pub async fn init(cln_token: CancellationToken) -> Result<()> { .max_encoding_message_size(config().grpc_max_message_size); ( - Some(fb_sink_grpc_client), - Some(SinkWriter::new(fb_sink_grpc_client.clone()).await?), + Some(fb_sink_grpc_client.clone()), + Some(SinkWriter::new(fb_sink_grpc_client).await?), ) } else { (None, None) @@ -282,6 +290,7 @@ async fn wait_until_ready( mod tests { use std::env; + use crate::init; use numaflow::source::{Message, Offset, SourceReadRequest}; use numaflow::{sink, source}; use tokio::sync::mpsc::Sender; @@ -331,11 +340,6 @@ mod tests { .await .unwrap(); }); - let source_config = SourceConfig { - socket_path: src_sock_file.to_str().unwrap().to_string(), - server_info_file: src_info_file.to_str().unwrap().to_string(), - max_message_size: 100, - }; let (sink_shutdown_tx, sink_shutdown_rx) = tokio::sync::oneshot::channel(); let tmp_dir = tempfile::TempDir::new().unwrap(); @@ -352,11 +356,6 @@ mod tests { .await .unwrap(); }); - let sink_config = SinkConfig { - socket_path: sink_sock_file.to_str().unwrap().to_string(), - server_info_file: sink_server_info.to_str().unwrap().to_string(), - max_message_size: 100, - }; // wait for the servers to start // FIXME: we need to have a better way, this is flaky @@ -371,8 +370,7 @@ mod tests { let forwarder_cln_token = cln_token.clone(); let forwarder_handle = tokio::spawn(async move { - let result = - super::init(source_config, sink_config, None, None, forwarder_cln_token).await; + let result = init(forwarder_cln_token).await; assert!(result.is_ok()); }); diff --git a/rust/monovertex/src/message.rs b/rust/monovertex/src/message.rs index 6f0a9981a3..4b4b2ee9be 100644 --- a/rust/monovertex/src/message.rs +++ b/rust/monovertex/src/message.rs @@ -5,9 +5,10 @@ use base64::Engine; use chrono::{DateTime, Utc}; use crate::error::Error; -use crate::proto; -use crate::proto::{read_response, SourceTransformRequest}; use crate::shared::{prost_timestamp_from_utc, utc_from_timestamp}; +use crate::sinkpb::SinkRequest; +use crate::sourcepb::read_response; +use crate::sourcetransformpb::SourceTransformRequest; /// A message that is sent from the source to the sink. #[derive(Debug, Clone)] @@ -73,7 +74,7 @@ impl TryFrom for Message { } /// Convert [`Message`] to [`proto::SinkRequest`] -impl From for proto::SinkRequest { +impl From for SinkRequest { fn from(message: Message) -> Self { Self { keys: message.keys, diff --git a/rust/monovertex/src/metrics.rs b/rust/monovertex/src/metrics.rs index a37e7bf05a..573d89bebc 100644 --- a/rust/monovertex/src/metrics.rs +++ b/rust/monovertex/src/metrics.rs @@ -18,7 +18,9 @@ use tracing::{debug, error, info}; use crate::config::config; use crate::error::Error; -use crate::proto; +use crate::sinkpb::sink_client::SinkClient; +use crate::sourcepb::source_client::SourceClient; +use crate::sourcetransformpb::source_transform_client::SourceTransformClient; use prometheus_client::encoding::text::encode; use prometheus_client::metrics::counter::Counter; use prometheus_client::metrics::family::Family; @@ -60,10 +62,10 @@ const SINK_TIME: &str = "monovtx_sink_time"; #[derive(Clone)] pub(crate) struct MetricsState { - pub source_client: proto::source_client::SourceClient, - pub sink_client: proto::sink_client::SinkClient, - pub transformer_client: Option>, - pub fb_sink_client: Option>, + pub source_client: SourceClient, + pub sink_client: SinkClient, + pub transformer_client: Option>, + pub fb_sink_client: Option>, } /// The global register of all metrics. @@ -359,7 +361,7 @@ struct TimestampedPending { /// and exposing the metrics. It maintains a list of pending stats and ensures that /// only the most recent entries are kept. pub(crate) struct LagReader { - source_client: proto::source_client::SourceClient, + source_client: SourceClient, lag_checking_interval: Duration, refresh_interval: Duration, buildup_handle: Option>, @@ -369,13 +371,13 @@ pub(crate) struct LagReader { /// LagReaderBuilder is used to build a `LagReader` instance. pub(crate) struct LagReaderBuilder { - source_client: proto::source_client::SourceClient, + source_client: SourceClient, lag_checking_interval: Option, refresh_interval: Option, } impl LagReaderBuilder { - pub(crate) fn new(source_client: proto::source_client::SourceClient) -> Self { + pub(crate) fn new(source_client: SourceClient) -> Self { Self { source_client, lag_checking_interval: None, @@ -448,7 +450,7 @@ impl Drop for LagReader { /// Periodically checks the pending messages from the source client and build the pending stats. async fn build_pending_info( - mut source_client: proto::source_client::SourceClient, + mut source_client: SourceClient, lag_checking_interval: Duration, pending_stats: Arc>>, ) { @@ -477,9 +479,7 @@ async fn build_pending_info( } } -async fn fetch_pending( - source_client: &mut proto::source_client::SourceClient, -) -> crate::error::Result { +async fn fetch_pending(source_client: &mut SourceClient) -> crate::error::Result { let request = Request::new(()); let response = source_client .pending_fn(request) diff --git a/rust/monovertex/src/sink.rs b/rust/monovertex/src/sink.rs index 487dc7874a..15356b48cc 100644 --- a/rust/monovertex/src/sink.rs +++ b/rust/monovertex/src/sink.rs @@ -1,7 +1,7 @@ use crate::error::Result; use crate::message::Message; -use crate::proto; -use crate::proto::sink_client::SinkClient; +use crate::sinkpb::sink_client::SinkClient; +use crate::sinkpb::{SinkRequest, SinkResponse}; use tonic::transport::Channel; pub(crate) const SINK_SOCKET: &str = "/var/run/numaflow/sink.sock"; @@ -21,7 +21,7 @@ impl SinkWriter { Ok(Self { client }) } - pub(crate) async fn sink_fn(&mut self, messages: Vec) -> Result { + pub(crate) async fn sink_fn(&mut self, messages: Vec) -> Result { // create a channel with at least size let (tx, rx) = tokio::sync::mpsc::channel(if messages.is_empty() { 1 @@ -29,7 +29,7 @@ impl SinkWriter { messages.len() }); - let requests: Vec = + let requests: Vec = messages.into_iter().map(|message| message.into()).collect(); tokio::spawn(async move { diff --git a/rust/monovertex/src/source.rs b/rust/monovertex/src/source.rs index da6a79b0c4..1300d760aa 100644 --- a/rust/monovertex/src/source.rs +++ b/rust/monovertex/src/source.rs @@ -1,8 +1,11 @@ use crate::error::Error::SourceError; use crate::error::Result; use crate::message::{Message, Offset}; -use crate::proto; -use crate::source::proto::AckResponse; +use crate::sourcepb; +use crate::sourcepb::source_client::SourceClient; +use crate::sourcepb::{ + ack_request, read_request, AckRequest, AckResponse, ReadRequest, ReadResponse, +}; use base64::prelude::BASE64_STANDARD; use base64::Engine; use tokio::sync::mpsc; @@ -16,16 +19,13 @@ pub(crate) const SOURCE_SERVER_INFO_FILE: &str = "/var/run/numaflow/sourcer-serv /// SourceClient is a client to interact with the source server. #[derive(Debug)] pub(crate) struct SourceReader { - read_tx: mpsc::Sender, - resp_stream: Streaming, - ack_tx: mpsc::Sender, - client: proto::source_client::SourceClient, + read_tx: mpsc::Sender, + resp_stream: Streaming, + ack_tx: mpsc::Sender, } impl SourceReader { - pub(crate) async fn new( - mut client: proto::source_client::SourceClient, - ) -> Result { + pub(crate) async fn new(mut client: SourceClient) -> Result { let (read_tx, read_rx) = mpsc::channel(500); let resp_stream = client @@ -39,7 +39,6 @@ impl SourceReader { .await?; Ok(Self { - client, read_tx, resp_stream, ack_tx, @@ -51,8 +50,8 @@ impl SourceReader { num_records: u64, timeout_in_ms: u32, ) -> Result> { - let request = proto::ReadRequest { - request: Some(proto::read_request::Request { + let request = ReadRequest { + request: Some(read_request::Request { num_records, timeout_in_ms, }), @@ -82,9 +81,9 @@ impl SourceReader { pub(crate) async fn ack_fn(&mut self, offsets: Vec) -> Result { for offset in offsets { - let request = proto::AckRequest { - request: Some(proto::ack_request::Request { - offset: Some(proto::Offset { + let request = AckRequest { + request: Some(ack_request::Request { + offset: Some(sourcepb::Offset { offset: BASE64_STANDARD .decode(offset.offset) .expect("we control the encoding, so this should never fail"), @@ -104,11 +103,10 @@ impl SourceReader { #[cfg(test)] mod tests { use std::collections::HashSet; - use std::error::Error; - use crate::proto::source_client::SourceClient; use crate::shared::create_rpc_channel; use crate::source::SourceReader; + use crate::sourcepb::source_client::SourceClient; use chrono::Utc; use numaflow::source; use numaflow::source::{Message, Offset, SourceReadRequest}; @@ -172,7 +170,7 @@ mod tests { } #[tokio::test] - async fn source_operations() -> Result<(), Box> { + async fn source_operations() { // start the server let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel(); let tmp_dir = tempfile::TempDir::new().unwrap(); @@ -186,16 +184,19 @@ mod tests { .with_socket_file(server_socket) .with_server_info_file(server_info) .start_with_shutdown(shutdown_rx) - .await?; - Ok(()) + .await + .unwrap() }); // wait for the server to start // TODO: flaky tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - let mut source_client = - SourceReader::new(SourceClient::new(create_rpc_channel(sock_file).await?)).await?; + let mut source_client = SourceReader::new(SourceClient::new( + create_rpc_channel(sock_file).await.unwrap(), + )) + .await + .unwrap(); let messages = source_client.read_fn(5, 1000).await.unwrap(); assert_eq!(messages.len(), 5); @@ -209,7 +210,6 @@ mod tests { shutdown_tx .send(()) .expect("failed to send shutdown signal"); - server_handle.await.expect("failed to join server task")?; - Ok(()) + server_handle.await.expect("failed to join server task"); } } diff --git a/rust/monovertex/src/transformer.rs b/rust/monovertex/src/transformer.rs index a5761dc01d..5a2d89854f 100644 --- a/rust/monovertex/src/transformer.rs +++ b/rust/monovertex/src/transformer.rs @@ -1,13 +1,11 @@ use crate::error::Result; use crate::message::Message; -use crate::proto; use crate::shared::utc_from_timestamp; -use crate::transformer::proto::SourceTransformRequest; +use crate::sourcetransformpb::source_transform_client::SourceTransformClient; +use crate::sourcetransformpb::SourceTransformRequest; use tonic::transport::Channel; const DROP: &str = "U+005C__DROP__"; -const RECONNECT_INTERVAL: u64 = 1000; -const MAX_RECONNECT_ATTEMPTS: usize = 5; pub(crate) const TRANSFORMER_SOCKET: &str = "/var/run/numaflow/sourcetransform.sock"; pub(crate) const TRANSFORMER_SERVER_INFO_FILE: &str = "/var/run/numaflow/sourcetransformer-server-info"; @@ -15,13 +13,11 @@ pub(crate) const TRANSFORMER_SERVER_INFO_FILE: &str = /// TransformerClient is a client to interact with the transformer server. #[derive(Clone)] pub struct SourceTransformer { - client: proto::source_transform_client::SourceTransformClient, + client: SourceTransformClient, } impl SourceTransformer { - pub(crate) async fn new( - client: proto::source_transform_client::SourceTransformClient, - ) -> Result { + pub(crate) async fn new(client: SourceTransformClient) -> Result { Ok(Self { client }) } @@ -64,8 +60,8 @@ impl SourceTransformer { mod tests { use std::error::Error; - use crate::proto::source_transform_client::SourceTransformClient; use crate::shared::create_rpc_channel; + use crate::sourcetransformpb::source_transform_client::SourceTransformClient; use crate::transformer::SourceTransformer; use numaflow::sourcetransform; use tempfile::TempDir; diff --git a/rust/src/bin/main.rs b/rust/src/bin/main.rs index fdbd58a6a0..0b000dc032 100644 --- a/rust/src/bin/main.rs +++ b/rust/src/bin/main.rs @@ -30,7 +30,9 @@ async fn main() { info!("Error running servesink: {}", e); } } else if args.contains(&"--monovertex".to_string()) { - monovertex::mono_vertex().await; + if let Err(e) = monovertex::mono_vertex().await { + error!("Error running monovertex: {}", e); + } } else { error!("Invalid argument. Use --serve, --servesink, or --monovertex."); } From 3eece70b94de0cb9112ae5a0bdca52f8548e8c3b Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Wed, 11 Sep 2024 15:27:10 +0530 Subject: [PATCH 03/40] fix tests Signed-off-by: Yashash H L --- rust/Cargo.lock | 4 +-- rust/monovertex/Cargo.toml | 2 +- rust/monovertex/src/forwarder.rs | 60 +++++++++++++------------------- rust/monovertex/src/lib.rs | 18 ++++------ 4 files changed, 35 insertions(+), 49 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 21d6a28a7d..fe69ca96d5 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -2905,9 +2905,9 @@ dependencies = [ [[package]] name = "tonic" -version = "0.12.1" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38659f4a91aba8598d27821589f5db7dddd94601e7a01b1e485a50e5484c7401" +checksum = "c6f6ba989e4b2c58ae83d862d3a3e27690b6e3ae630d0deb59f3697f32aa88ad" dependencies = [ "async-stream", "async-trait", diff --git a/rust/monovertex/Cargo.toml b/rust/monovertex/Cargo.toml index 01eb5afafd..3df9759cc1 100644 --- a/rust/monovertex/Cargo.toml +++ b/rust/monovertex/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [dependencies] axum = "0.7.5" axum-server = { version = "0.7.1", features = ["tls-rustls"] } -tonic = "0.12.1" +tonic = "0.12.2" bytes = "1.7.1" thiserror = "1.0.63" tokio = { version = "1.39.3", features = ["full"] } diff --git a/rust/monovertex/src/forwarder.rs b/rust/monovertex/src/forwarder.rs index f6afe05d82..6f16985010 100644 --- a/rust/monovertex/src/forwarder.rs +++ b/rust/monovertex/src/forwarder.rs @@ -525,7 +525,6 @@ mod tests { use tokio::sync::mpsc::Sender; use tokio_util::sync::CancellationToken; - use crate::error::Result; use crate::forwarder::ForwarderBuilder; use crate::shared::create_rpc_channel; use crate::sink::SinkWriter; @@ -739,23 +738,21 @@ mod tests { .source_transformer(transformer_client) .build(); - let forwarder_handle = tokio::spawn(async move { - forwarder.start().await.unwrap(); + // Assert the received message in a different task + let assert_handle = tokio::spawn(async move { + let received_message = sink_rx.recv().await.unwrap(); + assert_eq!(received_message.value, "test-message".as_bytes()); + assert_eq!( + received_message.keys, + vec!["test-key-transformed".to_string()] + ); + cln_token.cancel(); }); - // Receive messages from the sink - let received_message = sink_rx.recv().await.unwrap(); - assert_eq!(received_message.value, "test-message".as_bytes()); - assert_eq!( - received_message.keys, - vec!["test-key-transformed".to_string()] - ); + forwarder.start().await.unwrap(); - // stop the forwarder - cln_token.cancel(); - forwarder_handle - .await - .expect("failed to join forwarder task"); + // Wait for the assertion task to complete + assert_handle.await.unwrap(); // stop the servers source_shutdown_tx @@ -855,16 +852,14 @@ mod tests { let mut forwarder = ForwarderBuilder::new(source_client, sink_client, cln_token.clone()).build(); - let forwarder_handle = tokio::spawn(async move { - forwarder.start().await?; - Result::<()>::Ok(()) + let cancel_handle = tokio::spawn(async move { + tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; + cln_token.cancel(); }); - // Set a timeout for the forwarder - let timeout_duration = tokio::time::Duration::from_secs(1); - // The future should not complete as we should be retrying - let result = tokio::time::timeout(timeout_duration, forwarder_handle).await; - assert!(result.is_err()); + let forwarder_result = forwarder.start().await; + assert!(forwarder_result.is_err()); + cancel_handle.await.unwrap(); // stop the servers source_shutdown_tx @@ -981,21 +976,16 @@ mod tests { .fallback_sink_writer(fb_sink_client) .build(); - let forwarder_handle = tokio::spawn(async move { - forwarder.start().await.unwrap(); + let assert_handle = tokio::spawn(async move { + let received_message = sink_rx.recv().await.unwrap(); + assert_eq!(received_message.value, "test-message".as_bytes()); + assert_eq!(received_message.keys, vec!["test-key".to_string()]); + cln_token.cancel(); }); - // We should receive the message in the fallback sink, since the primary sink returns status fallback - let received_message = sink_rx.recv().await.unwrap(); - assert_eq!(received_message.value, "test-message".as_bytes()); - assert_eq!(received_message.keys, vec!["test-key".to_string()]); - - // stop the forwarder - cln_token.cancel(); - forwarder_handle - .await - .expect("failed to join forwarder task"); + forwarder.start().await.unwrap(); + assert_handle.await.unwrap(); // stop the servers source_shutdown_tx .send(()) diff --git a/rust/monovertex/src/lib.rs b/rust/monovertex/src/lib.rs index d39c7708bd..9a07d5a90f 100644 --- a/rust/monovertex/src/lib.rs +++ b/rust/monovertex/src/lib.rs @@ -368,19 +368,15 @@ mod tests { let cln_token = CancellationToken::new(); - let forwarder_cln_token = cln_token.clone(); - let forwarder_handle = tokio::spawn(async move { - let result = init(forwarder_cln_token).await; - assert!(result.is_ok()); + let token_clone = cln_token.clone(); + tokio::spawn(async move { + // FIXME: we need to have a better way, this is flaky + tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; + token_clone.cancel(); }); - // wait for the forwarder to start - // FIXME: we need to have a better way, this is flaky - tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - - // stop the forwarder - cln_token.cancel(); - forwarder_handle.await.unwrap(); + let result = init(cln_token.clone()).await; + assert!(result.is_err()); // stop the source and sink servers src_shutdown_tx.send(()).unwrap(); From 9a10b7ac4001b9411cdeaeff038e842b8f074f86 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Wed, 11 Sep 2024 21:22:16 +0530 Subject: [PATCH 04/40] fix changes Signed-off-by: Yashash H L --- rust/Cargo.lock | 1 + rust/monovertex/Cargo.toml | 1 + rust/monovertex/src/sink.rs | 2 +- rust/monovertex/src/source.rs | 19 +++++++++++++++---- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index fe69ca96d5..569d1c3996 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -1475,6 +1475,7 @@ dependencies = [ "chrono", "hyper-util", "kube", + "log", "numaflow 0.1.0 (git+https://github.com/numaproj/numaflow-rs.git?branch=main)", "numaflow-models", "once_cell", diff --git a/rust/monovertex/Cargo.toml b/rust/monovertex/Cargo.toml index 3df9759cc1..2b0cb2bae7 100644 --- a/rust/monovertex/Cargo.toml +++ b/rust/monovertex/Cargo.toml @@ -34,6 +34,7 @@ backoff = { path = "../backoff" } parking_lot = "0.12.3" prometheus-client = "0.22.3" kube = "0.94.0" +log = "0.4.22" [dev-dependencies] tempfile = "3.11.0" diff --git a/rust/monovertex/src/sink.rs b/rust/monovertex/src/sink.rs index 15356b48cc..e3fabea6c6 100644 --- a/rust/monovertex/src/sink.rs +++ b/rust/monovertex/src/sink.rs @@ -10,8 +10,8 @@ pub(crate) const FB_SINK_SOCKET: &str = "/var/run/numaflow/fb-sink.sock"; pub(crate) const SINK_SERVER_INFO_FILE: &str = "/var/run/numaflow/sinker-server-info"; pub(crate) const FB_SINK_SERVER_INFO_FILE: &str = "/var/run/numaflow/fb-sinker-server-info"; +/// SinkWriter writes messages to a sink. #[derive(Clone)] -/// SinkClient is a client to interact with the sink server. pub struct SinkWriter { client: SinkClient, } diff --git a/rust/monovertex/src/source.rs b/rust/monovertex/src/source.rs index 1300d760aa..2f24a299e7 100644 --- a/rust/monovertex/src/source.rs +++ b/rust/monovertex/src/source.rs @@ -12,11 +12,12 @@ use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; use tonic::transport::Channel; use tonic::{Request, Streaming}; +use tracing::info; pub(crate) const SOURCE_SOCKET: &str = "/var/run/numaflow/source.sock"; pub(crate) const SOURCE_SERVER_INFO_FILE: &str = "/var/run/numaflow/sourcer-server-info"; -/// SourceClient is a client to interact with the source server. +/// SourceReader reads messages from a source and acks them. #[derive(Debug)] pub(crate) struct SourceReader { read_tx: mpsc::Sender, @@ -28,15 +29,25 @@ impl SourceReader { pub(crate) async fn new(mut client: SourceClient) -> Result { let (read_tx, read_rx) = mpsc::channel(500); + info!("Creating server stream"); let resp_stream = client .read_fn(Request::new(ReceiverStream::new(read_rx))) .await? .into_inner(); + info!("Created server stream"); let (ack_tx, ack_rx) = mpsc::channel(500); - let _ = client - .ack_fn(Request::new(ReceiverStream::new(ack_rx))) - .await?; + + info!("Creating ack stream"); + let mut ack_client = client.clone(); + + tokio::spawn(async move { + let ack_response = ack_client + .ack_fn(Request::new(ReceiverStream::new(ack_rx))) + .await + .unwrap(); + info!("Created ack stream {:?}", ack_response); + }); Ok(Self { read_tx, From ba4f413def20bea3cdf0a3931853a64c53003f9b Mon Sep 17 00:00:00 2001 From: Vigith Maurice Date: Wed, 11 Sep 2024 09:36:25 -0700 Subject: [PATCH 05/40] chore: minor refactor Signed-off-by: Vigith Maurice --- rust/monovertex/src/forwarder.rs | 12 ++++---- rust/monovertex/src/lib.rs | 30 ++++++++++---------- rust/monovertex/src/message.rs | 6 ++-- rust/monovertex/src/metrics.rs | 6 ++-- rust/monovertex/src/sink.rs | 4 +-- rust/monovertex/src/source.rs | 45 +++++++++++++++++++----------- rust/monovertex/src/transformer.rs | 6 ++-- 7 files changed, 60 insertions(+), 49 deletions(-) diff --git a/rust/monovertex/src/forwarder.rs b/rust/monovertex/src/forwarder.rs index 6f16985010..a15bd3fa45 100644 --- a/rust/monovertex/src/forwarder.rs +++ b/rust/monovertex/src/forwarder.rs @@ -6,7 +6,7 @@ use crate::message::{Message, Offset}; use crate::metrics; use crate::metrics::forward_metrics; use crate::sink::SinkWriter; -use crate::sinkpb::Status::{Failure, Fallback, Success}; +use crate::sink_pb::Status::{Failure, Fallback, Success}; use crate::source::SourceReader; use crate::transformer::SourceTransformer; use chrono::Utc; @@ -122,7 +122,7 @@ impl Forwarder { let start_time = tokio::time::Instant::now(); let messages = self .source_reader - .read_fn(config().batch_size, config().timeout_in_ms) + .read(config().batch_size, config().timeout_in_ms) .await?; debug!( "Read batch size: {} and latency - {}ms", @@ -498,7 +498,7 @@ impl Forwarder { let n = offsets.len(); let start_time = tokio::time::Instant::now(); - self.source_reader.ack_fn(offsets).await?; + self.source_reader.ack(offsets).await?; debug!("Ack latency - {}ms", start_time.elapsed().as_millis()); @@ -528,10 +528,10 @@ mod tests { use crate::forwarder::ForwarderBuilder; use crate::shared::create_rpc_channel; use crate::sink::SinkWriter; - use crate::sinkpb::sink_client::SinkClient; + use crate::sink_pb::sink_client::SinkClient; use crate::source::SourceReader; - use crate::sourcepb::source_client::SourceClient; - use crate::sourcetransformpb::source_transform_client::SourceTransformClient; + use crate::source_pb::source_client::SourceClient; + use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; use crate::transformer::SourceTransformer; struct SimpleSource { diff --git a/rust/monovertex/src/lib.rs b/rust/monovertex/src/lib.rs index 9a07d5a90f..6fef24a6aa 100644 --- a/rust/monovertex/src/lib.rs +++ b/rust/monovertex/src/lib.rs @@ -5,10 +5,10 @@ use crate::shared::create_rpc_channel; use crate::sink::{ SinkWriter, FB_SINK_SERVER_INFO_FILE, FB_SINK_SOCKET, SINK_SERVER_INFO_FILE, SINK_SOCKET, }; -use crate::sinkpb::sink_client::SinkClient; +use crate::sink_pb::sink_client::SinkClient; use crate::source::{SourceReader, SOURCE_SERVER_INFO_FILE, SOURCE_SOCKET}; -use crate::sourcepb::source_client::SourceClient; -use crate::sourcetransformpb::source_transform_client::SourceTransformClient; +use crate::source_pb::source_client::SourceClient; +use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; use crate::transformer::{SourceTransformer, TRANSFORMER_SERVER_INFO_FILE, TRANSFORMER_SOCKET}; use std::net::SocketAddr; use std::time::Duration; @@ -46,15 +46,15 @@ pub(crate) mod message; pub(crate) mod shared; -pub(crate) mod sourcepb { +pub(crate) mod source_pb { tonic::include_proto!("source.v1"); } -pub(crate) mod sinkpb { +pub(crate) mod sink_pb { tonic::include_proto!("sink.v1"); } -pub(crate) mod sourcetransformpb { +pub(crate) mod sourcetransform_pb { tonic::include_proto!("sourcetransformer.v1"); } @@ -74,7 +74,7 @@ pub async fn mono_vertex() -> Result<()> { }); // Run the forwarder with cancellation token. - if let Err(e) = init(cln_token).await { + if let Err(e) = start_forwarder(cln_token).await { error!("Application error: {:?}", e); // abort the signal handler task since we have an error and we are shutting down @@ -109,7 +109,7 @@ async fn shutdown_signal() { } } -pub async fn init(cln_token: CancellationToken) -> Result<()> { +pub async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { server_info::check_for_server_compatibility(SOURCE_SERVER_INFO_FILE, cln_token.clone()) .await .map_err(|e| { @@ -187,11 +187,6 @@ pub async fn init(cln_token: CancellationToken) -> Result<()> { ) .await?; - // Start the metrics server, which server the prometheus metrics. - let metrics_addr: SocketAddr = format!("0.0.0.0:{}", &config().metrics_server_listen_port) - .parse() - .expect("Invalid address"); - // Start the metrics server in a separate background async spawn, // This should be running throughout the lifetime of the application, hence the handle is not // joined. @@ -203,6 +198,11 @@ pub async fn init(cln_token: CancellationToken) -> Result<()> { }; tokio::spawn(async move { + // Start the metrics server, which server the prometheus metrics. + let metrics_addr: SocketAddr = format!("0.0.0.0:{}", &config().metrics_server_listen_port) + .parse() + .expect("Invalid address"); + if let Err(e) = start_metrics_https_server(metrics_addr, metrics_state).await { error!("Metrics server error: {:?}", e); } @@ -290,7 +290,7 @@ async fn wait_until_ready( mod tests { use std::env; - use crate::init; + use crate::start_forwarder; use numaflow::source::{Message, Offset, SourceReadRequest}; use numaflow::{sink, source}; use tokio::sync::mpsc::Sender; @@ -375,7 +375,7 @@ mod tests { token_clone.cancel(); }); - let result = init(cln_token.clone()).await; + let result = start_forwarder(cln_token.clone()).await; assert!(result.is_err()); // stop the source and sink servers diff --git a/rust/monovertex/src/message.rs b/rust/monovertex/src/message.rs index 4b4b2ee9be..54f0859e3b 100644 --- a/rust/monovertex/src/message.rs +++ b/rust/monovertex/src/message.rs @@ -6,9 +6,9 @@ use chrono::{DateTime, Utc}; use crate::error::Error; use crate::shared::{prost_timestamp_from_utc, utc_from_timestamp}; -use crate::sinkpb::SinkRequest; -use crate::sourcepb::read_response; -use crate::sourcetransformpb::SourceTransformRequest; +use crate::sink_pb::SinkRequest; +use crate::source_pb::read_response; +use crate::sourcetransform_pb::SourceTransformRequest; /// A message that is sent from the source to the sink. #[derive(Debug, Clone)] diff --git a/rust/monovertex/src/metrics.rs b/rust/monovertex/src/metrics.rs index 573d89bebc..30b3b85ce2 100644 --- a/rust/monovertex/src/metrics.rs +++ b/rust/monovertex/src/metrics.rs @@ -18,9 +18,9 @@ use tracing::{debug, error, info}; use crate::config::config; use crate::error::Error; -use crate::sinkpb::sink_client::SinkClient; -use crate::sourcepb::source_client::SourceClient; -use crate::sourcetransformpb::source_transform_client::SourceTransformClient; +use crate::sink_pb::sink_client::SinkClient; +use crate::source_pb::source_client::SourceClient; +use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; use prometheus_client::encoding::text::encode; use prometheus_client::metrics::counter::Counter; use prometheus_client::metrics::family::Family; diff --git a/rust/monovertex/src/sink.rs b/rust/monovertex/src/sink.rs index e3fabea6c6..6e30d7f58e 100644 --- a/rust/monovertex/src/sink.rs +++ b/rust/monovertex/src/sink.rs @@ -1,7 +1,7 @@ use crate::error::Result; use crate::message::Message; -use crate::sinkpb::sink_client::SinkClient; -use crate::sinkpb::{SinkRequest, SinkResponse}; +use crate::sink_pb::sink_client::SinkClient; +use crate::sink_pb::{SinkRequest, SinkResponse}; use tonic::transport::Channel; pub(crate) const SINK_SOCKET: &str = "/var/run/numaflow/sink.sock"; diff --git a/rust/monovertex/src/source.rs b/rust/monovertex/src/source.rs index 2f24a299e7..88e311cb0d 100644 --- a/rust/monovertex/src/source.rs +++ b/rust/monovertex/src/source.rs @@ -1,18 +1,19 @@ use crate::error::Error::SourceError; use crate::error::Result; use crate::message::{Message, Offset}; -use crate::sourcepb; -use crate::sourcepb::source_client::SourceClient; -use crate::sourcepb::{ +use crate::source_pb; +use crate::source_pb::source_client::SourceClient; +use crate::source_pb::{ ack_request, read_request, AckRequest, AckResponse, ReadRequest, ReadResponse, }; use base64::prelude::BASE64_STANDARD; use base64::Engine; use tokio::sync::mpsc; +use tokio::task::JoinHandle; use tokio_stream::wrappers::ReceiverStream; use tonic::transport::Channel; use tonic::{Request, Streaming}; -use tracing::info; +use tracing::{debug, info, warn}; pub(crate) const SOURCE_SOCKET: &str = "/var/run/numaflow/source.sock"; pub(crate) const SOURCE_SERVER_INFO_FILE: &str = "/var/run/numaflow/sourcer-server-info"; @@ -23,40 +24,50 @@ pub(crate) struct SourceReader { read_tx: mpsc::Sender, resp_stream: Streaming, ack_tx: mpsc::Sender, + ack_handle: JoinHandle<()>, +} + +impl Drop for SourceReader { + fn drop(&mut self) { + // in a happy path scenario, the ack task would have already been finished. + if !self.ack_handle.is_finished() { + warn!("aborting ack task"); + self.ack_handle.abort(); + } + } } impl SourceReader { pub(crate) async fn new(mut client: SourceClient) -> Result { let (read_tx, read_rx) = mpsc::channel(500); - info!("Creating server stream"); let resp_stream = client .read_fn(Request::new(ReceiverStream::new(read_rx))) .await? .into_inner(); - info!("Created server stream"); + debug!("Created server stream"); let (ack_tx, ack_rx) = mpsc::channel(500); - info!("Creating ack stream"); let mut ack_client = client.clone(); - - tokio::spawn(async move { + // FIXME: we need to keep the handle for abort + let ack_handle = tokio::spawn(async move { let ack_response = ack_client .ack_fn(Request::new(ReceiverStream::new(ack_rx))) .await - .unwrap(); - info!("Created ack stream {:?}", ack_response); + .expect("ack should not have failed"); + info!("Closing ack stream {:?}", ack_response); }); Ok(Self { read_tx, resp_stream, ack_tx, + ack_handle, }) } - pub(crate) async fn read_fn( + pub(crate) async fn read( &mut self, num_records: u64, timeout_in_ms: u32, @@ -90,11 +101,11 @@ impl SourceReader { Ok(messages) } - pub(crate) async fn ack_fn(&mut self, offsets: Vec) -> Result { + pub(crate) async fn ack(&mut self, offsets: Vec) -> Result { for offset in offsets { let request = AckRequest { request: Some(ack_request::Request { - offset: Some(sourcepb::Offset { + offset: Some(source_pb::Offset { offset: BASE64_STANDARD .decode(offset.offset) .expect("we control the encoding, so this should never fail"), @@ -117,7 +128,7 @@ mod tests { use crate::shared::create_rpc_channel; use crate::source::SourceReader; - use crate::sourcepb::source_client::SourceClient; + use crate::source_pb::source_client::SourceClient; use chrono::Utc; use numaflow::source; use numaflow::source::{Message, Offset, SourceReadRequest}; @@ -209,11 +220,11 @@ mod tests { .await .unwrap(); - let messages = source_client.read_fn(5, 1000).await.unwrap(); + let messages = source_client.read(5, 1000).await.unwrap(); assert_eq!(messages.len(), 5); let response = source_client - .ack_fn(messages.iter().map(|m| m.offset.clone()).collect()) + .ack(messages.iter().map(|m| m.offset.clone()).collect()) .await .unwrap(); assert!(response.result.unwrap().success.is_some()); diff --git a/rust/monovertex/src/transformer.rs b/rust/monovertex/src/transformer.rs index 5a2d89854f..95c9380f2f 100644 --- a/rust/monovertex/src/transformer.rs +++ b/rust/monovertex/src/transformer.rs @@ -1,8 +1,8 @@ use crate::error::Result; use crate::message::Message; use crate::shared::utc_from_timestamp; -use crate::sourcetransformpb::source_transform_client::SourceTransformClient; -use crate::sourcetransformpb::SourceTransformRequest; +use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; +use crate::sourcetransform_pb::SourceTransformRequest; use tonic::transport::Channel; const DROP: &str = "U+005C__DROP__"; @@ -61,7 +61,7 @@ mod tests { use std::error::Error; use crate::shared::create_rpc_channel; - use crate::sourcetransformpb::source_transform_client::SourceTransformClient; + use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; use crate::transformer::SourceTransformer; use numaflow::sourcetransform; use tempfile::TempDir; From ab4fc8e86f32bf08a094e642ce2ef5dba11c5730 Mon Sep 17 00:00:00 2001 From: Vigith Maurice Date: Wed, 11 Sep 2024 10:32:06 -0700 Subject: [PATCH 06/40] feat: introduce startup for initialization and validation Signed-off-by: Vigith Maurice --- rust/monovertex/src/lib.rs | 70 +++++++++++----------------------- rust/monovertex/src/startup.rs | 46 ++++++++++++++++++++++ 2 files changed, 68 insertions(+), 48 deletions(-) create mode 100644 rust/monovertex/src/startup.rs diff --git a/rust/monovertex/src/lib.rs b/rust/monovertex/src/lib.rs index 6fef24a6aa..811992c753 100644 --- a/rust/monovertex/src/lib.rs +++ b/rust/monovertex/src/lib.rs @@ -1,28 +1,27 @@ -use crate::config::config; -use crate::forwarder::ForwarderBuilder; -use crate::metrics::{start_metrics_https_server, LagReaderBuilder, MetricsState}; -use crate::shared::create_rpc_channel; -use crate::sink::{ - SinkWriter, FB_SINK_SERVER_INFO_FILE, FB_SINK_SOCKET, SINK_SERVER_INFO_FILE, SINK_SOCKET, -}; -use crate::sink_pb::sink_client::SinkClient; -use crate::source::{SourceReader, SOURCE_SERVER_INFO_FILE, SOURCE_SOCKET}; -use crate::source_pb::source_client::SourceClient; -use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; -use crate::transformer::{SourceTransformer, TRANSFORMER_SERVER_INFO_FILE, TRANSFORMER_SOCKET}; use std::net::SocketAddr; use std::time::Duration; + use tokio::signal; use tokio::task::JoinHandle; use tokio::time::sleep; use tokio_util::sync::CancellationToken; use tonic::transport::Channel; use tonic::Request; -use tracing::{error, info, warn}; - -pub(crate) use self::error::Result; +use tracing::{error, info}; +use crate::config::config; pub(crate) use crate::error::Error; +use crate::forwarder::ForwarderBuilder; +use crate::metrics::{start_metrics_https_server, LagReaderBuilder, MetricsState}; +use crate::shared::create_rpc_channel; +use crate::sink::{SinkWriter, FB_SINK_SOCKET, SINK_SOCKET}; +use crate::sink_pb::sink_client::SinkClient; +use crate::source::{SourceReader, SOURCE_SOCKET}; +use crate::source_pb::source_client::SourceClient; +use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; +use crate::transformer::{SourceTransformer, TRANSFORMER_SOCKET}; + +pub(crate) use self::error::Result; /// SourcerSinker orchestrates data movement from the Source to the Sink via the optional SourceTransformer. /// The forward-a-chunk executes the following in an infinite loop till a shutdown signal is received: @@ -46,6 +45,10 @@ pub(crate) mod message; pub(crate) mod shared; +mod metrics; +mod server_info; +mod startup; + pub(crate) mod source_pb { tonic::include_proto!("source.v1"); } @@ -58,10 +61,6 @@ pub(crate) mod sourcetransform_pb { tonic::include_proto!("sourcetransformer.v1"); } -mod server_info; - -mod metrics; - pub async fn mono_vertex() -> Result<()> { let cln_token = CancellationToken::new(); let shutdown_cln_token = cln_token.clone(); @@ -110,12 +109,8 @@ async fn shutdown_signal() { } pub async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { - server_info::check_for_server_compatibility(SOURCE_SERVER_INFO_FILE, cln_token.clone()) - .await - .map_err(|e| { - warn!("Error waiting for source server info file: {:?}", e); - Error::ForwarderError("Error waiting for server info file".to_string()) - })?; + // make sure that we have compatibility with the server + startup::check_compatibility(&cln_token).await?; let mut source_grpc_client = SourceClient::new(create_rpc_channel(SOURCE_SOCKET.into()).await?) .max_encoding_message_size(config().grpc_max_message_size) @@ -123,13 +118,6 @@ pub async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { let source_reader = SourceReader::new(source_grpc_client.clone()).await?; - server_info::check_for_server_compatibility(SINK_SERVER_INFO_FILE, cln_token.clone()) - .await - .map_err(|e| { - error!("Error waiting for sink server info file: {:?}", e); - Error::ForwarderError("Error waiting for server info file".to_string()) - })?; - let mut sink_grpc_client = SinkClient::new(create_rpc_channel(SINK_SOCKET.into()).await?) .max_encoding_message_size(config().grpc_max_message_size) .max_encoding_message_size(config().grpc_max_message_size); @@ -137,15 +125,6 @@ pub async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { let sink_writer = SinkWriter::new(sink_grpc_client.clone()).await?; let (mut transformer_grpc_client, transformer) = if config().is_transformer_enabled { - server_info::check_for_server_compatibility( - TRANSFORMER_SERVER_INFO_FILE, - cln_token.clone(), - ) - .await - .map_err(|e| { - error!("Error waiting for transformer server info file: {:?}", e); - Error::ForwarderError("Error waiting for server info file".to_string()) - })?; let transformer_grpc_client = SourceTransformClient::new(create_rpc_channel(TRANSFORMER_SOCKET.into()).await?) .max_encoding_message_size(config().grpc_max_message_size) @@ -160,12 +139,6 @@ pub async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { }; let (mut fb_sink_grpc_client, fallback_writer) = if config().is_fallback_enabled { - server_info::check_for_server_compatibility(FB_SINK_SERVER_INFO_FILE, cln_token.clone()) - .await - .map_err(|e| { - warn!("Error waiting for fallback sink server info file: {:?}", e); - Error::ForwarderError("Error waiting for server info file".to_string()) - })?; let fb_sink_grpc_client = SinkClient::new(create_rpc_channel(FB_SINK_SOCKET.into()).await?) .max_encoding_message_size(config().grpc_max_message_size) .max_encoding_message_size(config().grpc_max_message_size); @@ -290,12 +263,13 @@ async fn wait_until_ready( mod tests { use std::env; - use crate::start_forwarder; use numaflow::source::{Message, Offset, SourceReadRequest}; use numaflow::{sink, source}; use tokio::sync::mpsc::Sender; use tokio_util::sync::CancellationToken; + use crate::start_forwarder; + struct SimpleSource; #[tonic::async_trait] impl source::Sourcer for SimpleSource { diff --git a/rust/monovertex/src/startup.rs b/rust/monovertex/src/startup.rs new file mode 100644 index 0000000000..74a8bc752a --- /dev/null +++ b/rust/monovertex/src/startup.rs @@ -0,0 +1,46 @@ +use crate::config::config; +use crate::error::Error; +use crate::sink::{FB_SINK_SERVER_INFO_FILE, SINK_SERVER_INFO_FILE}; +use crate::source::SOURCE_SERVER_INFO_FILE; +use crate::transformer::TRANSFORMER_SERVER_INFO_FILE; +use crate::{error, server_info}; +use tokio_util::sync::CancellationToken; +use tracing::warn; + +pub async fn check_compatibility(cln_token: &CancellationToken) -> error::Result<()> { + server_info::check_for_server_compatibility(SOURCE_SERVER_INFO_FILE, cln_token.clone()) + .await + .map_err(|e| { + warn!("Error waiting for source server info file: {:?}", e); + Error::ForwarderError("Error waiting for server info file".to_string()) + })?; + + server_info::check_for_server_compatibility(SINK_SERVER_INFO_FILE, cln_token.clone()) + .await + .map_err(|e| { + error!("Error waiting for sink server info file: {:?}", e); + Error::ForwarderError("Error waiting for server info file".to_string()) + })?; + + if config().is_transformer_enabled { + server_info::check_for_server_compatibility( + TRANSFORMER_SERVER_INFO_FILE, + cln_token.clone(), + ) + .await + .map_err(|e| { + error!("Error waiting for transformer server info file: {:?}", e); + Error::ForwarderError("Error waiting for server info file".to_string()) + })?; + } + + if config().is_fallback_enabled { + server_info::check_for_server_compatibility(FB_SINK_SERVER_INFO_FILE, cln_token.clone()) + .await + .map_err(|e| { + warn!("Error waiting for fallback sink server info file: {:?}", e); + Error::ForwarderError("Error waiting for server info file".to_string()) + })?; + } + Ok(()) +} From a9cdbc7b3d5ae98702563df533564d8265b385d7 Mon Sep 17 00:00:00 2001 From: Vigith Maurice Date: Wed, 11 Sep 2024 11:26:54 -0700 Subject: [PATCH 07/40] chore: sync before i create a new branch Signed-off-by: Vigith Maurice --- rust/monovertex/src/lib.rs | 1 + rust/monovertex/src/source.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/rust/monovertex/src/lib.rs b/rust/monovertex/src/lib.rs index 811992c753..fe9957565f 100644 --- a/rust/monovertex/src/lib.rs +++ b/rust/monovertex/src/lib.rs @@ -170,6 +170,7 @@ pub async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { fb_sink_client: fb_sink_grpc_client.clone(), }; + // FIXME: track the join handle tokio::spawn(async move { // Start the metrics server, which server the prometheus metrics. let metrics_addr: SocketAddr = format!("0.0.0.0:{}", &config().metrics_server_listen_port) diff --git a/rust/monovertex/src/source.rs b/rust/monovertex/src/source.rs index 88e311cb0d..ce5489e030 100644 --- a/rust/monovertex/src/source.rs +++ b/rust/monovertex/src/source.rs @@ -50,7 +50,7 @@ impl SourceReader { let (ack_tx, ack_rx) = mpsc::channel(500); let mut ack_client = client.clone(); - // FIXME: we need to keep the handle for abort + // spawn a task to handle acks. let ack_handle = tokio::spawn(async move { let ack_response = ack_client .ack_fn(Request::new(ReceiverStream::new(ack_rx))) From a294b6ba4a0537855827a45ec0fe1621c58b0acc Mon Sep 17 00:00:00 2001 From: Vigith Maurice Date: Wed, 11 Sep 2024 15:54:09 -0700 Subject: [PATCH 08/40] chore: code rearrange Signed-off-by: Vigith Maurice --- rust/monovertex/src/forwarder.rs | 1 + rust/monovertex/src/lib.rs | 121 +++++++------------------------ rust/monovertex/src/startup.rs | 86 +++++++++++++++++++++- 3 files changed, 111 insertions(+), 97 deletions(-) diff --git a/rust/monovertex/src/forwarder.rs b/rust/monovertex/src/forwarder.rs index a15bd3fa45..981bb3bde8 100644 --- a/rust/monovertex/src/forwarder.rs +++ b/rust/monovertex/src/forwarder.rs @@ -87,6 +87,7 @@ impl Forwarder { pub(crate) async fn start(&mut self) -> Result<()> { let mut processed_msgs_count: usize = 0; let mut last_forwarded_at = std::time::Instant::now(); + info!("Forwarder has started"); loop { let start_time = tokio::time::Instant::now(); if self.cln_token.is_cancelled() { diff --git a/rust/monovertex/src/lib.rs b/rust/monovertex/src/lib.rs index fe9957565f..1257c494aa 100644 --- a/rust/monovertex/src/lib.rs +++ b/rust/monovertex/src/lib.rs @@ -1,22 +1,16 @@ -use std::net::SocketAddr; -use std::time::Duration; - use tokio::signal; use tokio::task::JoinHandle; -use tokio::time::sleep; use tokio_util::sync::CancellationToken; -use tonic::transport::Channel; -use tonic::Request; use tracing::{error, info}; use crate::config::config; pub(crate) use crate::error::Error; use crate::forwarder::ForwarderBuilder; -use crate::metrics::{start_metrics_https_server, LagReaderBuilder, MetricsState}; +use crate::metrics::MetricsState; use crate::shared::create_rpc_channel; -use crate::sink::{SinkWriter, FB_SINK_SOCKET, SINK_SOCKET}; +use crate::sink::{FB_SINK_SOCKET, SINK_SOCKET, SinkWriter}; use crate::sink_pb::sink_client::SinkClient; -use crate::source::{SourceReader, SOURCE_SOCKET}; +use crate::source::{SOURCE_SOCKET, SourceReader}; use crate::source_pb::source_client::SourceClient; use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; use crate::transformer::{SourceTransformer, TRANSFORMER_SOCKET}; @@ -116,43 +110,36 @@ pub async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { .max_encoding_message_size(config().grpc_max_message_size) .max_encoding_message_size(config().grpc_max_message_size); - let source_reader = SourceReader::new(source_grpc_client.clone()).await?; - let mut sink_grpc_client = SinkClient::new(create_rpc_channel(SINK_SOCKET.into()).await?) .max_encoding_message_size(config().grpc_max_message_size) .max_encoding_message_size(config().grpc_max_message_size); - let sink_writer = SinkWriter::new(sink_grpc_client.clone()).await?; - - let (mut transformer_grpc_client, transformer) = if config().is_transformer_enabled { + let mut transformer_grpc_client = if config().is_transformer_enabled { let transformer_grpc_client = SourceTransformClient::new(create_rpc_channel(TRANSFORMER_SOCKET.into()).await?) .max_encoding_message_size(config().grpc_max_message_size) .max_encoding_message_size(config().grpc_max_message_size); - ( - Some(transformer_grpc_client.clone()), - Some(SourceTransformer::new(transformer_grpc_client).await?), - ) + Some(transformer_grpc_client.clone()) + } else { - (None, None) + None }; - let (mut fb_sink_grpc_client, fallback_writer) = if config().is_fallback_enabled { + let mut fb_sink_grpc_client = if config().is_fallback_enabled { let fb_sink_grpc_client = SinkClient::new(create_rpc_channel(FB_SINK_SOCKET.into()).await?) .max_encoding_message_size(config().grpc_max_message_size) .max_encoding_message_size(config().grpc_max_message_size); - ( - Some(fb_sink_grpc_client.clone()), - Some(SinkWriter::new(fb_sink_grpc_client).await?), - ) + + Some(fb_sink_grpc_client.clone()) + } else { - (None, None) + None }; // readiness check for all the ud containers - wait_until_ready( + startup::wait_until_ready( &mut source_grpc_client, &mut sink_grpc_client, &mut transformer_grpc_client, @@ -170,37 +157,28 @@ pub async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { fb_sink_client: fb_sink_grpc_client.clone(), }; - // FIXME: track the join handle - tokio::spawn(async move { - // Start the metrics server, which server the prometheus metrics. - let metrics_addr: SocketAddr = format!("0.0.0.0:{}", &config().metrics_server_listen_port) - .parse() - .expect("Invalid address"); - - if let Err(e) = start_metrics_https_server(metrics_addr, metrics_state).await { - error!("Metrics server error: {:?}", e); - } - }); + // start the metrics server + // FIXME: what to do with the handle + let _ = startup::start_metrics_server(metrics_state); // start the lag reader to publish lag metrics - let mut lag_reader = LagReaderBuilder::new(source_grpc_client.clone()) - .lag_checking_interval(Duration::from_secs( - config().lag_check_interval_in_secs.into(), - )) - .refresh_interval(Duration::from_secs( - config().lag_refresh_interval_in_secs.into(), - )) - .build(); - lag_reader.start().await; + startup::start_lag_reader(source_grpc_client.clone()).await; // build the forwarder + let source_reader = SourceReader::new(source_grpc_client.clone()).await?; + let sink_writer = SinkWriter::new(sink_grpc_client.clone()).await?; + let mut forwarder_builder = ForwarderBuilder::new(source_reader, sink_writer, cln_token); + // add transformer if exists - if let Some(transformer) = transformer { + if let Some(transformer_grpc_client) = transformer_grpc_client { + let transformer = SourceTransformer::new(transformer_grpc_client).await?; forwarder_builder = forwarder_builder.source_transformer(transformer); } + // add fallback sink if exists - if let Some(fallback_writer) = fallback_writer { + if let Some(fb_sink_grpc_client) = fb_sink_grpc_client { + let fallback_writer = SinkWriter::new(fb_sink_grpc_client).await?; forwarder_builder = forwarder_builder.fallback_sink_writer(fallback_writer); } // build the final forwarder @@ -213,53 +191,6 @@ pub async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { Ok(()) } -async fn wait_until_ready( - source_client: &mut SourceClient, - sink_client: &mut SinkClient, - transformer_client: &mut Option>, - fb_sink_client: &mut Option>, -) -> Result<()> { - loop { - let source_ready = source_client.is_ready(Request::new(())).await.is_ok(); - if !source_ready { - info!("UDSource is not ready, waiting..."); - } - - let sink_ready = sink_client.is_ready(Request::new(())).await.is_ok(); - if !sink_ready { - info!("UDSink is not ready, waiting..."); - } - - let transformer_ready = if let Some(client) = transformer_client { - let ready = client.is_ready(Request::new(())).await.is_ok(); - if !ready { - info!("UDTransformer is not ready, waiting..."); - } - ready - } else { - true - }; - - let fb_sink_ready = if let Some(client) = fb_sink_client { - let ready = client.is_ready(Request::new(())).await.is_ok(); - if !ready { - info!("Fallback Sink is not ready, waiting..."); - } - ready - } else { - true - }; - - if source_ready && sink_ready && transformer_ready && fb_sink_ready { - break; - } - - sleep(Duration::from_secs(1)).await; - } - - Ok(()) -} - #[cfg(test)] mod tests { use std::env; diff --git a/rust/monovertex/src/startup.rs b/rust/monovertex/src/startup.rs index 74a8bc752a..8f70f79a03 100644 --- a/rust/monovertex/src/startup.rs +++ b/rust/monovertex/src/startup.rs @@ -1,13 +1,23 @@ use crate::config::config; use crate::error::Error; +use crate::metrics::{LagReaderBuilder, MetricsState, start_metrics_https_server}; use crate::sink::{FB_SINK_SERVER_INFO_FILE, SINK_SERVER_INFO_FILE}; use crate::source::SOURCE_SERVER_INFO_FILE; +use crate::source_pb::source_client::SourceClient; use crate::transformer::TRANSFORMER_SERVER_INFO_FILE; use crate::{error, server_info}; +use std::net::SocketAddr; +use std::time::Duration; +use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; -use tracing::warn; +use tonic::transport::Channel; +use tracing::{info, warn}; +use tonic::Request; +use tokio::time::sleep; +use crate::sink_pb::sink_client::SinkClient; +use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; -pub async fn check_compatibility(cln_token: &CancellationToken) -> error::Result<()> { +pub(crate) async fn check_compatibility(cln_token: &CancellationToken) -> error::Result<()> { server_info::check_for_server_compatibility(SOURCE_SERVER_INFO_FILE, cln_token.clone()) .await .map_err(|e| { @@ -44,3 +54,75 @@ pub async fn check_compatibility(cln_token: &CancellationToken) -> error::Result } Ok(()) } + +pub(crate) async fn start_metrics_server(metrics_state: MetricsState) -> JoinHandle<()> { + tokio::spawn(async { + // Start the metrics server, which server the prometheus metrics. + let metrics_addr: SocketAddr = format!("0.0.0.0:{}", &config().metrics_server_listen_port) + .parse() + .expect("Invalid address"); + + if let Err(e) = start_metrics_https_server(metrics_addr, metrics_state).await { + error!("Metrics server error: {:?}", e); + } + }) +} + +pub(crate) async fn start_lag_reader(lag_reader_grpc_client: SourceClient) { + let mut lag_reader = LagReaderBuilder::new(lag_reader_grpc_client) + .lag_checking_interval(Duration::from_secs( + config().lag_check_interval_in_secs.into(), + )) + .refresh_interval(Duration::from_secs( + config().lag_refresh_interval_in_secs.into(), + )) + .build(); + lag_reader.start().await; +} + +pub(crate) async fn wait_until_ready( + source_client: &mut SourceClient, + sink_client: &mut SinkClient, + transformer_client: &mut Option>, + fb_sink_client: &mut Option>, +) -> error::Result<()> { + loop { + let source_ready = source_client.is_ready(Request::new(())).await.is_ok(); + if !source_ready { + info!("UDSource is not ready, waiting..."); + } + + let sink_ready = sink_client.is_ready(Request::new(())).await.is_ok(); + if !sink_ready { + info!("UDSink is not ready, waiting..."); + } + + let transformer_ready = if let Some(client) = transformer_client { + let ready = client.is_ready(Request::new(())).await.is_ok(); + if !ready { + info!("UDTransformer is not ready, waiting..."); + } + ready + } else { + true + }; + + let fb_sink_ready = if let Some(client) = fb_sink_client { + let ready = client.is_ready(Request::new(())).await.is_ok(); + if !ready { + info!("Fallback Sink is not ready, waiting..."); + } + ready + } else { + true + }; + + if source_ready && sink_ready && transformer_ready && fb_sink_ready { + break; + } + + sleep(Duration::from_secs(1)).await; + } + + Ok(()) +} \ No newline at end of file From 8ce9f5e8ebd46e93275aa2252df3e1196568a4cc Mon Sep 17 00:00:00 2001 From: Vigith Maurice Date: Wed, 11 Sep 2024 19:07:01 -0700 Subject: [PATCH 09/40] chore: fmt Signed-off-by: Vigith Maurice --- rust/monovertex/src/lib.rs | 17 +++++++---------- rust/monovertex/src/startup.rs | 18 ++++++++++-------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/rust/monovertex/src/lib.rs b/rust/monovertex/src/lib.rs index 1257c494aa..219e929dca 100644 --- a/rust/monovertex/src/lib.rs +++ b/rust/monovertex/src/lib.rs @@ -8,9 +8,9 @@ pub(crate) use crate::error::Error; use crate::forwarder::ForwarderBuilder; use crate::metrics::MetricsState; use crate::shared::create_rpc_channel; -use crate::sink::{FB_SINK_SOCKET, SINK_SOCKET, SinkWriter}; +use crate::sink::{SinkWriter, FB_SINK_SOCKET, SINK_SOCKET}; use crate::sink_pb::sink_client::SinkClient; -use crate::source::{SOURCE_SOCKET, SourceReader}; +use crate::source::{SourceReader, SOURCE_SOCKET}; use crate::source_pb::source_client::SourceClient; use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; use crate::transformer::{SourceTransformer, TRANSFORMER_SOCKET}; @@ -102,7 +102,7 @@ async fn shutdown_signal() { } } -pub async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { +async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { // make sure that we have compatibility with the server startup::check_compatibility(&cln_token).await?; @@ -120,8 +120,7 @@ pub async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { .max_encoding_message_size(config().grpc_max_message_size) .max_encoding_message_size(config().grpc_max_message_size); - Some(transformer_grpc_client.clone()) - + Some(transformer_grpc_client.clone()) } else { None }; @@ -131,11 +130,9 @@ pub async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { .max_encoding_message_size(config().grpc_max_message_size) .max_encoding_message_size(config().grpc_max_message_size); - - Some(fb_sink_grpc_client.clone()) - + Some(fb_sink_grpc_client.clone()) } else { - None + None }; // readiness check for all the ud containers @@ -172,7 +169,7 @@ pub async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { // add transformer if exists if let Some(transformer_grpc_client) = transformer_grpc_client { - let transformer = SourceTransformer::new(transformer_grpc_client).await?; + let transformer = SourceTransformer::new(transformer_grpc_client).await?; forwarder_builder = forwarder_builder.source_transformer(transformer); } diff --git a/rust/monovertex/src/startup.rs b/rust/monovertex/src/startup.rs index 8f70f79a03..b63605d356 100644 --- a/rust/monovertex/src/startup.rs +++ b/rust/monovertex/src/startup.rs @@ -1,21 +1,23 @@ +use std::net::SocketAddr; +use std::time::Duration; + use crate::config::config; use crate::error::Error; -use crate::metrics::{LagReaderBuilder, MetricsState, start_metrics_https_server}; +use crate::metrics::{start_metrics_https_server, LagReaderBuilder, MetricsState}; use crate::sink::{FB_SINK_SERVER_INFO_FILE, SINK_SERVER_INFO_FILE}; +use crate::sink_pb::sink_client::SinkClient; use crate::source::SOURCE_SERVER_INFO_FILE; use crate::source_pb::source_client::SourceClient; +use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; use crate::transformer::TRANSFORMER_SERVER_INFO_FILE; use crate::{error, server_info}; -use std::net::SocketAddr; -use std::time::Duration; + use tokio::task::JoinHandle; +use tokio::time::sleep; use tokio_util::sync::CancellationToken; use tonic::transport::Channel; -use tracing::{info, warn}; use tonic::Request; -use tokio::time::sleep; -use crate::sink_pb::sink_client::SinkClient; -use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; +use tracing::{info, warn}; pub(crate) async fn check_compatibility(cln_token: &CancellationToken) -> error::Result<()> { server_info::check_for_server_compatibility(SOURCE_SERVER_INFO_FILE, cln_token.clone()) @@ -125,4 +127,4 @@ pub(crate) async fn wait_until_ready( } Ok(()) -} \ No newline at end of file +} From 81edc44d83c2e497243642a1ea8ecd56f2dc56a2 Mon Sep 17 00:00:00 2001 From: Vigith Maurice Date: Wed, 11 Sep 2024 19:22:46 -0700 Subject: [PATCH 10/40] chore: reduce visibility Signed-off-by: Vigith Maurice --- rust/monovertex/src/lib.rs | 26 ++++++++++---------------- rust/monovertex/src/server_info.rs | 2 +- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/rust/monovertex/src/lib.rs b/rust/monovertex/src/lib.rs index 219e929dca..1c640a9c71 100644 --- a/rust/monovertex/src/lib.rs +++ b/rust/monovertex/src/lib.rs @@ -4,7 +4,7 @@ use tokio_util::sync::CancellationToken; use tracing::{error, info}; use crate::config::config; -pub(crate) use crate::error::Error; + use crate::forwarder::ForwarderBuilder; use crate::metrics::MetricsState; use crate::shared::create_rpc_channel; @@ -23,22 +23,16 @@ pub(crate) use self::error::Result; /// - Invokes the SourceTransformer concurrently /// - Calls the Sinker to write the batch to the Sink /// - Send Acknowledgement back to the Source -pub mod error; - -pub(crate) mod source; - -pub(crate) mod sink; - -pub(crate) mod transformer; - -pub(crate) mod forwarder; - -pub(crate) mod config; - -pub(crate) mod message; - -pub(crate) mod shared; +mod error; +pub(crate) use crate::error::Error; +mod source; +mod sink; +mod transformer; +mod forwarder; +mod config; +mod message; +mod shared; mod metrics; mod server_info; mod startup; diff --git a/rust/monovertex/src/server_info.rs b/rust/monovertex/src/server_info.rs index 7412b2ca9d..763af44c84 100644 --- a/rust/monovertex/src/server_info.rs +++ b/rust/monovertex/src/server_info.rs @@ -34,7 +34,7 @@ pub(crate) struct ServerInfo { /// check_for_server_compatibility waits until the server info file is ready and check whether the /// server is compatible with Numaflow. -pub async fn check_for_server_compatibility( +pub(crate) async fn check_for_server_compatibility( file_path: &str, cln_token: CancellationToken, ) -> error::Result<()> { From 5baeef7b85fe6aa79d33dacad60b30bf4803a7c8 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Thu, 12 Sep 2024 20:50:20 +0530 Subject: [PATCH 11/40] fix tests Signed-off-by: Yashash H L --- rust/Cargo.lock | 195 ++++++++++++++++++++++++++----- rust/monovertex/Cargo.toml | 4 +- rust/monovertex/src/forwarder.rs | 21 ++-- rust/monovertex/src/lib.rs | 17 +-- rust/monovertex/src/metrics.rs | 13 ++- rust/monovertex/src/source.rs | 19 +-- rust/monovertex/src/startup.rs | 9 +- rust/servesink/Cargo.toml | 4 +- 8 files changed, 215 insertions(+), 67 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 569d1c3996..5eecd28618 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -307,7 +307,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 1.1.0", "shlex", "syn", "which", @@ -1097,6 +1097,7 @@ dependencies = [ "tokio", "tokio-rustls 0.26.0", "tower-service", + "webpki-roots 0.26.3", ] [[package]] @@ -1114,9 +1115,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ab92f4f49ee4fb4f997c784b7a2e0fa70050211e0b6a287f898c3c9785ca956" +checksum = "da62f120a8a37763efb0cf8fdf264b884c7b8b9ac8660b900c8661030c00e6ba" dependencies = [ "bytes", "futures-channel", @@ -1476,7 +1477,7 @@ dependencies = [ "hyper-util", "kube", "log", - "numaflow 0.1.0 (git+https://github.com/numaproj/numaflow-rs.git?branch=main)", + "numaflow 0.1.1", "numaflow-models", "once_cell", "parking_lot", @@ -1602,8 +1603,8 @@ dependencies = [ [[package]] name = "numaflow" -version = "0.1.0" -source = "git+https://github.com/numaproj/numaflow-rs.git?branch=main#f265a615716ab3ec3adf85e8c24413cc076cd695" +version = "0.1.1" +source = "git+https://github.com/numaproj/numaflow-rs.git?branch=source-streaming#dcbb26834153b84853d9757e25395d92a1314d4a" dependencies = [ "chrono", "futures-util", @@ -1628,7 +1629,7 @@ version = "0.0.0-pre" dependencies = [ "k8s-openapi", "kube", - "reqwest", + "reqwest 0.11.27", "serde", "serde_derive", "serde_json", @@ -1915,9 +1916,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13db3d3fde688c61e2446b4d843bc27a7e8af269a69440c0308021dc92333cc" +checksum = "3b2ecbe40f08db5c006b5764a2645f7f3f141ce756412ac9e1dd6087e6d32995" dependencies = [ "bytes", "prost-derive", @@ -1946,9 +1947,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18bec9b0adc4eba778b33684b7ba3e7137789434769ee3ce3930463ef904cfca" +checksum = "acf0c195eebb4af52c752bec4f52f645da98b6e92077a04110c7f349477ae5ac" dependencies = [ "anyhow", "itertools 0.13.0", @@ -1959,13 +1960,61 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cee5168b05f49d4b0ca581206eb14a7b22fafd963efe729ac48eb03266e25cc2" +checksum = "60caa6738c7369b940c3d49246a8d1749323674c65cb13010134f5c9bad5b519" dependencies = [ "prost", ] +[[package]] +name = "quinn" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c7c5fdde3cdae7203427dc4f0a68fe0ed09833edc525a03456b153b79828684" +dependencies = [ + "bytes", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash 2.0.0", + "rustls 0.23.12", + "socket2", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "quinn-proto" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6" +dependencies = [ + "bytes", + "rand", + "ring", + "rustc-hash 2.0.0", + "rustls 0.23.12", + "slab", + "thiserror", + "tinyvec", + "tracing", +] + +[[package]] +name = "quinn-udp" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bffec3605b73c6f1754535084a85229fa8a30f86014e6c81aeec4abb68b0285" +dependencies = [ + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.52.0", +] + [[package]] name = "quote" version = "1.0.36" @@ -2134,10 +2183,52 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots", + "webpki-roots 0.25.4", "winreg", ] +[[package]] +name = "reqwest" +version = "0.12.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8f4955649ef5c38cc7f9e8aa41761d48fb9677197daea9984dc54f56aad5e63" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-core", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.4.1", + "hyper-rustls 0.27.2", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls 0.23.12", + "rustls-pemfile 2.1.3", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper 1.0.1", + "tokio", + "tokio-rustls 0.26.0", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots 0.26.3", + "windows-registry", +] + [[package]] name = "ring" version = "0.17.8" @@ -2187,6 +2278,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" + [[package]] name = "rustc_version" version = "0.4.0" @@ -2375,9 +2472,9 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.204" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] @@ -2394,9 +2491,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.204" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", @@ -2405,9 +2502,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.122" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" dependencies = [ "itoa", "memchr", @@ -2483,8 +2580,8 @@ dependencies = [ name = "servesink" version = "0.1.0" dependencies = [ - "numaflow 0.1.0 (git+https://github.com/numaproj/numaflow-rs.git?branch=main)", - "reqwest", + "numaflow 0.1.1", + "reqwest 0.12.7", "tokio", "tonic", "tracing", @@ -2665,6 +2762,9 @@ name = "sync_wrapper" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +dependencies = [ + "futures-core", +] [[package]] name = "system-configuration" @@ -2848,9 +2948,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.15" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" +checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" dependencies = [ "futures-core", "pin-project-lite", @@ -2859,9 +2959,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.11" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" dependencies = [ "bytes", "futures-core", @@ -2936,9 +3036,9 @@ dependencies = [ [[package]] name = "tonic-build" -version = "0.12.1" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "568392c5a2bd0020723e3f387891176aabafe36fd9fcd074ad309dfa0c8eb964" +checksum = "fe4ee8877250136bd7e3d2331632810a4df4ea5e004656990d8d66d2f5ee8a67" dependencies = [ "prettyplease", "proc-macro2", @@ -3291,6 +3391,15 @@ version = "0.25.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" +[[package]] +name = "webpki-roots" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd7c23921eeb1713a4e851530e9b9756e4fb0e89978582942612524cf09f01cd" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "which" version = "4.4.2" @@ -3334,6 +3443,36 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-registry" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/rust/monovertex/Cargo.toml b/rust/monovertex/Cargo.toml index 2b0cb2bae7..fdefe40a93 100644 --- a/rust/monovertex/Cargo.toml +++ b/rust/monovertex/Cargo.toml @@ -13,7 +13,7 @@ tokio = { version = "1.39.3", features = ["full"] } tracing = "0.1.40" tokio-util = "0.7.11" tokio-stream = "0.1.15" -prost = "0.13.1" +prost = "0.13.2" prost-types = "0.13.1" chrono = "0.4.31" base64 = "0.22.1" @@ -38,7 +38,7 @@ log = "0.4.22" [dev-dependencies] tempfile = "3.11.0" -numaflow = { git = "https://github.com/numaproj/numaflow-rs.git", branch = "main" } +numaflow = { git = "https://github.com/numaproj/numaflow-rs.git", branch = "source-streaming" } [build-dependencies] tonic-build = "0.12.1" diff --git a/rust/monovertex/src/forwarder.rs b/rust/monovertex/src/forwarder.rs index 981bb3bde8..277664f1e5 100644 --- a/rust/monovertex/src/forwarder.rs +++ b/rust/monovertex/src/forwarder.rs @@ -523,6 +523,7 @@ mod tests { use chrono::Utc; use numaflow::source::{Message, Offset, SourceReadRequest}; use numaflow::{sink, source, sourcetransform}; + use tokio::sync::mpsc; use tokio::sync::mpsc::Sender; use tokio_util::sync::CancellationToken; @@ -575,13 +576,11 @@ mod tests { .extend(message_offsets) } - async fn ack(&self, offsets: Vec) { - for offset in offsets { - self.yet_to_be_acked - .write() - .unwrap() - .remove(&String::from_utf8(offset.offset).unwrap()); - } + async fn ack(&self, offset: Offset) { + self.yet_to_be_acked + .write() + .unwrap() + .remove(&String::from_utf8(offset.offset).unwrap()); } async fn pending(&self) -> usize { @@ -659,7 +658,7 @@ mod tests { #[tokio::test] async fn test_forwarder_source_sink() { - let (sink_tx, mut sink_rx) = tokio::sync::mpsc::channel(10); + let (sink_tx, mut sink_rx) = mpsc::channel(10); // Start the source server let (source_shutdown_tx, source_shutdown_rx) = tokio::sync::oneshot::channel(); @@ -755,6 +754,7 @@ mod tests { // Wait for the assertion task to complete assert_handle.await.unwrap(); + drop(forwarder); // stop the servers source_shutdown_tx .send(()) @@ -863,6 +863,7 @@ mod tests { cancel_handle.await.unwrap(); // stop the servers + drop(forwarder); source_shutdown_tx .send(()) .expect("failed to send shutdown signal"); @@ -872,7 +873,7 @@ mod tests { sink_shutdown_tx .send(()) - .expect("failed to send shutdown signal"); + .expect("failed to send sink shutdown signal"); sink_server_handle .await .expect("failed to join sink server task"); @@ -987,6 +988,8 @@ mod tests { forwarder.start().await.unwrap(); assert_handle.await.unwrap(); + + drop(forwarder); // stop the servers source_shutdown_tx .send(()) diff --git a/rust/monovertex/src/lib.rs b/rust/monovertex/src/lib.rs index 1c640a9c71..6e3e37180d 100644 --- a/rust/monovertex/src/lib.rs +++ b/rust/monovertex/src/lib.rs @@ -26,16 +26,16 @@ pub(crate) use self::error::Result; mod error; pub(crate) use crate::error::Error; -mod source; -mod sink; -mod transformer; -mod forwarder; mod config; +mod forwarder; mod message; -mod shared; mod metrics; mod server_info; +mod shared; +mod sink; +mod source; mod startup; +mod transformer; pub(crate) mod source_pb { tonic::include_proto!("source.v1"); @@ -150,10 +150,11 @@ async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { // start the metrics server // FIXME: what to do with the handle - let _ = startup::start_metrics_server(metrics_state); + startup::start_metrics_server(metrics_state).await; // start the lag reader to publish lag metrics - startup::start_lag_reader(source_grpc_client.clone()).await; + let mut lag_reader = startup::create_lag_reader(source_grpc_client.clone()).await; + lag_reader.start().await; // build the forwarder let source_reader = SourceReader::new(source_grpc_client.clone()).await?; @@ -198,7 +199,7 @@ mod tests { impl source::Sourcer for SimpleSource { async fn read(&self, _: SourceReadRequest, _: Sender) {} - async fn ack(&self, _: Vec) {} + async fn ack(&self, _: Offset) {} async fn pending(&self) -> usize { 0 diff --git a/rust/monovertex/src/metrics.rs b/rust/monovertex/src/metrics.rs index 30b3b85ce2..f12a027804 100644 --- a/rust/monovertex/src/metrics.rs +++ b/rust/monovertex/src/metrics.rs @@ -326,22 +326,27 @@ async fn livez() -> impl IntoResponse { } async fn sidecar_livez(State(mut state): State) -> impl IntoResponse { - if !state.source_client.is_ready(Request::new(())).await.is_ok() { + if state + .source_client + .is_ready(Request::new(())) + .await + .is_err() + { error!("Source client is not available"); return StatusCode::SERVICE_UNAVAILABLE; } - if !state.sink_client.is_ready(Request::new(())).await.is_ok() { + if state.sink_client.is_ready(Request::new(())).await.is_err() { error!("Sink client is not available"); return StatusCode::SERVICE_UNAVAILABLE; } if let Some(mut transformer_client) = state.transformer_client { - if !transformer_client.is_ready(Request::new(())).await.is_ok() { + if transformer_client.is_ready(Request::new(())).await.is_err() { error!("Transformer client is not available"); return StatusCode::SERVICE_UNAVAILABLE; } } if let Some(mut fb_sink_client) = state.fb_sink_client { - if !fb_sink_client.is_ready(Request::new(())).await.is_ok() { + if fb_sink_client.is_ready(Request::new(())).await.is_err() { error!("Fallback sink client is not available"); return StatusCode::SERVICE_UNAVAILABLE; } diff --git a/rust/monovertex/src/source.rs b/rust/monovertex/src/source.rs index ce5489e030..ed6f23e92a 100644 --- a/rust/monovertex/src/source.rs +++ b/rust/monovertex/src/source.rs @@ -4,7 +4,7 @@ use crate::message::{Message, Offset}; use crate::source_pb; use crate::source_pb::source_client::SourceClient; use crate::source_pb::{ - ack_request, read_request, AckRequest, AckResponse, ReadRequest, ReadResponse, + ack_request, ack_response, read_request, AckRequest, AckResponse, ReadRequest, ReadResponse, }; use base64::prelude::BASE64_STANDARD; use base64::Engine; @@ -118,7 +118,9 @@ impl SourceReader { .await .map_err(|e| SourceError(e.to_string()))?; } - Ok(AckResponse::default()) + Ok(AckResponse { + result: Some(ack_response::Result { success: Some(()) }), + }) } } @@ -173,13 +175,11 @@ mod tests { self.yet_to_ack.write().unwrap().extend(message_offsets) } - async fn ack(&self, offsets: Vec) { - for offset in offsets { - self.yet_to_ack - .write() - .unwrap() - .remove(&String::from_utf8(offset.offset).unwrap()); - } + async fn ack(&self, offset: Offset) { + self.yet_to_ack + .write() + .unwrap() + .remove(&String::from_utf8(offset.offset).unwrap()); } async fn pending(&self) -> usize { @@ -229,6 +229,7 @@ mod tests { .unwrap(); assert!(response.result.unwrap().success.is_some()); + drop(source_client); shutdown_tx .send(()) .expect("failed to send shutdown signal"); diff --git a/rust/monovertex/src/startup.rs b/rust/monovertex/src/startup.rs index b63605d356..44df14b8d8 100644 --- a/rust/monovertex/src/startup.rs +++ b/rust/monovertex/src/startup.rs @@ -3,7 +3,7 @@ use std::time::Duration; use crate::config::config; use crate::error::Error; -use crate::metrics::{start_metrics_https_server, LagReaderBuilder, MetricsState}; +use crate::metrics::{start_metrics_https_server, LagReader, LagReaderBuilder, MetricsState}; use crate::sink::{FB_SINK_SERVER_INFO_FILE, SINK_SERVER_INFO_FILE}; use crate::sink_pb::sink_client::SinkClient; use crate::source::SOURCE_SERVER_INFO_FILE; @@ -70,16 +70,15 @@ pub(crate) async fn start_metrics_server(metrics_state: MetricsState) -> JoinHan }) } -pub(crate) async fn start_lag_reader(lag_reader_grpc_client: SourceClient) { - let mut lag_reader = LagReaderBuilder::new(lag_reader_grpc_client) +pub(crate) async fn create_lag_reader(lag_reader_grpc_client: SourceClient) -> LagReader { + LagReaderBuilder::new(lag_reader_grpc_client) .lag_checking_interval(Duration::from_secs( config().lag_check_interval_in_secs.into(), )) .refresh_interval(Duration::from_secs( config().lag_refresh_interval_in_secs.into(), )) - .build(); - lag_reader.start().await; + .build() } pub(crate) async fn wait_until_ready( diff --git a/rust/servesink/Cargo.toml b/rust/servesink/Cargo.toml index e820030494..6d79dc2b7e 100644 --- a/rust/servesink/Cargo.toml +++ b/rust/servesink/Cargo.toml @@ -6,11 +6,11 @@ edition = "2021" [dependencies] tonic = "0.12.0" tokio = { version = "1.0", features = ["macros", "rt-multi-thread"] } -numaflow = { git = "https://github.com/numaproj/numaflow-rs.git", branch = "main" } +numaflow = { git = "https://github.com/numaproj/numaflow-rs.git", branch = "source-streaming" } tracing = "0.1.40" tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } [dependencies.reqwest] -version = "^0.11" +version = "0.12.7" default-features = false features = ["rustls-tls"] \ No newline at end of file From 948de1ceb59aa6655c9b3f6e9da23fb414e7bcf8 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Thu, 12 Sep 2024 21:19:27 +0530 Subject: [PATCH 12/40] fix tests Signed-off-by: Yashash H L --- rust/monovertex/src/forwarder.rs | 1 + rust/monovertex/src/source.rs | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/rust/monovertex/src/forwarder.rs b/rust/monovertex/src/forwarder.rs index 277664f1e5..c1c2e85b2e 100644 --- a/rust/monovertex/src/forwarder.rs +++ b/rust/monovertex/src/forwarder.rs @@ -798,6 +798,7 @@ mod tests { } #[tokio::test] + #[ignore] // FIXME async fn test_forwarder_sink_error() { // Start the source server let (source_shutdown_tx, source_shutdown_rx) = tokio::sync::oneshot::channel(); diff --git a/rust/monovertex/src/source.rs b/rust/monovertex/src/source.rs index ed6f23e92a..521e33c6c1 100644 --- a/rust/monovertex/src/source.rs +++ b/rust/monovertex/src/source.rs @@ -88,6 +88,7 @@ impl SourceReader { while let Some(response) = self.resp_stream.message().await? { if response.status.as_ref().map_or(false, |status| status.eot) { + println!("breaking"); break; } @@ -97,7 +98,7 @@ impl SourceReader { messages.push(result.try_into()?); } - + println!("messages {:?}", messages); Ok(messages) } @@ -229,6 +230,8 @@ mod tests { .unwrap(); assert!(response.result.unwrap().success.is_some()); + // we need to drop the client, because if there are any in-flight requests + // server fails to shut down. https://github.com/numaproj/numaflow-rs/issues/85 drop(source_client); shutdown_tx .send(()) From 2a27add472ffee20b71d4630d652ae99fa0b03e0 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Thu, 12 Sep 2024 21:33:14 +0530 Subject: [PATCH 13/40] lint and codegen Signed-off-by: Yashash H L --- go.sum | 4 ---- pkg/daemon/client/grpc_daemon_client.go | 2 +- pkg/sdkclient/grpc/grpc_utils.go | 2 +- pkg/sdkclient/source/client_test.go | 2 +- pkg/sources/udsource/grpc_udsource_test.go | 17 ----------------- rust/monovertex/build.rs | 1 + 6 files changed, 4 insertions(+), 24 deletions(-) diff --git a/go.sum b/go.sum index fc6202a07a..969aab1b96 100644 --- a/go.sum +++ b/go.sum @@ -485,10 +485,6 @@ github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDm github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/numaproj/numaflow-go v0.8.1-0.20240906054808-44a0a178c835 h1:sMucUReYuLPSchDtzjNzZHE0UefDAgH9Hl6pULzNKj8= -github.com/numaproj/numaflow-go v0.8.1-0.20240906054808-44a0a178c835/go.mod h1:GWXSR8ZLKv1yjzTStVWiqu6HuGSjtS+wpvP2xLhqX+A= -github.com/numaproj/numaflow-go v0.8.1-0.20240908024139-2506e0d7639d h1:4NiJDowEBSeR/ptanr66vN9wUH6kM0B+M10sb69Lysw= -github.com/numaproj/numaflow-go v0.8.1-0.20240908024139-2506e0d7639d/go.mod h1:jOCtHiDv5hvrrSOt3/swCd8FpmEP1w/RNZydqJCsB58= github.com/numaproj/numaflow-go v0.8.1-0.20240909093557-c9946b0e8b33 h1:lrSD4qibegQp896k3wGRNwQSb2f533JJsG4gixFiv5k= github.com/numaproj/numaflow-go v0.8.1-0.20240909093557-c9946b0e8b33/go.mod h1:jOCtHiDv5hvrrSOt3/swCd8FpmEP1w/RNZydqJCsB58= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= diff --git a/pkg/daemon/client/grpc_daemon_client.go b/pkg/daemon/client/grpc_daemon_client.go index fa30ec0c4e..6b1d0040db 100644 --- a/pkg/daemon/client/grpc_daemon_client.go +++ b/pkg/daemon/client/grpc_daemon_client.go @@ -37,7 +37,7 @@ func NewGRPCDaemonServiceClient(address string) (DaemonClient, error) { config := &tls.Config{ InsecureSkipVerify: true, } - conn, err := grpc.Dial(address, grpc.WithTransportCredentials(credentials.NewTLS(config))) + conn, err := grpc.NewClient(address, grpc.WithTransportCredentials(credentials.NewTLS(config))) if err != nil { return nil, err } diff --git a/pkg/sdkclient/grpc/grpc_utils.go b/pkg/sdkclient/grpc/grpc_utils.go index 6d3574a290..e65d9d5e56 100644 --- a/pkg/sdkclient/grpc/grpc_utils.go +++ b/pkg/sdkclient/grpc/grpc_utils.go @@ -59,7 +59,7 @@ func ConnectToServer(udsSockAddr string, serverInfo *info.ServerInfo, maxMessage sockAddr = getUdsSockAddr(udsSockAddr) log.Println("UDS Client:", sockAddr) - conn, err = grpc.Dial(sockAddr, grpc.WithTransportCredentials(insecure.NewCredentials()), + conn, err = grpc.NewClient(sockAddr, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxMessageSize), grpc.MaxCallSendMsgSize(maxMessageSize))) } diff --git a/pkg/sdkclient/source/client_test.go b/pkg/sdkclient/source/client_test.go index 1c65d4cb80..f1ae7d80f4 100644 --- a/pkg/sdkclient/source/client_test.go +++ b/pkg/sdkclient/source/client_test.go @@ -164,7 +164,7 @@ func TestAckFn(t *testing.T) { assert.NoError(t, err) assert.Equal(t, &sourcepb.AckResponse{}, ack) - ack, err = testClient.AckFn(ctx, &sourcepb.AckRequest{}) + _, err = testClient.AckFn(ctx, &sourcepb.AckRequest{}) assert.EqualError(t, err, "mock connection refused") } diff --git a/pkg/sources/udsource/grpc_udsource_test.go b/pkg/sources/udsource/grpc_udsource_test.go index ed326ba742..459f9ae39f 100644 --- a/pkg/sources/udsource/grpc_udsource_test.go +++ b/pkg/sources/udsource/grpc_udsource_test.go @@ -30,7 +30,6 @@ import ( "go.uber.org/goleak" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" - "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/timestamppb" "github.com/numaproj/numaflow/pkg/isb" @@ -41,22 +40,6 @@ func TestMain(m *testing.M) { goleak.VerifyTestMain(m) } -type rpcMsg struct { - msg proto.Message -} - -func (r *rpcMsg) Matches(msg interface{}) bool { - m, ok := msg.(proto.Message) - if !ok { - return false - } - return proto.Equal(m, r.msg) -} - -func (r *rpcMsg) String() string { - return fmt.Sprintf("is %s", r.msg) -} - func NewMockUDSgRPCBasedUDSource(ctx context.Context, mockClient *sourcemock.MockSourceClient) *GRPCBasedUDSource { c, _ := sourceclient.NewFromClient(ctx, mockClient) return &GRPCBasedUDSource{ diff --git a/rust/monovertex/build.rs b/rust/monovertex/build.rs index fc30e6b678..87b2453c75 100644 --- a/rust/monovertex/build.rs +++ b/rust/monovertex/build.rs @@ -1,6 +1,7 @@ fn main() { tonic_build::configure() .build_server(true) + .protoc_arg("--experimental_allow_proto3_optional") .compile( &[ "proto/source.proto", From e0ae8ca55dfb4c0d85c819af90332d7086b37f03 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Fri, 13 Sep 2024 09:08:24 +0530 Subject: [PATCH 14/40] stop retrying while shutting down Signed-off-by: Yashash H L --- rust/monovertex/src/forwarder.rs | 7 ++++++- rust/monovertex/src/source.rs | 2 -- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/rust/monovertex/src/forwarder.rs b/rust/monovertex/src/forwarder.rs index c1c2e85b2e..89acb22e3c 100644 --- a/rust/monovertex/src/forwarder.rs +++ b/rust/monovertex/src/forwarder.rs @@ -243,6 +243,12 @@ impl Forwarder { } Err(e) => Err(e)?, } + + if self.cln_token.is_cancelled() { + return Err(Error::SinkError( + "Cancellation token triggered during retry".to_string(), + )); + } } // If after the retries we still have messages to process, handle the post retry failures @@ -798,7 +804,6 @@ mod tests { } #[tokio::test] - #[ignore] // FIXME async fn test_forwarder_sink_error() { // Start the source server let (source_shutdown_tx, source_shutdown_rx) = tokio::sync::oneshot::channel(); diff --git a/rust/monovertex/src/source.rs b/rust/monovertex/src/source.rs index 521e33c6c1..6343b8a29b 100644 --- a/rust/monovertex/src/source.rs +++ b/rust/monovertex/src/source.rs @@ -88,7 +88,6 @@ impl SourceReader { while let Some(response) = self.resp_stream.message().await? { if response.status.as_ref().map_or(false, |status| status.eot) { - println!("breaking"); break; } @@ -98,7 +97,6 @@ impl SourceReader { messages.push(result.try_into()?); } - println!("messages {:?}", messages); Ok(messages) } From 10a650876f64e658be98275bb9918c89ac9e1095 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Fri, 13 Sep 2024 13:28:01 +0530 Subject: [PATCH 15/40] update ubuntu Signed-off-by: Yashash H L --- .github/workflows/ci.yaml | 2 +- rust/monovertex/src/forwarder.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9eaa4de89b..50a7baf29c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -150,7 +150,7 @@ jobs: - run: git diff --exit-code build-rust-amd64: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 defaults: run: working-directory: ./rust diff --git a/rust/monovertex/src/forwarder.rs b/rust/monovertex/src/forwarder.rs index 89acb22e3c..3c3d34fa83 100644 --- a/rust/monovertex/src/forwarder.rs +++ b/rust/monovertex/src/forwarder.rs @@ -244,6 +244,7 @@ impl Forwarder { Err(e) => Err(e)?, } + // if we are shutting down, stop the retry if self.cln_token.is_cancelled() { return Err(Error::SinkError( "Cancellation token triggered during retry".to_string(), From 7e86c5f964a78d78ba2b602bf89547684ee19425 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Fri, 13 Sep 2024 17:43:05 +0530 Subject: [PATCH 16/40] lint Signed-off-by: Yashash H L --- pkg/daemon/client/grpc_daemon_client.go | 2 +- pkg/daemon/client/grpc_daemon_client_test.go | 7 ------- pkg/mvtxdaemon/client/grpc_client.go | 2 +- pkg/sdkclient/grpc/grpc_utils.go | 2 +- 4 files changed, 3 insertions(+), 10 deletions(-) diff --git a/pkg/daemon/client/grpc_daemon_client.go b/pkg/daemon/client/grpc_daemon_client.go index 6b1d0040db..fa30ec0c4e 100644 --- a/pkg/daemon/client/grpc_daemon_client.go +++ b/pkg/daemon/client/grpc_daemon_client.go @@ -37,7 +37,7 @@ func NewGRPCDaemonServiceClient(address string) (DaemonClient, error) { config := &tls.Config{ InsecureSkipVerify: true, } - conn, err := grpc.NewClient(address, grpc.WithTransportCredentials(credentials.NewTLS(config))) + conn, err := grpc.Dial(address, grpc.WithTransportCredentials(credentials.NewTLS(config))) if err != nil { return nil, err } diff --git a/pkg/daemon/client/grpc_daemon_client_test.go b/pkg/daemon/client/grpc_daemon_client_test.go index d1bca6e18a..905d963822 100644 --- a/pkg/daemon/client/grpc_daemon_client_test.go +++ b/pkg/daemon/client/grpc_daemon_client_test.go @@ -467,11 +467,4 @@ func TestNewGRPCDaemonServiceClient(t *testing.T) { err = client.Close() assert.NoError(t, err) }) - - t.Run("empty address", func(t *testing.T) { - address := "" - client, err := NewGRPCDaemonServiceClient(address) - assert.Error(t, err) - assert.Nil(t, client) - }) } diff --git a/pkg/mvtxdaemon/client/grpc_client.go b/pkg/mvtxdaemon/client/grpc_client.go index cd6fdbb455..c398b138c2 100644 --- a/pkg/mvtxdaemon/client/grpc_client.go +++ b/pkg/mvtxdaemon/client/grpc_client.go @@ -38,7 +38,7 @@ func NewGRPCClient(address string) (MonoVertexDaemonClient, error) { config := &tls.Config{ InsecureSkipVerify: true, } - conn, err := grpc.Dial(address, grpc.WithTransportCredentials(credentials.NewTLS(config))) + conn, err := grpc.NewClient(address, grpc.WithTransportCredentials(credentials.NewTLS(config))) if err != nil { return nil, err } diff --git a/pkg/sdkclient/grpc/grpc_utils.go b/pkg/sdkclient/grpc/grpc_utils.go index e65d9d5e56..bf5d95ae8d 100644 --- a/pkg/sdkclient/grpc/grpc_utils.go +++ b/pkg/sdkclient/grpc/grpc_utils.go @@ -49,7 +49,7 @@ func ConnectToServer(udsSockAddr string, serverInfo *info.ServerInfo, maxMessage return nil, fmt.Errorf("failed to start Multiproc Client: %w", err) } - conn, err = grpc.Dial( + conn, err = grpc.NewClient( fmt.Sprintf("%s:///%s", resolver.CustScheme, resolver.CustServiceName), grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"round_robin":{}}]}`), grpc.WithTransportCredentials(insecure.NewCredentials()), From c272caa6665340653d128f4109c17a356cdfdedf Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Fri, 13 Sep 2024 19:26:33 +0530 Subject: [PATCH 17/40] use ubuntu latest Signed-off-by: Yashash H L --- .github/workflows/ci.yaml | 10 +++++----- .github/workflows/nightly-build.yml | 4 ++-- .github/workflows/release.yml | 6 +++--- pkg/daemon/client/grpc_daemon_client.go | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 50a7baf29c..1feddb7647 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -9,7 +9,7 @@ on: jobs: ui: name: UI - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 timeout-minutes: 6 steps: - name: Checkout code @@ -30,7 +30,7 @@ jobs: run: git diff --exit-code codegen: name: Codegen - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 timeout-minutes: 5 steps: - name: Checkout code @@ -59,7 +59,7 @@ jobs: unit-tests: name: Unit Tests - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 timeout-minutes: 10 services: redis: @@ -131,7 +131,7 @@ jobs: lint: name: Lint - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 timeout-minutes: 10 env: GOPATH: /home/runner/go @@ -184,7 +184,7 @@ jobs: e2e-tests: name: E2E Tests - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 needs: [ build-rust-amd64 ] timeout-minutes: 20 strategy: diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml index 51bdb6bfbc..5f5179fb37 100644 --- a/.github/workflows/nightly-build.yml +++ b/.github/workflows/nightly-build.yml @@ -41,7 +41,7 @@ jobs: path: dist build-rust-amd64: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 defaults: run: working-directory: ./rust @@ -71,7 +71,7 @@ jobs: path: rust/numaflow-rs-linux-amd64 build-rust-arm64: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 defaults: run: working-directory: ./rust diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index dcab109e0e..23a05eacb3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -39,7 +39,7 @@ jobs: path: dist build-rust-amd64: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 defaults: run: working-directory: ./rust @@ -62,7 +62,7 @@ jobs: path: rust/numaflow-rs-linux-amd64 build-rust-arm64: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 defaults: run: working-directory: ./rust @@ -90,7 +90,7 @@ jobs: build-push-linux-multi: name: Build & push linux/amd64 and linux/arm64 needs: [ build-go-binaries, build-rust-amd64, build-rust-arm64] - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 if: github.repository == 'numaproj/numaflow' strategy: matrix: diff --git a/pkg/daemon/client/grpc_daemon_client.go b/pkg/daemon/client/grpc_daemon_client.go index fa30ec0c4e..6b1d0040db 100644 --- a/pkg/daemon/client/grpc_daemon_client.go +++ b/pkg/daemon/client/grpc_daemon_client.go @@ -37,7 +37,7 @@ func NewGRPCDaemonServiceClient(address string) (DaemonClient, error) { config := &tls.Config{ InsecureSkipVerify: true, } - conn, err := grpc.Dial(address, grpc.WithTransportCredentials(credentials.NewTLS(config))) + conn, err := grpc.NewClient(address, grpc.WithTransportCredentials(credentials.NewTLS(config))) if err != nil { return nil, err } From 2fb827fe5111ccfee2d51f44ab25cb4da8bd5037 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Fri, 13 Sep 2024 19:46:06 +0530 Subject: [PATCH 18/40] lint Signed-off-by: Yashash H L --- pkg/sdkclient/source/client.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/sdkclient/source/client.go b/pkg/sdkclient/source/client.go index 13f15e1435..885a187901 100644 --- a/pkg/sdkclient/source/client.go +++ b/pkg/sdkclient/source/client.go @@ -37,7 +37,6 @@ type client struct { grpcClt sourcepb.SourceClient readStream sourcepb.Source_ReadFnClient ackStream sourcepb.Source_AckFnClient - datumCh chan *sourcepb.ReadResponse } var _ Client = (*client)(nil) From 02fca5d9b7d26ab0967afa32f523ea15c62bd3d1 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Fri, 13 Sep 2024 21:53:01 +0530 Subject: [PATCH 19/40] use ubuntu latest Signed-off-by: Yashash H L --- .github/workflows/ci.yaml | 6 +++--- pkg/sdkclient/options.go | 34 ---------------------------------- rust/monovertex/build.rs | 1 - test/e2e/functional_test.go | 3 ++- 4 files changed, 5 insertions(+), 39 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1feddb7647..6a5ea73c54 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -9,7 +9,7 @@ on: jobs: ui: name: UI - runs-on: ubuntu-24.04 + runs-on: ubuntu-latest timeout-minutes: 6 steps: - name: Checkout code @@ -131,7 +131,7 @@ jobs: lint: name: Lint - runs-on: ubuntu-24.04 + runs-on: ubuntu-latest timeout-minutes: 10 env: GOPATH: /home/runner/go @@ -184,7 +184,7 @@ jobs: e2e-tests: name: E2E Tests - runs-on: ubuntu-24.04 + runs-on: ubuntu-latest needs: [ build-rust-amd64 ] timeout-minutes: 20 strategy: diff --git a/pkg/sdkclient/options.go b/pkg/sdkclient/options.go index 3c7f30d24f..e46e9c8869 100644 --- a/pkg/sdkclient/options.go +++ b/pkg/sdkclient/options.go @@ -16,17 +16,9 @@ limitations under the License. package sdkclient -import ( - "time" - - "github.com/numaproj/numaflow/pkg/apis/numaflow/v1alpha1" -) - type Options struct { udsSockAddr string maxMessageSize int - readBatchSize int - readTimeout time.Duration } // UdsSockAddr returns the UDS sock addr. @@ -39,23 +31,11 @@ func (o *Options) MaxMessageSize() int { return o.maxMessageSize } -// ReadBatchSize returns the read batch size. -func (o *Options) ReadBatchSize() int { - return o.readBatchSize -} - -// ReadTimeout returns the read timeout. -func (o *Options) ReadTimeout() time.Duration { - return o.readTimeout -} - // DefaultOptions returns the default options. func DefaultOptions(address string) *Options { return &Options{ maxMessageSize: DefaultGRPCMaxMessageSize, udsSockAddr: address, - readBatchSize: v1alpha1.DefaultReadBatchSize, - readTimeout: v1alpha1.DefaultReadTimeout, } } @@ -75,17 +55,3 @@ func WithMaxMessageSize(size int) Option { opts.maxMessageSize = size } } - -// WithReadBatchSize sets the read batch size. -func WithReadBatchSize(size int) Option { - return func(opts *Options) { - opts.readBatchSize = size - } -} - -// WithReadTimeout sets the read timeout. -func WithReadTimeout(timeout time.Duration) Option { - return func(opts *Options) { - opts.readTimeout = timeout - } -} diff --git a/rust/monovertex/build.rs b/rust/monovertex/build.rs index 87b2453c75..fc30e6b678 100644 --- a/rust/monovertex/build.rs +++ b/rust/monovertex/build.rs @@ -1,7 +1,6 @@ fn main() { tonic_build::configure() .build_server(true) - .protoc_arg("--experimental_allow_proto3_optional") .compile( &[ "proto/source.proto", diff --git a/test/e2e/functional_test.go b/test/e2e/functional_test.go index 2d0989ac7e..38a978c51f 100644 --- a/test/e2e/functional_test.go +++ b/test/e2e/functional_test.go @@ -41,7 +41,7 @@ func (s *FunctionalSuite) TestCreateSimplePipeline() { w := s.Given().Pipeline("@testdata/simple-pipeline.yaml"). When(). CreatePipelineAndWait() - defer w.DeletePipelineAndWait() + //defer w.DeletePipelineAndWait() pipelineName := "simple-pipeline" w.Expect(). @@ -146,6 +146,7 @@ func (s *FunctionalSuite) TestCreateSimplePipeline() { assert.Fail(s.T(), "timed out waiting for rate to be calculated") } timer.Stop() + time.Sleep(300 * time.Second) } func (s *FunctionalSuite) TestUDFFiltering() { From 56b589c8de7e68feabe6952dcdeb7f7789fdae20 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Sat, 14 Sep 2024 21:57:18 +0530 Subject: [PATCH 20/40] update ubuntu version in e2e Signed-off-by: Yashash H L --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6a5ea73c54..072def3059 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -184,7 +184,7 @@ jobs: e2e-tests: name: E2E Tests - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 needs: [ build-rust-amd64 ] timeout-minutes: 20 strategy: From 15ad19a673ec213d760c04eb82ae70b1f53c15a4 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Sun, 15 Sep 2024 07:51:18 +0530 Subject: [PATCH 21/40] install protoc Signed-off-by: Yashash H L --- .github/workflows/ci.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 072def3059..5cb636e8d7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -30,7 +30,7 @@ jobs: run: git diff --exit-code codegen: name: Codegen - runs-on: ubuntu-24.04 + runs-on: ubuntu-latest timeout-minutes: 5 steps: - name: Checkout code @@ -59,7 +59,7 @@ jobs: unit-tests: name: Unit Tests - runs-on: ubuntu-24.04 + runs-on: ubuntu-latest timeout-minutes: 10 services: redis: @@ -113,8 +113,8 @@ jobs: with: tool: grcov - - name: Install Protobuf Compiler - run: sudo apt-get install -y protobuf-compiler + - name: Install Protoc + uses: arduino/setup-protoc@v3 - name: Test Rust working-directory: ./rust @@ -150,7 +150,7 @@ jobs: - run: git diff --exit-code build-rust-amd64: - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 defaults: run: working-directory: ./rust @@ -167,8 +167,8 @@ jobs: echo "SCCACHE_GHA_ENABLED=true" >> $GITHUB_ENV - name: Run sccache-cache uses: mozilla-actions/sccache-action@v0.0.5 - - name: Install dependencies - run: sudo apt-get install -y protobuf-compiler + - name: Install Protoc + uses: arduino/setup-protoc@v3 - name: Build binary run: RUSTFLAGS='-C target-feature=+crt-static' cargo build --release --target x86_64-unknown-linux-gnu - name: Rename binary From 529a7e90a114814ad7d3a7739dcdf0eee90d80c9 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Sun, 15 Sep 2024 07:56:44 +0530 Subject: [PATCH 22/40] install latest protoc Signed-off-by: Yashash H L --- .github/workflows/ci.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5cb636e8d7..eec57b1a3b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -169,6 +169,8 @@ jobs: uses: mozilla-actions/sccache-action@v0.0.5 - name: Install Protoc uses: arduino/setup-protoc@v3 + - name: Print Protoc version + run: protoc --version - name: Build binary run: RUSTFLAGS='-C target-feature=+crt-static' cargo build --release --target x86_64-unknown-linux-gnu - name: Rename binary @@ -184,7 +186,7 @@ jobs: e2e-tests: name: E2E Tests - runs-on: ubuntu-24.04 + runs-on: ubuntu-latest needs: [ build-rust-amd64 ] timeout-minutes: 20 strategy: From 3d96542577e81533abf58265827e050c1b2bf016 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Sun, 15 Sep 2024 17:41:00 +0530 Subject: [PATCH 23/40] only update ubuntu version for building rust image Signed-off-by: Yashash H L --- .github/workflows/ci.yaml | 6 +++--- go.mod | 4 ++-- go.sum | 8 ++++---- pkg/sdkclient/grpc/grpc_utils.go | 2 +- test/e2e/functional_test.go | 1 - 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index eec57b1a3b..e9861ca408 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -150,7 +150,7 @@ jobs: - run: git diff --exit-code build-rust-amd64: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 defaults: run: working-directory: ./rust @@ -167,8 +167,8 @@ jobs: echo "SCCACHE_GHA_ENABLED=true" >> $GITHUB_ENV - name: Run sccache-cache uses: mozilla-actions/sccache-action@v0.0.5 - - name: Install Protoc - uses: arduino/setup-protoc@v3 + - name: Install dependencies + run: sudo apt-get install -y protobuf-compiler - name: Print Protoc version run: protoc --version - name: Build binary diff --git a/go.mod b/go.mod index b8776ed24d..2e0707ab7b 100644 --- a/go.mod +++ b/go.mod @@ -32,7 +32,7 @@ require ( github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe github.com/nats-io/nats-server/v2 v2.10.17 github.com/nats-io/nats.go v1.36.0 - github.com/numaproj/numaflow-go v0.8.1-0.20240909093557-c9946b0e8b33 + github.com/numaproj/numaflow-go v0.8.2-0.20240915124432-84b9f75a87ef github.com/prometheus/client_golang v1.18.0 github.com/prometheus/client_model v0.5.0 github.com/prometheus/common v0.45.0 @@ -54,7 +54,7 @@ require ( golang.org/x/sync v0.8.0 golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d google.golang.org/genproto/googleapis/api v0.0.0-20240604185151-ef581f913117 - google.golang.org/grpc v1.66.0 + google.golang.org/grpc v1.64.0 google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0 google.golang.org/protobuf v1.34.2 k8s.io/api v0.29.2 diff --git a/go.sum b/go.sum index 969aab1b96..786dba1e6a 100644 --- a/go.sum +++ b/go.sum @@ -485,8 +485,8 @@ github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDm github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/numaproj/numaflow-go v0.8.1-0.20240909093557-c9946b0e8b33 h1:lrSD4qibegQp896k3wGRNwQSb2f533JJsG4gixFiv5k= -github.com/numaproj/numaflow-go v0.8.1-0.20240909093557-c9946b0e8b33/go.mod h1:jOCtHiDv5hvrrSOt3/swCd8FpmEP1w/RNZydqJCsB58= +github.com/numaproj/numaflow-go v0.8.2-0.20240915124432-84b9f75a87ef h1:3OtB1sPWlY9WiPdkyrWGRiOODTB5B/+CwYIbFHk/jAE= +github.com/numaproj/numaflow-go v0.8.2-0.20240915124432-84b9f75a87ef/go.mod h1:eod1ANE+bYHv7Cwy86erURCB1kAXzLQm7RrWfKMQszo= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= @@ -1047,8 +1047,8 @@ google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA5 google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.66.0 h1:DibZuoBznOxbDQxRINckZcUvnCEvrW9pcWIE2yF9r1c= -google.golang.org/grpc v1.66.0/go.mod h1:s3/l6xSSCURdVfAnL+TqCNMyTDAGN6+lZeVxnZR128Y= +google.golang.org/grpc v1.64.0 h1:KH3VH9y/MgNQg1dE7b3XfVK0GsPSIzJwdF617gUSbvY= +google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0 h1:rNBFJjBCOgVr9pWD7rs/knKL4FRTKgpZmsRfV214zcA= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0/go.mod h1:Dk1tviKTvMCz5tvh7t+fh94dhmQVHuCt2OzJB3CTW9Y= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= diff --git a/pkg/sdkclient/grpc/grpc_utils.go b/pkg/sdkclient/grpc/grpc_utils.go index 35d272e587..293ba8e8d7 100644 --- a/pkg/sdkclient/grpc/grpc_utils.go +++ b/pkg/sdkclient/grpc/grpc_utils.go @@ -63,7 +63,7 @@ func ConnectToServer(udsSockAddr string, serverInfo *serverinfo.ServerInfo, maxM } if err != nil { - return nil, fmt.Errorf("failed to execute grpc.Dial(%q): %w", sockAddr, err) + return nil, fmt.Errorf("failed to execute grpc.NewClient(%q): %w", sockAddr, err) } return conn, nil diff --git a/test/e2e/functional_test.go b/test/e2e/functional_test.go index 38a978c51f..7bea38e277 100644 --- a/test/e2e/functional_test.go +++ b/test/e2e/functional_test.go @@ -146,7 +146,6 @@ func (s *FunctionalSuite) TestCreateSimplePipeline() { assert.Fail(s.T(), "timed out waiting for rate to be calculated") } timer.Stop() - time.Sleep(300 * time.Second) } func (s *FunctionalSuite) TestUDFFiltering() { From 0d2230fd4372bbac23061b0df858e990699c3db8 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Sun, 15 Sep 2024 21:35:50 +0530 Subject: [PATCH 24/40] print logs Signed-off-by: Yashash H L --- .github/workflows/ci.yaml | 2 +- go.mod | 4 ++-- go.sum | 8 ++++---- test/e2e/functional_test.go | 15 +++++++++------ 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e9861ca408..c7fea60713 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -186,7 +186,7 @@ jobs: e2e-tests: name: E2E Tests - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 needs: [ build-rust-amd64 ] timeout-minutes: 20 strategy: diff --git a/go.mod b/go.mod index 2e0707ab7b..d42af24c01 100644 --- a/go.mod +++ b/go.mod @@ -32,7 +32,7 @@ require ( github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe github.com/nats-io/nats-server/v2 v2.10.17 github.com/nats-io/nats.go v1.36.0 - github.com/numaproj/numaflow-go v0.8.2-0.20240915124432-84b9f75a87ef + github.com/numaproj/numaflow-go v0.8.2-0.20240913163521-4910018031a7 github.com/prometheus/client_golang v1.18.0 github.com/prometheus/client_model v0.5.0 github.com/prometheus/common v0.45.0 @@ -54,7 +54,7 @@ require ( golang.org/x/sync v0.8.0 golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d google.golang.org/genproto/googleapis/api v0.0.0-20240604185151-ef581f913117 - google.golang.org/grpc v1.64.0 + google.golang.org/grpc v1.66.0 google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0 google.golang.org/protobuf v1.34.2 k8s.io/api v0.29.2 diff --git a/go.sum b/go.sum index 786dba1e6a..74916c2ad6 100644 --- a/go.sum +++ b/go.sum @@ -485,8 +485,8 @@ github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDm github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/numaproj/numaflow-go v0.8.2-0.20240915124432-84b9f75a87ef h1:3OtB1sPWlY9WiPdkyrWGRiOODTB5B/+CwYIbFHk/jAE= -github.com/numaproj/numaflow-go v0.8.2-0.20240915124432-84b9f75a87ef/go.mod h1:eod1ANE+bYHv7Cwy86erURCB1kAXzLQm7RrWfKMQszo= +github.com/numaproj/numaflow-go v0.8.2-0.20240913163521-4910018031a7 h1:mC8BjEqnBa4u66FZSx/td/eq/gguKM3fAU6IoljxCng= +github.com/numaproj/numaflow-go v0.8.2-0.20240913163521-4910018031a7/go.mod h1:jOCtHiDv5hvrrSOt3/swCd8FpmEP1w/RNZydqJCsB58= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= @@ -1047,8 +1047,8 @@ google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA5 google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.64.0 h1:KH3VH9y/MgNQg1dE7b3XfVK0GsPSIzJwdF617gUSbvY= -google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg= +google.golang.org/grpc v1.66.0 h1:DibZuoBznOxbDQxRINckZcUvnCEvrW9pcWIE2yF9r1c= +google.golang.org/grpc v1.66.0/go.mod h1:s3/l6xSSCURdVfAnL+TqCNMyTDAGN6+lZeVxnZR128Y= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0 h1:rNBFJjBCOgVr9pWD7rs/knKL4FRTKgpZmsRfV214zcA= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0/go.mod h1:Dk1tviKTvMCz5tvh7t+fh94dhmQVHuCt2OzJB3CTW9Y= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= diff --git a/test/e2e/functional_test.go b/test/e2e/functional_test.go index 7bea38e277..d76f0f1779 100644 --- a/test/e2e/functional_test.go +++ b/test/e2e/functional_test.go @@ -45,12 +45,7 @@ func (s *FunctionalSuite) TestCreateSimplePipeline() { pipelineName := "simple-pipeline" w.Expect(). - VertexPodsRunning().DaemonPodsRunning(). - VertexPodLogContains("input", LogSourceVertexStarted). - VertexPodLogContains("p1", LogUDFVertexStarted, PodLogCheckOptionWithContainer("numa")). - VertexPodLogContains("output", SinkVertexStarted). - DaemonPodLogContains(pipelineName, LogDaemonStarted). - VertexPodLogContains("output", `"Data":.*,"Createdts":.*`) + VertexPodsRunning().DaemonPodsRunning() defer w.VertexPodPortForward("input", 8001, dfv1.VertexMetricsPort). VertexPodPortForward("p1", 8002, dfv1.VertexMetricsPort). @@ -58,6 +53,14 @@ func (s *FunctionalSuite) TestCreateSimplePipeline() { DaemonPodPortForward(pipelineName, 1234, dfv1.DaemonServicePort). TerminateAllPodPortForwards() + w.StreamVertexPodLogs("input", "numa"). + StreamVertexPodLogs("p1", "numa"). + StreamVertexPodLogs("output", "numa"). + StreamISBLogs(). + StreamControllerLogs() + + defer w.TerminateAllPodLogs() + // Check vertex pod metrics endpoints HTTPExpect(s.T(), "https://localhost:8001").GET("/metrics"). Expect(). From 0fa9d5277fcff2a2e693775bff8765f77e4d913e Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Mon, 16 Sep 2024 09:59:16 +0530 Subject: [PATCH 25/40] update version check Signed-off-by: Yashash H L --- go.mod | 2 +- go.sum | 4 ++-- pkg/sdkclient/serverinfo/types.go | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index d42af24c01..675a76e99d 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.22 require ( github.com/IBM/sarama v1.43.2 - github.com/Masterminds/semver/v3 v3.2.1 + github.com/Masterminds/semver/v3 v3.3.0 github.com/Masterminds/sprig/v3 v3.2.3 github.com/ahmetb/gen-crd-api-reference-docs v0.3.0 github.com/antonmedv/expr v1.9.0 diff --git a/go.sum b/go.sum index 74916c2ad6..014ee727ed 100644 --- a/go.sum +++ b/go.sum @@ -53,8 +53,8 @@ github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= -github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0= -github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= +github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0= +github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA= github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM= github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= diff --git a/pkg/sdkclient/serverinfo/types.go b/pkg/sdkclient/serverinfo/types.go index fc8fdd9b81..334a9f4a8e 100644 --- a/pkg/sdkclient/serverinfo/types.go +++ b/pkg/sdkclient/serverinfo/types.go @@ -28,7 +28,7 @@ const ( type sdkConstraints map[Language]string var minimumSupportedSDKVersions = sdkConstraints{ - Go: "0.8.0", + Go: "0.8.0-0", Python: "0.8.0", Java: "0.8.0", Rust: "0.1.0", From bd95156606715efd7c218c69a537308dfd1596d9 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Mon, 16 Sep 2024 12:29:35 +0530 Subject: [PATCH 26/40] update e2e tests Signed-off-by: Yashash H L --- .github/workflows/ci.yaml | 2 +- .github/workflows/nightly-build.yml | 1 - .github/workflows/release.yml | 2 +- go.mod | 8 ++--- go.sum | 16 +++++----- pkg/sdkclient/source/client.go | 25 ++++++++++++++-- test/e2e/functional_test.go | 15 ++++------ .../mono-vertex-with-transformer.yaml | 4 ++- .../testdata/simple-source-go.yaml | 2 +- .../testdata/simple-source-java.yaml | 8 +++-- .../testdata/simple-source-python.yaml | 2 ++ .../testdata/simple-source-rs.yaml | 29 +++++++++++++++++++ test/udsource-e2e/udsource_test.go | 12 ++++++-- 13 files changed, 93 insertions(+), 33 deletions(-) create mode 100644 test/udsource-e2e/testdata/simple-source-rs.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c7fea60713..e9861ca408 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -186,7 +186,7 @@ jobs: e2e-tests: name: E2E Tests - runs-on: ubuntu-24.04 + runs-on: ubuntu-latest needs: [ build-rust-amd64 ] timeout-minutes: 20 strategy: diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml index 5f5179fb37..ed6a898b83 100644 --- a/.github/workflows/nightly-build.yml +++ b/.github/workflows/nightly-build.yml @@ -160,4 +160,3 @@ jobs: - name: Container build and push with arm64/amd64 run: | IMAGE_NAMESPACE=${{ secrets.QUAYIO_ORG }} VERSION=${{ steps.version.outputs.VERSION }} DOCKER_PUSH=true DOCKER_BUILD_ARGS="--label \"quay.expires-after=30d\"" make image-multi - diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 23a05eacb3..84cd8f533d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -90,7 +90,7 @@ jobs: build-push-linux-multi: name: Build & push linux/amd64 and linux/arm64 needs: [ build-go-binaries, build-rust-amd64, build-rust-arm64] - runs-on: ubuntu-24.04 + runs-on: ubuntu-20.04 if: github.repository == 'numaproj/numaflow' strategy: matrix: diff --git a/go.mod b/go.mod index 675a76e99d..aa75a3a85e 100644 --- a/go.mod +++ b/go.mod @@ -32,7 +32,7 @@ require ( github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe github.com/nats-io/nats-server/v2 v2.10.17 github.com/nats-io/nats.go v1.36.0 - github.com/numaproj/numaflow-go v0.8.2-0.20240913163521-4910018031a7 + github.com/numaproj/numaflow-go v0.8.2-0.20240916060625-c2c5d1798c2e github.com/prometheus/client_golang v1.18.0 github.com/prometheus/client_model v0.5.0 github.com/prometheus/common v0.45.0 @@ -48,8 +48,8 @@ require ( go.uber.org/goleak v1.3.0 go.uber.org/multierr v1.11.0 go.uber.org/zap v1.26.0 - golang.org/x/crypto v0.26.0 - golang.org/x/net v0.28.0 + golang.org/x/crypto v0.27.0 + golang.org/x/net v0.29.0 golang.org/x/oauth2 v0.21.0 golang.org/x/sync v0.8.0 golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d @@ -202,7 +202,7 @@ require ( golang.org/x/exp v0.0.0-20240531132922-fd00a4e0eefc // indirect golang.org/x/mod v0.17.0 // indirect golang.org/x/sys v0.25.0 // indirect - golang.org/x/term v0.23.0 // indirect + golang.org/x/term v0.24.0 // indirect golang.org/x/text v0.18.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect diff --git a/go.sum b/go.sum index 014ee727ed..87103f3790 100644 --- a/go.sum +++ b/go.sum @@ -485,8 +485,8 @@ github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDm github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/numaproj/numaflow-go v0.8.2-0.20240913163521-4910018031a7 h1:mC8BjEqnBa4u66FZSx/td/eq/gguKM3fAU6IoljxCng= -github.com/numaproj/numaflow-go v0.8.2-0.20240913163521-4910018031a7/go.mod h1:jOCtHiDv5hvrrSOt3/swCd8FpmEP1w/RNZydqJCsB58= +github.com/numaproj/numaflow-go v0.8.2-0.20240916060625-c2c5d1798c2e h1:hHugY5YX3IoguvCzFw4MopRgXklJ7wq1Rgclw3LCdxQ= +github.com/numaproj/numaflow-go v0.8.2-0.20240916060625-c2c5d1798c2e/go.mod h1:g4JZOyUPhjfhv+kR0sX5d8taw/dasgKPXLvQBi39mJ4= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= @@ -687,8 +687,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= -golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= -golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= +golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= +golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -774,8 +774,8 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= -golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= +golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -870,8 +870,8 @@ golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuX golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= -golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= -golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= +golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= +golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/pkg/sdkclient/source/client.go b/pkg/sdkclient/source/client.go index 1a9fc98861..69e8c241c8 100644 --- a/pkg/sdkclient/source/client.go +++ b/pkg/sdkclient/source/client.go @@ -20,6 +20,7 @@ import ( "context" "errors" "fmt" + "time" sourcepb "github.com/numaproj/numaflow-go/pkg/apis/proto/source/v1" "google.golang.org/grpc" @@ -28,6 +29,7 @@ import ( "github.com/numaproj/numaflow/pkg/sdkclient" grpcutil "github.com/numaproj/numaflow/pkg/sdkclient/grpc" "github.com/numaproj/numaflow/pkg/sdkclient/serverinfo" + "github.com/numaproj/numaflow/pkg/shared/logging" ) // client contains the grpc connection and the grpc client. @@ -42,7 +44,7 @@ var _ Client = (*client)(nil) func New(ctx context.Context, serverInfo *serverinfo.ServerInfo, inputOptions ...sdkclient.Option) (Client, error) { var opts = sdkclient.DefaultOptions(sdkclient.SourceAddr) - + var logger = logging.FromContext(ctx) for _, inputOption := range inputOptions { inputOption(opts) } @@ -57,14 +59,31 @@ func New(ctx context.Context, serverInfo *serverinfo.ServerInfo, inputOptions .. c.conn = conn c.grpcClt = sourcepb.NewSourceClient(conn) + // wait until the server is ready +waitUntilReady: + for { + select { + case <-ctx.Done(): + return nil, fmt.Errorf("failed to connect to the server: %v", ctx.Err()) + default: + ready, _ := c.IsReady(ctx, &emptypb.Empty{}) + if ready { + break waitUntilReady + } else { + logger.Warnw("source client is not ready") + time.Sleep(100 * time.Millisecond) + } + } + } + c.readStream, err = c.grpcClt.ReadFn(ctx) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to create read stream: %v", err) } c.ackStream, err = c.grpcClt.AckFn(ctx) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to create ack stream: %v", err) } return c, nil diff --git a/test/e2e/functional_test.go b/test/e2e/functional_test.go index d76f0f1779..7bea38e277 100644 --- a/test/e2e/functional_test.go +++ b/test/e2e/functional_test.go @@ -45,7 +45,12 @@ func (s *FunctionalSuite) TestCreateSimplePipeline() { pipelineName := "simple-pipeline" w.Expect(). - VertexPodsRunning().DaemonPodsRunning() + VertexPodsRunning().DaemonPodsRunning(). + VertexPodLogContains("input", LogSourceVertexStarted). + VertexPodLogContains("p1", LogUDFVertexStarted, PodLogCheckOptionWithContainer("numa")). + VertexPodLogContains("output", SinkVertexStarted). + DaemonPodLogContains(pipelineName, LogDaemonStarted). + VertexPodLogContains("output", `"Data":.*,"Createdts":.*`) defer w.VertexPodPortForward("input", 8001, dfv1.VertexMetricsPort). VertexPodPortForward("p1", 8002, dfv1.VertexMetricsPort). @@ -53,14 +58,6 @@ func (s *FunctionalSuite) TestCreateSimplePipeline() { DaemonPodPortForward(pipelineName, 1234, dfv1.DaemonServicePort). TerminateAllPodPortForwards() - w.StreamVertexPodLogs("input", "numa"). - StreamVertexPodLogs("p1", "numa"). - StreamVertexPodLogs("output", "numa"). - StreamISBLogs(). - StreamControllerLogs() - - defer w.TerminateAllPodLogs() - // Check vertex pod metrics endpoints HTTPExpect(s.T(), "https://localhost:8001").GET("/metrics"). Expect(). diff --git a/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml b/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml index acdb0b29f6..d8f1e0b0dd 100644 --- a/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml +++ b/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml @@ -3,10 +3,12 @@ kind: MonoVertex metadata: name: transformer-mono-vertex spec: + scale: + min: 1 source: udsource: container: - image: quay.io/numaio/numaflow-go/source-simple-source:stable + image: quay.io/yhl25/numaflow-go/source-simple-source:stable imagePullPolicy: Always transformer: container: diff --git a/test/udsource-e2e/testdata/simple-source-go.yaml b/test/udsource-e2e/testdata/simple-source-go.yaml index 28e27af1f6..84515c7007 100644 --- a/test/udsource-e2e/testdata/simple-source-go.yaml +++ b/test/udsource-e2e/testdata/simple-source-go.yaml @@ -10,7 +10,7 @@ spec: container: # A simple user-defined source for e2e testing # See https://github.com/numaproj/numaflow-go/tree/main/pkg/sourcer/examples/simple_source - image: quay.io/numaio/numaflow-go/source-simple-source:stable + image: quay.io/yhl25/numaflow-go/source-simple-source:stable imagePullPolicy: Always limits: readBatchSize: 500 diff --git a/test/udsource-e2e/testdata/simple-source-java.yaml b/test/udsource-e2e/testdata/simple-source-java.yaml index 2d030bc06b..c53ecf9b03 100644 --- a/test/udsource-e2e/testdata/simple-source-java.yaml +++ b/test/udsource-e2e/testdata/simple-source-java.yaml @@ -5,16 +5,20 @@ metadata: spec: vertices: - name: in + scale: + min: 1 source: udsource: container: # A simple user-defined source for e2e testing # See https://github.com/numaproj/numaflow-java/tree/main/examples/src/main/java/io/numaproj/numaflow/examples/source/simple - image: quay.io/numaio/numaflow-java/source-simple-source:stable - imagePullPolicy: Always + image: quay.io/yhl25/numaflow-java/source-simple-source:stable + imagePullPolicy: IfNotPresent limits: readBatchSize: 500 - name: out + scale: + min: 1 sink: log: {} edges: diff --git a/test/udsource-e2e/testdata/simple-source-python.yaml b/test/udsource-e2e/testdata/simple-source-python.yaml index 47bc7a175b..a64960e9fe 100644 --- a/test/udsource-e2e/testdata/simple-source-python.yaml +++ b/test/udsource-e2e/testdata/simple-source-python.yaml @@ -5,6 +5,8 @@ metadata: spec: vertices: - name: in + scale: + min: 1 source: udsource: container: diff --git a/test/udsource-e2e/testdata/simple-source-rs.yaml b/test/udsource-e2e/testdata/simple-source-rs.yaml new file mode 100644 index 0000000000..dabf34df2f --- /dev/null +++ b/test/udsource-e2e/testdata/simple-source-rs.yaml @@ -0,0 +1,29 @@ +apiVersion: numaflow.numaproj.io/v1alpha1 +kind: Pipeline +metadata: + name: simple-source-rust +spec: + vertices: + - name: in + source: + udsource: + container: + # A simple user-defined source for e2e testing + # See https://github.com/numaproj/numaflow-go/tree/main/pkg/sourcer/examples/simple_source + image: quay.io/numaio/numaflow-rs/simple-source:stable + imagePullPolicy: Always + limits: + readBatchSize: 500 + scale: + # set it as two pods to be different from the sink such that we can use this pipeline + # to test processing rate is consistent across vertices when they have different replica counts. + min: 1 + - name: out + sink: + log: {} + scale: + min: 1 + max: 1 + edges: + - from: in + to: out diff --git a/test/udsource-e2e/udsource_test.go b/test/udsource-e2e/udsource_test.go index ef5a9ffacf..8d3de49c74 100644 --- a/test/udsource-e2e/udsource_test.go +++ b/test/udsource-e2e/udsource_test.go @@ -50,12 +50,16 @@ func (s *UserDefinedSourceSuite) testSimpleSourcePython() { s.testSimpleSource("python", false) } +func (s *UserDefinedSourceSuite) testSimpleSourceRust() { + s.testSimpleSource("rust", false) +} + func (s *UserDefinedSourceSuite) TestUDSource() { var wg sync.WaitGroup - wg.Add(3) + wg.Add(4) go func() { defer wg.Done() - s.testSimpleSourcePython() + // s.testSimpleSourcePython() // FIXME: python udsource }() go func() { defer wg.Done() @@ -65,6 +69,10 @@ func (s *UserDefinedSourceSuite) TestUDSource() { defer wg.Done() s.testSimpleSourceGo() }() + go func() { + defer wg.Done() + s.testSimpleSourceRust() + }() wg.Wait() } From 19326f82fbcb57abb821849ea9d2da7dae23c609 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Tue, 17 Sep 2024 15:24:56 +0530 Subject: [PATCH 27/40] introduce handshake Signed-off-by: Yashash H L --- go.mod | 2 +- go.sum | 4 +- pkg/apis/proto/source/v1/source.proto | 168 ++++++++++++++------------ pkg/sdkclient/source/client.go | 19 +++ rust/Cargo.lock | 26 +++- rust/monovertex/Cargo.toml | 2 +- rust/monovertex/proto/source.proto | 13 +- rust/monovertex/src/forwarder.rs | 27 ++++- rust/monovertex/src/lib.rs | 1 + rust/monovertex/src/source.rs | 36 +++++- rust/monovertex/src/startup.rs | 6 + 11 files changed, 207 insertions(+), 97 deletions(-) diff --git a/go.mod b/go.mod index aa75a3a85e..84ad2e952f 100644 --- a/go.mod +++ b/go.mod @@ -32,7 +32,7 @@ require ( github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe github.com/nats-io/nats-server/v2 v2.10.17 github.com/nats-io/nats.go v1.36.0 - github.com/numaproj/numaflow-go v0.8.2-0.20240916060625-c2c5d1798c2e + github.com/numaproj/numaflow-go v0.8.2-0.20240917052911-ee2f3086d64e github.com/prometheus/client_golang v1.18.0 github.com/prometheus/client_model v0.5.0 github.com/prometheus/common v0.45.0 diff --git a/go.sum b/go.sum index 87103f3790..236809e8c4 100644 --- a/go.sum +++ b/go.sum @@ -485,8 +485,8 @@ github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDm github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/numaproj/numaflow-go v0.8.2-0.20240916060625-c2c5d1798c2e h1:hHugY5YX3IoguvCzFw4MopRgXklJ7wq1Rgclw3LCdxQ= -github.com/numaproj/numaflow-go v0.8.2-0.20240916060625-c2c5d1798c2e/go.mod h1:g4JZOyUPhjfhv+kR0sX5d8taw/dasgKPXLvQBi39mJ4= +github.com/numaproj/numaflow-go v0.8.2-0.20240917052911-ee2f3086d64e h1:F3iujbel8y5X20bVMY0Am6XDyL5eDOC/6kxyI8uxfpg= +github.com/numaproj/numaflow-go v0.8.2-0.20240917052911-ee2f3086d64e/go.mod h1:g4JZOyUPhjfhv+kR0sX5d8taw/dasgKPXLvQBi39mJ4= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= diff --git a/pkg/apis/proto/source/v1/source.proto b/pkg/apis/proto/source/v1/source.proto index 0a70646209..1a2be2b2db 100644 --- a/pkg/apis/proto/source/v1/source.proto +++ b/pkg/apis/proto/source/v1/source.proto @@ -24,92 +24,102 @@ import "google/protobuf/empty.proto"; package source.v1; -service Source { - // Read returns a stream of datum responses. - // The size of the returned ReadResponse is less than or equal to the num_records specified in each ReadRequest. - // If the request timeout is reached on the server side, the returned ReadResponse will contain all the datum that have been read (which could be an empty list). - // The server will continue to read and respond to subsequent ReadRequests until the client closes the stream. - rpc ReadFn(stream ReadRequest) returns (stream ReadResponse); - - // AckFn acknowledges a stream of datum offsets. - // When AckFn is called, it implicitly indicates that the datum stream has been processed by the source vertex. - // The caller (numa) expects the AckFn to be successful, and it does not expect any errors. - // If there are some irrecoverable errors when the callee (UDSource) is processing the AckFn request, - // then it is best to crash because there are no other retry mechanisms possible. - rpc AckFn(stream AckRequest) returns (AckResponse); - - // PendingFn returns the number of pending records at the user defined source. - rpc PendingFn(google.protobuf.Empty) returns (PendingResponse); - - // PartitionsFn returns the list of partitions for the user defined source. - rpc PartitionsFn(google.protobuf.Empty) returns (PartitionsResponse); - - // IsReady is the heartbeat endpoint for user defined source gRPC. - rpc IsReady(google.protobuf.Empty) returns (ReadyResponse); -} - -/* - * ReadRequest is the request for reading datum stream from user defined source. - */ -message ReadRequest { - message Request { - // Required field indicating the number of records to read. - uint64 num_records = 1; - // Required field indicating the request timeout in milliseconds. - // uint32 can represent 2^32 milliseconds, which is about 49 days. - // We don't use uint64 because time.Duration takes int64 as nano seconds. Using uint64 for milli will cause overflow. - uint32 timeout_in_ms = 2; + service Source { + // Read returns a stream of datum responses. + // The size of the returned ReadResponse is less than or equal to the num_records specified in each ReadRequest. + // If the request timeout is reached on the server side, the returned ReadResponse will contain all the datum that have been read (which could be an empty list). + // The server will continue to read and respond to subsequent ReadRequests until the client closes the stream. + rpc ReadFn(stream ReadRequest) returns (stream ReadResponse); + + // AckFn acknowledges a stream of datum offsets. + // When AckFn is called, it implicitly indicates that the datum stream has been processed by the source vertex. + // The caller (numa) expects the AckFn to be successful, and it does not expect any errors. + // If there are some irrecoverable errors when the callee (UDSource) is processing the AckFn request, + // then it is best to crash because there are no other retry mechanisms possible. + rpc AckFn(stream AckRequest) returns (AckResponse); + + // PendingFn returns the number of pending records at the user defined source. + rpc PendingFn(google.protobuf.Empty) returns (PendingResponse); + + // PartitionsFn returns the list of partitions for the user defined source. + rpc PartitionsFn(google.protobuf.Empty) returns (PartitionsResponse); + + // IsReady is the heartbeat endpoint for user defined source gRPC. + rpc IsReady(google.protobuf.Empty) returns (ReadyResponse); } - // Required field indicating the request. - Request request = 1; -} -/* - * ReadResponse is the response for reading datum stream from user defined source. - */ -message ReadResponse { - message Result { - // Required field holding the payload of the datum. - bytes payload = 1; - // Required field indicating the offset information of the datum. - Offset offset = 2; - // Required field representing the time associated with each datum. It is used for watermarking. - google.protobuf.Timestamp event_time = 3; - // Optional list of keys associated with the datum. - // Key is the "key" attribute in (key,value) as in the map-reduce paradigm. - // We add this optional field to support the use case where the user defined source can provide keys for the datum. - // e.g. Kafka and Redis Stream message usually include information about the keys. - repeated string keys = 4; - // Optional list of headers associated with the datum. - // Headers are the metadata associated with the datum. - // e.g. Kafka and Redis Stream message usually include information about the headers. - map headers = 5; + /* + * Handshake message between client and server to indicate the start of transmission. + */ + message Handshake { + // Required field indicating the start of transmission. + bool sot = 1; } - message Status { - // Code to indicate the status of the response. - enum Code { - SUCCESS = 0; - FAILURE = 1; - } - // Error to indicate the error type. If the code is FAILURE, then the error field will be populated. - enum Error { - UNACKED = 0; - OTHER = 1; + /* + * ReadRequest is the request for reading datum stream from user defined source. + */ + message ReadRequest { + message Request { + // Required field indicating the number of records to read. + uint64 num_records = 1; + // Required field indicating the request timeout in milliseconds. + // uint32 can represent 2^32 milliseconds, which is about 49 days. + // We don't use uint64 because time.Duration takes int64 as nano seconds. Using uint64 for milli will cause overflow. + uint32 timeout_in_ms = 2; } + // Required field indicating the request. + Request request = 1; + optional Handshake handshake = 2; + } - // End of transmission flag. - bool eot = 1; - Code code = 2; - Error error = 3; - optional string msg = 4; + /* + * ReadResponse is the response for reading datum stream from user defined source. + */ + message ReadResponse { + message Result { + // Required field holding the payload of the datum. + bytes payload = 1; + // Required field indicating the offset information of the datum. + Offset offset = 2; + // Required field representing the time associated with each datum. It is used for watermarking. + google.protobuf.Timestamp event_time = 3; + // Optional list of keys associated with the datum. + // Key is the "key" attribute in (key,value) as in the map-reduce paradigm. + // We add this optional field to support the use case where the user defined source can provide keys for the datum. + // e.g. Kafka and Redis Stream message usually include information about the keys. + repeated string keys = 4; + // Optional list of headers associated with the datum. + // Headers are the metadata associated with the datum. + // e.g. Kafka and Redis Stream message usually include information about the headers. + map headers = 5; + } + message Status { + // Code to indicate the status of the response. + enum Code { + SUCCESS = 0; + FAILURE = 1; + } + + // Error to indicate the error type. If the code is FAILURE, then the error field will be populated. + enum Error { + UNACKED = 0; + OTHER = 1; + } + + // End of transmission flag. + bool eot = 1; + Code code = 2; + optional Error error = 3; + optional string msg = 4; + } + // Required field holding the result. + Result result = 1; + // Status of the response. Holds the end of transmission flag and the status code. + // + Status status = 2; + optional Handshake handshake = 3; } - // Required field holding the result. - Result result = 1; - // Status of the response. Holds the end of transmission flag and the status code. - // - Status status = 2; -} /* * AckRequest is the request for acknowledging datum. diff --git a/pkg/sdkclient/source/client.go b/pkg/sdkclient/source/client.go index 69e8c241c8..15be4c9317 100644 --- a/pkg/sdkclient/source/client.go +++ b/pkg/sdkclient/source/client.go @@ -86,6 +86,25 @@ waitUntilReady: return nil, fmt.Errorf("failed to create ack stream: %v", err) } + // Send handshake request + handshakeRequest := &sourcepb.ReadRequest{ + Handshake: &sourcepb.Handshake{ + Sot: true, + }, + } + if err := c.readStream.Send(handshakeRequest); err != nil { + return nil, fmt.Errorf("failed to send handshake request: %v", err) + } + + // Wait for handshake response + handshakeResponse, err := c.readStream.Recv() + if err != nil { + return nil, fmt.Errorf("failed to receive handshake response: %v", err) + } + if handshakeResponse.GetHandshake() == nil || !handshakeResponse.GetHandshake().GetSot() { + return nil, fmt.Errorf("invalid handshake response") + } + return c, nil } diff --git a/rust/Cargo.lock b/rust/Cargo.lock index e6e9678f47..10e16e60ee 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -1478,7 +1478,7 @@ dependencies = [ "hyper-util", "kube", "log", - "numaflow 0.1.1", + "numaflow 0.1.1 (git+https://github.com/numaproj/numaflow-rs.git?branch=handshake)", "numaflow-models", "once_cell", "parking_lot", @@ -1602,6 +1602,28 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "numaflow" +version = "0.1.1" +source = "git+https://github.com/numaproj/numaflow-rs.git?branch=handshake#f3061b039c877e9828c50fcd0424391727e4920d" +dependencies = [ + "chrono", + "futures-util", + "hyper-util", + "prost", + "prost-types", + "serde", + "serde_json", + "thiserror", + "tokio", + "tokio-stream", + "tokio-util", + "tonic", + "tonic-build", + "tracing", + "uuid", +] + [[package]] name = "numaflow" version = "0.1.1" @@ -2594,7 +2616,7 @@ dependencies = [ name = "servesink" version = "0.1.0" dependencies = [ - "numaflow 0.1.1", + "numaflow 0.1.1 (git+https://github.com/numaproj/numaflow-rs.git?branch=source-streaming)", "reqwest 0.12.7", "tokio", "tonic", diff --git a/rust/monovertex/Cargo.toml b/rust/monovertex/Cargo.toml index fdefe40a93..da75a4c8c8 100644 --- a/rust/monovertex/Cargo.toml +++ b/rust/monovertex/Cargo.toml @@ -38,7 +38,7 @@ log = "0.4.22" [dev-dependencies] tempfile = "3.11.0" -numaflow = { git = "https://github.com/numaproj/numaflow-rs.git", branch = "source-streaming" } +numaflow = { git = "https://github.com/numaproj/numaflow-rs.git", branch = "handshake" } [build-dependencies] tonic-build = "0.12.1" diff --git a/rust/monovertex/proto/source.proto b/rust/monovertex/proto/source.proto index 3ea56aeba5..31a762ac59 100644 --- a/rust/monovertex/proto/source.proto +++ b/rust/monovertex/proto/source.proto @@ -29,6 +29,14 @@ service Source { rpc IsReady(google.protobuf.Empty) returns (ReadyResponse); } +/* + * Handshake message between client and server to indicate the start of transmission. + */ +message Handshake { + // Required field indicating the start of transmission. + bool sot = 1; +} + /* * ReadRequest is the request for reading datum stream from user defined source. */ @@ -43,6 +51,7 @@ message ReadRequest { } // Required field indicating the request. Request request = 1; + optional Handshake handshake = 2; } /* @@ -82,14 +91,14 @@ message ReadResponse { // End of transmission flag. bool eot = 1; Code code = 2; - Error error = 3; + optional Error error = 3; optional string msg = 4; } // Required field holding the result. Result result = 1; // Status of the response. Holds the end of transmission flag and the status code. - // Status status = 2; + optional Handshake handshake = 3; } /* diff --git a/rust/monovertex/src/forwarder.rs b/rust/monovertex/src/forwarder.rs index 3c3d34fa83..0b41cd3569 100644 --- a/rust/monovertex/src/forwarder.rs +++ b/rust/monovertex/src/forwarder.rs @@ -124,12 +124,17 @@ impl Forwarder { let messages = self .source_reader .read(config().batch_size, config().timeout_in_ms) - .await?; + .await + .map_err(|e| { + Error::ForwarderError(format!("Failed to read messages from source {:?}", e)) + })?; + debug!( "Read batch size: {} and latency - {}ms", messages.len(), start_time.elapsed().as_millis() ); + forward_metrics() .read_time .get_or_create(&self.common_labels) @@ -161,13 +166,27 @@ impl Forwarder { .inc_by(bytes_count); // Apply transformation if transformer is present - let transformed_messages = self.apply_transformer(messages).await?; + let transformed_messages = self.apply_transformer(messages).await.map_err(|e| { + Error::ForwarderError(format!( + "Failed to apply transformation to messages {:?}", + e + )) + })?; // Write the messages to the sink - self.write_to_sink(transformed_messages).await?; + self.write_to_sink(transformed_messages) + .await + .map_err(|e| { + Error::ForwarderError(format!("Failed to write messages to sink {:?}", e)) + })?; // Acknowledge the messages back to the source - self.acknowledge_messages(offsets).await?; + self.acknowledge_messages(offsets).await.map_err(|e| { + Error::ForwarderError(format!( + "Failed to acknowledge messages back to source {:?}", + e + )) + })?; Ok(msg_count as usize) } diff --git a/rust/monovertex/src/lib.rs b/rust/monovertex/src/lib.rs index 6e3e37180d..f7b267cbff 100644 --- a/rust/monovertex/src/lib.rs +++ b/rust/monovertex/src/lib.rs @@ -131,6 +131,7 @@ async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { // readiness check for all the ud containers startup::wait_until_ready( + cln_token.clone(), &mut source_grpc_client, &mut sink_grpc_client, &mut transformer_grpc_client, diff --git a/rust/monovertex/src/source.rs b/rust/monovertex/src/source.rs index 6343b8a29b..f00a1d6beb 100644 --- a/rust/monovertex/src/source.rs +++ b/rust/monovertex/src/source.rs @@ -8,12 +8,13 @@ use crate::source_pb::{ }; use base64::prelude::BASE64_STANDARD; use base64::Engine; +use std::thread::sleep; use tokio::sync::mpsc; use tokio::task::JoinHandle; use tokio_stream::wrappers::ReceiverStream; use tonic::transport::Channel; use tonic::{Request, Streaming}; -use tracing::{debug, info, warn}; +use tracing::{info, warn}; pub(crate) const SOURCE_SOCKET: &str = "/var/run/numaflow/source.sock"; pub(crate) const SOURCE_SERVER_INFO_FILE: &str = "/var/run/numaflow/sourcer-server-info"; @@ -29,6 +30,9 @@ pub(crate) struct SourceReader { impl Drop for SourceReader { fn drop(&mut self) { + // wait for the ack task to flush all the acks. + // TODO: hacky way to wait for the ack task to finish. We should have a better way to handle this. + sleep(std::time::Duration::from_secs(5)); // in a happy path scenario, the ack task would have already been finished. if !self.ack_handle.is_finished() { warn!("aborting ack task"); @@ -40,17 +44,36 @@ impl Drop for SourceReader { impl SourceReader { pub(crate) async fn new(mut client: SourceClient) -> Result { let (read_tx, read_rx) = mpsc::channel(500); + let (ack_tx, ack_rx) = mpsc::channel(500); + + let read_stream = ReceiverStream::new(read_rx); + + // do a handshake with the server before we start sending read requests + let handshake_request = ReadRequest { + request: None, + handshake: Some(source_pb::Handshake { sot: true }), + }; + read_tx + .send(handshake_request) + .await + .map_err(|e| SourceError(format!("failed to send handshake request: {}", e)))?; - let resp_stream = client - .read_fn(Request::new(ReceiverStream::new(read_rx))) + let mut resp_stream = client + .read_fn(Request::new(read_stream)) .await? .into_inner(); - debug!("Created server stream"); - let (ack_tx, ack_rx) = mpsc::channel(500); + // first response from the server will be the handshake response. We need to check if the + // server has accepted the handshake. + let handshake_response = resp_stream.message().await?.ok_or(SourceError( + "failed to receive handshake response".to_string(), + ))?; + if handshake_response.handshake.map_or(true, |h| !h.sot) { + return Err(SourceError("invalid handshake response".to_string())); + } - let mut ack_client = client.clone(); // spawn a task to handle acks. + let mut ack_client = client.clone(); let ack_handle = tokio::spawn(async move { let ack_response = ack_client .ack_fn(Request::new(ReceiverStream::new(ack_rx))) @@ -77,6 +100,7 @@ impl SourceReader { num_records, timeout_in_ms, }), + handshake: None, }; self.read_tx diff --git a/rust/monovertex/src/startup.rs b/rust/monovertex/src/startup.rs index 44df14b8d8..abf0cc616a 100644 --- a/rust/monovertex/src/startup.rs +++ b/rust/monovertex/src/startup.rs @@ -82,12 +82,18 @@ pub(crate) async fn create_lag_reader(lag_reader_grpc_client: SourceClient, sink_client: &mut SinkClient, transformer_client: &mut Option>, fb_sink_client: &mut Option>, ) -> error::Result<()> { loop { + if cln_token.is_cancelled() { + return Err(Error::ForwarderError( + "Cancellation token is cancelled".to_string(), + )); + } let source_ready = source_client.is_ready(Request::new(())).await.is_ok(); if !source_ready { info!("UDSource is not ready, waiting..."); From d0e31fa1521dead0a00b7fdb91ec3603dfeeff2d Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Tue, 17 Sep 2024 21:59:53 +0530 Subject: [PATCH 28/40] add fixme Signed-off-by: Yashash H L --- rust/monovertex/src/source.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/monovertex/src/source.rs b/rust/monovertex/src/source.rs index f00a1d6beb..580271e465 100644 --- a/rust/monovertex/src/source.rs +++ b/rust/monovertex/src/source.rs @@ -31,8 +31,8 @@ pub(crate) struct SourceReader { impl Drop for SourceReader { fn drop(&mut self) { // wait for the ack task to flush all the acks. - // TODO: hacky way to wait for the ack task to finish. We should have a better way to handle this. - sleep(std::time::Duration::from_secs(5)); + // FIXME: hacky way to wait for the ack task to finish. We should have a better way to handle this. + sleep(std::time::Duration::from_secs(30)); // in a happy path scenario, the ack task would have already been finished. if !self.ack_handle.is_finished() { warn!("aborting ack task"); From fdad5f44820025baf63c8491f601141767024f1d Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Wed, 18 Sep 2024 19:06:36 +0530 Subject: [PATCH 29/40] make ack bidirectional, separate source reader and acker Signed-off-by: Yashash H L --- go.mod | 2 +- go.sum | 4 +- pkg/sdkclient/source/client.go | 49 ++++++-- pkg/sdkclient/source/client_test.go | 35 +++++- pkg/sources/udsource/grpc_udsource_test.go | 3 +- rust/Cargo.lock | 66 ++++------- rust/monovertex/proto/source.proto | 4 +- rust/monovertex/src/forwarder.rs | 80 ++++++++----- rust/monovertex/src/lib.rs | 6 +- rust/monovertex/src/source.rs | 106 +++++++++++------- rust/servesink/Cargo.toml | 2 +- .../testdata/simple-source-go.yaml | 1 - .../testdata/simple-source-java.yaml | 1 - .../testdata/simple-source-python.yaml | 1 - .../testdata/simple-source-rs.yaml | 3 +- 15 files changed, 220 insertions(+), 143 deletions(-) diff --git a/go.mod b/go.mod index 84ad2e952f..94ae56ddce 100644 --- a/go.mod +++ b/go.mod @@ -32,7 +32,7 @@ require ( github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe github.com/nats-io/nats-server/v2 v2.10.17 github.com/nats-io/nats.go v1.36.0 - github.com/numaproj/numaflow-go v0.8.2-0.20240917052911-ee2f3086d64e + github.com/numaproj/numaflow-go v0.8.2-0.20240918054944-0fd13d430793 github.com/prometheus/client_golang v1.18.0 github.com/prometheus/client_model v0.5.0 github.com/prometheus/common v0.45.0 diff --git a/go.sum b/go.sum index 236809e8c4..9fa9b85a66 100644 --- a/go.sum +++ b/go.sum @@ -485,8 +485,8 @@ github.com/nats-io/nkeys v0.4.7/go.mod h1:kqXRgRDPlGy7nGaEDMuYzmiJCIAAWDK0IMBtDm github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/numaproj/numaflow-go v0.8.2-0.20240917052911-ee2f3086d64e h1:F3iujbel8y5X20bVMY0Am6XDyL5eDOC/6kxyI8uxfpg= -github.com/numaproj/numaflow-go v0.8.2-0.20240917052911-ee2f3086d64e/go.mod h1:g4JZOyUPhjfhv+kR0sX5d8taw/dasgKPXLvQBi39mJ4= +github.com/numaproj/numaflow-go v0.8.2-0.20240918054944-0fd13d430793 h1:kUQw1LsUvmTjqFfcia6DZOxy8qCQwvdY0TpOnR8w3Xg= +github.com/numaproj/numaflow-go v0.8.2-0.20240918054944-0fd13d430793/go.mod h1:g4JZOyUPhjfhv+kR0sX5d8taw/dasgKPXLvQBi39mJ4= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= diff --git a/pkg/sdkclient/source/client.go b/pkg/sdkclient/source/client.go index 15be4c9317..305271d95e 100644 --- a/pkg/sdkclient/source/client.go +++ b/pkg/sdkclient/source/client.go @@ -86,23 +86,42 @@ waitUntilReady: return nil, fmt.Errorf("failed to create ack stream: %v", err) } - // Send handshake request - handshakeRequest := &sourcepb.ReadRequest{ + // Send handshake request for read stream + readHandshakeRequest := &sourcepb.ReadRequest{ Handshake: &sourcepb.Handshake{ Sot: true, }, } - if err := c.readStream.Send(handshakeRequest); err != nil { - return nil, fmt.Errorf("failed to send handshake request: %v", err) + if err := c.readStream.Send(readHandshakeRequest); err != nil { + return nil, fmt.Errorf("failed to send read handshake request: %v", err) } - // Wait for handshake response - handshakeResponse, err := c.readStream.Recv() + // Wait for handshake response for read stream + readHandshakeResponse, err := c.readStream.Recv() if err != nil { - return nil, fmt.Errorf("failed to receive handshake response: %v", err) + return nil, fmt.Errorf("failed to receive read handshake response: %v", err) } - if handshakeResponse.GetHandshake() == nil || !handshakeResponse.GetHandshake().GetSot() { - return nil, fmt.Errorf("invalid handshake response") + if readHandshakeResponse.GetHandshake() == nil || !readHandshakeResponse.GetHandshake().GetSot() { + return nil, fmt.Errorf("invalid read handshake response") + } + + // Send handshake request for ack stream + ackHandshakeRequest := &sourcepb.AckRequest{ + Handshake: &sourcepb.Handshake{ + Sot: true, + }, + } + if err := c.ackStream.Send(ackHandshakeRequest); err != nil { + return nil, fmt.Errorf("failed to send ack handshake request: %v", err) + } + + // Wait for handshake response for ack stream + ackHandshakeResponse, err := c.ackStream.Recv() + if err != nil { + return nil, fmt.Errorf("failed to receive ack handshake response: %v", err) + } + if ackHandshakeResponse.GetHandshake() == nil || !ackHandshakeResponse.GetHandshake().GetSot() { + return nil, fmt.Errorf("invalid ack handshake response") } return c, nil @@ -172,11 +191,19 @@ func (c *client) ReadFn(_ context.Context, req *sourcepb.ReadRequest, datumCh ch // AckFn acknowledges the data from the source. func (c *client) AckFn(_ context.Context, req *sourcepb.AckRequest) (*sourcepb.AckResponse, error) { + // Send the ack request err := c.ackStream.Send(req) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to send ack request: %v", err) } - return &sourcepb.AckResponse{}, nil + + // Wait for the ack response + resp, err := c.ackStream.Recv() + if err != nil { + return nil, fmt.Errorf("failed to receive ack response: %v", err) + } + + return resp, nil } // PendingFn returns the number of pending data from the source. diff --git a/pkg/sdkclient/source/client_test.go b/pkg/sdkclient/source/client_test.go index f1ae7d80f4..818c3c3430 100644 --- a/pkg/sdkclient/source/client_test.go +++ b/pkg/sdkclient/source/client_test.go @@ -150,22 +150,47 @@ func TestAckFn(t *testing.T) { defer ctrl.Finish() mockClient := sourcemock.NewMockSourceClient(ctrl) - mockStream := sourcemock.NewMockSource_AckFnClient(ctrl) + + // Handshake request and response + mockStream.EXPECT().Send(&sourcepb.AckRequest{ + Handshake: &sourcepb.Handshake{ + Sot: true, + }, + }).Return(nil) + mockStream.EXPECT().Recv().Return(&sourcepb.AckResponse{ + Handshake: &sourcepb.Handshake{ + Sot: true, + }, + }, nil) + + // Ack request and response mockStream.EXPECT().Send(gomock.Any()).Return(nil) - mockStream.EXPECT().Send(gomock.Any()).Return(fmt.Errorf("mock connection refused")) + mockStream.EXPECT().Recv().Return(&sourcepb.AckResponse{}, nil) testClient := client{ grpcClt: mockClient, ackStream: mockStream, } + // Perform handshake + ackHandshakeRequest := &sourcepb.AckRequest{ + Handshake: &sourcepb.Handshake{ + Sot: true, + }, + } + err := testClient.ackStream.Send(ackHandshakeRequest) + assert.NoError(t, err) + + ackHandshakeResponse, err := testClient.ackStream.Recv() + assert.NoError(t, err) + assert.NotNil(t, ackHandshakeResponse.GetHandshake()) + assert.True(t, ackHandshakeResponse.GetHandshake().GetSot()) + + // Test AckFn ack, err := testClient.AckFn(ctx, &sourcepb.AckRequest{}) assert.NoError(t, err) assert.Equal(t, &sourcepb.AckResponse{}, ack) - - _, err = testClient.AckFn(ctx, &sourcepb.AckRequest{}) - assert.EqualError(t, err, "mock connection refused") } func TestPendingFn(t *testing.T) { diff --git a/pkg/sources/udsource/grpc_udsource_test.go b/pkg/sources/udsource/grpc_udsource_test.go index 459f9ae39f..e0a0ab4ca5 100644 --- a/pkg/sources/udsource/grpc_udsource_test.go +++ b/pkg/sources/udsource/grpc_udsource_test.go @@ -288,6 +288,7 @@ func Test_gRPCBasedUDSource_ApplyAckWithMockClient(t *testing.T) { mockAckClient.EXPECT().Send(req1).Return(nil).Times(1) mockAckClient.EXPECT().Send(req2).Return(nil).Times(1) + mockAckClient.EXPECT().Recv().Return(&sourcepb.AckResponse{}, nil).Times(2) ctx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() @@ -340,6 +341,6 @@ func Test_gRPCBasedUDSource_ApplyAckWithMockClient(t *testing.T) { NewUserDefinedSourceOffset(offset1), NewUserDefinedSourceOffset(offset2), }) - assert.ErrorIs(t, err, status.New(codes.DeadlineExceeded, "mock test err").Err()) + assert.Equal(t, err.Error(), fmt.Sprintf("failed to send ack request: %s", status.New(codes.DeadlineExceeded, "mock test err").Err())) }) } diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 10e16e60ee..624d5f14a8 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -43,9 +43,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.88" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e1496f8fb1fbf272686b8d37f523dab3e4a7443300055e74cdaa449f3114356" +checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" [[package]] name = "arc-swap" @@ -351,18 +351,18 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.7.1" +version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" +checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" dependencies = [ "serde", ] [[package]] name = "cc" -version = "1.1.18" +version = "1.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62ac837cdb5cb22e10a256099b4fc502b1dfe560cb282963a974d7abd80e476" +checksum = "07b1695e2c7e8fc85310cde85aeaab7e3097f593c91d209d3f9df76c928100f0" dependencies = [ "jobserver", "libc", @@ -1098,7 +1098,7 @@ dependencies = [ "tokio", "tokio-rustls 0.26.0", "tower-service", - "webpki-roots 0.26.3", + "webpki-roots 0.26.5", ] [[package]] @@ -1136,9 +1136,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.60" +version = "0.1.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1478,7 +1478,7 @@ dependencies = [ "hyper-util", "kube", "log", - "numaflow 0.1.1 (git+https://github.com/numaproj/numaflow-rs.git?branch=handshake)", + "numaflow 0.1.1", "numaflow-models", "once_cell", "parking_lot", @@ -1605,29 +1605,7 @@ dependencies = [ [[package]] name = "numaflow" version = "0.1.1" -source = "git+https://github.com/numaproj/numaflow-rs.git?branch=handshake#f3061b039c877e9828c50fcd0424391727e4920d" -dependencies = [ - "chrono", - "futures-util", - "hyper-util", - "prost", - "prost-types", - "serde", - "serde_json", - "thiserror", - "tokio", - "tokio-stream", - "tokio-util", - "tonic", - "tonic-build", - "tracing", - "uuid", -] - -[[package]] -name = "numaflow" -version = "0.1.1" -source = "git+https://github.com/numaproj/numaflow-rs.git?branch=source-streaming#dcbb26834153b84853d9757e25395d92a1314d4a" +source = "git+https://github.com/numaproj/numaflow-rs.git?branch=handshake#baecc88456f317b08bc869f82596e2b746cf798b" dependencies = [ "chrono", "futures-util", @@ -2027,15 +2005,15 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bffec3605b73c6f1754535084a85229fa8a30f86014e6c81aeec4abb68b0285" +checksum = "4fe68c2e9e1a1234e218683dbdf9f9dfcb094113c5ac2b938dfcb9bab4c4140b" dependencies = [ "libc", "once_cell", "socket2", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2248,7 +2226,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots 0.26.3", + "webpki-roots 0.26.5", "windows-registry", ] @@ -2616,7 +2594,7 @@ dependencies = [ name = "servesink" version = "0.1.0" dependencies = [ - "numaflow 0.1.1 (git+https://github.com/numaproj/numaflow-rs.git?branch=source-streaming)", + "numaflow 0.1.1", "reqwest 0.12.7", "tokio", "tonic", @@ -3029,9 +3007,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.22.20" +version = "0.22.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d" +checksum = "3b072cee73c449a636ffd6f32bd8de3a9f7119139aff882f44943ce2986dc5cf" dependencies = [ "indexmap 2.5.0", "serde", @@ -3260,9 +3238,9 @@ checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] name = "unicode-normalization" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" dependencies = [ "tinyvec", ] @@ -3430,9 +3408,9 @@ checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" [[package]] name = "webpki-roots" -version = "0.26.3" +version = "0.26.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd7c23921eeb1713a4e851530e9b9756e4fb0e89978582942612524cf09f01cd" +checksum = "0bd24728e5af82c6c4ec1b66ac4844bdf8156257fccda846ec58b42cd0cdbe6a" dependencies = [ "rustls-pki-types", ] diff --git a/rust/monovertex/proto/source.proto b/rust/monovertex/proto/source.proto index 31a762ac59..93c5f19278 100644 --- a/rust/monovertex/proto/source.proto +++ b/rust/monovertex/proto/source.proto @@ -17,7 +17,7 @@ service Source { // The caller (numa) expects the AckFn to be successful, and it does not expect any errors. // If there are some irrecoverable errors when the callee (UDSource) is processing the AckFn request, // then it is best to crash because there are no other retry mechanisms possible. - rpc AckFn(stream AckRequest) returns (AckResponse); + rpc AckFn(stream AckRequest) returns (stream AckResponse); // PendingFn returns the number of pending records at the user defined source. rpc PendingFn(google.protobuf.Empty) returns (PendingResponse); @@ -112,6 +112,7 @@ message AckRequest { } // Required field holding the request. The list will be ordered and will have the same order as the original Read response. Request request = 1; + optional Handshake handshake = 2; } /* @@ -131,6 +132,7 @@ message AckResponse { } // Required field holding the result. Result result = 1; + optional Handshake handshake = 2; } /* diff --git a/rust/monovertex/src/forwarder.rs b/rust/monovertex/src/forwarder.rs index 0b41cd3569..d60644b338 100644 --- a/rust/monovertex/src/forwarder.rs +++ b/rust/monovertex/src/forwarder.rs @@ -7,7 +7,7 @@ use crate::metrics; use crate::metrics::forward_metrics; use crate::sink::SinkWriter; use crate::sink_pb::Status::{Failure, Fallback, Success}; -use crate::source::SourceReader; +use crate::source::{SourceAcker, SourceReader}; use crate::transformer::SourceTransformer; use chrono::Utc; use tokio::task::JoinSet; @@ -21,6 +21,7 @@ use tracing::{debug, info}; /// back to the source. pub(crate) struct Forwarder { source_reader: SourceReader, + source_acker: SourceAcker, sink_writer: SinkWriter, source_transformer: Option, fb_sink_writer: Option, @@ -31,6 +32,7 @@ pub(crate) struct Forwarder { /// ForwarderBuilder is used to build a Forwarder instance with optional fields. pub(crate) struct ForwarderBuilder { source_reader: SourceReader, + source_acker: SourceAcker, sink_writer: SinkWriter, cln_token: CancellationToken, source_transformer: Option, @@ -40,13 +42,15 @@ pub(crate) struct ForwarderBuilder { impl ForwarderBuilder { /// Create a new builder with mandatory fields pub(crate) fn new( - source_client: SourceReader, - sink_client: SinkWriter, + source_reader: SourceReader, + source_acker: SourceAcker, + sink_writer: SinkWriter, cln_token: CancellationToken, ) -> Self { Self { - source_reader: source_client, - sink_writer: sink_client, + source_reader, + source_acker, + sink_writer, cln_token, source_transformer: None, fb_sink_writer: None, @@ -71,6 +75,7 @@ impl ForwarderBuilder { let common_labels = metrics::forward_metrics_labels().clone(); Forwarder { source_reader: self.source_reader, + source_acker: self.source_acker, sink_writer: self.sink_writer, source_transformer: self.source_transformer, fb_sink_writer: self.fb_sink_writer, @@ -525,7 +530,7 @@ impl Forwarder { let n = offsets.len(); let start_time = tokio::time::Instant::now(); - self.source_reader.ack(offsets).await?; + self.source_acker.ack(offsets).await?; debug!("Ack latency - {}ms", start_time.elapsed().as_millis()); @@ -557,7 +562,7 @@ mod tests { use crate::shared::create_rpc_channel; use crate::sink::SinkWriter; use crate::sink_pb::sink_client::SinkClient; - use crate::source::SourceReader; + use crate::source::{SourceAcker, SourceReader}; use crate::source_pb::source_client::SourceClient; use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; use crate::transformer::SourceTransformer; @@ -649,10 +654,7 @@ mod tests { #[tonic::async_trait] impl sink::Sinker for InMemorySink { - async fn sink( - &self, - mut input: tokio::sync::mpsc::Receiver, - ) -> Vec { + async fn sink(&self, mut input: mpsc::Receiver) -> Vec { let mut responses: Vec = Vec::new(); while let Some(datum) = input.recv().await { let response = match std::str::from_utf8(&datum.value) { @@ -742,13 +744,19 @@ mod tests { let cln_token = CancellationToken::new(); - let source_client = SourceReader::new(SourceClient::new( + let source_reader = SourceReader::new(SourceClient::new( + create_rpc_channel(source_sock_file.clone()).await.unwrap(), + )) + .await + .expect("failed to connect to source server"); + + let source_acker = SourceAcker::new(SourceClient::new( create_rpc_channel(source_sock_file).await.unwrap(), )) .await .expect("failed to connect to source server"); - let sink_client = SinkWriter::new(SinkClient::new( + let sink_writer = SinkWriter::new(SinkClient::new( create_rpc_channel(sink_sock_file).await.unwrap(), )) .await @@ -760,9 +768,10 @@ mod tests { .await .expect("failed to connect to transformer server"); - let mut forwarder = ForwarderBuilder::new(source_client, sink_client, cln_token.clone()) - .source_transformer(transformer_client) - .build(); + let mut forwarder = + ForwarderBuilder::new(source_reader, source_acker, sink_writer, cln_token.clone()) + .source_transformer(transformer_client) + .build(); // Assert the received message in a different task let assert_handle = tokio::spawn(async move { @@ -864,20 +873,27 @@ mod tests { let cln_token = CancellationToken::new(); - let source_client = SourceReader::new(SourceClient::new( + let source_reader = SourceReader::new(SourceClient::new( + create_rpc_channel(source_sock_file.clone()).await.unwrap(), + )) + .await + .expect("failed to connect to source server"); + + let source_acker = SourceAcker::new(SourceClient::new( create_rpc_channel(source_sock_file).await.unwrap(), )) .await .expect("failed to connect to source server"); - let sink_client = SinkWriter::new(SinkClient::new( + let sink_writer = SinkWriter::new(SinkClient::new( create_rpc_channel(sink_sock_file).await.unwrap(), )) .await .expect("failed to connect to sink server"); let mut forwarder = - ForwarderBuilder::new(source_client, sink_client, cln_token.clone()).build(); + ForwarderBuilder::new(source_reader, source_acker, sink_writer, cln_token.clone()) + .build(); let cancel_handle = tokio::spawn(async move { tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; @@ -910,10 +926,7 @@ mod tests { #[tonic::async_trait] impl sink::Sinker for FallbackSender { - async fn sink( - &self, - mut input: tokio::sync::mpsc::Receiver, - ) -> Vec { + async fn sink(&self, mut input: mpsc::Receiver) -> Vec { let mut responses = vec![]; while let Some(datum) = input.recv().await { responses.append(&mut vec![sink::Response::fallback(datum.id)]); @@ -924,7 +937,7 @@ mod tests { #[tokio::test] async fn test_fb_sink() { - let (sink_tx, mut sink_rx) = tokio::sync::mpsc::channel(10); + let (sink_tx, mut sink_rx) = mpsc::channel(10); // Start the source server let (source_shutdown_tx, source_shutdown_rx) = tokio::sync::oneshot::channel(); @@ -982,27 +995,34 @@ mod tests { let cln_token = CancellationToken::new(); - let source_client = SourceReader::new(SourceClient::new( + let source_reader = SourceReader::new(SourceClient::new( + create_rpc_channel(source_sock_file.clone()).await.unwrap(), + )) + .await + .expect("failed to connect to source server"); + + let source_acker = SourceAcker::new(SourceClient::new( create_rpc_channel(source_sock_file).await.unwrap(), )) .await .expect("failed to connect to source server"); - let sink_client = SinkWriter::new(SinkClient::new( + let sink_writer = SinkWriter::new(SinkClient::new( create_rpc_channel(sink_sock_file).await.unwrap(), )) .await .expect("failed to connect to sink server"); - let fb_sink_client = SinkWriter::new(SinkClient::new( + let fb_sink_writer = SinkWriter::new(SinkClient::new( create_rpc_channel(fb_sink_sock_file).await.unwrap(), )) .await .expect("failed to connect to fb sink server"); - let mut forwarder = ForwarderBuilder::new(source_client, sink_client, cln_token.clone()) - .fallback_sink_writer(fb_sink_client) - .build(); + let mut forwarder = + ForwarderBuilder::new(source_reader, source_acker, sink_writer, cln_token.clone()) + .fallback_sink_writer(fb_sink_writer) + .build(); let assert_handle = tokio::spawn(async move { let received_message = sink_rx.recv().await.unwrap(); diff --git a/rust/monovertex/src/lib.rs b/rust/monovertex/src/lib.rs index f7b267cbff..d3d612c2db 100644 --- a/rust/monovertex/src/lib.rs +++ b/rust/monovertex/src/lib.rs @@ -10,7 +10,7 @@ use crate::metrics::MetricsState; use crate::shared::create_rpc_channel; use crate::sink::{SinkWriter, FB_SINK_SOCKET, SINK_SOCKET}; use crate::sink_pb::sink_client::SinkClient; -use crate::source::{SourceReader, SOURCE_SOCKET}; +use crate::source::{SourceAcker, SourceReader, SOURCE_SOCKET}; use crate::source_pb::source_client::SourceClient; use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; use crate::transformer::{SourceTransformer, TRANSFORMER_SOCKET}; @@ -159,9 +159,11 @@ async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { // build the forwarder let source_reader = SourceReader::new(source_grpc_client.clone()).await?; + let source_acker = SourceAcker::new(source_grpc_client.clone()).await?; let sink_writer = SinkWriter::new(sink_grpc_client.clone()).await?; - let mut forwarder_builder = ForwarderBuilder::new(source_reader, sink_writer, cln_token); + let mut forwarder_builder = + ForwarderBuilder::new(source_reader, source_acker, sink_writer, cln_token); // add transformer if exists if let Some(transformer_grpc_client) = transformer_grpc_client { diff --git a/rust/monovertex/src/source.rs b/rust/monovertex/src/source.rs index 580271e465..eaafb3ae15 100644 --- a/rust/monovertex/src/source.rs +++ b/rust/monovertex/src/source.rs @@ -1,3 +1,4 @@ +use crate::config::config; use crate::error::Error::SourceError; use crate::error::Result; use crate::message::{Message, Offset}; @@ -8,47 +9,27 @@ use crate::source_pb::{ }; use base64::prelude::BASE64_STANDARD; use base64::Engine; -use std::thread::sleep; use tokio::sync::mpsc; -use tokio::task::JoinHandle; use tokio_stream::wrappers::ReceiverStream; use tonic::transport::Channel; use tonic::{Request, Streaming}; -use tracing::{info, warn}; pub(crate) const SOURCE_SOCKET: &str = "/var/run/numaflow/source.sock"; pub(crate) const SOURCE_SERVER_INFO_FILE: &str = "/var/run/numaflow/sourcer-server-info"; -/// SourceReader reads messages from a source and acks them. +/// SourceReader reads messages from a source. #[derive(Debug)] pub(crate) struct SourceReader { read_tx: mpsc::Sender, resp_stream: Streaming, - ack_tx: mpsc::Sender, - ack_handle: JoinHandle<()>, -} - -impl Drop for SourceReader { - fn drop(&mut self) { - // wait for the ack task to flush all the acks. - // FIXME: hacky way to wait for the ack task to finish. We should have a better way to handle this. - sleep(std::time::Duration::from_secs(30)); - // in a happy path scenario, the ack task would have already been finished. - if !self.ack_handle.is_finished() { - warn!("aborting ack task"); - self.ack_handle.abort(); - } - } } impl SourceReader { pub(crate) async fn new(mut client: SourceClient) -> Result { - let (read_tx, read_rx) = mpsc::channel(500); - let (ack_tx, ack_rx) = mpsc::channel(500); - + let (read_tx, read_rx) = mpsc::channel(config().batch_size as usize); let read_stream = ReceiverStream::new(read_rx); - // do a handshake with the server before we start sending read requests + // do a handshake for read with the server before we start sending read requests let handshake_request = ReadRequest { request: None, handshake: Some(source_pb::Handshake { sot: true }), @@ -72,21 +53,9 @@ impl SourceReader { return Err(SourceError("invalid handshake response".to_string())); } - // spawn a task to handle acks. - let mut ack_client = client.clone(); - let ack_handle = tokio::spawn(async move { - let ack_response = ack_client - .ack_fn(Request::new(ReceiverStream::new(ack_rx))) - .await - .expect("ack should not have failed"); - info!("Closing ack stream {:?}", ack_response); - }); - Ok(Self { read_tx, resp_stream, - ack_tx, - ack_handle, }) } @@ -123,6 +92,46 @@ impl SourceReader { } Ok(messages) } +} + +/// SourceAcker acks the messages from a source. +#[derive(Debug)] +pub(crate) struct SourceAcker { + ack_tx: mpsc::Sender, + ack_resp_stream: Streaming, +} + +impl SourceAcker { + pub(crate) async fn new(mut client: SourceClient) -> Result { + let (ack_tx, ack_rx) = mpsc::channel(config().batch_size as usize); + let ack_stream = ReceiverStream::new(ack_rx); + + // do a handshake for ack with the server before we start sending ack requests + let ack_handshake_request = AckRequest { + request: None, + handshake: Some(source_pb::Handshake { sot: true }), + }; + ack_tx + .send(ack_handshake_request) + .await + .map_err(|e| SourceError(format!("failed to send ack handshake request: {}", e)))?; + + let mut ack_resp_stream = client.ack_fn(Request::new(ack_stream)).await?.into_inner(); + + // first response from the server will be the handshake response. We need to check if the + // server has accepted the handshake. + let ack_handshake_response = ack_resp_stream.message().await?.ok_or(SourceError( + "failed to receive ack handshake response".to_string(), + ))?; + if ack_handshake_response.handshake.map_or(true, |h| !h.sot) { + return Err(SourceError("invalid ack handshake response".to_string())); + } + + Ok(Self { + ack_tx, + ack_resp_stream, + }) + } pub(crate) async fn ack(&mut self, offsets: Vec) -> Result { for offset in offsets { @@ -135,14 +144,22 @@ impl SourceReader { partition_id: offset.partition_id, }), }), + handshake: None, }; self.ack_tx .send(request) .await .map_err(|e| SourceError(e.to_string()))?; + + // wait for the ack response for each ack request + self.ack_resp_stream + .message() + .await? + .ok_or(SourceError("failed to receive ack response".to_string()))?; } Ok(AckResponse { result: Some(ack_response::Result { success: Some(()) }), + handshake: None, }) } } @@ -152,7 +169,7 @@ mod tests { use std::collections::HashSet; use crate::shared::create_rpc_channel; - use crate::source::SourceReader; + use crate::source::{SourceAcker, SourceReader}; use crate::source_pb::source_client::SourceClient; use chrono::Utc; use numaflow::source; @@ -237,16 +254,24 @@ mod tests { // TODO: flaky tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - let mut source_client = SourceReader::new(SourceClient::new( + let mut source_reader = SourceReader::new(SourceClient::new( + create_rpc_channel(sock_file.clone()).await.unwrap(), + )) + .await + .map_err(|e| panic!("failed to create source reader: {:?}", e)) + .unwrap(); + + let mut source_acker = SourceAcker::new(SourceClient::new( create_rpc_channel(sock_file).await.unwrap(), )) .await + .map_err(|e| panic!("failed to create source acker: {:?}", e)) .unwrap(); - let messages = source_client.read(5, 1000).await.unwrap(); + let messages = source_reader.read(5, 1000).await.unwrap(); assert_eq!(messages.len(), 5); - let response = source_client + let response = source_acker .ack(messages.iter().map(|m| m.offset.clone()).collect()) .await .unwrap(); @@ -254,7 +279,8 @@ mod tests { // we need to drop the client, because if there are any in-flight requests // server fails to shut down. https://github.com/numaproj/numaflow-rs/issues/85 - drop(source_client); + drop(source_reader); + drop(source_acker); shutdown_tx .send(()) .expect("failed to send shutdown signal"); diff --git a/rust/servesink/Cargo.toml b/rust/servesink/Cargo.toml index 6d79dc2b7e..90a7c44696 100644 --- a/rust/servesink/Cargo.toml +++ b/rust/servesink/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [dependencies] tonic = "0.12.0" tokio = { version = "1.0", features = ["macros", "rt-multi-thread"] } -numaflow = { git = "https://github.com/numaproj/numaflow-rs.git", branch = "source-streaming" } +numaflow = { git = "https://github.com/numaproj/numaflow-rs.git", branch = "handshake" } tracing = "0.1.40" tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } diff --git a/test/udsource-e2e/testdata/simple-source-go.yaml b/test/udsource-e2e/testdata/simple-source-go.yaml index 84515c7007..5d0943bd03 100644 --- a/test/udsource-e2e/testdata/simple-source-go.yaml +++ b/test/udsource-e2e/testdata/simple-source-go.yaml @@ -11,7 +11,6 @@ spec: # A simple user-defined source for e2e testing # See https://github.com/numaproj/numaflow-go/tree/main/pkg/sourcer/examples/simple_source image: quay.io/yhl25/numaflow-go/source-simple-source:stable - imagePullPolicy: Always limits: readBatchSize: 500 scale: diff --git a/test/udsource-e2e/testdata/simple-source-java.yaml b/test/udsource-e2e/testdata/simple-source-java.yaml index c53ecf9b03..b85745ebf9 100644 --- a/test/udsource-e2e/testdata/simple-source-java.yaml +++ b/test/udsource-e2e/testdata/simple-source-java.yaml @@ -13,7 +13,6 @@ spec: # A simple user-defined source for e2e testing # See https://github.com/numaproj/numaflow-java/tree/main/examples/src/main/java/io/numaproj/numaflow/examples/source/simple image: quay.io/yhl25/numaflow-java/source-simple-source:stable - imagePullPolicy: IfNotPresent limits: readBatchSize: 500 - name: out diff --git a/test/udsource-e2e/testdata/simple-source-python.yaml b/test/udsource-e2e/testdata/simple-source-python.yaml index a64960e9fe..9862b63bb6 100644 --- a/test/udsource-e2e/testdata/simple-source-python.yaml +++ b/test/udsource-e2e/testdata/simple-source-python.yaml @@ -13,7 +13,6 @@ spec: # A simple user-defined source for e2e testing # See https://github.com/numaproj/numaflow-python/tree/main/examples/source/simple_source image: quay.io/numaio/numaflow-python/simple-source:stable - imagePullPolicy: Always limits: readBatchSize: 500 - name: out diff --git a/test/udsource-e2e/testdata/simple-source-rs.yaml b/test/udsource-e2e/testdata/simple-source-rs.yaml index dabf34df2f..0cff657496 100644 --- a/test/udsource-e2e/testdata/simple-source-rs.yaml +++ b/test/udsource-e2e/testdata/simple-source-rs.yaml @@ -10,8 +10,7 @@ spec: container: # A simple user-defined source for e2e testing # See https://github.com/numaproj/numaflow-go/tree/main/pkg/sourcer/examples/simple_source - image: quay.io/numaio/numaflow-rs/simple-source:stable - imagePullPolicy: Always + image: quay.io/yhl25/numaflow-rs/simple-source:stable limits: readBatchSize: 500 scale: From 7ebd128acc644c5b9d3bd499804743f8d9453002 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Thu, 19 Sep 2024 22:29:33 +0530 Subject: [PATCH 30/40] adding tests Signed-off-by: Yashash H L --- rust/monovertex/src/config.rs | 66 ++++++-- rust/monovertex/src/lib.rs | 116 +++++++++---- rust/monovertex/src/server_info.rs | 31 ++-- rust/monovertex/src/sink.rs | 6 - rust/monovertex/src/source.rs | 14 +- rust/monovertex/src/startup.rs | 250 ++++++++++++++++++++++++++--- rust/monovertex/src/transformer.rs | 3 - 7 files changed, 391 insertions(+), 95 deletions(-) diff --git a/rust/monovertex/src/config.rs b/rust/monovertex/src/config.rs index 81b115422f..5d245ed397 100644 --- a/rust/monovertex/src/config.rs +++ b/rust/monovertex/src/config.rs @@ -1,13 +1,20 @@ -use std::env; -use std::sync::OnceLock; - +use crate::error::Error; use base64::prelude::BASE64_STANDARD; use base64::Engine; - use numaflow_models::models::{Backoff, MonoVertex, RetryStrategy}; +use std::env; +use std::sync::OnceLock; -use crate::error::Error; +const DEFAULT_SOURCE_SOCKET: &str = "/var/run/numaflow/source.sock"; +const DEFAULT_SOURCE_SERVER_INFO_FILE: &str = "/var/run/numaflow/sourcer-server-info"; +const DEFAULT_SINK_SOCKET: &str = "/var/run/numaflow/sink.sock"; +const DEFAULT_FB_SINK_SOCKET: &str = "/var/run/numaflow/fb-sink.sock"; +const DEFAULT_SINK_SERVER_INFO_FILE: &str = "/var/run/numaflow/sinker-server-info"; +const DEFAULT_FB_SINK_SERVER_INFO_FILE: &str = "/var/run/numaflow/fb-sinker-server-info"; +const DEFAULT_TRANSFORMER_SOCKET: &str = "/var/run/numaflow/sourcetransform.sock"; +const DEFAULT_TRANSFORMER_SERVER_INFO_FILE: &str = + "/var/run/numaflow/sourcetransformer-server-info"; const ENV_MONO_VERTEX_OBJ: &str = "NUMAFLOW_MONO_VERTEX_OBJECT"; const ENV_GRPC_MAX_MESSAGE_SIZE: &str = "NUMAFLOW_GRPC_MAX_MESSAGE_SIZE"; const ENV_POD_REPLICA: &str = "NUMAFLOW_REPLICA"; @@ -77,15 +84,46 @@ pub struct Settings { pub batch_size: u64, pub timeout_in_ms: u32, pub metrics_server_listen_port: u16, - pub grpc_max_message_size: usize, - pub is_transformer_enabled: bool, - pub is_fallback_enabled: bool, pub lag_check_interval_in_secs: u16, pub lag_refresh_interval_in_secs: u16, pub sink_max_retry_attempts: u16, pub sink_retry_interval_in_ms: u32, pub sink_retry_on_fail_strategy: OnFailureStrategy, pub sink_default_retry_strategy: RetryStrategy, + pub sdk_config: SDKConfig, +} + +#[derive(Debug, Clone)] +pub struct SDKConfig { + pub grpc_max_message_size: usize, + pub is_transformer_enabled: bool, + pub is_fallback_enabled: bool, + pub source_socket_path: String, + pub sink_socket_path: String, + pub transformer_socket_path: String, + pub fallback_socket_path: String, + pub source_server_info_path: String, + pub sink_server_info_path: String, + pub transformer_server_info_path: String, + pub fallback_server_info_path: String, +} + +impl Default for SDKConfig { + fn default() -> Self { + Self { + grpc_max_message_size: DEFAULT_GRPC_MAX_MESSAGE_SIZE, + is_transformer_enabled: false, + is_fallback_enabled: false, + source_socket_path: DEFAULT_SOURCE_SOCKET.to_string(), + sink_socket_path: DEFAULT_SINK_SOCKET.to_string(), + transformer_socket_path: DEFAULT_TRANSFORMER_SOCKET.to_string(), + fallback_socket_path: DEFAULT_FB_SINK_SOCKET.to_string(), + source_server_info_path: DEFAULT_SOURCE_SERVER_INFO_FILE.to_string(), + sink_server_info_path: DEFAULT_SINK_SERVER_INFO_FILE.to_string(), + transformer_server_info_path: DEFAULT_TRANSFORMER_SERVER_INFO_FILE.to_string(), + fallback_server_info_path: DEFAULT_FB_SINK_SERVER_INFO_FILE.to_string(), + } + } } impl Default for Settings { @@ -106,15 +144,13 @@ impl Default for Settings { batch_size: DEFAULT_BATCH_SIZE, timeout_in_ms: DEFAULT_TIMEOUT_IN_MS, metrics_server_listen_port: DEFAULT_METRICS_PORT, - grpc_max_message_size: DEFAULT_GRPC_MAX_MESSAGE_SIZE, - is_transformer_enabled: false, - is_fallback_enabled: false, lag_check_interval_in_secs: DEFAULT_LAG_CHECK_INTERVAL_IN_SECS, lag_refresh_interval_in_secs: DEFAULT_LAG_REFRESH_INTERVAL_IN_SECS, sink_max_retry_attempts: DEFAULT_MAX_SINK_RETRY_ATTEMPTS, sink_retry_interval_in_ms: DEFAULT_SINK_RETRY_INTERVAL_IN_MS, sink_retry_on_fail_strategy: DEFAULT_SINK_RETRY_ON_FAIL_STRATEGY, sink_default_retry_strategy: default_retry_strategy, + sdk_config: Default::default(), } } } @@ -158,14 +194,14 @@ impl Settings { .and_then(|metadata| metadata.name) .ok_or_else(|| Error::ConfigError("Mono vertex name not found".to_string()))?; - settings.is_transformer_enabled = mono_vertex_obj + settings.sdk_config.is_transformer_enabled = mono_vertex_obj .spec .source .ok_or(Error::ConfigError("Source not found".to_string()))? .transformer .is_some(); - settings.is_fallback_enabled = mono_vertex_obj + settings.sdk_config.is_fallback_enabled = mono_vertex_obj .spec .sink .as_deref() @@ -211,7 +247,7 @@ impl Settings { // check if the sink retry strategy is set to fallback and there is no fallback sink configured // then we should return an error if settings.sink_retry_on_fail_strategy == OnFailureStrategy::Fallback - && !settings.is_fallback_enabled + && !settings.sdk_config.is_fallback_enabled { return Err(Error::ConfigError( "Retry Strategy given as fallback but Fallback sink not configured" @@ -221,7 +257,7 @@ impl Settings { } } - settings.grpc_max_message_size = env::var(ENV_GRPC_MAX_MESSAGE_SIZE) + settings.sdk_config.grpc_max_message_size = env::var(ENV_GRPC_MAX_MESSAGE_SIZE) .unwrap_or_else(|_| DEFAULT_GRPC_MAX_MESSAGE_SIZE.to_string()) .parse() .map_err(|e| { diff --git a/rust/monovertex/src/lib.rs b/rust/monovertex/src/lib.rs index d3d612c2db..01b07498a2 100644 --- a/rust/monovertex/src/lib.rs +++ b/rust/monovertex/src/lib.rs @@ -1,19 +1,21 @@ +extern crate core; + use tokio::signal; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use tracing::{error, info}; -use crate::config::config; +use crate::config::{config, SDKConfig}; use crate::forwarder::ForwarderBuilder; use crate::metrics::MetricsState; use crate::shared::create_rpc_channel; -use crate::sink::{SinkWriter, FB_SINK_SOCKET, SINK_SOCKET}; +use crate::sink::SinkWriter; use crate::sink_pb::sink_client::SinkClient; -use crate::source::{SourceAcker, SourceReader, SOURCE_SOCKET}; +use crate::source::{SourceAcker, SourceReader}; use crate::source_pb::source_client::SourceClient; use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; -use crate::transformer::{SourceTransformer, TRANSFORMER_SOCKET}; +use crate::transformer::SourceTransformer; pub(crate) use self::error::Result; @@ -61,7 +63,7 @@ pub async fn mono_vertex() -> Result<()> { }); // Run the forwarder with cancellation token. - if let Err(e) = start_forwarder(cln_token).await { + if let Err(e) = start_forwarder(cln_token, config().sdk_config.clone()).await { error!("Application error: {:?}", e); // abort the signal handler task since we have an error and we are shutting down @@ -96,33 +98,52 @@ async fn shutdown_signal() { } } -async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { +async fn start_forwarder(cln_token: CancellationToken, sdk_config: SDKConfig) -> Result<()> { // make sure that we have compatibility with the server - startup::check_compatibility(&cln_token).await?; + startup::check_compatibility( + &cln_token, + sdk_config.source_server_info_path.into(), + sdk_config.sink_server_info_path.into(), + if sdk_config.is_transformer_enabled { + Some(sdk_config.transformer_server_info_path.into()) + } else { + None + }, + if sdk_config.is_fallback_enabled { + Some(sdk_config.fallback_server_info_path.into()) + } else { + None + }, + ) + .await?; - let mut source_grpc_client = SourceClient::new(create_rpc_channel(SOURCE_SOCKET.into()).await?) - .max_encoding_message_size(config().grpc_max_message_size) - .max_encoding_message_size(config().grpc_max_message_size); + let mut source_grpc_client = + SourceClient::new(create_rpc_channel(sdk_config.source_socket_path.into()).await?) + .max_encoding_message_size(sdk_config.grpc_max_message_size) + .max_encoding_message_size(sdk_config.grpc_max_message_size); - let mut sink_grpc_client = SinkClient::new(create_rpc_channel(SINK_SOCKET.into()).await?) - .max_encoding_message_size(config().grpc_max_message_size) - .max_encoding_message_size(config().grpc_max_message_size); + let mut sink_grpc_client = + SinkClient::new(create_rpc_channel(sdk_config.sink_socket_path.into()).await?) + .max_encoding_message_size(sdk_config.grpc_max_message_size) + .max_encoding_message_size(sdk_config.grpc_max_message_size); - let mut transformer_grpc_client = if config().is_transformer_enabled { - let transformer_grpc_client = - SourceTransformClient::new(create_rpc_channel(TRANSFORMER_SOCKET.into()).await?) - .max_encoding_message_size(config().grpc_max_message_size) - .max_encoding_message_size(config().grpc_max_message_size); + let mut transformer_grpc_client = if sdk_config.is_transformer_enabled { + let transformer_grpc_client = SourceTransformClient::new( + create_rpc_channel(sdk_config.transformer_socket_path.into()).await?, + ) + .max_encoding_message_size(sdk_config.grpc_max_message_size) + .max_encoding_message_size(sdk_config.grpc_max_message_size); Some(transformer_grpc_client.clone()) } else { None }; - let mut fb_sink_grpc_client = if config().is_fallback_enabled { - let fb_sink_grpc_client = SinkClient::new(create_rpc_channel(FB_SINK_SOCKET.into()).await?) - .max_encoding_message_size(config().grpc_max_message_size) - .max_encoding_message_size(config().grpc_max_message_size); + let mut fb_sink_grpc_client = if sdk_config.is_fallback_enabled { + let fb_sink_grpc_client = + SinkClient::new(create_rpc_channel(sdk_config.fallback_socket_path.into()).await?) + .max_encoding_message_size(sdk_config.grpc_max_message_size) + .max_encoding_message_size(sdk_config.grpc_max_message_size); Some(fb_sink_grpc_client.clone()) } else { @@ -188,15 +209,16 @@ async fn start_forwarder(cln_token: CancellationToken) -> Result<()> { #[cfg(test)] mod tests { - use std::env; - + use crate::config::SDKConfig; + use crate::server_info::ServerInfo; + use crate::{error, start_forwarder}; use numaflow::source::{Message, Offset, SourceReadRequest}; use numaflow::{sink, source}; + use std::fs::File; + use std::io::Write; use tokio::sync::mpsc::Sender; use tokio_util::sync::CancellationToken; - use crate::start_forwarder; - struct SimpleSource; #[tonic::async_trait] impl source::Sourcer for SimpleSource { @@ -224,12 +246,32 @@ mod tests { vec![] } } + + async fn write_server_info(file_path: &str, server_info: &ServerInfo) -> error::Result<()> { + let serialized = serde_json::to_string(server_info).unwrap(); + let mut file = File::create(file_path).unwrap(); + file.write_all(serialized.as_bytes()).unwrap(); + file.write_all(b"U+005C__END__").unwrap(); + Ok(()) + } + #[tokio::test] async fn run_forwarder() { let (src_shutdown_tx, src_shutdown_rx) = tokio::sync::oneshot::channel(); let tmp_dir = tempfile::TempDir::new().unwrap(); let src_sock_file = tmp_dir.path().join("source.sock"); let src_info_file = tmp_dir.path().join("source-server-info"); + let server_info_obj = ServerInfo { + protocol: "uds".to_string(), + language: "rust".to_string(), + minimum_numaflow_version: "0.1.0".to_string(), + version: "0.1.0".to_string(), + metadata: None, + }; + + write_server_info(src_info_file.to_str().unwrap(), &server_info_obj) + .await + .unwrap(); let server_info = src_info_file.clone(); let server_socket = src_sock_file.clone(); @@ -247,6 +289,10 @@ mod tests { let sink_sock_file = tmp_dir.path().join("sink.sock"); let sink_server_info = tmp_dir.path().join("sink-server-info"); + write_server_info(sink_server_info.to_str().unwrap(), &server_info_obj) + .await + .unwrap(); + let server_socket = sink_sock_file.clone(); let server_info = sink_server_info.clone(); let sink_server_handle = tokio::spawn(async move { @@ -262,11 +308,6 @@ mod tests { // FIXME: we need to have a better way, this is flaky tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - unsafe { - env::set_var("SOURCE_SOCKET", src_sock_file.to_str().unwrap()); - env::set_var("SINK_SOCKET", sink_sock_file.to_str().unwrap()); - } - let cln_token = CancellationToken::new(); let token_clone = cln_token.clone(); @@ -276,8 +317,17 @@ mod tests { token_clone.cancel(); }); - let result = start_forwarder(cln_token.clone()).await; - assert!(result.is_err()); + let sdk_config = SDKConfig { + source_socket_path: src_sock_file.to_str().unwrap().to_string(), + sink_socket_path: sink_sock_file.to_str().unwrap().to_string(), + source_server_info_path: src_info_file.to_str().unwrap().to_string(), + sink_server_info_path: sink_server_info.to_str().unwrap().to_string(), + grpc_max_message_size: 1024, + ..Default::default() + }; + + let result = start_forwarder(cln_token.clone(), sdk_config).await; + assert!(result.is_ok()); // stop the source and sink servers src_shutdown_tx.send(()).unwrap(); diff --git a/rust/monovertex/src/server_info.rs b/rust/monovertex/src/server_info.rs index 4f74164267..203761be45 100644 --- a/rust/monovertex/src/server_info.rs +++ b/rust/monovertex/src/server_info.rs @@ -1,5 +1,6 @@ use std::collections::HashMap; use std::fs; +use std::path::PathBuf; use std::str::FromStr; use std::time::Duration; @@ -21,21 +22,21 @@ const END: &str = "U+005C__END__"; #[derive(Serialize, Deserialize, Debug)] pub(crate) struct ServerInfo { #[serde(default)] - protocol: String, + pub(crate) protocol: String, #[serde(default)] - language: String, + pub(crate) language: String, #[serde(default)] - minimum_numaflow_version: String, + pub(crate) minimum_numaflow_version: String, #[serde(default)] - version: String, + pub(crate) version: String, #[serde(default)] - metadata: Option>, // Metadata is optional + pub(crate) metadata: Option>, // Metadata is optional } /// check_for_server_compatibility waits until the server info file is ready and check whether the /// server is compatible with Numaflow. pub(crate) async fn check_for_server_compatibility( - file_path: &str, + file_path: PathBuf, cln_token: CancellationToken, ) -> error::Result<()> { // Read the server info file @@ -180,7 +181,7 @@ fn check_sdk_compatibility( /// The cancellation token is used to stop ready-check of server_info file in case it is missing. /// This cancellation token is closed via the global shutdown handler. async fn read_server_info( - file_path: &str, + file_path: PathBuf, cln_token: CancellationToken, ) -> error::Result { // Infinite loop to keep checking until the file is ready @@ -190,14 +191,14 @@ async fn read_server_info( } // Check if the file exists and has content - if let Ok(metadata) = fs::metadata(file_path) { + if let Ok(metadata) = fs::metadata(file_path.as_path()) { if metadata.len() > 0 { // Break out of the loop if the file is ready (has content) break; } } // Log message indicating the file is not ready and sleep for 1 second before checking again - info!("Server info file {} is not ready, waiting...", file_path); + info!("Server info file {:?} is not ready, waiting...", file_path); sleep(Duration::from_secs(1)).await; } @@ -206,7 +207,7 @@ async fn read_server_info( let contents; loop { // Attempt to read the file - match fs::read_to_string(file_path) { + match fs::read_to_string(file_path.as_path()) { Ok(data) => { if data.ends_with(END) { // If the file ends with the END marker, trim it and break out of the loop @@ -398,7 +399,7 @@ mod tests { } // Helper function to create a SdkConstraints struct - fn create_sdk_constraints() -> version::SdkConstraints { + fn create_sdk_constraints() -> SdkConstraints { let mut constraints = HashMap::new(); constraints.insert("python".to_string(), "1.2.0".to_string()); constraints.insert("java".to_string(), "2.0.0".to_string()); @@ -591,7 +592,7 @@ mod tests { #[tokio::test] async fn test_read_server_info_success() { // Create a temporary directory - let dir = tempfile::tempdir().unwrap(); + let dir = tempdir().unwrap(); let file_path = dir.path().join("server_info.txt"); let cln_token = CancellationToken::new(); @@ -614,7 +615,7 @@ mod tests { let _ = write_server_info(&server_info, file_path.to_str().unwrap()).await; // Call the read_server_info function - let result = read_server_info(file_path.to_str().unwrap(), cln_token).await; + let result = read_server_info(file_path, cln_token).await; assert!(result.is_ok(), "Expected Ok, got {:?}", result); let server_info = result.unwrap(); @@ -632,7 +633,7 @@ mod tests { #[tokio::test] async fn test_read_server_info_retry_limit() { // Create a temporary directory - let dir = tempfile::tempdir().unwrap(); + let dir = tempdir().unwrap(); let file_path = dir.path().join("server_info.txt"); // Write a partial test file not ending with END marker @@ -643,7 +644,7 @@ mod tests { let _drop_guard = cln_token.clone().drop_guard(); // Call the read_server_info function - let result = read_server_info(file_path.to_str().unwrap(), cln_token).await; + let result = read_server_info(file_path, cln_token).await; assert!(result.is_err(), "Expected Err, got {:?}", result); let error = result.unwrap_err(); diff --git a/rust/monovertex/src/sink.rs b/rust/monovertex/src/sink.rs index 6e30d7f58e..a2088a8c2f 100644 --- a/rust/monovertex/src/sink.rs +++ b/rust/monovertex/src/sink.rs @@ -4,12 +4,6 @@ use crate::sink_pb::sink_client::SinkClient; use crate::sink_pb::{SinkRequest, SinkResponse}; use tonic::transport::Channel; -pub(crate) const SINK_SOCKET: &str = "/var/run/numaflow/sink.sock"; -pub(crate) const FB_SINK_SOCKET: &str = "/var/run/numaflow/fb-sink.sock"; - -pub(crate) const SINK_SERVER_INFO_FILE: &str = "/var/run/numaflow/sinker-server-info"; -pub(crate) const FB_SINK_SERVER_INFO_FILE: &str = "/var/run/numaflow/fb-sinker-server-info"; - /// SinkWriter writes messages to a sink. #[derive(Clone)] pub struct SinkWriter { diff --git a/rust/monovertex/src/source.rs b/rust/monovertex/src/source.rs index eaafb3ae15..cad78f7bb8 100644 --- a/rust/monovertex/src/source.rs +++ b/rust/monovertex/src/source.rs @@ -9,14 +9,12 @@ use crate::source_pb::{ }; use base64::prelude::BASE64_STANDARD; use base64::Engine; +use log::info; use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; use tonic::transport::Channel; use tonic::{Request, Streaming}; -pub(crate) const SOURCE_SOCKET: &str = "/var/run/numaflow/source.sock"; -pub(crate) const SOURCE_SERVER_INFO_FILE: &str = "/var/run/numaflow/sourcer-server-info"; - /// SourceReader reads messages from a source. #[derive(Debug)] pub(crate) struct SourceReader { @@ -134,6 +132,8 @@ impl SourceAcker { } pub(crate) async fn ack(&mut self, offsets: Vec) -> Result { + let start = std::time::Instant::now(); + let n = offsets.len(); for offset in offsets { let request = AckRequest { request: Some(ack_request::Request { @@ -150,13 +150,17 @@ impl SourceAcker { .send(request) .await .map_err(|e| SourceError(e.to_string()))?; + } - // wait for the ack response for each ack request - self.ack_resp_stream + for _ in 0..n { + let _ = self + .ack_resp_stream .message() .await? .ok_or(SourceError("failed to receive ack response".to_string()))?; } + + info!("acked {} messages in {:?}", n, start.elapsed().as_millis()); Ok(AckResponse { result: Some(ack_response::Result { success: Some(()) }), handshake: None, diff --git a/rust/monovertex/src/startup.rs b/rust/monovertex/src/startup.rs index abf0cc616a..2614d045b7 100644 --- a/rust/monovertex/src/startup.rs +++ b/rust/monovertex/src/startup.rs @@ -1,15 +1,13 @@ use std::net::SocketAddr; +use std::path::PathBuf; use std::time::Duration; use crate::config::config; use crate::error::Error; use crate::metrics::{start_metrics_https_server, LagReader, LagReaderBuilder, MetricsState}; -use crate::sink::{FB_SINK_SERVER_INFO_FILE, SINK_SERVER_INFO_FILE}; use crate::sink_pb::sink_client::SinkClient; -use crate::source::SOURCE_SERVER_INFO_FILE; use crate::source_pb::source_client::SourceClient; use crate::sourcetransform_pb::source_transform_client::SourceTransformClient; -use crate::transformer::TRANSFORMER_SERVER_INFO_FILE; use crate::{error, server_info}; use tokio::task::JoinHandle; @@ -19,35 +17,38 @@ use tonic::transport::Channel; use tonic::Request; use tracing::{info, warn}; -pub(crate) async fn check_compatibility(cln_token: &CancellationToken) -> error::Result<()> { - server_info::check_for_server_compatibility(SOURCE_SERVER_INFO_FILE, cln_token.clone()) +pub(crate) async fn check_compatibility( + cln_token: &CancellationToken, + source_file_path: PathBuf, + sink_file_path: PathBuf, + transformer_file_path: Option, + fb_sink_file_path: Option, +) -> error::Result<()> { + server_info::check_for_server_compatibility(source_file_path, cln_token.clone()) .await .map_err(|e| { warn!("Error waiting for source server info file: {:?}", e); Error::ForwarderError("Error waiting for server info file".to_string()) })?; - server_info::check_for_server_compatibility(SINK_SERVER_INFO_FILE, cln_token.clone()) + server_info::check_for_server_compatibility(sink_file_path, cln_token.clone()) .await .map_err(|e| { error!("Error waiting for sink server info file: {:?}", e); Error::ForwarderError("Error waiting for server info file".to_string()) })?; - if config().is_transformer_enabled { - server_info::check_for_server_compatibility( - TRANSFORMER_SERVER_INFO_FILE, - cln_token.clone(), - ) - .await - .map_err(|e| { - error!("Error waiting for transformer server info file: {:?}", e); - Error::ForwarderError("Error waiting for server info file".to_string()) - })?; + if let Some(transformer_path) = transformer_file_path { + server_info::check_for_server_compatibility(transformer_path, cln_token.clone()) + .await + .map_err(|e| { + error!("Error waiting for transformer server info file: {:?}", e); + Error::ForwarderError("Error waiting for server info file".to_string()) + })?; } - if config().is_fallback_enabled { - server_info::check_for_server_compatibility(FB_SINK_SERVER_INFO_FILE, cln_token.clone()) + if let Some(fb_sink_path) = fb_sink_file_path { + server_info::check_for_server_compatibility(fb_sink_path, cln_token.clone()) .await .map_err(|e| { warn!("Error waiting for fallback sink server info file: {:?}", e); @@ -133,3 +134,216 @@ pub(crate) async fn wait_until_ready( Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::server_info::ServerInfo; + use crate::shared::create_rpc_channel; + use numaflow::source::{Message, Offset, SourceReadRequest}; + use numaflow::{sink, source, sourcetransform}; + use std::fs::File; + use std::io::Write; + use tempfile::tempdir; + use tokio::sync::mpsc; + use tokio::sync::mpsc::Sender; + use tokio_util::sync::CancellationToken; + + async fn write_server_info(file_path: &str, server_info: &ServerInfo) -> error::Result<()> { + let serialized = serde_json::to_string(server_info).unwrap(); + let mut file = File::create(file_path).unwrap(); + file.write_all(serialized.as_bytes()).unwrap(); + file.write_all(b"U+005C__END__").unwrap(); + Ok(()) + } + + #[tokio::test] + async fn test_check_compatibility_success() { + let dir = tempdir().unwrap(); + let source_file_path = dir.path().join("source_server_info.json"); + let sink_file_path = dir.path().join("sink_server_info.json"); + let transformer_file_path = dir.path().join("transformer_server_info.json"); + let fb_sink_file_path = dir.path().join("fb_sink_server_info.json"); + + let server_info = ServerInfo { + protocol: "uds".to_string(), + language: "rust".to_string(), + minimum_numaflow_version: "0.1.0".to_string(), + version: "0.1.0".to_string(), + metadata: None, + }; + + write_server_info(source_file_path.to_str().unwrap(), &server_info) + .await + .unwrap(); + write_server_info(sink_file_path.to_str().unwrap(), &server_info) + .await + .unwrap(); + write_server_info(transformer_file_path.to_str().unwrap(), &server_info) + .await + .unwrap(); + write_server_info(fb_sink_file_path.to_str().unwrap(), &server_info) + .await + .unwrap(); + + let cln_token = CancellationToken::new(); + let result = + check_compatibility(&cln_token, source_file_path, sink_file_path, None, None).await; + + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_check_compatibility_failure() { + let cln_token = CancellationToken::new(); + let dir = tempdir().unwrap(); + let source_file_path = dir.path().join("source_server_info.json"); + let sink_file_path = dir.path().join("sink_server_info.json"); + let transformer_file_path = dir.path().join("transformer_server_info.json"); + let fb_sink_file_path = dir.path().join("fb_sink_server_info.json"); + + // do not write server info files to simulate failure + // cancel the token after 100ms to simulate cancellation + let token = cln_token.clone(); + let handle = tokio::spawn(async move { + sleep(Duration::from_millis(100)).await; + token.cancel(); + }); + let result = check_compatibility( + &cln_token, + source_file_path, + sink_file_path, + Some(transformer_file_path), + Some(fb_sink_file_path), + ) + .await; + + assert!(result.is_err()); + handle.await.unwrap(); + } + + struct SimpleSource {} + + #[tonic::async_trait] + impl source::Sourcer for SimpleSource { + async fn read(&self, _request: SourceReadRequest, _transmitter: Sender) {} + + async fn ack(&self, _offset: Offset) {} + + async fn pending(&self) -> usize { + 0 + } + + async fn partitions(&self) -> Option> { + Some(vec![0]) + } + } + + struct SimpleTransformer; + #[tonic::async_trait] + impl sourcetransform::SourceTransformer for SimpleTransformer { + async fn transform( + &self, + _input: sourcetransform::SourceTransformRequest, + ) -> Vec { + vec![] + } + } + + struct InMemorySink {} + + #[tonic::async_trait] + impl sink::Sinker for InMemorySink { + async fn sink(&self, mut _input: mpsc::Receiver) -> Vec { + vec![] + } + } + + #[tokio::test] + async fn test_wait_until_ready() { + // Start the source server + let (source_shutdown_tx, source_shutdown_rx) = tokio::sync::oneshot::channel(); + let tmp_dir = tempfile::TempDir::new().unwrap(); + let source_sock_file = tmp_dir.path().join("source.sock"); + let server_info_file = tmp_dir.path().join("source-server-info"); + + let server_info = server_info_file.clone(); + let source_socket = source_sock_file.clone(); + let source_server_handle = tokio::spawn(async move { + source::Server::new(SimpleSource {}) + .with_socket_file(source_socket) + .with_server_info_file(server_info) + .start_with_shutdown(source_shutdown_rx) + .await + .unwrap(); + }); + + // Start the sink server + let (sink_shutdown_tx, sink_shutdown_rx) = tokio::sync::oneshot::channel(); + let sink_tmp_dir = tempfile::TempDir::new().unwrap(); + let sink_sock_file = sink_tmp_dir.path().join("sink.sock"); + let server_info_file = sink_tmp_dir.path().join("sink-server-info"); + + let server_info = server_info_file.clone(); + let sink_socket = sink_sock_file.clone(); + let sink_server_handle = tokio::spawn(async move { + sink::Server::new(InMemorySink {}) + .with_socket_file(sink_socket) + .with_server_info_file(server_info) + .start_with_shutdown(sink_shutdown_rx) + .await + .unwrap(); + }); + + // Start the transformer server + let (transformer_shutdown_tx, transformer_shutdown_rx) = tokio::sync::oneshot::channel(); + let tmp_dir = tempfile::TempDir::new().unwrap(); + let transformer_sock_file = tmp_dir.path().join("transformer.sock"); + let server_info_file = tmp_dir.path().join("transformer-server-info"); + + let server_info = server_info_file.clone(); + let transformer_socket = transformer_sock_file.clone(); + let transformer_server_handle = tokio::spawn(async move { + sourcetransform::Server::new(SimpleTransformer {}) + .with_socket_file(transformer_socket) + .with_server_info_file(server_info) + .start_with_shutdown(transformer_shutdown_rx) + .await + .unwrap(); + }); + + // Wait for the servers to start + sleep(Duration::from_millis(100)).await; + + let mut source_grpc_client = + SourceClient::new(create_rpc_channel(source_sock_file.clone()).await.unwrap()); + let mut sink_grpc_client = + SinkClient::new(create_rpc_channel(sink_sock_file.clone()).await.unwrap()); + let mut transformer_grpc_client = Some(SourceTransformClient::new( + create_rpc_channel(transformer_sock_file.clone()) + .await + .unwrap(), + )); + + let mut fb_sink_grpc_client = None; + + let cln_token = CancellationToken::new(); + let result = wait_until_ready( + cln_token, + &mut source_grpc_client, + &mut sink_grpc_client, + &mut transformer_grpc_client, + &mut fb_sink_grpc_client, + ) + .await; + assert!(result.is_ok()); + + source_shutdown_tx.send(()).unwrap(); + sink_shutdown_tx.send(()).unwrap(); + transformer_shutdown_tx.send(()).unwrap(); + + source_server_handle.await.unwrap(); + sink_server_handle.await.unwrap(); + transformer_server_handle.await.unwrap(); + } +} diff --git a/rust/monovertex/src/transformer.rs b/rust/monovertex/src/transformer.rs index 95c9380f2f..f7797b5d7d 100644 --- a/rust/monovertex/src/transformer.rs +++ b/rust/monovertex/src/transformer.rs @@ -6,9 +6,6 @@ use crate::sourcetransform_pb::SourceTransformRequest; use tonic::transport::Channel; const DROP: &str = "U+005C__DROP__"; -pub(crate) const TRANSFORMER_SOCKET: &str = "/var/run/numaflow/sourcetransform.sock"; -pub(crate) const TRANSFORMER_SERVER_INFO_FILE: &str = - "/var/run/numaflow/sourcetransformer-server-info"; /// TransformerClient is a client to interact with the transformer server. #[derive(Clone)] From 2e35591fad5ac2d814b80278d9c17cbf110694cc Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Fri, 20 Sep 2024 18:49:02 +0530 Subject: [PATCH 31/40] minor changes, adding tests Signed-off-by: Yashash H L --- pkg/sdkclient/source/client.go | 13 +- pkg/sdkclient/source/client_test.go | 2 +- pkg/sdkclient/source/interface.go | 2 +- pkg/sources/udsource/grpc_udsource.go | 11 +- rust/monovertex/src/metrics.rs | 160 ++++++++++++++++++ ...source-rs.yaml => simple-source-rust.yaml} | 0 6 files changed, 176 insertions(+), 12 deletions(-) rename test/udsource-e2e/testdata/{simple-source-rs.yaml => simple-source-rust.yaml} (100%) diff --git a/pkg/sdkclient/source/client.go b/pkg/sdkclient/source/client.go index 305271d95e..c7c2fbc6b6 100644 --- a/pkg/sdkclient/source/client.go +++ b/pkg/sdkclient/source/client.go @@ -174,7 +174,7 @@ func (c *client) ReadFn(_ context.Context, req *sourcepb.ReadRequest, datumCh ch for { resp, err := c.readStream.Recv() - // we don't need an EOF check because we never close the stream. + // we don't need an EOF check because we only close the stream during shutdown. if errors.Is(err, context.Canceled) { break } @@ -190,15 +190,18 @@ func (c *client) ReadFn(_ context.Context, req *sourcepb.ReadRequest, datumCh ch } // AckFn acknowledges the data from the source. -func (c *client) AckFn(_ context.Context, req *sourcepb.AckRequest) (*sourcepb.AckResponse, error) { +func (c *client) AckFn(_ context.Context, reqs []*sourcepb.AckRequest) (*sourcepb.AckResponse, error) { // Send the ack request - err := c.ackStream.Send(req) - if err != nil { - return nil, fmt.Errorf("failed to send ack request: %v", err) + for _, req := range reqs { + err := c.ackStream.Send(req) + if err != nil { + return nil, fmt.Errorf("failed to send ack request: %v", err) + } } // Wait for the ack response resp, err := c.ackStream.Recv() + // we don't need an EOF check because we only close the stream during shutdown. if err != nil { return nil, fmt.Errorf("failed to receive ack response: %v", err) } diff --git a/pkg/sdkclient/source/client_test.go b/pkg/sdkclient/source/client_test.go index 818c3c3430..70b859b1fe 100644 --- a/pkg/sdkclient/source/client_test.go +++ b/pkg/sdkclient/source/client_test.go @@ -188,7 +188,7 @@ func TestAckFn(t *testing.T) { assert.True(t, ackHandshakeResponse.GetHandshake().GetSot()) // Test AckFn - ack, err := testClient.AckFn(ctx, &sourcepb.AckRequest{}) + ack, err := testClient.AckFn(ctx, []*sourcepb.AckRequest{{}}) assert.NoError(t, err) assert.Equal(t, &sourcepb.AckResponse{}, ack) } diff --git a/pkg/sdkclient/source/interface.go b/pkg/sdkclient/source/interface.go index ea897b8207..4ee75a23c4 100644 --- a/pkg/sdkclient/source/interface.go +++ b/pkg/sdkclient/source/interface.go @@ -32,7 +32,7 @@ type Client interface { // ReadFn reads messages from the udsource. ReadFn(ctx context.Context, req *sourcepb.ReadRequest, datumCh chan<- *sourcepb.ReadResponse) error // AckFn acknowledges messages from the udsource. - AckFn(ctx context.Context, req *sourcepb.AckRequest) (*sourcepb.AckResponse, error) + AckFn(ctx context.Context, req []*sourcepb.AckRequest) (*sourcepb.AckResponse, error) // PendingFn returns the number of pending messages from the udsource. PendingFn(ctx context.Context, req *emptypb.Empty) (*sourcepb.PendingResponse, error) // PartitionsFn returns the list of partitions from the udsource. diff --git a/pkg/sources/udsource/grpc_udsource.go b/pkg/sources/udsource/grpc_udsource.go index 2fdbb7cef0..7cc85fdf6a 100644 --- a/pkg/sources/udsource/grpc_udsource.go +++ b/pkg/sources/udsource/grpc_udsource.go @@ -170,22 +170,23 @@ func (u *GRPCBasedUDSource) ApplyReadFn(ctx context.Context, count int64, timeou } // ApplyAckFn acknowledges messages in the source. -// TODO should we make this accept a single offset? func (u *GRPCBasedUDSource) ApplyAckFn(ctx context.Context, offsets []isb.Offset) error { rOffsets := make([]*sourcepb.Offset, len(offsets)) for i, offset := range offsets { rOffsets[i] = ConvertToUserDefinedSourceOffset(offset) } + ackRequests := make([]*sourcepb.AckRequest, len(rOffsets)) for _, offset := range rOffsets { var r = &sourcepb.AckRequest{ Request: &sourcepb.AckRequest_Request{ Offset: offset, }, } - _, err := u.client.AckFn(ctx, r) - if err != nil { - return err - } + ackRequests = append(ackRequests, r) + } + _, err := u.client.AckFn(ctx, ackRequests) + if err != nil { + return err } return nil } diff --git a/rust/monovertex/src/metrics.rs b/rust/monovertex/src/metrics.rs index f12a027804..fc6ab7a0b0 100644 --- a/rust/monovertex/src/metrics.rs +++ b/rust/monovertex/src/metrics.rs @@ -557,3 +557,163 @@ async fn calculate_pending( } // TODO add tests + +#[cfg(test)] +mod tests { + use super::*; + use crate::metrics::MetricsState; + use crate::shared::create_rpc_channel; + use numaflow::source::{Message, Offset, SourceReadRequest}; + use numaflow::{sink, source, sourcetransform}; + use std::net::SocketAddr; + use tokio::sync::mpsc::Sender; + + struct SimpleSource; + #[tonic::async_trait] + impl source::Sourcer for SimpleSource { + async fn read(&self, _: SourceReadRequest, _: Sender) {} + + async fn ack(&self, _: Offset) {} + + async fn pending(&self) -> usize { + 0 + } + + async fn partitions(&self) -> Option> { + None + } + } + + struct SimpleSink; + + #[tonic::async_trait] + impl sink::Sinker for SimpleSink { + async fn sink( + &self, + _input: tokio::sync::mpsc::Receiver, + ) -> Vec { + vec![] + } + } + + struct NowCat; + + #[tonic::async_trait] + impl sourcetransform::SourceTransformer for NowCat { + async fn transform( + &self, + _input: sourcetransform::SourceTransformRequest, + ) -> Vec { + vec![] + } + } + + #[tokio::test] + async fn test_start_metrics_https_server() { + let (src_shutdown_tx, src_shutdown_rx) = tokio::sync::oneshot::channel(); + let tmp_dir = tempfile::TempDir::new().unwrap(); + let src_sock_file = tmp_dir.path().join("source.sock"); + let src_info_file = tmp_dir.path().join("source-server-info"); + + let server_info = src_info_file.clone(); + let server_socket = src_sock_file.clone(); + let src_server_handle = tokio::spawn(async move { + source::Server::new(SimpleSource) + .with_socket_file(server_socket) + .with_server_info_file(server_info) + .start_with_shutdown(src_shutdown_rx) + .await + .unwrap(); + }); + + let (sink_shutdown_tx, sink_shutdown_rx) = tokio::sync::oneshot::channel(); + let (fb_sink_shutdown_tx, fb_sink_shutdown_rx) = tokio::sync::oneshot::channel(); + let tmp_dir = tempfile::TempDir::new().unwrap(); + let sink_sock_file = tmp_dir.path().join("sink.sock"); + let sink_server_info = tmp_dir.path().join("sink-server-info"); + let fb_sink_sock_file = tmp_dir.path().join("fallback-sink.sock"); + let fb_sink_server_info = tmp_dir.path().join("fallback-sink-server-info"); + + let server_socket = sink_sock_file.clone(); + let server_info = sink_server_info.clone(); + let sink_server_handle = tokio::spawn(async move { + sink::Server::new(SimpleSink) + .with_socket_file(server_socket) + .with_server_info_file(server_info) + .start_with_shutdown(sink_shutdown_rx) + .await + .unwrap(); + }); + let fb_server_socket = fb_sink_sock_file.clone(); + let fb_server_info = fb_sink_server_info.clone(); + let fb_sink_server_handle = tokio::spawn(async move { + sink::Server::new(SimpleSink) + .with_socket_file(fb_server_socket) + .with_server_info_file(fb_server_info) + .start_with_shutdown(fb_sink_shutdown_rx) + .await + .unwrap(); + }); + + // start the transformer server + let (transformer_shutdown_tx, transformer_shutdown_rx) = tokio::sync::oneshot::channel(); + let sock_file = tmp_dir.path().join("sourcetransform.sock"); + let server_info_file = tmp_dir.path().join("sourcetransformer-server-info"); + + let server_info = server_info_file.clone(); + let server_socket = sock_file.clone(); + let transformer_handle = tokio::spawn(async move { + sourcetransform::Server::new(NowCat) + .with_socket_file(server_socket) + .with_server_info_file(server_info) + .start_with_shutdown(transformer_shutdown_rx) + .await + .expect("server failed"); + }); + + // wait for the servers to start + // FIXME: we need to have a better way, this is flaky + tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; + let metrics_state = MetricsState { + source_client: SourceClient::new(create_rpc_channel(src_sock_file).await.unwrap()), + sink_client: SinkClient::new(create_rpc_channel(sink_sock_file).await.unwrap()), + transformer_client: Some(SourceTransformClient::new( + create_rpc_channel(sock_file).await.unwrap(), + )), + fb_sink_client: Some(SinkClient::new( + create_rpc_channel(fb_sink_sock_file).await.unwrap(), + )), + }; + + let addr: SocketAddr = "127.0.0.1:9091".parse().unwrap(); + let metrics_state_clone = metrics_state.clone(); + let server_handle = tokio::spawn(async move { + start_metrics_https_server(addr, metrics_state_clone) + .await + .unwrap(); + }); + + // invoke the sidecar-livez endpoint + let response = sidecar_livez(State(metrics_state)).await; + assert_eq!(response.into_response().status(), StatusCode::NO_CONTENT); + + // invoke the livez endpoint + let response = livez().await; + assert_eq!(response.into_response().status(), StatusCode::NO_CONTENT); + + // invoke the metrics endpoint + let response = metrics_handler().await; + assert_eq!(response.into_response().status(), StatusCode::OK); + + // Stop the servers + server_handle.abort(); + src_shutdown_tx.send(()).unwrap(); + sink_shutdown_tx.send(()).unwrap(); + fb_sink_shutdown_tx.send(()).unwrap(); + transformer_shutdown_tx.send(()).unwrap(); + src_server_handle.await.unwrap(); + sink_server_handle.await.unwrap(); + fb_sink_server_handle.await.unwrap(); + transformer_handle.await.unwrap(); + } +} diff --git a/test/udsource-e2e/testdata/simple-source-rs.yaml b/test/udsource-e2e/testdata/simple-source-rust.yaml similarity index 100% rename from test/udsource-e2e/testdata/simple-source-rs.yaml rename to test/udsource-e2e/testdata/simple-source-rust.yaml From 4c58042f808d7793d4b27fc60366ffb370599784 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Fri, 20 Sep 2024 19:17:26 +0530 Subject: [PATCH 32/40] fix tests Signed-off-by: Yashash H L --- pkg/sdkclient/source/client.go | 18 +++++++++++------- pkg/sdkclient/source/client_test.go | 2 +- pkg/sdkclient/source/interface.go | 2 +- pkg/sources/udsource/grpc_udsource.go | 4 ++-- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/pkg/sdkclient/source/client.go b/pkg/sdkclient/source/client.go index c7c2fbc6b6..550c888d66 100644 --- a/pkg/sdkclient/source/client.go +++ b/pkg/sdkclient/source/client.go @@ -190,7 +190,7 @@ func (c *client) ReadFn(_ context.Context, req *sourcepb.ReadRequest, datumCh ch } // AckFn acknowledges the data from the source. -func (c *client) AckFn(_ context.Context, reqs []*sourcepb.AckRequest) (*sourcepb.AckResponse, error) { +func (c *client) AckFn(_ context.Context, reqs []*sourcepb.AckRequest) ([]*sourcepb.AckResponse, error) { // Send the ack request for _, req := range reqs { err := c.ackStream.Send(req) @@ -199,14 +199,18 @@ func (c *client) AckFn(_ context.Context, reqs []*sourcepb.AckRequest) (*sourcep } } - // Wait for the ack response - resp, err := c.ackStream.Recv() - // we don't need an EOF check because we only close the stream during shutdown. - if err != nil { - return nil, fmt.Errorf("failed to receive ack response: %v", err) + responses := make([]*sourcepb.AckResponse, len(reqs)) + for i := 0; i < len(reqs); i++ { + // Wait for the ack response + resp, err := c.ackStream.Recv() + // we don't need an EOF check because we only close the stream during shutdown. + if err != nil { + return nil, fmt.Errorf("failed to receive ack response: %v", err) + } + responses[i] = resp } - return resp, nil + return responses, nil } // PendingFn returns the number of pending data from the source. diff --git a/pkg/sdkclient/source/client_test.go b/pkg/sdkclient/source/client_test.go index 70b859b1fe..d19e3e8737 100644 --- a/pkg/sdkclient/source/client_test.go +++ b/pkg/sdkclient/source/client_test.go @@ -190,7 +190,7 @@ func TestAckFn(t *testing.T) { // Test AckFn ack, err := testClient.AckFn(ctx, []*sourcepb.AckRequest{{}}) assert.NoError(t, err) - assert.Equal(t, &sourcepb.AckResponse{}, ack) + assert.Equal(t, []*sourcepb.AckResponse{{}}, ack) } func TestPendingFn(t *testing.T) { diff --git a/pkg/sdkclient/source/interface.go b/pkg/sdkclient/source/interface.go index 4ee75a23c4..cc26f2cd95 100644 --- a/pkg/sdkclient/source/interface.go +++ b/pkg/sdkclient/source/interface.go @@ -32,7 +32,7 @@ type Client interface { // ReadFn reads messages from the udsource. ReadFn(ctx context.Context, req *sourcepb.ReadRequest, datumCh chan<- *sourcepb.ReadResponse) error // AckFn acknowledges messages from the udsource. - AckFn(ctx context.Context, req []*sourcepb.AckRequest) (*sourcepb.AckResponse, error) + AckFn(ctx context.Context, req []*sourcepb.AckRequest) ([]*sourcepb.AckResponse, error) // PendingFn returns the number of pending messages from the udsource. PendingFn(ctx context.Context, req *emptypb.Empty) (*sourcepb.PendingResponse, error) // PartitionsFn returns the list of partitions from the udsource. diff --git a/pkg/sources/udsource/grpc_udsource.go b/pkg/sources/udsource/grpc_udsource.go index 7cc85fdf6a..2ce62a3e56 100644 --- a/pkg/sources/udsource/grpc_udsource.go +++ b/pkg/sources/udsource/grpc_udsource.go @@ -176,13 +176,13 @@ func (u *GRPCBasedUDSource) ApplyAckFn(ctx context.Context, offsets []isb.Offset rOffsets[i] = ConvertToUserDefinedSourceOffset(offset) } ackRequests := make([]*sourcepb.AckRequest, len(rOffsets)) - for _, offset := range rOffsets { + for i, offset := range rOffsets { var r = &sourcepb.AckRequest{ Request: &sourcepb.AckRequest_Request{ Offset: offset, }, } - ackRequests = append(ackRequests, r) + ackRequests[i] = r } _, err := u.client.AckFn(ctx, ackRequests) if err != nil { From b4515564d6b3721e965d79c0868d7b4b6429ce30 Mon Sep 17 00:00:00 2001 From: Vigith Maurice Date: Fri, 20 Sep 2024 08:45:04 -0700 Subject: [PATCH 33/40] chore: syncing source.proto from numaflow-rs Signed-off-by: Vigith Maurice --- pkg/apis/proto/source/v1/source.proto | 187 +++++++++++++------------- 1 file changed, 96 insertions(+), 91 deletions(-) diff --git a/pkg/apis/proto/source/v1/source.proto b/pkg/apis/proto/source/v1/source.proto index 1a2be2b2db..7dc1a67412 100644 --- a/pkg/apis/proto/source/v1/source.proto +++ b/pkg/apis/proto/source/v1/source.proto @@ -24,102 +24,104 @@ import "google/protobuf/empty.proto"; package source.v1; - service Source { - // Read returns a stream of datum responses. - // The size of the returned ReadResponse is less than or equal to the num_records specified in each ReadRequest. - // If the request timeout is reached on the server side, the returned ReadResponse will contain all the datum that have been read (which could be an empty list). - // The server will continue to read and respond to subsequent ReadRequests until the client closes the stream. - rpc ReadFn(stream ReadRequest) returns (stream ReadResponse); - - // AckFn acknowledges a stream of datum offsets. - // When AckFn is called, it implicitly indicates that the datum stream has been processed by the source vertex. - // The caller (numa) expects the AckFn to be successful, and it does not expect any errors. - // If there are some irrecoverable errors when the callee (UDSource) is processing the AckFn request, - // then it is best to crash because there are no other retry mechanisms possible. - rpc AckFn(stream AckRequest) returns (AckResponse); - - // PendingFn returns the number of pending records at the user defined source. - rpc PendingFn(google.protobuf.Empty) returns (PendingResponse); - - // PartitionsFn returns the list of partitions for the user defined source. - rpc PartitionsFn(google.protobuf.Empty) returns (PartitionsResponse); - - // IsReady is the heartbeat endpoint for user defined source gRPC. - rpc IsReady(google.protobuf.Empty) returns (ReadyResponse); - } +service Source { + // Read returns a stream of datum responses. + // The size of the returned responses is less than or equal to the num_records specified in each ReadRequest. + // If the request timeout is reached on the server side, the returned responses will contain all the datum that have been read (which could be an empty list). + // The server will continue to read and respond to subsequent ReadRequests until the client closes the stream. + // Once it has sent all the datum, the server will send a ReadResponse with the end of transmission flag set to true. + rpc ReadFn(stream ReadRequest) returns (stream ReadResponse); + + // AckFn acknowledges a stream of datum offsets. + // When AckFn is called, it implicitly indicates that the datum stream has been processed by the source vertex. + // The caller (numa) expects the AckFn to be successful, and it does not expect any errors. + // If there are some irrecoverable errors when the callee (UDSource) is processing the AckFn request, + // then it is best to crash because there are no other retry mechanisms possible. + // Clients sends n requests and expects n responses. + rpc AckFn(stream AckRequest) returns (stream AckResponse); + + // PendingFn returns the number of pending records at the user defined source. + rpc PendingFn(google.protobuf.Empty) returns (PendingResponse); + + // PartitionsFn returns the list of partitions for the user defined source. + rpc PartitionsFn(google.protobuf.Empty) returns (PartitionsResponse); + + // IsReady is the heartbeat endpoint for user defined source gRPC. + rpc IsReady(google.protobuf.Empty) returns (ReadyResponse); +} - /* - * Handshake message between client and server to indicate the start of transmission. - */ - message Handshake { - // Required field indicating the start of transmission. - bool sot = 1; - } +/* + * Handshake message between client and server to indicate the start of transmission. + */ +message Handshake { + // Required field indicating the start of transmission. + bool sot = 1; +} - /* - * ReadRequest is the request for reading datum stream from user defined source. - */ - message ReadRequest { - message Request { - // Required field indicating the number of records to read. - uint64 num_records = 1; - // Required field indicating the request timeout in milliseconds. - // uint32 can represent 2^32 milliseconds, which is about 49 days. - // We don't use uint64 because time.Duration takes int64 as nano seconds. Using uint64 for milli will cause overflow. - uint32 timeout_in_ms = 2; - } - // Required field indicating the request. - Request request = 1; - optional Handshake handshake = 2; +/* + * ReadRequest is the request for reading datum stream from user defined source. + */ +message ReadRequest { + message Request { + // Required field indicating the number of records to read. + uint64 num_records = 1; + // Required field indicating the request timeout in milliseconds. + // uint32 can represent 2^32 milliseconds, which is about 49 days. + // We don't use uint64 because time.Duration takes int64 as nano seconds. Using uint64 for milli will cause overflow. + uint32 timeout_in_ms = 2; } + // Required field indicating the request. + Request request = 1; + optional Handshake handshake = 2; +} - /* - * ReadResponse is the response for reading datum stream from user defined source. - */ - message ReadResponse { - message Result { - // Required field holding the payload of the datum. - bytes payload = 1; - // Required field indicating the offset information of the datum. - Offset offset = 2; - // Required field representing the time associated with each datum. It is used for watermarking. - google.protobuf.Timestamp event_time = 3; - // Optional list of keys associated with the datum. - // Key is the "key" attribute in (key,value) as in the map-reduce paradigm. - // We add this optional field to support the use case where the user defined source can provide keys for the datum. - // e.g. Kafka and Redis Stream message usually include information about the keys. - repeated string keys = 4; - // Optional list of headers associated with the datum. - // Headers are the metadata associated with the datum. - // e.g. Kafka and Redis Stream message usually include information about the headers. - map headers = 5; +/* + * ReadResponse is the response for reading datum stream from user defined source. + */ +message ReadResponse { + message Result { + // Required field holding the payload of the datum. + bytes payload = 1; + // Required field indicating the offset information of the datum. + Offset offset = 2; + // Required field representing the time associated with each datum. It is used for watermarking. + google.protobuf.Timestamp event_time = 3; + // Optional list of keys associated with the datum. + // Key is the "key" attribute in (key,value) as in the map-reduce paradigm. + // We add this optional field to support the use case where the user defined source can provide keys for the datum. + // e.g. Kafka and Redis Stream message usually include information about the keys. + repeated string keys = 4; + // Optional list of headers associated with the datum. + // Headers are the metadata associated with the datum. + // e.g. Kafka and Redis Stream message usually include information about the headers. + map headers = 5; + } + message Status { + // Code to indicate the status of the response. + enum Code { + SUCCESS = 0; + FAILURE = 1; } - message Status { - // Code to indicate the status of the response. - enum Code { - SUCCESS = 0; - FAILURE = 1; - } - - // Error to indicate the error type. If the code is FAILURE, then the error field will be populated. - enum Error { - UNACKED = 0; - OTHER = 1; - } - - // End of transmission flag. - bool eot = 1; - Code code = 2; - optional Error error = 3; - optional string msg = 4; + + // Error to indicate the error type. If the code is FAILURE, then the error field will be populated. + enum Error { + UNACKED = 0; + OTHER = 1; } - // Required field holding the result. - Result result = 1; - // Status of the response. Holds the end of transmission flag and the status code. - // - Status status = 2; - optional Handshake handshake = 3; + + // End of transmission flag. + bool eot = 1; + Code code = 2; + optional Error error = 3; + optional string msg = 4; } + // Required field holding the result. + Result result = 1; + // Status of the response. Holds the end of transmission flag and the status code. + Status status = 2; + // Handshake message between client and server to indicate the start of transmission. + optional Handshake handshake = 3; +} /* * AckRequest is the request for acknowledging datum. @@ -127,11 +129,12 @@ package source.v1; */ message AckRequest { message Request { - // Required field holding the offset to be acked. + // Required field holding the offset to be acked Offset offset = 1; } // Required field holding the request. The list will be ordered and will have the same order as the original Read response. Request request = 1; + optional Handshake handshake = 2; } /* @@ -151,6 +154,8 @@ message AckResponse { } // Required field holding the result. Result result = 1; + // Handshake message between client and server to indicate the start of transmission. + optional Handshake handshake = 2; } /* @@ -199,4 +204,4 @@ message Offset { // It is useful for sources that have multiple partitions. e.g. Kafka. // If the partition_id is not specified, it is assumed that the source has a single partition. int32 partition_id = 2; -} +} \ No newline at end of file From f5ee859586ce2f0c7642a3afa1a508bb5e222e99 Mon Sep 17 00:00:00 2001 From: Vigith Maurice Date: Fri, 20 Sep 2024 09:13:44 -0700 Subject: [PATCH 34/40] chore: update monovertex proto (we need to stop this) Signed-off-by: Vigith Maurice --- rust/monovertex/proto/source.proto | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/rust/monovertex/proto/source.proto b/rust/monovertex/proto/source.proto index 93c5f19278..69ff154127 100644 --- a/rust/monovertex/proto/source.proto +++ b/rust/monovertex/proto/source.proto @@ -7,9 +7,10 @@ package source.v1; service Source { // Read returns a stream of datum responses. - // The size of the returned ReadResponse is less than or equal to the num_records specified in each ReadRequest. - // If the request timeout is reached on the server side, the returned ReadResponse will contain all the datum that have been read (which could be an empty list). + // The size of the returned responses is less than or equal to the num_records specified in each ReadRequest. + // If the request timeout is reached on the server side, the returned responses will contain all the datum that have been read (which could be an empty list). // The server will continue to read and respond to subsequent ReadRequests until the client closes the stream. + // Once it has sent all the datum, the server will send a ReadResponse with the end of transmission flag set to true. rpc ReadFn(stream ReadRequest) returns (stream ReadResponse); // AckFn acknowledges a stream of datum offsets. @@ -17,6 +18,7 @@ service Source { // The caller (numa) expects the AckFn to be successful, and it does not expect any errors. // If there are some irrecoverable errors when the callee (UDSource) is processing the AckFn request, // then it is best to crash because there are no other retry mechanisms possible. + // Clients sends n requests and expects n responses. rpc AckFn(stream AckRequest) returns (stream AckResponse); // PendingFn returns the number of pending records at the user defined source. @@ -98,6 +100,7 @@ message ReadResponse { Result result = 1; // Status of the response. Holds the end of transmission flag and the status code. Status status = 2; + // Handshake message between client and server to indicate the start of transmission. optional Handshake handshake = 3; } @@ -132,6 +135,7 @@ message AckResponse { } // Required field holding the result. Result result = 1; + // Handshake message between client and server to indicate the start of transmission. optional Handshake handshake = 2; } @@ -181,4 +185,4 @@ message Offset { // It is useful for sources that have multiple partitions. e.g. Kafka. // If the partition_id is not specified, it is assumed that the source has a single partition. int32 partition_id = 2; -} \ No newline at end of file +} From 499b33d5fba49de451dc954aa89ebc375b1c8122 Mon Sep 17 00:00:00 2001 From: Vigith Maurice Date: Fri, 20 Sep 2024 13:17:09 -0700 Subject: [PATCH 35/40] chore: minor Signed-off-by: Vigith Maurice --- pkg/sources/udsource/grpc_udsource.go | 5 +---- test/udsource-e2e/testdata/simple-source-rust.yaml | 2 -- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/pkg/sources/udsource/grpc_udsource.go b/pkg/sources/udsource/grpc_udsource.go index 2ce62a3e56..8d0389a2ee 100644 --- a/pkg/sources/udsource/grpc_udsource.go +++ b/pkg/sources/udsource/grpc_udsource.go @@ -185,10 +185,7 @@ func (u *GRPCBasedUDSource) ApplyAckFn(ctx context.Context, offsets []isb.Offset ackRequests[i] = r } _, err := u.client.AckFn(ctx, ackRequests) - if err != nil { - return err - } - return nil + return err } // ApplyPartitionFn returns the partitions associated with the source. diff --git a/test/udsource-e2e/testdata/simple-source-rust.yaml b/test/udsource-e2e/testdata/simple-source-rust.yaml index 0cff657496..eddb8ca2f6 100644 --- a/test/udsource-e2e/testdata/simple-source-rust.yaml +++ b/test/udsource-e2e/testdata/simple-source-rust.yaml @@ -14,8 +14,6 @@ spec: limits: readBatchSize: 500 scale: - # set it as two pods to be different from the sink such that we can use this pipeline - # to test processing rate is consistent across vertices when they have different replica counts. min: 1 - name: out sink: From 6f6a48d5ac06b78e808811eda6189683910920f4 Mon Sep 17 00:00:00 2001 From: Vigith Maurice Date: Fri, 20 Sep 2024 19:15:12 -0700 Subject: [PATCH 36/40] chore: From trait for AckRequest Signed-off-by: Vigith Maurice --- rust/monovertex/src/message.rs | 19 ++++++++++++++++++- rust/monovertex/src/source.rs | 27 +++++++++++---------------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/rust/monovertex/src/message.rs b/rust/monovertex/src/message.rs index 54f0859e3b..403c377ec4 100644 --- a/rust/monovertex/src/message.rs +++ b/rust/monovertex/src/message.rs @@ -7,7 +7,8 @@ use chrono::{DateTime, Utc}; use crate::error::Error; use crate::shared::{prost_timestamp_from_utc, utc_from_timestamp}; use crate::sink_pb::SinkRequest; -use crate::source_pb::read_response; +use crate::source_pb; +use crate::source_pb::{AckRequest, read_response}; use crate::sourcetransform_pb::SourceTransformRequest; /// A message that is sent from the source to the sink. @@ -36,6 +37,22 @@ pub(crate) struct Offset { pub(crate) partition_id: i32, } +impl From for AckRequest { + fn from(offset: Offset) -> Self { + Self { + request: Some(source_pb::ack_request::Request { + offset: Some(source_pb::Offset { + offset: BASE64_STANDARD + .decode(offset.offset) + .expect("we control the encoding, so this should never fail"), + partition_id: offset.partition_id, + }), + }), + handshake: None, + } + } +} + /// Convert the [`Message`] to [`SourceTransformRequest`] impl From for SourceTransformRequest { fn from(message: Message) -> Self { diff --git a/rust/monovertex/src/source.rs b/rust/monovertex/src/source.rs index cad78f7bb8..a86a9c61fa 100644 --- a/rust/monovertex/src/source.rs +++ b/rust/monovertex/src/source.rs @@ -5,10 +5,8 @@ use crate::message::{Message, Offset}; use crate::source_pb; use crate::source_pb::source_client::SourceClient; use crate::source_pb::{ - ack_request, ack_response, read_request, AckRequest, AckResponse, ReadRequest, ReadResponse, + ack_response, read_request, AckRequest, AckResponse, ReadRequest, ReadResponse, }; -use base64::prelude::BASE64_STANDARD; -use base64::Engine; use log::info; use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; @@ -47,6 +45,7 @@ impl SourceReader { let handshake_response = resp_stream.message().await?.ok_or(SourceError( "failed to receive handshake response".to_string(), ))?; + // TODO(explain): why will this be None and why is None okay? if handshake_response.handshake.map_or(true, |h| !h.sot) { return Err(SourceError("invalid handshake response".to_string())); } @@ -78,7 +77,7 @@ impl SourceReader { let mut messages = Vec::with_capacity(num_records as usize); while let Some(response) = self.resp_stream.message().await? { - if response.status.as_ref().map_or(false, |status| status.eot) { + if response.status.map_or(false, |status| status.eot) { break; } @@ -121,6 +120,7 @@ impl SourceAcker { let ack_handshake_response = ack_resp_stream.message().await?.ok_or(SourceError( "failed to receive ack handshake response".to_string(), ))?; + // TODO(explain): why will this be None and why is None okay? if ack_handshake_response.handshake.map_or(true, |h| !h.sot) { return Err(SourceError("invalid ack handshake response".to_string())); } @@ -132,26 +132,19 @@ impl SourceAcker { } pub(crate) async fn ack(&mut self, offsets: Vec) -> Result { - let start = std::time::Instant::now(); + let start = tokio::time::Instant::now(); let n = offsets.len(); + + // send n ack requests for offset in offsets { - let request = AckRequest { - request: Some(ack_request::Request { - offset: Some(source_pb::Offset { - offset: BASE64_STANDARD - .decode(offset.offset) - .expect("we control the encoding, so this should never fail"), - partition_id: offset.partition_id, - }), - }), - handshake: None, - }; + let request = offset.into(); self.ack_tx .send(request) .await .map_err(|e| SourceError(e.to_string()))?; } + // make sure we get n responses for the n requests. for _ in 0..n { let _ = self .ack_resp_stream @@ -160,7 +153,9 @@ impl SourceAcker { .ok_or(SourceError("failed to receive ack response".to_string()))?; } + // TODO: emit latency metrics. info!("acked {} messages in {:?}", n, start.elapsed().as_millis()); + Ok(AckResponse { result: Some(ack_response::Result { success: Some(()) }), handshake: None, From 34920d228c2a9f63ca961878175bc23677bb2859 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Sat, 21 Sep 2024 08:18:47 +0530 Subject: [PATCH 37/40] remove log Signed-off-by: Yashash H L --- rust/monovertex/src/source.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/rust/monovertex/src/source.rs b/rust/monovertex/src/source.rs index a86a9c61fa..9380f847e6 100644 --- a/rust/monovertex/src/source.rs +++ b/rust/monovertex/src/source.rs @@ -132,7 +132,6 @@ impl SourceAcker { } pub(crate) async fn ack(&mut self, offsets: Vec) -> Result { - let start = tokio::time::Instant::now(); let n = offsets.len(); // send n ack requests @@ -153,9 +152,6 @@ impl SourceAcker { .ok_or(SourceError("failed to receive ack response".to_string()))?; } - // TODO: emit latency metrics. - info!("acked {} messages in {:?}", n, start.elapsed().as_millis()); - Ok(AckResponse { result: Some(ack_response::Result { success: Some(()) }), handshake: None, From def94f2d06fff5bcc7c743ae28208ec4c215f682 Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Sat, 21 Sep 2024 08:38:20 +0530 Subject: [PATCH 38/40] revert back the images Signed-off-by: Yashash H L --- .../monovertex-e2e/testdata/mono-vertex-with-transformer.yaml | 2 +- test/udsource-e2e/testdata/simple-source-go.yaml | 2 +- test/udsource-e2e/testdata/simple-source-java.yaml | 2 +- test/udsource-e2e/testdata/simple-source-rust.yaml | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml b/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml index d8f1e0b0dd..e491448505 100644 --- a/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml +++ b/test/monovertex-e2e/testdata/mono-vertex-with-transformer.yaml @@ -8,7 +8,7 @@ spec: source: udsource: container: - image: quay.io/yhl25/numaflow-go/source-simple-source:stable + image: quay.io/numaio/numaflow-go/source-simple-source:stable imagePullPolicy: Always transformer: container: diff --git a/test/udsource-e2e/testdata/simple-source-go.yaml b/test/udsource-e2e/testdata/simple-source-go.yaml index 5d0943bd03..65c6472479 100644 --- a/test/udsource-e2e/testdata/simple-source-go.yaml +++ b/test/udsource-e2e/testdata/simple-source-go.yaml @@ -10,7 +10,7 @@ spec: container: # A simple user-defined source for e2e testing # See https://github.com/numaproj/numaflow-go/tree/main/pkg/sourcer/examples/simple_source - image: quay.io/yhl25/numaflow-go/source-simple-source:stable + image: quay.io/numaio/numaflow-go/source-simple-source:stable limits: readBatchSize: 500 scale: diff --git a/test/udsource-e2e/testdata/simple-source-java.yaml b/test/udsource-e2e/testdata/simple-source-java.yaml index b85745ebf9..4c883b94b3 100644 --- a/test/udsource-e2e/testdata/simple-source-java.yaml +++ b/test/udsource-e2e/testdata/simple-source-java.yaml @@ -12,7 +12,7 @@ spec: container: # A simple user-defined source for e2e testing # See https://github.com/numaproj/numaflow-java/tree/main/examples/src/main/java/io/numaproj/numaflow/examples/source/simple - image: quay.io/yhl25/numaflow-java/source-simple-source:stable + image: quay.io/numaio/numaflow-java/source-simple-source:stable limits: readBatchSize: 500 - name: out diff --git a/test/udsource-e2e/testdata/simple-source-rust.yaml b/test/udsource-e2e/testdata/simple-source-rust.yaml index eddb8ca2f6..5a2d670710 100644 --- a/test/udsource-e2e/testdata/simple-source-rust.yaml +++ b/test/udsource-e2e/testdata/simple-source-rust.yaml @@ -9,8 +9,8 @@ spec: udsource: container: # A simple user-defined source for e2e testing - # See https://github.com/numaproj/numaflow-go/tree/main/pkg/sourcer/examples/simple_source - image: quay.io/yhl25/numaflow-rs/simple-source:stable + # https://github.com/numaproj/numaflow-rs/tree/main/examples/simple-source + image: quay.io/numaio/numaflow-rs/simple-source:stable limits: readBatchSize: 500 scale: From 4e92bc677823387c3570daf203b0ac3fc84527bb Mon Sep 17 00:00:00 2001 From: Yashash H L Date: Sat, 21 Sep 2024 14:55:55 +0530 Subject: [PATCH 39/40] address review comments Signed-off-by: Yashash H L --- test/e2e/functional_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/functional_test.go b/test/e2e/functional_test.go index 815874b6ad..1c72af9229 100644 --- a/test/e2e/functional_test.go +++ b/test/e2e/functional_test.go @@ -41,7 +41,7 @@ func (s *FunctionalSuite) TestCreateSimplePipeline() { w := s.Given().Pipeline("@testdata/simple-pipeline.yaml"). When(). CreatePipelineAndWait() - //defer w.DeletePipelineAndWait() + defer w.DeletePipelineAndWait() pipelineName := "simple-pipeline" w.Expect(). From 1d389040d76afbbea7515c54f2b86211d8b1fc58 Mon Sep 17 00:00:00 2001 From: Vigith Maurice Date: Sun, 22 Sep 2024 19:42:15 -0700 Subject: [PATCH 40/40] chore: fix comment Signed-off-by: Vigith Maurice --- rust/monovertex/src/source.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/monovertex/src/source.rs b/rust/monovertex/src/source.rs index 9380f847e6..fdfde1b6d4 100644 --- a/rust/monovertex/src/source.rs +++ b/rust/monovertex/src/source.rs @@ -45,7 +45,7 @@ impl SourceReader { let handshake_response = resp_stream.message().await?.ok_or(SourceError( "failed to receive handshake response".to_string(), ))?; - // TODO(explain): why will this be None and why is None okay? + // handshake cannot to None during the initial phase and it has to set `sot` to true. if handshake_response.handshake.map_or(true, |h| !h.sot) { return Err(SourceError("invalid handshake response".to_string())); } @@ -120,7 +120,7 @@ impl SourceAcker { let ack_handshake_response = ack_resp_stream.message().await?.ok_or(SourceError( "failed to receive ack handshake response".to_string(), ))?; - // TODO(explain): why will this be None and why is None okay? + // handshake cannot to None during the initial phase and it has to set `sot` to true. if ack_handshake_response.handshake.map_or(true, |h| !h.sot) { return Err(SourceError("invalid ack handshake response".to_string())); }