From 564927d86843da21e38832052385b2aa7defdb3a Mon Sep 17 00:00:00 2001 From: Heemank Verma Date: Sat, 8 Mar 2025 12:10:57 +0530 Subject: [PATCH] update: orchestrator merge --- .github/workflows/e2e-test.yml | 20 +- .github/workflows/orchestrator-coverage.yml | 35 +- Cargo.lock | 43 +- Cargo.toml | 2 +- cairo/Scarb.lock | 100 +++ e2e-tests/src/anvil.rs | 2 +- e2e-tests/tests.rs | 90 ++- orchestrator/CHANGELOG.md | 9 + .../crates/da-clients/ethereum/Cargo.toml | 4 +- orchestrator/crates/orchestrator/Cargo.toml | 20 +- .../crates/orchestrator/src/cli/mod.rs | 23 +- .../crates/orchestrator/src/cli/service.rs | 13 +- .../crates/orchestrator/src/config.rs | 16 + .../orchestrator/src/cron/event_bridge.rs | 3 +- .../crates/orchestrator/src/cron/mod.rs | 4 + .../crates/orchestrator/src/database/mod.rs | 1 - .../orchestrator/src/database/mongodb/mod.rs | 24 +- .../crates/orchestrator/src/helpers.rs | 66 ++ .../crates/orchestrator/src/jobs/constants.rs | 11 - .../orchestrator/src/jobs/da_job/mod.rs | 82 ++- .../orchestrator/src/jobs/metadata/common.rs | 38 ++ .../orchestrator/src/jobs/metadata/da.rs | 21 + .../orchestrator/src/jobs/metadata/mod.rs | 73 +++ .../orchestrator/src/jobs/metadata/proving.rs | 31 + .../orchestrator/src/jobs/metadata/snos.rs | 27 + .../src/jobs/metadata/state_update.rs | 27 + .../crates/orchestrator/src/jobs/mod.rs | 572 ++++++++++++++++-- .../orchestrator/src/jobs/proving_job/mod.rs | 131 +++- .../src/jobs/register_proof_job/mod.rs | 12 +- .../orchestrator/src/jobs/snos_job/mod.rs | 171 ++++-- .../src/jobs/state_update_job/mod.rs | 349 ++++++----- .../src/jobs/state_update_job/utils.rs | 92 ++- .../crates/orchestrator/src/jobs/types.rs | 13 +- orchestrator/crates/orchestrator/src/lib.rs | 1 + orchestrator/crates/orchestrator/src/main.rs | 3 + .../crates/orchestrator/src/queue/mod.rs | 2 +- .../orchestrator/src/routes/app_routes.rs | 45 ++ .../crates/orchestrator/src/routes/error.rs | 118 ++++ .../orchestrator/src/routes/job_routes.rs | 174 ++++-- .../crates/orchestrator/src/routes/mod.rs | 106 ++-- .../crates/orchestrator/src/routes/types.rs | 100 +++ .../crates/orchestrator/src/setup/mod.rs | 2 +- .../orchestrator/src/tests/common/mod.rs | 17 +- .../crates/orchestrator/src/tests/config.rs | 19 +- .../orchestrator/src/tests/database/mod.rs | 45 +- .../orchestrator/src/tests/jobs/da_job/mod.rs | 48 +- .../crates/orchestrator/src/tests/jobs/mod.rs | 335 ++++++---- .../src/tests/jobs/proving_job/mod.rs | 43 +- .../src/tests/jobs/snos_job/mod.rs | 47 +- .../src/tests/jobs/state_update_job/mod.rs | 171 ++++-- .../crates/orchestrator/src/tests/mod.rs | 1 + .../src/tests/server/job_routes.rs | 170 ++++-- .../crates/orchestrator/src/tests/utils.rs | 69 +++ .../src/tests/workers/proving/mod.rs | 78 +-- .../src/tests/workers/snos/mod.rs | 2 +- .../src/tests/workers/update_state/mod.rs | 141 +++-- .../src/tests/workers/utils/mod.rs | 188 +++++- .../src/workers/data_submission_worker.rs | 42 +- .../orchestrator/src/workers/proving.rs | 44 +- .../crates/orchestrator/src/workers/snos.rs | 19 +- .../orchestrator/src/workers/update_state.rs | 105 +++- .../atlantic-service/Cargo.toml | 4 +- .../atlantic-service/src/lib.rs | 29 +- .../gps-fact-checker/Cargo.toml | 2 +- .../prover-client-interface/Cargo.toml | 4 +- .../prover-client-interface/src/lib.rs | 7 +- .../prover-clients/sharp-service/Cargo.toml | 4 +- .../prover-clients/sharp-service/src/lib.rs | 51 +- .../prover-clients/sharp-service/tests/lib.rs | 5 +- .../settlement-clients/ethereum/Cargo.toml | 4 +- .../settlement-clients/ethereum/src/lib.rs | 2 +- .../settlement-client-interface/Cargo.toml | 2 +- .../settlement-clients/starknet/Cargo.toml | 4 +- orchestrator/crates/utils/src/http_client.rs | 2 +- 74 files changed, 3407 insertions(+), 973 deletions(-) create mode 100644 cairo/Scarb.lock create mode 100644 orchestrator/crates/orchestrator/src/helpers.rs delete mode 100644 orchestrator/crates/orchestrator/src/jobs/constants.rs create mode 100644 orchestrator/crates/orchestrator/src/jobs/metadata/common.rs create mode 100644 orchestrator/crates/orchestrator/src/jobs/metadata/da.rs create mode 100644 orchestrator/crates/orchestrator/src/jobs/metadata/mod.rs create mode 100644 orchestrator/crates/orchestrator/src/jobs/metadata/proving.rs create mode 100644 orchestrator/crates/orchestrator/src/jobs/metadata/snos.rs create mode 100644 orchestrator/crates/orchestrator/src/jobs/metadata/state_update.rs create mode 100644 orchestrator/crates/orchestrator/src/routes/error.rs create mode 100644 orchestrator/crates/orchestrator/src/routes/types.rs create mode 100644 orchestrator/crates/orchestrator/src/tests/utils.rs diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 8403b6788..8198a607c 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -33,15 +33,18 @@ jobs: sudo apt update sudo apt-get install -y clang llvm libudev-dev protobuf-compiler gcc g++ build-essential libssl-dev pkg-config curl wget git libclang-dev - - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: nightly + - name: Install Rust toolchain using rustup + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + source "$HOME/.cargo/env" + rustup toolchain install nightly-2024-09-05 --profile minimal + rustup default nightly-2024-09-05 + rustup override set nightly-2024-09-05 + rustup show - name: Rust Cache uses: Swatinem/rust-cache@v2 - + - uses: software-mansion/setup-scarb@v1 with: scarb-version: "2.8.2" @@ -92,11 +95,12 @@ jobs: - name: Rustup show run: rustup show - - name: Run Orchestrator e2e test + - name: Run e2e test env: MADARA_ORCHESTRATOR_ETHEREUM_SETTLEMENT_RPC_URL: ${{ secrets.ETHEREUM_SEPOLIA_BLAST_RPC }} MADARA_ORCHESTRATOR_RPC_FOR_SNOS: ${{ secrets.RPC_FOR_SNOS }} # the self hosted runner has a different region so we override it here AWS_REGION: us-east-1 run: | - RUST_LOG=info cargo +nightly-2024-09-05 test --features testing --workspace test_orchestrator_workflow -- --nocapture + source "$HOME/.cargo/env" + RUST_LOG=info cargo +nightly-2024-09-05 cargo test --features testing test_orchestrator_workflow -- --nocapture diff --git a/.github/workflows/orchestrator-coverage.yml b/.github/workflows/orchestrator-coverage.yml index 57651f00a..cd1008c24 100644 --- a/.github/workflows/orchestrator-coverage.yml +++ b/.github/workflows/orchestrator-coverage.yml @@ -35,11 +35,14 @@ jobs: sudo apt update sudo apt-get install -y clang llvm libudev-dev protobuf-compiler gcc g++ build-essential libssl-dev pkg-config curl wget git libclang-dev - - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: nightly + - name: Install Rust toolchain using rustup + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + source "$HOME/.cargo/env" + rustup toolchain install nightly-2024-09-05 --profile minimal + rustup default nightly-2024-09-05 + rustup override set nightly-2024-09-05 + rustup show - name: Rust Cache uses: Swatinem/rust-cache@v2 @@ -107,6 +110,8 @@ jobs: - name: Build Madara (Orchestrator) working-directory: orchestrator/madara run: | + source "$HOME/.cargo/env" + rustup toolchain install 1.81-x86_64-unknown-linux-gnu --profile minimal cargo build mv target/debug/madara ../madara-binary cd ../.. @@ -126,7 +131,9 @@ jobs: make snos - name: Check rust version - run: rustup show + run: | + source "$HOME/.cargo/env" + rustup show - name: Run llvm-cov tests env: @@ -135,17 +142,19 @@ jobs: # the self hosted runner has a different region so we override it here AWS_REGION: us-east-1 run: | + source "$HOME/.cargo/env" RUST_LOG=debug RUST_BACKTRACE=1 cargo llvm-cov nextest \ - --release \ - --features testing \ - --lcov \ - --output-path lcov.info \ - --test-threads=1 \ - --package "orchestrator-*" \ - --no-fail-fast + --release \ + --features testing \ + --lcov \ + --output-path lcov.info \ + --test-threads=1 \ + --package "orchestrator-*" \ + --no-fail-fast - name: Coveralls uses: coverallsapp/github-action@v2 with: parallel-finished: true files: lcov.info + debug: true diff --git a/Cargo.lock b/Cargo.lock index ea46f211b..3fa4ee9e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7897,6 +7897,26 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "jemalloc-sys" +version = "0.5.4+5.3.0-patched" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac6c1946e1cea1788cbfde01c993b52a10e2da07f4bac608228d1bed20bfebf2" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "jemallocator" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0de374a9f8e63150e6f5e8a60cc14c668226d7a347d8aee1a45766e3c4dd3bc" +dependencies = [ + "jemalloc-sys", + "libc", +] + [[package]] name = "jobserver" version = "0.1.32" @@ -9884,7 +9904,7 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "orchestrator-atlantic-service" -version = "0.7.0" +version = "0.8.0" dependencies = [ "alloy 0.8.3", "async-trait", @@ -9927,7 +9947,7 @@ dependencies = [ [[package]] name = "orchestrator-core" -version = "0.7.0" +version = "0.8.0" dependencies = [ "alloy 0.2.1", "assert_matches", @@ -9956,6 +9976,7 @@ dependencies = [ "httpmock 0.8.0-alpha.1", "hyper 0.14.31", "itertools 0.13.0", + "jemallocator", "lazy_static", "log", "majin-blob-core", @@ -10005,7 +10026,7 @@ dependencies = [ [[package]] name = "orchestrator-da-client-interface" -version = "0.7.0" +version = "0.8.0" dependencies = [ "async-trait", "axum 0.7.7", @@ -10016,7 +10037,7 @@ dependencies = [ [[package]] name = "orchestrator-ethereum-da-client" -version = "0.7.0" +version = "0.8.0" dependencies = [ "alloy 0.1.0", "async-trait", @@ -10046,7 +10067,7 @@ dependencies = [ [[package]] name = "orchestrator-ethereum-settlement-client" -version = "0.7.0" +version = "0.8.0" dependencies = [ "alloy 0.2.1", "alloy-primitives 0.7.7", @@ -10082,7 +10103,7 @@ dependencies = [ [[package]] name = "orchestrator-gps-fact-checker" -version = "0.7.0" +version = "0.8.0" dependencies = [ "alloy 0.8.3", "async-trait", @@ -10113,7 +10134,7 @@ dependencies = [ [[package]] name = "orchestrator-prover-client-interface" -version = "0.7.0" +version = "0.8.0" dependencies = [ "async-trait", "cairo-vm", @@ -10126,7 +10147,7 @@ dependencies = [ [[package]] name = "orchestrator-settlement-client-interface" -version = "0.7.0" +version = "0.8.0" dependencies = [ "async-trait", "axum 0.7.7", @@ -10139,7 +10160,7 @@ dependencies = [ [[package]] name = "orchestrator-sharp-service" -version = "0.7.0" +version = "0.8.0" dependencies = [ "alloy 0.8.3", "async-trait", @@ -10176,7 +10197,7 @@ dependencies = [ [[package]] name = "orchestrator-starknet-settlement-client" -version = "0.7.0" +version = "0.8.0" dependencies = [ "alloy 0.8.3", "alloy-primitives 0.8.18", @@ -10214,7 +10235,7 @@ dependencies = [ [[package]] name = "orchestrator-utils" -version = "0.7.0" +version = "0.8.0" dependencies = [ "base64 0.22.1", "color-eyre", diff --git a/Cargo.toml b/Cargo.toml index 560c4dacb..14a53f377 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -323,7 +323,7 @@ aws-sdk-sqs = "1.36.0" axum = { version = "0.7.4" } axum-macros = "0.4.1" color-eyre = "0.6.2" -chrono = "0.4.0" +chrono = { version = "0.4", features = ["serde"] } c-kzg = "1.0.3" dotenvy = "0.15.7" log = "0.4.21" diff --git a/cairo/Scarb.lock b/cairo/Scarb.lock new file mode 100644 index 000000000..673ccad4d --- /dev/null +++ b/cairo/Scarb.lock @@ -0,0 +1,100 @@ +# Code generated by scarb DO NOT EDIT. +version = 1 + +[[package]] +name = "madara_contracts" +version = "0.1.0" +dependencies = [ + "openzeppelin", + "snforge_std", +] + +[[package]] +name = "openzeppelin" +version = "0.15.1" +source = "git+https://github.com/OpenZeppelin/cairo-contracts.git?tag=v0.15.1#2f8a93d762858714095a1d391afffa9e21df6983" +dependencies = [ + "openzeppelin_access", + "openzeppelin_account", + "openzeppelin_governance", + "openzeppelin_introspection", + "openzeppelin_presets", + "openzeppelin_security", + "openzeppelin_token", + "openzeppelin_upgrades", + "openzeppelin_utils", +] + +[[package]] +name = "openzeppelin_access" +version = "0.15.1" +source = "git+https://github.com/OpenZeppelin/cairo-contracts.git?tag=v0.15.1#2f8a93d762858714095a1d391afffa9e21df6983" +dependencies = [ + "openzeppelin_introspection", + "openzeppelin_utils", +] + +[[package]] +name = "openzeppelin_account" +version = "0.15.1" +source = "git+https://github.com/OpenZeppelin/cairo-contracts.git?tag=v0.15.1#2f8a93d762858714095a1d391afffa9e21df6983" +dependencies = [ + "openzeppelin_introspection", + "openzeppelin_utils", +] + +[[package]] +name = "openzeppelin_governance" +version = "0.15.1" +source = "git+https://github.com/OpenZeppelin/cairo-contracts.git?tag=v0.15.1#2f8a93d762858714095a1d391afffa9e21df6983" +dependencies = [ + "openzeppelin_access", + "openzeppelin_introspection", +] + +[[package]] +name = "openzeppelin_introspection" +version = "0.15.1" +source = "git+https://github.com/OpenZeppelin/cairo-contracts.git?tag=v0.15.1#2f8a93d762858714095a1d391afffa9e21df6983" + +[[package]] +name = "openzeppelin_presets" +version = "0.15.1" +source = "git+https://github.com/OpenZeppelin/cairo-contracts.git?tag=v0.15.1#2f8a93d762858714095a1d391afffa9e21df6983" +dependencies = [ + "openzeppelin_access", + "openzeppelin_account", + "openzeppelin_introspection", + "openzeppelin_token", + "openzeppelin_upgrades", +] + +[[package]] +name = "openzeppelin_security" +version = "0.15.1" +source = "git+https://github.com/OpenZeppelin/cairo-contracts.git?tag=v0.15.1#2f8a93d762858714095a1d391afffa9e21df6983" + +[[package]] +name = "openzeppelin_token" +version = "0.15.1" +source = "git+https://github.com/OpenZeppelin/cairo-contracts.git?tag=v0.15.1#2f8a93d762858714095a1d391afffa9e21df6983" +dependencies = [ + "openzeppelin_account", + "openzeppelin_governance", + "openzeppelin_introspection", +] + +[[package]] +name = "openzeppelin_upgrades" +version = "0.15.1" +source = "git+https://github.com/OpenZeppelin/cairo-contracts.git?tag=v0.15.1#2f8a93d762858714095a1d391afffa9e21df6983" + +[[package]] +name = "openzeppelin_utils" +version = "0.15.1" +source = "git+https://github.com/OpenZeppelin/cairo-contracts.git?tag=v0.15.1#2f8a93d762858714095a1d391afffa9e21df6983" + +[[package]] +name = "snforge_std" +version = "0.27.0" +source = "git+https://github.com/foundry-rs/starknet-foundry?tag=v0.27.0#2d99b7c00678ef0363881ee0273550c44a9263de" diff --git a/e2e-tests/src/anvil.rs b/e2e-tests/src/anvil.rs index d56052ea5..609c541c0 100644 --- a/e2e-tests/src/anvil.rs +++ b/e2e-tests/src/anvil.rs @@ -58,7 +58,7 @@ impl AnvilSetup { let provider = ProviderBuilder::new().with_recommended_fillers().wallet(wallet).on_http(self.rpc_url.clone()); let starknet_core_contract_client = StarknetCoreContract::deploy(&provider).await.unwrap(); - log::debug!("📦 Deployed starknet_core_contract at address: {}", starknet_core_contract_client.address()); + log::info!("📦 Deployed starknet_core_contract at address: {}", starknet_core_contract_client.address()); let verifier_client = GPSVerifier::deploy(&provider).await.unwrap(); // This is the fact hash calculated from get_fact_info() or mongodb job metadata diff --git a/e2e-tests/tests.rs b/e2e-tests/tests.rs index c9c5f03a0..1afbd1f5e 100644 --- a/e2e-tests/tests.rs +++ b/e2e-tests/tests.rs @@ -13,9 +13,14 @@ use e2e_tests::utils::{get_mongo_db_client, read_state_update_from_file, vec_u8_ use e2e_tests::{MongoDbServer, Orchestrator}; use mongodb::bson::doc; use orchestrator::cli::database::DatabaseValidatedArgs; +use orchestrator::constants::{ + BLOB_DATA_FILE_NAME, CAIRO_PIE_FILE_NAME, PROGRAM_OUTPUT_FILE_NAME, SNOS_OUTPUT_FILE_NAME, +}; use orchestrator::data_storage::DataStorage; use orchestrator::database::mongodb::MongoDBValidatedArgs; -use orchestrator::jobs::constants::{JOB_METADATA_SNOS_BLOCK, JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY}; +use orchestrator::jobs::metadata::{ + CommonMetadata, DaMetadata, JobMetadata, JobSpecificMetadata, ProvingMetadata, SnosMetadata, StateUpdateMetadata, +}; use orchestrator::jobs::types::{ExternalId, JobItem, JobStatus, JobType}; use orchestrator::queue::job_queue::JobQueueMessage; use orchestrator::queue::sqs::AWSSQSValidatedArgs; @@ -181,10 +186,10 @@ async fn test_orchestrator_workflow(#[case] l2_block_number: String) { internal_id: l2_block_number.clone(), job_type: JobType::ProofCreation, job_status: JobStatus::Completed, - version: 3, + version: 4, }; let test_result = wait_for_db_state( - Duration::from_secs(1500), + Duration::from_secs(900), l2_block_number.clone(), setup_config.mongo_db_instance(), expected_state_after_proving_job, @@ -197,7 +202,7 @@ async fn test_orchestrator_workflow(#[case] l2_block_number: String) { internal_id: l2_block_number.clone(), job_type: JobType::DataSubmission, job_status: JobStatus::Completed, - version: 3, + version: 4, }; let test_result = wait_for_db_state( Duration::from_secs(300), @@ -213,7 +218,7 @@ async fn test_orchestrator_workflow(#[case] l2_block_number: String) { internal_id: l2_block_number.clone(), job_type: JobType::StateTransition, job_status: JobStatus::Completed, - version: 3, + version: 4, }; let test_result = wait_for_db_state( Duration::from_secs(300), @@ -276,13 +281,29 @@ async fn get_database_state( /// Puts after SNOS job state into the database pub async fn put_job_data_in_db_snos(mongo_db: &MongoDbServer, l2_block_number: String) -> Uuid { + // Create the SNOS-specific metadata + let snos_metadata = SnosMetadata { + block_number: l2_block_number.parse().expect("Invalid block number"), + full_output: false, + cairo_pie_path: Some(format!("{}/{}", l2_block_number.clone(), CAIRO_PIE_FILE_NAME)), + snos_output_path: Some(format!("{}/{}", l2_block_number.clone(), SNOS_OUTPUT_FILE_NAME)), + program_output_path: Some(format!("{}/{}", l2_block_number.clone(), PROGRAM_OUTPUT_FILE_NAME)), + snos_fact: None, + }; + + // Create the common metadata with default values + let common_metadata = CommonMetadata::default(); + + // Combine into JobMetadata + let metadata = JobMetadata { common: common_metadata, specific: JobSpecificMetadata::Snos(snos_metadata) }; + let job_item = JobItem { id: Uuid::new_v4(), internal_id: l2_block_number.clone(), job_type: JobType::SnosRun, status: JobStatus::Created, external_id: ExternalId::Number(0), - metadata: HashMap::from([(JOB_METADATA_SNOS_BLOCK.to_string(), l2_block_number)]), + metadata, version: 0, created_at: Utc::now().round_subsecs(0), updated_at: Utc::now().round_subsecs(0), @@ -357,13 +378,26 @@ pub async fn mock_proving_job_endpoint_output(sharp_client: &mut SharpClient) { /// Puts after SNOS job state into the database pub async fn put_job_data_in_db_da(mongo_db: &MongoDbServer, l2_block_number: String) { + // Create the DA-specific metadata + let da_metadata = DaMetadata { + block_number: l2_block_number.parse::().unwrap() - 1, + blob_data_path: Some(format!("{}/{}", l2_block_number.clone(), BLOB_DATA_FILE_NAME)), + tx_hash: None, + }; + + // Create the common metadata with default values + let common_metadata = CommonMetadata::default(); + + // Combine into JobMetadata + let metadata = JobMetadata { common: common_metadata, specific: JobSpecificMetadata::Da(da_metadata) }; + let job_item = JobItem { id: Uuid::new_v4(), internal_id: (l2_block_number.parse::().unwrap() - 1).to_string(), job_type: JobType::DataSubmission, status: JobStatus::Completed, external_id: ExternalId::Number(0), - metadata: HashMap::new(), + metadata, version: 0, created_at: Utc::now().round_subsecs(0), updated_at: Utc::now().round_subsecs(0), @@ -433,16 +467,32 @@ pub async fn mock_starknet_get_latest_block(starknet_client: &mut StarknetClient /// Puts after SNOS job state into the database pub async fn put_job_data_in_db_update_state(mongo_db: &MongoDbServer, l2_block_number: String) { + let block_number = l2_block_number.parse::().unwrap() - 1; + + // Create the StateUpdate-specific metadata + let state_update_metadata = StateUpdateMetadata { + blocks_to_settle: vec![block_number], + snos_output_paths: vec![format!("{}/{}", block_number, SNOS_OUTPUT_FILE_NAME)], + program_output_paths: vec![format!("{}/{}", block_number, PROGRAM_OUTPUT_FILE_NAME)], + blob_data_paths: vec![format!("{}/{}", block_number, BLOB_DATA_FILE_NAME)], + last_failed_block_no: None, + tx_hashes: Vec::new(), + }; + + // Create the common metadata with default values + let common_metadata = CommonMetadata::default(); + + // Combine into JobMetadata + let metadata = + JobMetadata { common: common_metadata, specific: JobSpecificMetadata::StateUpdate(state_update_metadata) }; + let job_item = JobItem { id: Uuid::new_v4(), - internal_id: (l2_block_number.parse::().unwrap() - 1).to_string(), + internal_id: block_number.to_string(), job_type: JobType::StateTransition, status: JobStatus::Completed, external_id: ExternalId::Number(0), - metadata: HashMap::from([( - JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY.to_string(), - (l2_block_number.parse::().unwrap() - 1).to_string(), - )]), + metadata, version: 0, created_at: Utc::now().round_subsecs(0), updated_at: Utc::now().round_subsecs(0), @@ -454,13 +504,25 @@ pub async fn put_job_data_in_db_update_state(mongo_db: &MongoDbServer, l2_block_ /// Puts after SNOS job state into the database pub async fn put_job_data_in_db_proving(mongo_db: &MongoDbServer, l2_block_number: String) { + let block_number = l2_block_number.parse::().unwrap() - 1; + + // Create the Proving-specific metadata + let proving_metadata = + ProvingMetadata { block_number, input_path: None, ensure_on_chain_registration: None, download_proof: None }; + + // Create the common metadata with default values + let common_metadata = CommonMetadata::default(); + + // Combine into JobMetadata + let metadata = JobMetadata { common: common_metadata, specific: JobSpecificMetadata::Proving(proving_metadata) }; + let job_item = JobItem { id: Uuid::new_v4(), - internal_id: (l2_block_number.parse::().unwrap() - 1).to_string(), + internal_id: block_number.to_string(), job_type: JobType::ProofCreation, status: JobStatus::Completed, external_id: ExternalId::Number(0), - metadata: HashMap::new(), + metadata, version: 0, created_at: Utc::now().round_subsecs(0), updated_at: Utc::now().round_subsecs(0), diff --git a/orchestrator/CHANGELOG.md b/orchestrator/CHANGELOG.md index 30b98dd1d..a00f15ffc 100644 --- a/orchestrator/CHANGELOG.md +++ b/orchestrator/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## Added +- added metadata serialization and deserialization +- Limits on SNOS job concurrency +- Added JOB_METADATA_PROCESSING_STARTED_AT +- Added retry job endpoint for failed jobs - ci: linters added - readme: setup instructions added - Added : Grafana dashboard @@ -51,6 +55,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## Changed +- refactor: job isolation added, each job will have needed information from it's worker +- Increased interval time for spawn_consumer +- verify_job now handles VerificationTimeout status - refactor: expect removed and added error wraps - refactor: Readme and .env.example - refactor: http_mock version updated @@ -85,6 +92,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## Fixed +- fixed JOB_METADATA_PROCESSING_FINISHED_AT usage +- add jemallocator to fix memory leak - refactor: instrumentation - `is_worker_enabled` status check moved from `VerificationFailed` to `Failed` - refactor: static attributes for telemetry diff --git a/orchestrator/crates/da-clients/ethereum/Cargo.toml b/orchestrator/crates/da-clients/ethereum/Cargo.toml index cfb8debbf..6dc2e21e3 100644 --- a/orchestrator/crates/da-clients/ethereum/Cargo.toml +++ b/orchestrator/crates/da-clients/ethereum/Cargo.toml @@ -18,16 +18,16 @@ alloy = { git = "https://github.com/alloy-rs/alloy", rev = "68952c0", features = async-trait = { workspace = true } c-kzg = { workspace = true } color-eyre = { workspace = true } +orchestrator-da-client-interface = { workspace = true } dotenvy.workspace = true mockall = { workspace = true } -orchestrator-da-client-interface = { workspace = true } -orchestrator-utils = { workspace = true } reqwest = { workspace = true } rstest = { workspace = true } serde = { workspace = true, features = ["derive"] } starknet = { workspace = true } tokio = { workspace = true } url = { workspace = true } +orchestrator-utils = { workspace = true } #Instrumentation opentelemetry = { workspace = true, features = ["metrics", "logs"] } diff --git a/orchestrator/crates/orchestrator/Cargo.toml b/orchestrator/crates/orchestrator/Cargo.toml index 6cf405293..0a144d320 100644 --- a/orchestrator/crates/orchestrator/Cargo.toml +++ b/orchestrator/crates/orchestrator/Cargo.toml @@ -23,6 +23,7 @@ alloy = { version = "0.2.1", features = [ assert_matches = { workspace = true } async-std = { workspace = true } async-trait = { workspace = true } +orchestrator-atlantic-service = { workspace = true } aws-config = { workspace = true, features = ["behavior-version-latest"] } aws-credential-types = { workspace = true, features = [ "hardcoded-credentials", @@ -40,7 +41,10 @@ cairo-vm = { workspace = true } chrono = { workspace = true } clap = { workspace = true } color-eyre = { workspace = true } +orchestrator-da-client-interface = { workspace = true } dotenvy = { workspace = true } +orchestrator-ethereum-da-client = { workspace = true, optional = true } +orchestrator-ethereum-settlement-client = { workspace = true } futures = { workspace = true } hex = { workspace = true } itertools = { workspace = true } @@ -56,29 +60,25 @@ num-bigint = { workspace = true } num-traits = { workspace = true } omniqueue = { workspace = true, optional = true } once_cell = { workspace = true } -orchestrator-atlantic-service = { workspace = true } -orchestrator-da-client-interface = { workspace = true } -orchestrator-ethereum-da-client = { workspace = true, optional = true } -orchestrator-ethereum-settlement-client = { workspace = true } -orchestrator-prover-client-interface = { workspace = true } -orchestrator-settlement-client-interface = { workspace = true } -orchestrator-sharp-service = { workspace = true } -orchestrator-starknet-settlement-client = { workspace = true } prove_block = { workspace = true } +orchestrator-prover-client-interface = { workspace = true } rstest = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +orchestrator-settlement-client-interface = { workspace = true } +orchestrator-sharp-service = { workspace = true } starknet = { workspace = true } starknet-core = { workspace = true } starknet-os = { workspace = true } +orchestrator-starknet-settlement-client = { workspace = true } strum = { workspace = true } strum_macros = { workspace = true } tempfile = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true, features = ["sync", "macros", "rt-multi-thread"] } -orchestrator-utils = { workspace = true } url = { workspace = true } +orchestrator-utils = { workspace = true } uuid = { workspace = true, features = ["v4", "serde"] } aws-sdk-iam = { workspace = true } @@ -100,6 +100,8 @@ tracing-core = { workspace = true, default-features = false } tracing-opentelemetry = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter"] } +jemallocator = "0.5.4" + [features] default = ["ethereum", "with_mongodb", "with_sqs"] ethereum = ["orchestrator-ethereum-da-client"] diff --git a/orchestrator/crates/orchestrator/src/cli/mod.rs b/orchestrator/crates/orchestrator/src/cli/mod.rs index b86341f53..fa9188a6f 100644 --- a/orchestrator/crates/orchestrator/src/cli/mod.rs +++ b/orchestrator/crates/orchestrator/src/cli/mod.rs @@ -606,20 +606,9 @@ pub mod validate_params { pub(crate) fn validate_service_params(service_args: &ServiceCliArgs) -> Result { Ok(ServiceParams { // return None if the value is empty string - max_block_to_process: service_args.max_block_to_process.clone().and_then(|s| { - if s.is_empty() { - None - } else { - Some(s.parse::().expect("Failed to parse max block to process")) - } - }), - min_block_to_process: service_args.min_block_to_process.clone().and_then(|s| { - if s.is_empty() { - None - } else { - Some(s.parse::().expect("Failed to parse min block to process")) - } - }), + max_block_to_process: service_args.max_block_to_process, + min_block_to_process: service_args.min_block_to_process, + max_concurrent_snos_jobs: service_args.max_concurrent_snos_jobs, }) } @@ -898,14 +887,16 @@ pub mod validate_params { #[rstest] fn test_validate_service_params() { let service_args: ServiceCliArgs = ServiceCliArgs { - max_block_to_process: Some("66645".to_string()), - min_block_to_process: Some("100".to_string()), + max_block_to_process: Some(66645), + min_block_to_process: Some(100), + max_concurrent_snos_jobs: Some(10), }; let service_params = validate_service_params(&service_args); assert!(service_params.is_ok()); let service_params = service_params.unwrap(); assert_eq!(service_params.max_block_to_process, Some(66645)); assert_eq!(service_params.min_block_to_process, Some(100)); + assert_eq!(service_params.max_concurrent_snos_jobs, Some(10)); } } } diff --git a/orchestrator/crates/orchestrator/src/cli/service.rs b/orchestrator/crates/orchestrator/src/cli/service.rs index 1512eb978..7bba661d2 100644 --- a/orchestrator/crates/orchestrator/src/cli/service.rs +++ b/orchestrator/crates/orchestrator/src/cli/service.rs @@ -3,10 +3,15 @@ use clap::Args; #[derive(Debug, Clone, Args)] pub struct ServiceCliArgs { /// The maximum block to process. - #[arg(env = "MADARA_ORCHESTRATOR_MAX_BLOCK_NO_TO_PROCESS", long)] - pub max_block_to_process: Option, + /// The default value is u64::MAX. + #[arg(env = "MADARA_ORCHESTRATOR_MAX_BLOCK_NO_TO_PROCESS", long, default_value = Some("18446744073709551615"))] + pub max_block_to_process: Option, /// The minimum block to process. - #[arg(env = "MADARA_ORCHESTRATOR_MIN_BLOCK_NO_TO_PROCESS", long)] - pub min_block_to_process: Option, + #[arg(env = "MADARA_ORCHESTRATOR_MIN_BLOCK_NO_TO_PROCESS", long, default_value = Some("0"))] + pub min_block_to_process: Option, + + /// The maximum number of SNOS jobs to process concurrently. + #[arg(env = "MADARA_ORCHESTRATOR_MAX_CONCURRENT_SNOS_JOBS", long, default_value = Some("1"))] + pub max_concurrent_snos_jobs: Option, } diff --git a/orchestrator/crates/orchestrator/src/config.rs b/orchestrator/crates/orchestrator/src/config.rs index 126d75201..7c6e7dda7 100644 --- a/orchestrator/crates/orchestrator/src/config.rs +++ b/orchestrator/crates/orchestrator/src/config.rs @@ -35,6 +35,7 @@ use crate::data_storage::aws_s3::AWSS3; use crate::data_storage::DataStorage; use crate::database::mongodb::MongoDb; use crate::database::Database; +use crate::helpers::{JobProcessingState, ProcessingLocks}; use crate::queue::sqs::SqsQueue; use crate::queue::QueueProvider; use crate::routes::ServerParams; @@ -60,12 +61,15 @@ pub struct Config { storage: Box, /// Alerts client alerts: Box, + /// Locks + processing_locks: ProcessingLocks, } #[derive(Debug, Clone)] pub struct ServiceParams { pub max_block_to_process: Option, pub min_block_to_process: Option, + pub max_concurrent_snos_jobs: Option, } pub struct OrchestratorParams { @@ -162,6 +166,10 @@ pub async fn init_config(run_cmd: &RunCmd) -> color_eyre::Result> { let queue_params = run_cmd.validate_queue_params().map_err(|e| eyre!("Failed to validate queue params: {e}"))?; let queue = build_queue_client(&queue_params, provider_config.clone()).await; + let snos_processing_lock = + JobProcessingState::new(orchestrator_params.service_config.max_concurrent_snos_jobs.unwrap_or(1)); + let processing_locks = ProcessingLocks { snos_job_processing_lock: Arc::new(snos_processing_lock) }; + Ok(Arc::new(Config::new( orchestrator_params, Arc::new(rpc_client), @@ -172,6 +180,7 @@ pub async fn init_config(run_cmd: &RunCmd) -> color_eyre::Result> { queue, storage_client, alerts_client, + processing_locks, ))) } @@ -188,6 +197,7 @@ impl Config { queue: Box, storage: Box, alerts: Box, + processing_locks: ProcessingLocks, ) -> Self { Self { orchestrator_params, @@ -199,6 +209,7 @@ impl Config { queue, storage, alerts, + processing_locks, } } @@ -271,6 +282,11 @@ impl Config { pub fn prover_layout_name(&self) -> &LayoutName { &self.orchestrator_params.prover_layout_name } + + /// Returns the processing locks + pub fn processing_locks(&self) -> &ProcessingLocks { + &self.processing_locks + } } /// Builds the provider config diff --git a/orchestrator/crates/orchestrator/src/cron/event_bridge.rs b/orchestrator/crates/orchestrator/src/cron/event_bridge.rs index bd62f1211..2db2da0a8 100644 --- a/orchestrator/crates/orchestrator/src/cron/event_bridge.rs +++ b/orchestrator/crates/orchestrator/src/cron/event_bridge.rs @@ -89,7 +89,7 @@ impl Cron for AWSEventBridge { "Statement": [{ "Effect": "Allow", "Principal": { - "Service": "scheduler.amazonaws.com" + "Service": ["scheduler.amazonaws.com", "events.amazonaws.com"] }, "Action": "sts:AssumeRole" }] @@ -143,7 +143,6 @@ impl Cron for AWSEventBridge { ) -> color_eyre::Result<()> { let message = get_worker_trigger_message(trigger_type.clone())?; let trigger_name = format!("{}-{}", self.trigger_rule_name, trigger_type); - println!("trigger_nametrigger_nametrigger_name {}", trigger_name); match self.client.clone() { EventBridgeClient::Rule(client) => { diff --git a/orchestrator/crates/orchestrator/src/cron/mod.rs b/orchestrator/crates/orchestrator/src/cron/mod.rs index 4673cea4a..5973993d1 100644 --- a/orchestrator/crates/orchestrator/src/cron/mod.rs +++ b/orchestrator/crates/orchestrator/src/cron/mod.rs @@ -1,5 +1,8 @@ +use std::time::Duration; + use async_trait::async_trait; use lazy_static::lazy_static; +use tokio::time::sleep; use crate::queue::job_queue::WorkerTriggerType; @@ -29,6 +32,7 @@ pub trait Cron { ) -> color_eyre::Result<()>; async fn setup(&self) -> color_eyre::Result<()> { let trigger_arns = self.create_cron().await?; + sleep(Duration::from_secs(15)).await; for trigger in WORKER_TRIGGERS.iter() { self.add_cron_target_queue(trigger, &trigger_arns).await?; } diff --git a/orchestrator/crates/orchestrator/src/database/mod.rs b/orchestrator/crates/orchestrator/src/database/mod.rs index b2ff7554c..2ca17d734 100644 --- a/orchestrator/crates/orchestrator/src/database/mod.rs +++ b/orchestrator/crates/orchestrator/src/database/mod.rs @@ -47,6 +47,5 @@ pub trait Database: Send + Sync { internal_id: String, ) -> Result>; - // TODO: can be extendible to support multiple status. async fn get_jobs_by_statuses(&self, status: Vec, limit: Option) -> Result>; } diff --git a/orchestrator/crates/orchestrator/src/database/mongodb/mod.rs b/orchestrator/crates/orchestrator/src/database/mongodb/mod.rs index 3fd0daa07..795e14cc8 100644 --- a/orchestrator/crates/orchestrator/src/database/mongodb/mod.rs +++ b/orchestrator/crates/orchestrator/src/database/mongodb/mod.rs @@ -216,14 +216,26 @@ impl Database for MongoDb { tracing::debug!(job_type = ?job_type, category = "db_call", "Fetching latest job by type"); - // Get the first (and only) result if it exists match cursor.try_next().await? { Some(doc) => { - let job: JobItem = mongodb::bson::from_document(doc)?; - let attributes = [KeyValue::new("db_operation_name", "get_latest_job_by_type")]; - let duration = start.elapsed(); - ORCHESTRATOR_METRICS.db_calls_response_time.record(duration.as_secs_f64(), &attributes); - Ok(Some(job)) + // Try to deserialize and log any errors + match mongodb::bson::from_document::(doc.clone()) { + Ok(job) => { + tracing::debug!(deserialized_job = ?job, "Successfully deserialized job"); + let attributes = [KeyValue::new("db_operation_name", "get_latest_job_by_type")]; + let duration = start.elapsed(); + ORCHESTRATOR_METRICS.db_calls_response_time.record(duration.as_secs_f64(), &attributes); + Ok(Some(job)) + } + Err(e) => { + tracing::error!( + error = %e, + document = ?doc, + "Failed to deserialize document into JobItem" + ); + Err(eyre!("Failed to deserialize document: {}", e)) + } + } } None => Ok(None), } diff --git a/orchestrator/crates/orchestrator/src/helpers.rs b/orchestrator/crates/orchestrator/src/helpers.rs new file mode 100644 index 000000000..d69334d0f --- /dev/null +++ b/orchestrator/crates/orchestrator/src/helpers.rs @@ -0,0 +1,66 @@ +use std::collections::HashSet; +use std::sync::Arc; +use std::time::Duration; + +use tokio::sync::{Mutex, Semaphore, SemaphorePermit}; +use uuid::Uuid; + +use crate::config::Config; +use crate::jobs::types::JobItem; +use crate::jobs::JobError; +use crate::queue::job_queue::add_job_to_process_queue; + +pub struct ProcessingLocks { + pub snos_job_processing_lock: Arc, +} + +pub struct JobProcessingState { + pub semaphore: Semaphore, + pub active_jobs: Mutex>, +} +impl JobProcessingState { + pub fn new(max_parallel_jobs: usize) -> Self { + JobProcessingState { semaphore: Semaphore::new(max_parallel_jobs), active_jobs: Mutex::new(HashSet::new()) } + } + + pub async fn get_active_jobs(&self) -> HashSet { + self.active_jobs.lock().await.clone() + } + + pub fn get_available_permits(&self) -> usize { + self.semaphore.available_permits() + } + + pub async fn try_acquire_lock<'a>( + &'a self, + job: &JobItem, + config: Arc, + ) -> Result, JobError> { + // Trying to acquire permit with a timeout. + match tokio::time::timeout(Duration::from_millis(100), self.semaphore.acquire()).await { + Ok(Ok(permit)) => { + { + let mut active_jobs = self.active_jobs.lock().await; + active_jobs.insert(job.id); + drop(active_jobs); + } + tracing::info!(job_id = %job.id, "Job {} acquired lock", job.id); + Ok(permit) + } + Err(_) => { + tracing::error!(job_id = %job.id, "Job {} waiting - at max capacity ({} available permits)", job.id, self.get_available_permits()); + add_job_to_process_queue(job.id, &job.job_type, config.clone()).await?; + Err(JobError::MaxCapacityReached) + } + Ok(Err(e)) => Err(JobError::LockError(e.to_string())), + } + } + + pub async fn try_release_lock<'a>(&'a self, permit: SemaphorePermit<'a>, job_id: &Uuid) -> Result<(), JobError> { + let mut active_jobs = self.active_jobs.lock().await; + active_jobs.remove(job_id); + drop(active_jobs); // Explicitly drop the lock (optional but clear) + drop(permit); // Explicitly drop the permit (optional but clear) + Ok(()) + } +} diff --git a/orchestrator/crates/orchestrator/src/jobs/constants.rs b/orchestrator/crates/orchestrator/src/jobs/constants.rs deleted file mode 100644 index 07992401f..000000000 --- a/orchestrator/crates/orchestrator/src/jobs/constants.rs +++ /dev/null @@ -1,11 +0,0 @@ -pub const JOB_PROCESS_ATTEMPT_METADATA_KEY: &str = "process_attempt_no"; -pub const JOB_VERIFICATION_ATTEMPT_METADATA_KEY: &str = "verification_attempt_no"; -pub const JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY: &str = "blocks_number_to_settle"; -pub const JOB_METADATA_STATE_UPDATE_FETCH_FROM_TESTS: &str = "fetch_from_test_data"; -pub const JOB_METADATA_STATE_UPDATE_ATTEMPT_PREFIX: &str = "attempt_tx_hashes_"; -pub const JOB_METADATA_STATE_UPDATE_LAST_FAILED_BLOCK_NO: &str = "last_failed_block_no"; -pub const JOB_METADATA_SNOS_BLOCK: &str = "block_number_to_run"; -pub const JOB_METADATA_SNOS_FACT: &str = "snos_fact"; -pub const JOB_METADATA_FAILURE_REASON: &str = "failure_reason"; -pub const JOB_METADATA_ERROR: &str = "error"; -pub const JOB_METADATA_PROCESSING_COMPLETED_AT: &str = "processing_completed_at"; diff --git a/orchestrator/crates/orchestrator/src/jobs/da_job/mod.rs b/orchestrator/crates/orchestrator/src/jobs/da_job/mod.rs index f3c113da6..4e4b3c1a6 100644 --- a/orchestrator/crates/orchestrator/src/jobs/da_job/mod.rs +++ b/orchestrator/crates/orchestrator/src/jobs/da_job/mod.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use async_trait::async_trait; use chrono::{SubsecRound, Utc}; -use color_eyre::eyre::WrapErr; +use color_eyre::eyre::{eyre, WrapErr}; use lazy_static::lazy_static; use num_bigint::{BigUint, ToBigUint}; use num_traits::{Num, Zero}; @@ -19,7 +19,8 @@ use uuid::Uuid; use super::types::{JobItem, JobStatus, JobType, JobVerificationStatus}; use super::{Job, JobError, OtherError}; use crate::config::Config; -use crate::constants::BLOB_DATA_FILE_NAME; +use crate::helpers; +use crate::jobs::metadata::{DaMetadata, JobMetadata, JobSpecificMetadata}; use crate::jobs::state_update_job::utils::biguint_vec_to_u8_vec; lazy_static! { @@ -69,7 +70,7 @@ impl Job for DaJob { &self, _config: Arc, internal_id: String, - metadata: HashMap, + metadata: JobMetadata, ) -> Result { let job_id = Uuid::new_v4(); tracing::info!(log_type = "starting", category = "da", function_type = "create_job", block_no = %internal_id, "DA job creation started."); @@ -91,7 +92,21 @@ impl Job for DaJob { #[tracing::instrument(fields(category = "da"), skip(self, config), ret, err)] async fn process_job(&self, config: Arc, job: &mut JobItem) -> Result { let internal_id = job.internal_id.clone(); - tracing::info!(log_type = "starting", category = "da", function_type = "process_job", job_id = ?job.id, block_no = %internal_id, "DA job processing started."); + tracing::info!( + log_type = "starting", + category = "da", + function_type = "process_job", + job_id = ?job.id, + block_no = %internal_id, + "DA job processing started." + ); + + // Get DA-specific metadata + let mut da_metadata: DaMetadata = job.metadata.specific.clone().try_into().map_err(|e| { + tracing::error!(job_id = ?job.id, error = ?e, "Invalid metadata type for DA job"); + JobError::Other(OtherError(e)) + })?; + let block_no = job.internal_id.parse::().wrap_err("Failed to parse u64".to_string()).map_err(|e| { tracing::error!(job_id = ?job.id, error = ?e, "Failed to parse block number"); JobError::Other(OtherError(e)) @@ -115,13 +130,13 @@ impl Job for DaJob { MaybePendingStateUpdate::Update(state_update) => state_update, }; tracing::debug!(job_id = ?job.id, "Retrieved state update"); + // constructing the data from the rpc let blob_data = state_update_to_blob_data(block_no, state_update, config.clone()).await.map_err(|e| { tracing::error!(job_id = ?job.id, error = ?e, "Failed to convert state update to blob data"); JobError::Other(OtherError(e)) })?; // transforming the data so that we can apply FFT on this. - // @note: we can skip this step if in the above step we return vec directly let blob_data_biguint = convert_to_biguint(blob_data.clone()); tracing::trace!(job_id = ?job.id, "Converted blob data to BigUint"); @@ -130,16 +145,26 @@ impl Job for DaJob { tracing::error!(job_id = ?job.id, error = ?e, "Failed to apply FFT transformation"); JobError::Other(OtherError(e)) })?; - // data transformation on the data tracing::trace!(job_id = ?job.id, "Applied FFT transformation"); - store_blob_data(transformed_data.clone(), block_no, config.clone()).await?; + // Get blob data path from metadata + let blob_data_path = da_metadata.blob_data_path.as_ref().ok_or_else(|| { + tracing::error!(job_id = ?job.id, "Blob data path not found in metadata"); + JobError::Other(OtherError(eyre!("Blob data path not found in metadata"))) + })?; + + // Store the transformed data + store_blob_data(transformed_data.clone(), blob_data_path, config.clone()).await?; tracing::debug!(job_id = ?job.id, "Stored blob data"); let max_bytes_per_blob = config.da_client().max_bytes_per_blob().await; let max_blob_per_txn = config.da_client().max_blob_per_txn().await; - tracing::trace!(job_id = ?job.id, max_bytes_per_blob = max_bytes_per_blob, max_blob_per_txn = max_blob_per_txn, "Retrieved DA client configuration"); - // converting BigUints to Vec, one Vec represents one blob data + tracing::trace!( + job_id = ?job.id, + max_bytes_per_blob = max_bytes_per_blob, + max_blob_per_txn = max_blob_per_txn, + "Retrieved DA client configuration" + ); let blob_array = data_to_blobs(max_bytes_per_blob, transformed_data)?; let current_blob_length: u64 = blob_array @@ -152,9 +177,14 @@ impl Job for DaJob { })?; tracing::debug!(job_id = ?job.id, blob_count = current_blob_length, "Converted data to blobs"); - // there is a limit on number of blobs per txn, checking that here + // Check blob limit if current_blob_length > max_blob_per_txn { - tracing::warn!(job_id = ?job.id, current_blob_length = current_blob_length, max_blob_per_txn = max_blob_per_txn, "Exceeded maximum number of blobs per transaction"); + tracing::error!( + job_id = ?job.id, + current_blob_length = current_blob_length, + max_blob_per_txn = max_blob_per_txn, + "Exceeded maximum number of blobs per transaction" + ); Err(DaError::MaxBlobsLimitExceeded { max_blob_per_txn, current_blob_length, @@ -163,13 +193,24 @@ impl Job for DaJob { })? } - // making the txn to the DA layer + // Publish to DA layer let external_id = config.da_client().publish_state_diff(blob_array, &[0; 32]).await.map_err(|e| { tracing::error!(job_id = ?job.id, error = ?e, "Failed to publish state diff to DA layer"); JobError::Other(OtherError(e)) })?; - tracing::info!(log_type = "completed", category = "da", function_type = "process_job", job_id = ?job.id, block_no = %internal_id, external_id = ?external_id, "Successfully published state diff to DA layer."); + da_metadata.tx_hash = Some(external_id.clone()); + job.metadata.specific = JobSpecificMetadata::Da(da_metadata); + + tracing::info!( + log_type = "completed", + category = "da", + function_type = "process_job", + job_id = ?job.id, + block_no = %internal_id, + external_id = ?external_id, + "Successfully published state diff to DA layer." + ); Ok(external_id) } @@ -205,6 +246,13 @@ impl Job for DaJob { fn verification_polling_delay_seconds(&self) -> u64 { 60 } + + fn job_processing_lock( + &self, + _config: Arc, + ) -> std::option::Option> { + None + } } #[tracing::instrument(skip(elements))] @@ -344,14 +392,16 @@ pub async fn state_update_to_blob_data( } /// To store the blob data using the storage client with path /blob_data.txt -async fn store_blob_data(blob_data: Vec, block_number: u64, config: Arc) -> Result<(), JobError> { +async fn store_blob_data(blob_data: Vec, blob_data_path: &str, config: Arc) -> Result<(), JobError> { let storage_client = config.storage(); - let key = block_number.to_string() + "/" + BLOB_DATA_FILE_NAME; let blob_data_vec_u8 = biguint_vec_to_u8_vec(blob_data.as_slice()); if !blob_data_vec_u8.is_empty() { - storage_client.put_data(blob_data_vec_u8.into(), &key).await.map_err(|e| JobError::Other(OtherError(e)))?; + storage_client + .put_data(blob_data_vec_u8.into(), blob_data_path) + .await + .map_err(|e| JobError::Other(OtherError(e)))?; } Ok(()) diff --git a/orchestrator/crates/orchestrator/src/jobs/metadata/common.rs b/orchestrator/crates/orchestrator/src/jobs/metadata/common.rs new file mode 100644 index 000000000..a2505c608 --- /dev/null +++ b/orchestrator/crates/orchestrator/src/jobs/metadata/common.rs @@ -0,0 +1,38 @@ +//! Common metadata shared across all job types. + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +/// Common metadata fields shared across all job types. +/// +/// # Field Management +/// These fields are automatically managed by the job processing system and should not +/// be modified directly by workers or jobs. The system uses these fields to: +/// - Track processing and verification attempts +/// - Record completion timestamps +/// - Store failure information +#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)] +pub struct CommonMetadata { + /// Number of times the job has been processed + pub process_attempt_no: u64, + /// Number of times the job has been retried after processing failures + pub process_retry_attempt_no: u64, + /// Number of times the job has been verified + pub verification_attempt_no: u64, + /// Number of times the job has been retried after verification failures + pub verification_retry_attempt_no: u64, + /// Timestamp when job processing started + #[serde(with = "chrono::serde::ts_seconds_option")] + pub process_started_at: Option>, + /// Timestamp when job processing completed + #[serde(with = "chrono::serde::ts_seconds_option")] + pub process_completed_at: Option>, + /// Timestamp when job verification started + #[serde(with = "chrono::serde::ts_seconds_option")] + pub verification_started_at: Option>, + /// Timestamp when job verification completed + #[serde(with = "chrono::serde::ts_seconds_option")] + pub verification_completed_at: Option>, + /// Reason for job failure if any + pub failure_reason: Option, +} diff --git a/orchestrator/crates/orchestrator/src/jobs/metadata/da.rs b/orchestrator/crates/orchestrator/src/jobs/metadata/da.rs new file mode 100644 index 000000000..620238a95 --- /dev/null +++ b/orchestrator/crates/orchestrator/src/jobs/metadata/da.rs @@ -0,0 +1,21 @@ +//! Metadata for data availability (DA) jobs. + +use serde::{Deserialize, Serialize}; + +/// Metadata specific to data availability (DA) jobs. +/// +/// # Field Management +/// - Worker-initialized fields: block_number and blob_data_path +/// - Job-populated fields: tx_hash (during processing) +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct DaMetadata { + // Worker-initialized fields + /// Block number for data availability + pub block_number: u64, + /// Path to the blob data file + pub blob_data_path: Option, + + // Job-populated fields + /// Transaction hash after data submission + pub tx_hash: Option, +} diff --git a/orchestrator/crates/orchestrator/src/jobs/metadata/mod.rs b/orchestrator/crates/orchestrator/src/jobs/metadata/mod.rs new file mode 100644 index 000000000..1ace16ca0 --- /dev/null +++ b/orchestrator/crates/orchestrator/src/jobs/metadata/mod.rs @@ -0,0 +1,73 @@ +//! Job metadata types and their management. +//! +//! This module defines the metadata structures used by different job types in the system. +//! Each job type has its specific metadata requirements, and the fields are managed either +//! by workers during job creation or by jobs during processing. + +mod common; +mod da; +mod proving; +mod snos; +mod state_update; + +// Re-export everything for backward compatibility +use color_eyre::eyre; +use color_eyre::eyre::eyre; +pub use common::*; +pub use da::*; +pub use proving::*; +use serde::{Deserialize, Serialize}; +pub use snos::*; +pub use state_update::*; + +/// Enum containing all possible job-specific metadata types. +/// +/// This enum is used to provide type-safe access to job-specific metadata +/// while maintaining a common interface for job processing. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(tag = "type")] +pub enum JobSpecificMetadata { + /// SNOS job metadata + Snos(SnosMetadata), + /// State update job metadata + StateUpdate(StateUpdateMetadata), + /// Proving job metadata + Proving(ProvingMetadata), + /// Data availability job metadata + Da(DaMetadata), +} + +/// Macro to implement TryInto for JobSpecificMetadata variants +macro_rules! impl_try_into_metadata { + ($variant:ident, $type:ident) => { + impl TryInto<$type> for JobSpecificMetadata { + type Error = eyre::Error; + + fn try_into(self) -> Result<$type, Self::Error> { + match self { + JobSpecificMetadata::$variant(metadata) => Ok(metadata), + _ => Err(eyre!(concat!("Invalid metadata type: expected ", stringify!($variant), " metadata"))), + } + } + } + }; +} + +// Implement TryInto for all metadata types +impl_try_into_metadata!(Snos, SnosMetadata); +impl_try_into_metadata!(Proving, ProvingMetadata); +impl_try_into_metadata!(Da, DaMetadata); +impl_try_into_metadata!(StateUpdate, StateUpdateMetadata); + +/// Complete job metadata containing both common and job-specific fields. +/// +/// # Field Management +/// - `common`: Managed automatically by the job processing system +/// - `specific`: Contains job-type specific fields managed by workers and jobs +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct JobMetadata { + /// Common metadata fields shared across all job types + pub common: CommonMetadata, + /// Job-specific metadata fields + pub specific: JobSpecificMetadata, +} diff --git a/orchestrator/crates/orchestrator/src/jobs/metadata/proving.rs b/orchestrator/crates/orchestrator/src/jobs/metadata/proving.rs new file mode 100644 index 000000000..cda24991a --- /dev/null +++ b/orchestrator/crates/orchestrator/src/jobs/metadata/proving.rs @@ -0,0 +1,31 @@ +//! Metadata for proving jobs. + +use serde::{Deserialize, Serialize}; + +/// Input type specification for proving jobs. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum ProvingInputType { + /// Path to an existing proof + Proof(String), + /// Path to a Cairo PIE file + CairoPie(String), +} + +/// Metadata specific to proving jobs. +/// +/// # Field Management +/// All fields are initialized by the worker during job creation. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct ProvingMetadata { + /// Block number to prove + pub block_number: u64, + /// Path to the input file (proof or Cairo PIE) + pub input_path: Option, + /// SNOS fact to check for on-chain registration. If `None`, no on-chain check is performed. If + /// `Some(value)`, it checks for `value` on the chain. + pub ensure_on_chain_registration: Option, + /// Path where the generated proof should be downloaded. If `None`, the proof will not be + /// downloaded. If `Some(value)`, the proof will be downloaded and stored to the specified path + /// in the provided storage. + pub download_proof: Option, +} diff --git a/orchestrator/crates/orchestrator/src/jobs/metadata/snos.rs b/orchestrator/crates/orchestrator/src/jobs/metadata/snos.rs new file mode 100644 index 000000000..817fef718 --- /dev/null +++ b/orchestrator/crates/orchestrator/src/jobs/metadata/snos.rs @@ -0,0 +1,27 @@ +//! Metadata for SNOS (Starknet OS) jobs. + +use serde::{Deserialize, Serialize}; + +/// Metadata specific to SNOS (Starknet OS) jobs. +/// +/// # Field Management +/// - Worker-initialized fields: block_number, full_output, and path configurations +/// - Job-populated fields: snos_fact (during processing) +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct SnosMetadata { + // Worker-initialized fields + /// Block number to process + pub block_number: u64, + /// Whether to generate full SNOS output + pub full_output: bool, + /// Path to the Cairo PIE file + pub cairo_pie_path: Option, + /// Path to the SNOS output file + pub snos_output_path: Option, + /// Path to the program output file + pub program_output_path: Option, + + // Job-populated fields + /// SNOS fact generated during processing + pub snos_fact: Option, +} diff --git a/orchestrator/crates/orchestrator/src/jobs/metadata/state_update.rs b/orchestrator/crates/orchestrator/src/jobs/metadata/state_update.rs new file mode 100644 index 000000000..214a12372 --- /dev/null +++ b/orchestrator/crates/orchestrator/src/jobs/metadata/state_update.rs @@ -0,0 +1,27 @@ +//! Metadata for state update jobs. + +use serde::{Deserialize, Serialize}; + +/// Metadata specific to state update jobs. +/// +/// # Field Management +/// - Worker-initialized fields: blocks and paths configurations +/// - Job-populated fields: last_failed_block_no and tx_hashes (during processing) +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct StateUpdateMetadata { + // Worker-initialized fields + /// Block numbers that need to be settled + pub blocks_to_settle: Vec, + /// Paths to SNOS output files for each block + pub snos_output_paths: Vec, + /// Paths to program output files for each block + pub program_output_paths: Vec, + /// Paths to blob data files for each block + pub blob_data_paths: Vec, + + // Job-populated fields + /// Last block number that failed processing + pub last_failed_block_no: Option, + /// Transaction hashes for processed blocks + pub tx_hashes: Vec, +} diff --git a/orchestrator/crates/orchestrator/src/jobs/mod.rs b/orchestrator/crates/orchestrator/src/jobs/mod.rs index 2b880830a..036bc5237 100644 --- a/orchestrator/crates/orchestrator/src/jobs/mod.rs +++ b/orchestrator/crates/orchestrator/src/jobs/mod.rs @@ -7,7 +7,6 @@ use std::time::{Duration, Instant}; use async_trait::async_trait; use chrono::Utc; use color_eyre::eyre::{eyre, Context}; -use constants::{JOB_METADATA_ERROR, JOB_METADATA_FAILURE_REASON, JOB_METADATA_PROCESSING_COMPLETED_AT}; use conversion::parse_string; use da_job::DaError; use futures::FutureExt; @@ -22,17 +21,18 @@ use types::{ExternalId, JobItemUpdates}; use uuid::Uuid; use crate::config::Config; -use crate::jobs::constants::{JOB_PROCESS_ATTEMPT_METADATA_KEY, JOB_VERIFICATION_ATTEMPT_METADATA_KEY}; +use crate::helpers::JobProcessingState; #[double] use crate::jobs::job_handler_factory::factory; +use crate::jobs::metadata::JobMetadata; use crate::jobs::types::{JobItem, JobStatus, JobType, JobVerificationStatus}; use crate::metrics::ORCHESTRATOR_METRICS; use crate::queue::job_queue::{add_job_to_process_queue, add_job_to_verification_queue, ConsumptionError}; -pub mod constants; pub mod conversion; pub mod da_job; pub mod job_handler_factory; +pub mod metadata; pub mod proving_job; pub mod register_proof_job; pub mod snos_job; @@ -40,47 +40,70 @@ pub mod state_update_job; pub mod types; use thiserror::Error; +/// Error types for job-related operations in the orchestrator #[derive(Error, Debug, PartialEq)] pub enum JobError { + /// Indicates an invalid job ID was provided #[error("Job id {id:?} is invalid.")] InvalidId { id: String }, + /// Indicates an attempt to create a duplicate job #[error("Job already exists for internal_id {internal_id:?} and job_type {job_type:?}. Skipping!")] JobAlreadyExists { internal_id: String, job_type: JobType }, + /// Indicates the job is in an invalid status for the requested operation #[error("Invalid status {job_status:?} for job with id {id:?}. Cannot process.")] InvalidStatus { id: Uuid, job_status: JobStatus }, + /// Indicates the requested job could not be found #[error("Failed to find job with id {id:?}")] JobNotFound { id: Uuid }, + /// Indicates a metadata counter would overflow if incremented #[error("Incrementing key {} in metadata would exceed u64::MAX", key)] KeyOutOfBounds { key: String }, + /// Wraps errors from DA layer operations #[error("DA Error: {0}")] DaJobError(#[from] DaError), + /// Wraps errors from proving operations #[error("Proving Error: {0}")] ProvingJobError(#[from] ProvingError), + /// Wraps errors from state update operations #[error("Proving Error: {0}")] StateUpdateJobError(#[from] StateUpdateError), + /// Wraps errors from SNOS operations #[error("Snos Error: {0}")] SnosJobError(#[from] SnosError), + /// Wraps errors from queue handling operations #[error("Queue Handling Error: {0}")] ConsumptionError(#[from] ConsumptionError), + /// Wraps errors from fact operations #[error("Fact Error: {0}")] FactError(#[from] FactError), + /// Wraps general errors that don't fit other categories #[error("Other error: {0}")] Other(#[from] OtherError), + + /// Indicates that the maximum capacity of jobs currently being processed has been reached + #[error("Max Capacity Reached, Already processing")] + MaxCapacityReached, + + /// Indicates an error occurred while extracting the processing lock + #[error("Error extracting processing lock: {0}")] + LockError(String), } -// ==================================================== /// Wrapper Type for Other(<>) job type +/// +/// Provides a generic error type for cases that don't fit into specific error categories +/// while maintaining error chain context. #[derive(Debug)] pub struct OtherError(color_eyre::eyre::Error); @@ -109,51 +132,113 @@ impl From for OtherError { OtherError(eyre!(error_string)) } } -// ==================================================== + +impl From for JobError { + fn from(err: color_eyre::Report) -> Self { + JobError::Other(OtherError(err)) + } +} /// Job Trait /// /// The Job trait is used to define the methods that a job /// should implement to be used as a job for the orchestrator. The orchestrator automatically /// handles queueing and processing of jobs as long as they implement the trait. +/// +/// # Implementation Requirements +/// Implementors must be both `Send` and `Sync` to work with the async processing system. #[automock] #[async_trait] pub trait Job: Send + Sync { /// Should build a new job item and return it + /// + /// # Arguments + /// * `config` - Shared configuration for the job + /// * `internal_id` - Unique identifier for internal tracking + /// * `metadata` - Additional key-value pairs associated with the job + /// + /// # Returns + /// * `Result` - The created job item or an error async fn create_job( &self, config: Arc, internal_id: String, - metadata: HashMap, + metadata: JobMetadata, ) -> Result; + /// Should process the job and return the external_id which can be used to /// track the status of the job. For example, a DA job will submit the state diff /// to the DA layer and return the txn hash. + /// + /// # Arguments + /// * `config` - Shared configuration for the job + /// * `job` - Mutable reference to the job being processed + /// + /// # Returns + /// * `Result` - External tracking ID or an error async fn process_job(&self, config: Arc, job: &mut JobItem) -> Result; + /// Should verify the job and return the status of the verification. For example, /// a DA job will verify the inclusion of the state diff in the DA layer and return /// the status of the verification. + /// + /// # Arguments + /// * `config` - Shared configuration for the job + /// * `job` - Mutable reference to the job being verified + /// + /// # Returns + /// * `Result` - Current verification status or an error async fn verify_job(&self, config: Arc, job: &mut JobItem) -> Result; + /// Should return the maximum number of attempts to process the job. A new attempt is made /// every time the verification returns `JobVerificationStatus::Rejected` fn max_process_attempts(&self) -> u64; + /// Should return the maximum number of attempts to verify the job. A new attempt is made /// every few seconds depending on the result `verification_polling_delay_seconds` fn max_verification_attempts(&self) -> u64; + /// Should return the number of seconds to wait before polling for verification fn verification_polling_delay_seconds(&self) -> u64; + + fn job_processing_lock(&self, config: Arc) -> Option>; } /// Creates the job in the DB in the created state and adds it to the process queue +/// +/// # Arguments +/// * `job_type` - Type of job to create +/// * `internal_id` - Unique identifier for internal tracking +/// * `metadata` - Additional key-value pairs for the job +/// * `config` - Shared configuration +/// +/// # Returns +/// * `Result<(), JobError>` - Success or an error +/// +/// # Metrics +/// * Records block gauge +/// * Updates successful job operations count +/// * Records job response time +/// +/// # Notes +/// * Skips creation if job already exists with same internal_id and job_type +/// * Automatically adds the job to the process queue upon successful creation #[tracing::instrument(fields(category = "general"), skip(config), ret, err)] pub async fn create_job( job_type: JobType, internal_id: String, - metadata: HashMap, + metadata: JobMetadata, config: Arc, ) -> Result<(), JobError> { let start = Instant::now(); - tracing::info!(log_type = "starting", category = "general", function_type = "create_job", job_type = ?job_type, block_no = %internal_id, "General create job started for block"); + tracing::info!( + log_type = "starting", + category = "general", + function_type = "create_job", + job_type = ?job_type, + block_no = %internal_id, + "General create job started for block" + ); tracing::debug!( job_type = ?job_type, @@ -161,13 +246,13 @@ pub async fn create_job( metadata = ?metadata, "Job creation details" ); + let existing_job = config .database() .get_job_by_internal_id_and_type(internal_id.as_str(), &job_type) .await .map_err(|e| JobError::Other(OtherError(e)))?; - // this is technically a redundant check, we've another check inside `create_job` if existing_job.is_some() { tracing::warn!("{}", JobError::JobAlreadyExists { internal_id, job_type }); return Ok(()); @@ -176,7 +261,7 @@ pub async fn create_job( let job_handler = factory::get_job_handler(&job_type).await; let job_item = job_handler.create_job(config.clone(), internal_id.clone(), metadata).await?; config.database().create_job(job_item.clone()).await?; - + println!("Job item inside the create job function: {:?}", job_item); add_job_to_process_queue(job_item.id, &job_type, config.clone()) .await .map_err(|e| JobError::Other(OtherError(e)))?; @@ -184,7 +269,14 @@ pub async fn create_job( let attributes = [KeyValue::new("operation_job_type", format!("{:?}", job_type)), KeyValue::new("operation_type", "create_job")]; - tracing::info!(log_type = "completed", category = "general", function_type = "create_job", block_no = %internal_id, "General create job completed for block"); + tracing::info!( + log_type = "completed", + category = "general", + function_type = "create_job", + block_no = %internal_id, + "General create job completed for block" + ); + let duration = start.elapsed(); ORCHESTRATOR_METRICS.block_gauge.record(parse_string(&internal_id)?, &attributes); ORCHESTRATOR_METRICS.successful_job_operations.add(1.0, &attributes); @@ -194,12 +286,41 @@ pub async fn create_job( /// Processes the job, increments the process attempt count and updates the status of the job in the /// DB. It then adds the job to the verification queue. +/// +/// # Arguments +/// * `id` - UUID of the job to process +/// * `config` - Shared configuration +/// +/// # Returns +/// * `Result<(), JobError>` - Success or an error +/// +/// # State Transitions +/// * `Created` -> `LockedForProcessing` -> `PendingVerification` +/// * `VerificationFailed` -> `LockedForProcessing` -> `PendingVerification` +/// * `PendingRetry` -> `LockedForProcessing` -> `PendingVerification` +/// +/// # Metrics +/// * Updates block gauge +/// * Records successful job operations +/// * Tracks job response time +/// +/// # Notes +/// * Only processes jobs in Created, VerificationFailed, or PendingRetry status +/// * Updates job version to prevent concurrent processing +/// * Adds processing completion timestamp to metadata +/// * Automatically adds job to verification queue upon successful processing #[tracing::instrument(skip(config), fields(category = "general", job, job_type, internal_id), ret, err)] pub async fn process_job(id: Uuid, config: Arc) -> Result<(), JobError> { let start = Instant::now(); - let job = get_job(id, config.clone()).await?; + let mut job = get_job(id, config.clone()).await?; let internal_id = job.internal_id.clone(); - tracing::info!(log_type = "starting", category = "general", function_type = "process_job", block_no = %internal_id, "General process job started for block"); + tracing::info!( + log_type = "starting", + category = "general", + function_type = "process_job", + block_no = %internal_id, + "General process job started for block" + ); tracing::Span::current().record("job", format!("{:?}", job.clone())); tracing::Span::current().record("job_type", format!("{:?}", job.job_type)); @@ -207,23 +328,41 @@ pub async fn process_job(id: Uuid, config: Arc) -> Result<(), JobError> tracing::debug!(job_id = ?id, status = ?job.status, "Current job status"); match job.status { - // we only want to process jobs that are in the created or verification failed state. - // verification failed state means that the previous processing failed and we want to retry - JobStatus::Created | JobStatus::VerificationFailed => { - tracing::info!(job_id = ?id, status = ?job.status, "Job status is Created or VerificationFailed, proceeding with processing"); + // we only want to process jobs that are in the created or verification failed state or if it's been called from + // the retry endpoint (in this case it would be PendingRetry status) verification failed state means + // that the previous processing failed and we want to retry + JobStatus::Created | JobStatus::VerificationFailed | JobStatus::PendingRetry => { + tracing::info!(job_id = ?id, status = ?job.status, "Processing job"); } _ => { - tracing::warn!(job_id = ?id, status = ?job.status, "Job status is Invalid. Cannot process."); + tracing::warn!(job_id = ?id, status = ?job.status, "Cannot process job with current status"); return Err(JobError::InvalidStatus { id, job_status: job.status }); } } + + let job_handler = factory::get_job_handler(&job.job_type).await; + let job_processing_locks = job_handler.job_processing_lock(config.clone()); + + let permit = if let Some(ref processing_locks) = job_processing_locks { + Some(processing_locks.try_acquire_lock(&job, config.clone()).await?) + } else { + None + }; + // this updates the version of the job. this ensures that if another thread was about to process // the same job, it would fail to update the job in the database because the version would be // outdated tracing::debug!(job_id = ?id, "Updating job status to LockedForProcessing"); + job.metadata.common.process_started_at = Some(Utc::now()); let mut job = config .database() - .update_job(&job, JobItemUpdates::new().update_status(JobStatus::LockedForProcessing).build()) + .update_job( + &job, + JobItemUpdates::new() + .update_status(JobStatus::LockedForProcessing) + .update_metadata(job.metadata.clone()) + .build(), + ) .await .map_err(|e| { tracing::error!(job_id = ?id, error = ?e, "Failed to update job status"); @@ -231,13 +370,12 @@ pub async fn process_job(id: Uuid, config: Arc) -> Result<(), JobError> })?; tracing::debug!(job_id = ?id, job_type = ?job.job_type, "Getting job handler"); - let job_handler = factory::get_job_handler(&job.job_type).await; let external_id = match AssertUnwindSafe(job_handler.process_job(config.clone(), &mut job)).catch_unwind().await { Ok(Ok(external_id)) => { tracing::debug!(job_id = ?id, "Successfully processed job"); // Add the time of processing to the metadata. - job.metadata - .insert(JOB_METADATA_PROCESSING_COMPLETED_AT.to_string(), Utc::now().timestamp_millis().to_string()); + job.metadata.common.process_completed_at = Some(Utc::now()); + external_id } Ok(Err(e)) => { @@ -263,10 +401,11 @@ pub async fn process_job(id: Uuid, config: Arc) -> Result<(), JobError> .await; } }; - tracing::debug!(job_id = ?id, "Incrementing process attempt count in metadata"); - let metadata = increment_key_in_metadata(&job.metadata, JOB_PROCESS_ATTEMPT_METADATA_KEY)?; - // Fetching the job again because update status above will update the job version + // Increment process attempt counter + job.metadata.common.process_attempt_no += 1; + + // Update job status and metadata tracing::debug!(job_id = ?id, "Updating job status to PendingVerification"); config .database() @@ -274,7 +413,7 @@ pub async fn process_job(id: Uuid, config: Arc) -> Result<(), JobError> &job, JobItemUpdates::new() .update_status(JobStatus::PendingVerification) - .update_metadata(metadata) + .update_metadata(job.metadata.clone()) .update_external_id(external_id.clone().into()) .build(), ) @@ -284,6 +423,7 @@ pub async fn process_job(id: Uuid, config: Arc) -> Result<(), JobError> JobError::Other(OtherError(e)) })?; + // Add to verification queue tracing::debug!(job_id = ?id, "Adding job to verification queue"); add_job_to_verification_queue( job.id, @@ -302,12 +442,25 @@ pub async fn process_job(id: Uuid, config: Arc) -> Result<(), JobError> KeyValue::new("operation_type", "process_job"), ]; - tracing::info!(log_type = "completed", category = "general", function_type = "process_job", block_no = %internal_id, "General process job completed for block"); + tracing::info!( + log_type = "completed", + category = "general", + function_type = "process_job", + block_no = %internal_id, + "General process job completed for block" + ); + let duration = start.elapsed(); ORCHESTRATOR_METRICS.successful_job_operations.add(1.0, &attributes); ORCHESTRATOR_METRICS.jobs_response_time.record(duration.as_secs_f64(), &attributes); - // job_type, internal_id, external_id register_block_gauge(job.job_type, &job.internal_id, external_id.into(), &attributes)?; + + if let Some(permit) = permit { + if let Some(ref processing_locks) = job_processing_locks { + processing_locks.try_release_lock(permit, &job.id).await?; + } + } + Ok(()) } @@ -317,6 +470,28 @@ pub async fn process_job(id: Uuid, config: Arc) -> Result<(), JobError> /// retries processing the job if the max attempts have not been exceeded. If the max attempts have /// been exceeded, it marks the job as timed out. If the verification is still pending, it pushes /// the job back to the queue. +/// +/// # Arguments +/// * `id` - UUID of the job to verify +/// * `config` - Shared configuration +/// +/// # Returns +/// * `Result<(), JobError>` - Success or an error +/// +/// # State Transitions +/// * `PendingVerification` -> `Completed` (on successful verification) +/// * `PendingVerification` -> `VerificationFailed` (on verification rejection) +/// * `PendingVerification` -> `VerificationTimeout` (max attempts reached) +/// +/// # Metrics +/// * Records verification time if processing completion timestamp exists +/// * Updates block gauge and job operation metrics +/// * Tracks successful operations and response time +/// +/// # Notes +/// * Only jobs in `PendingVerification` or `VerificationTimeout` status can be verified +/// * Automatically retries processing if verification fails and max attempts not reached +/// * Removes processing_finished_at from metadata upon successful verification #[tracing::instrument( skip(config), fields(category = "general", job, job_type, internal_id, verification_status), @@ -334,8 +509,9 @@ pub async fn verify_job(id: Uuid, config: Arc) -> Result<(), JobError> { tracing::Span::current().record("internal_id", job.internal_id.clone()); match job.status { - JobStatus::PendingVerification => { - tracing::debug!(job_id = ?id, "Job status is PendingVerification, proceeding with verification"); + // Jobs with `VerificationTimeout` will be retired manually after resetting verification attempt number to 0. + JobStatus::PendingVerification | JobStatus::VerificationTimeout => { + tracing::info!(job_id = ?id, status = ?job.status, "Proceeding with verification"); } _ => { tracing::error!(job_id = ?id, status = ?job.status, "Invalid job status for verification"); @@ -345,6 +521,17 @@ pub async fn verify_job(id: Uuid, config: Arc) -> Result<(), JobError> { let job_handler = factory::get_job_handler(&job.job_type).await; tracing::debug!(job_id = ?id, "Verifying job with handler"); + + job.metadata.common.verification_started_at = Some(Utc::now()); + let mut job = config + .database() + .update_job(&job, JobItemUpdates::new().update_metadata(job.metadata.clone()).build()) + .await + .map_err(|e| { + tracing::error!(job_id = ?id, error = ?e, "Failed to update job status"); + JobError::Other(OtherError(e)) + })?; + let verification_status = job_handler.verify_job(config.clone(), &mut job).await?; tracing::Span::current().record("verification_status", format!("{:?}", &verification_status)); @@ -358,24 +545,26 @@ pub async fn verify_job(id: Uuid, config: Arc) -> Result<(), JobError> { match verification_status { JobVerificationStatus::Verified => { tracing::info!(job_id = ?id, "Job verified successfully"); - match job - .metadata - .get(JOB_METADATA_PROCESSING_COMPLETED_AT) - .and_then(|time| time.parse::().ok()) - .map(|start| Utc::now().timestamp_millis() - start) - { - Some(time_taken) => ORCHESTRATOR_METRICS + // Calculate verification time if processing completion timestamp exists + if let Some(verification_time) = job.metadata.common.verification_started_at { + let time_taken = (Utc::now() - verification_time).num_milliseconds(); + ORCHESTRATOR_METRICS .verification_time - .record(time_taken as f64, &[KeyValue::new("operation_job_type", format!("{:?}", job.job_type))]), - None => tracing::warn!("Failed to calculate verification time: Invalid or missing processing time"), + .record(time_taken as f64, &[KeyValue::new("operation_job_type", format!("{:?}", job.job_type))]); + } else { + tracing::warn!("Failed to calculate verification time: Missing processing completion timestamp"); } - let mut metadata = job.metadata.clone(); - metadata.remove("processing_completed_at"); + + // Update verification completed timestamp and update status + job.metadata.common.verification_completed_at = Some(Utc::now()); config .database() .update_job( &job, - JobItemUpdates::new().update_metadata(metadata).update_status(JobStatus::Completed).build(), + JobItemUpdates::new() + .update_metadata(job.metadata.clone()) + .update_status(JobStatus::Completed) + .build(), ) .await .map_err(|e| { @@ -385,17 +574,16 @@ pub async fn verify_job(id: Uuid, config: Arc) -> Result<(), JobError> { operation_job_status = Some(JobStatus::Completed); } JobVerificationStatus::Rejected(e) => { - tracing::warn!(job_id = ?id, error = ?e, "Job verification rejected"); - let mut new_job_metadata = job.metadata.clone(); - new_job_metadata.insert(JOB_METADATA_ERROR.to_string(), e); + tracing::error!(job_id = ?id, error = ?e, "Job verification rejected"); + + // Update metadata with error information + job.metadata.common.failure_reason = Some(e.clone()); operation_job_status = Some(JobStatus::VerificationFailed); - let process_attempts = get_u64_from_metadata(&job.metadata, JOB_PROCESS_ATTEMPT_METADATA_KEY) - .map_err(|e| JobError::Other(OtherError(e)))?; - if process_attempts < job_handler.max_process_attempts() { + if job.metadata.common.process_attempt_no < job_handler.max_process_attempts() { tracing::info!( job_id = ?id, - attempt = process_attempts + 1, + attempt = job.metadata.common.process_attempt_no + 1, "Verification failed. Retrying job processing" ); @@ -405,7 +593,7 @@ pub async fn verify_job(id: Uuid, config: Arc) -> Result<(), JobError> { &job, JobItemUpdates::new() .update_status(JobStatus::VerificationFailed) - .update_metadata(new_job_metadata) + .update_metadata(job.metadata.clone()) .build(), ) .await @@ -421,16 +609,18 @@ pub async fn verify_job(id: Uuid, config: Arc) -> Result<(), JobError> { return move_job_to_failed( &job, config.clone(), - format!("Verification rejected. Max process attempts reached: {}", process_attempts), + format!( + "Verification rejected. Max process attempts reached: {}", + job.metadata.common.process_attempt_no + ), ) .await; } } JobVerificationStatus::Pending => { tracing::debug!(job_id = ?id, "Job verification still pending"); - let verify_attempts = get_u64_from_metadata(&job.metadata, JOB_VERIFICATION_ATTEMPT_METADATA_KEY) - .map_err(|e| JobError::Other(OtherError(e)))?; - if verify_attempts >= job_handler.max_verification_attempts() { + + if job.metadata.common.verification_attempt_no >= job_handler.max_verification_attempts() { tracing::warn!(job_id = ?id, "Max verification attempts reached. Marking job as timed out"); config .database() @@ -442,11 +632,12 @@ pub async fn verify_job(id: Uuid, config: Arc) -> Result<(), JobError> { })?; operation_job_status = Some(JobStatus::VerificationTimeout); } else { - let metadata = increment_key_in_metadata(&job.metadata, JOB_VERIFICATION_ATTEMPT_METADATA_KEY)?; + // Increment verification attempts + job.metadata.common.verification_attempt_no += 1; config .database() - .update_job(&job, JobItemUpdates::new().update_metadata(metadata).build()) + .update_job(&job, JobItemUpdates::new().update_metadata(job.metadata.clone()).build()) .await .map_err(|e| { tracing::error!(job_id = ?id, error = ?e, "Failed to update job metadata"); @@ -477,13 +668,112 @@ pub async fn verify_job(id: Uuid, config: Arc) -> Result<(), JobError> { let duration = start.elapsed(); ORCHESTRATOR_METRICS.successful_job_operations.add(1.0, &attributes); ORCHESTRATOR_METRICS.jobs_response_time.record(duration.as_secs_f64(), &attributes); - // job_type, internal_id, external_id register_block_gauge(job.job_type, &job.internal_id, job.external_id, &attributes)?; Ok(()) } +/// Retries a failed job by reprocessing it. +/// Only jobs with Failed status can be retried. +/// +/// # Arguments +/// * `id` - UUID of the job to retry +/// * `config` - Shared configuration +/// +/// # Returns +/// * `Result<(), JobError>` - Success or an error +/// +/// # State Transitions +/// * `Failed` -> `PendingRetry` -> (normal processing flow) +/// +/// # Notes +/// * Only jobs in Failed status can be retried +/// * Transitions through PendingRetry status before normal processing +/// * Uses standard process_job function after status update +#[tracing::instrument(skip(config), fields(category = "general"), ret, err)] +pub async fn retry_job(id: Uuid, config: Arc) -> Result<(), JobError> { + let mut job = get_job(id, config.clone()).await?; + let internal_id = job.internal_id.clone(); + + tracing::info!( + log_type = "starting", + category = "general", + function_type = "retry_job", + block_no = %internal_id, + "General retry job started for block" + ); + + if job.status != JobStatus::Failed { + tracing::error!( + job_id = ?id, + status = ?job.status, + "Cannot retry job: invalid status" + ); + return Err(JobError::InvalidStatus { id, job_status: job.status }); + } + + // Increment the retry counter in common metadata + job.metadata.common.process_retry_attempt_no += 1; + job.metadata.common.process_attempt_no = 0; + + tracing::debug!( + job_id = ?id, + retry_count = job.metadata.common.process_retry_attempt_no, + "Incrementing process retry attempt counter" + ); + + // Update job status and metadata to PendingRetry before processing + config + .database() + .update_job( + &job, + JobItemUpdates::new().update_status(JobStatus::PendingRetry).update_metadata(job.metadata.clone()).build(), + ) + .await + .map_err(|e| { + tracing::error!( + job_id = ?id, + error = ?e, + "Failed to update job status to PendingRetry" + ); + JobError::Other(OtherError(e)) + })?; + + add_job_to_process_queue(job.id, &job.job_type, config.clone()).await.map_err(|e| { + tracing::error!( + log_type = "error", + category = "general", + function_type = "retry_job", + block_no = %internal_id, + error = %e, + "Failed to add job to process queue" + ); + JobError::Other(OtherError(e)) + })?; + + tracing::info!( + log_type = "completed", + category = "general", + function_type = "retry_job", + block_no = %internal_id, + "Successfully queued job for retry" + ); + + Ok(()) +} + /// Terminates the job and updates the status of the job in the DB. -/// Logs error if the job status `Completed` is existing on DL queue. +/// +/// # Arguments +/// * `id` - UUID of the job to handle failure for +/// * `config` - Shared configuration +/// +/// # Returns +/// * `Result<(), JobError>` - Success or an error +/// +/// # Notes +/// * Logs error if the job status `Completed` is existing on DL queue +/// * Updates job status to Failed and records failure reason in metadata +/// * Updates metrics for failed jobs #[tracing::instrument(skip(config), fields(job_status, job_type), ret, err)] pub async fn handle_job_failure(id: Uuid, config: Arc) -> Result<(), JobError> { let job = get_job(id, config.clone()).await?.clone(); @@ -519,6 +809,20 @@ fn register_block_gauge( Ok(()) } +/// Moves a job to the Failed state with the provided reason +/// +/// # Arguments +/// * `job` - Reference to the job to mark as failed +/// * `config` - Shared configuration +/// * `reason` - Failure reason to record in metadata +/// +/// # Returns +/// * `Result<(), JobError>` - Success or an error +/// +/// # Notes +/// * Skips processing if job is already in Failed status +/// * Records failure reason in job metadata +/// * Updates metrics for failed jobs async fn move_job_to_failed(job: &JobItem, config: Arc, reason: String) -> Result<(), JobError> { if job.status == JobStatus::Completed { tracing::error!(job_id = ?job.id, job_status = ?job.status, "Invalid state exists on DL queue"); @@ -531,30 +835,53 @@ async fn move_job_to_failed(job: &JobItem, config: Arc, reason: String) return Ok(()); } - let mut metadata = job.metadata.clone(); + let mut job_metadata = job.metadata.clone(); let internal_id = job.internal_id.clone(); - metadata.insert(JOB_METADATA_FAILURE_REASON.to_string(), reason); tracing::debug!(job_id = ?job.id, "Updating job status to Failed in database"); + // Update failure information in common metadata + job_metadata.common.failure_reason = Some(reason); + match config .database() - .update_job(job, JobItemUpdates::new().update_status(JobStatus::Failed).update_metadata(metadata).build()) + .update_job(job, JobItemUpdates::new().update_status(JobStatus::Failed).update_metadata(job_metadata).build()) .await { Ok(_) => { - tracing::info!(log_type = "completed", category = "general", function_type = "handle_job_failure", block_no = %internal_id, "General handle job failure completed for block"); + tracing::info!( + log_type = "completed", + category = "general", + function_type = "handle_job_failure", + block_no = %internal_id, + "General handle job failure completed for block" + ); ORCHESTRATOR_METRICS .failed_jobs .add(1.0, &[KeyValue::new("operation_job_type", format!("{:?}", job.job_type))]); Ok(()) } Err(e) => { - tracing::error!(log_type = "error", category = "general", function_type = "handle_job_failure", block_no = %internal_id, error = %e, "General handle job failure failed for block"); + tracing::error!( + log_type = "error", + category = "general", + function_type = "handle_job_failure", + block_no = %internal_id, + error = %e, + "General handle job failure failed for block" + ); Err(JobError::Other(OtherError(e))) } } } +/// Retrieves a job by its ID from the database +/// +/// # Arguments +/// * `id` - UUID of the job to retrieve +/// * `config` - Shared configuration +/// +/// # Returns +/// * `Result` - The job if found, or JobNotFound error async fn get_job(id: Uuid, config: Arc) -> Result { let job = config.database().get_job_by_id(id).await.map_err(|e| JobError::Other(OtherError(e)))?; match job { @@ -563,6 +890,18 @@ async fn get_job(id: Uuid, config: Arc) -> Result { } } +/// Increments a numeric value in the job metadata +/// +/// # Arguments +/// * `metadata` - Current metadata map +/// * `key` - Key to increment +/// +/// # Returns +/// * `Result, JobError>` - Updated metadata or an error +/// +/// # Errors +/// * Returns KeyOutOfBounds if incrementing would exceed u64::MAX +/// * Returns error if value cannot be parsed as u64 pub fn increment_key_in_metadata( metadata: &HashMap, key: &str, @@ -578,7 +917,19 @@ pub fn increment_key_in_metadata( Ok(new_metadata) } -fn get_u64_from_metadata(metadata: &HashMap, key: &str) -> color_eyre::Result { +/// Retrieves a u64 value from the metadata map +/// +/// # Arguments +/// * `metadata` - Metadata map to search +/// * `key` - Key to retrieve +/// +/// # Returns +/// * `color_eyre::Result` - The parsed value or an error +/// +/// # Notes +/// * Returns 0 if the key doesn't exist in the metadata +/// * Wraps parsing errors with additional context +pub fn get_u64_from_metadata(metadata: &HashMap, key: &str) -> color_eyre::Result { metadata .get(key) .unwrap_or(&"0".to_string()) @@ -586,14 +937,100 @@ fn get_u64_from_metadata(metadata: &HashMap, key: &str) -> color .wrap_err(format!("Failed to parse u64 from metadata key '{}'", key)) } +/// Queues a job for processing by adding it to the process queue +/// +/// # Arguments +/// * `id` - UUID of the job to process +/// * `config` - Shared configuration +/// +/// # Returns +/// * `Result<(), JobError>` - Success or an error +/// +/// # State Transitions +/// * Any valid state -> PendingProcess +#[tracing::instrument(skip(config), fields(category = "general"), ret, err)] +pub async fn queue_job_for_processing(id: Uuid, config: Arc) -> Result<(), JobError> { + let job = get_job(id, config.clone()).await?; + + // Add to process queue directly + add_job_to_process_queue(id, &job.job_type, config).await.map_err(|e| { + tracing::error!(job_id = ?id, error = ?e, "Failed to add job to process queue"); + JobError::Other(OtherError(e)) + })?; + + Ok(()) +} + +/// Queues a job for verification by adding it to the verification queue +/// +/// # Arguments +/// * `id` - UUID of the job to verify +/// * `config` - Shared configuration +/// +/// # Returns +/// * `Result<(), JobError>` - Success or an error +/// +/// # Notes +/// * Resets verification attempt count to 0 +/// * Sets appropriate delay for verification polling +#[tracing::instrument(skip(config), fields(category = "general"), ret, err)] +pub async fn queue_job_for_verification(id: Uuid, config: Arc) -> Result<(), JobError> { + let mut job = get_job(id, config.clone()).await?; + let job_handler = factory::get_job_handler(&job.job_type).await; + + // Reset verification attempts and increment retry counter in common metadata + job.metadata.common.verification_attempt_no = 0; + job.metadata.common.verification_retry_attempt_no += 1; + + tracing::debug!( + job_id = ?id, + retry_count = job.metadata.common.verification_retry_attempt_no, + "Incrementing verification retry attempt counter" + ); + + // Update job status and metadata + config + .database() + .update_job( + &job, + JobItemUpdates::new() + .update_status(JobStatus::PendingVerification) + .update_metadata(job.metadata.clone()) + .build(), + ) + .await + .map_err(|e| JobError::Other(OtherError(e)))?; + + // Add to verification queue with appropriate delay + add_job_to_verification_queue( + id, + &job.job_type, + Duration::from_secs(job_handler.verification_polling_delay_seconds()), + config, + ) + .await + .map_err(|e| { + tracing::error!( + job_id = ?id, + error = ?e, + "Failed to add job to verification queue" + ); + JobError::Other(OtherError(e)) + })?; + + Ok(()) +} + #[cfg(test)] mod tests { use super::*; + /// Tests for increment_key_in_metadata function mod test_increment_key_in_metadata { use super::*; #[test] + /// Tests incrementing a non-existent key (should start at 0) fn key_does_not_exist() { let metadata = HashMap::new(); let key = "test_key"; @@ -602,6 +1039,7 @@ mod tests { } #[test] + /// Tests incrementing an existing numeric value fn key_exists_with_numeric_value() { let mut metadata = HashMap::new(); metadata.insert("test_key".to_string(), "41".to_string()); @@ -611,6 +1049,7 @@ mod tests { } #[test] + /// Tests handling of non-numeric values fn key_exists_with_non_numeric_value() { let mut metadata = HashMap::new(); metadata.insert("test_key".to_string(), "not_a_number".to_string()); @@ -620,6 +1059,7 @@ mod tests { } #[test] + /// Tests overflow handling at u64::MAX fn key_exists_with_max_u64_value() { let mut metadata = HashMap::new(); metadata.insert("test_key".to_string(), u64::MAX.to_string()); @@ -629,10 +1069,12 @@ mod tests { } } + /// Tests for get_u64_from_metadata function mod test_get_u64_from_metadata { use super::*; #[test] + /// Tests retrieving a valid u64 value fn key_exists_with_valid_u64_value() { let mut metadata = HashMap::new(); metadata.insert("key1".to_string(), "12345".to_string()); @@ -641,6 +1083,7 @@ mod tests { } #[test] + /// Tests handling of invalid numeric strings fn key_exists_with_invalid_value() { let mut metadata = HashMap::new(); metadata.insert("key2".to_string(), "not_a_number".to_string()); @@ -649,6 +1092,7 @@ mod tests { } #[test] + /// Tests default behavior when key doesn't exist fn key_does_not_exist() { let metadata = HashMap::::new(); let result = get_u64_from_metadata(&metadata, "key3").unwrap(); diff --git a/orchestrator/crates/orchestrator/src/jobs/proving_job/mod.rs b/orchestrator/crates/orchestrator/src/jobs/proving_job/mod.rs index 1bdadfef4..3d9e16e9c 100644 --- a/orchestrator/crates/orchestrator/src/jobs/proving_job/mod.rs +++ b/orchestrator/crates/orchestrator/src/jobs/proving_job/mod.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::sync::Arc; use async_trait::async_trait; @@ -12,8 +11,8 @@ use uuid::Uuid; use super::types::{JobItem, JobStatus, JobType, JobVerificationStatus}; use super::{Job, JobError, OtherError}; use crate::config::Config; -use crate::constants::CAIRO_PIE_FILE_NAME; -use crate::jobs::constants::JOB_METADATA_SNOS_FACT; +use crate::helpers; +use crate::jobs::metadata::{JobMetadata, ProvingInputType, ProvingMetadata}; #[derive(Error, Debug, PartialEq)] pub enum ProvingError { @@ -39,7 +38,7 @@ impl Job for ProvingJob { &self, _config: Arc, internal_id: String, - metadata: HashMap, + metadata: JobMetadata, ) -> Result { tracing::info!(log_type = "starting", category = "proving", function_type = "create_job", block_no = %internal_id, "Proving job creation started."); let job_item = JobItem { @@ -60,17 +59,38 @@ impl Job for ProvingJob { #[tracing::instrument(fields(category = "proving"), skip(self, config), ret, err)] async fn process_job(&self, config: Arc, job: &mut JobItem) -> Result { let internal_id = job.internal_id.clone(); - tracing::info!(log_type = "starting", category = "proving", function_type = "process_job", job_id = ?job.id, block_no = %internal_id, "Proving job processing started."); + tracing::info!( + log_type = "starting", + category = "proving", + function_type = "process_job", + job_id = ?job.id, + block_no = %internal_id, + "Proving job processing started." + ); + + // Get proving metadata + let proving_metadata: ProvingMetadata = job.metadata.specific.clone().try_into().map_err(|e| { + tracing::error!(job_id = %job.internal_id, error = %e, "Invalid metadata type for proving job"); + JobError::Other(OtherError(e)) + })?; + + // Get input path from metadata + let input_path = match proving_metadata.input_path { + Some(ProvingInputType::CairoPie(path)) => path, + Some(ProvingInputType::Proof(_)) => { + return Err(JobError::Other(OtherError(eyre!("Expected CairoPie input, got Proof")))); + } + None => return Err(JobError::Other(OtherError(eyre!("Input path not found in job metadata")))), + }; - // Cairo Pie path in s3 storage client - let block_number: String = job.internal_id.to_string(); - let cairo_pie_path = block_number + "/" + CAIRO_PIE_FILE_NAME; - tracing::debug!(job_id = %job.internal_id, %cairo_pie_path, "Fetching Cairo PIE file"); + tracing::debug!(job_id = %job.internal_id, %input_path, "Fetching Cairo PIE file"); - let cairo_pie_file = config.storage().get_data(&cairo_pie_path).await.map_err(|e| { + // Fetch and parse Cairo PIE + let cairo_pie_file = config.storage().get_data(&input_path).await.map_err(|e| { tracing::error!(job_id = %job.internal_id, error = %e, "Failed to fetch Cairo PIE file"); ProvingError::CairoPIEFileFetchFailed(e.to_string()) })?; + tracing::debug!(job_id = %job.internal_id, "Parsing Cairo PIE file"); let cairo_pie = Box::new(CairoPie::from_bytes(cairo_pie_file.to_vec().as_slice()).map_err(|e| { tracing::error!(job_id = %job.internal_id, error = %e, "Failed to parse Cairo PIE file"); @@ -88,15 +108,28 @@ impl Job for ProvingJob { JobError::Other(OtherError(e)) })?; - tracing::info!(log_type = "completed", category = "proving", function_type = "process_job", job_id = ?job.id, block_no = %internal_id, %external_id, "Proving job processed successfully."); Ok(external_id) } #[tracing::instrument(fields(category = "proving"), skip(self, config), ret, err)] async fn verify_job(&self, config: Arc, job: &mut JobItem) -> Result { let internal_id = job.internal_id.clone(); - tracing::info!(log_type = "starting", category = "proving", function_type = "verify_job", job_id = ?job.id, block_no = %internal_id, "Proving job verification started."); + tracing::info!( + log_type = "starting", + category = "proving", + function_type = "verify_job", + job_id = ?job.id, + block_no = %internal_id, + "Proving job verification started." + ); + + // Get proving metadata + let proving_metadata: ProvingMetadata = job.metadata.specific.clone().try_into().map_err(|e| { + tracing::error!(job_id = %job.internal_id, error = %e, "Invalid metadata type for proving job"); + JobError::Other(OtherError(e)) + })?; + // Get task ID from external_id let task_id: String = job .external_id .unwrap_string() @@ -106,36 +139,75 @@ impl Job for ProvingJob { })? .into(); - let fact = job.metadata.get(JOB_METADATA_SNOS_FACT).ok_or_else(|| { - tracing::error!(job_id = %job.internal_id, "Fact not available in job metadata"); - OtherError(eyre!("Fact not available in job")) - })?; + // Determine if we need on-chain verification + let (cross_verify, fact) = match &proving_metadata.ensure_on_chain_registration { + Some(fact_str) => (true, Some(fact_str.clone())), + None => (false, None), + }; + + tracing::debug!( + job_id = %job.internal_id, + %task_id, + cross_verify, + "Getting task status from prover client" + ); - tracing::debug!(job_id = %job.internal_id, %task_id, "Getting task status from prover client"); let task_status = config .prover_client() - .get_task_status(&task_id, fact) + .get_task_status(&task_id, fact, cross_verify) .await .wrap_err("Prover Client Error".to_string()) .map_err(|e| { - tracing::error!(job_id = %job.internal_id, error = %e, "Failed to get task status from prover client"); + tracing::error!( + job_id = %job.internal_id, + error = %e, + "Failed to get task status from prover client" + ); JobError::Other(OtherError(e)) })?; match task_status { TaskStatus::Processing => { - tracing::info!(log_type = "pending", category = "proving", function_type = "verify_job", job_id = ?job.id, block_no = %internal_id, "Proving job verification pending."); + tracing::info!( + log_type = "pending", + category = "proving", + function_type = "verify_job", + job_id = ?job.id, + block_no = %internal_id, + "Proving job verification pending." + ); Ok(JobVerificationStatus::Pending) } TaskStatus::Succeeded => { - // TODO: call isValid on the contract over here to cross-verify whether the proof was registered on - // chain or not - - tracing::info!(log_type = "completed", category = "proving", function_type = "verify_job", job_id = ?job.id, block_no = %internal_id, "Proving job verification completed."); + // If proof download path is specified, store the proof + if let Some(download_path) = proving_metadata.download_proof { + tracing::debug!( + job_id = %job.internal_id, + "Downloading and storing proof to path: {}", + download_path + ); + // TODO: Implement proof download and storage + } + + tracing::info!( + log_type = "completed", + category = "proving", + function_type = "verify_job", + job_id = ?job.id, + block_no = %internal_id, + "Proving job verification completed." + ); Ok(JobVerificationStatus::Verified) } TaskStatus::Failed(err) => { - tracing::info!(log_type = "failed", category = "proving", function_type = "verify_job", job_id = ?job.id, block_no = %internal_id, "Proving job verification failed."); + tracing::info!( + log_type = "failed", + category = "proving", + function_type = "verify_job", + job_id = ?job.id, + block_no = %internal_id, + "Proving job verification failed." + ); Ok(JobVerificationStatus::Rejected(format!( "Prover job #{} failed with error: {}", job.internal_id, err @@ -153,6 +225,13 @@ impl Job for ProvingJob { } fn verification_polling_delay_seconds(&self) -> u64 { - 300 + 30 + } + + fn job_processing_lock( + &self, + _config: Arc, + ) -> std::option::Option> { + None } } diff --git a/orchestrator/crates/orchestrator/src/jobs/register_proof_job/mod.rs b/orchestrator/crates/orchestrator/src/jobs/register_proof_job/mod.rs index 042467440..1539c711c 100644 --- a/orchestrator/crates/orchestrator/src/jobs/register_proof_job/mod.rs +++ b/orchestrator/crates/orchestrator/src/jobs/register_proof_job/mod.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::sync::Arc; use async_trait::async_trait; @@ -8,6 +7,8 @@ use uuid::Uuid; use super::JobError; use crate::config::Config; +use crate::helpers; +use crate::jobs::metadata::JobMetadata; use crate::jobs::types::{JobItem, JobStatus, JobType, JobVerificationStatus}; use crate::jobs::Job; @@ -20,7 +21,7 @@ impl Job for RegisterProofJob { &self, _config: Arc, internal_id: String, - metadata: HashMap, + metadata: JobMetadata, ) -> Result { tracing::info!(log_type = "starting", category = "proof_registry", function_type = "create_job", block_no = %internal_id, "Proof registration job creation started."); let job_item = JobItem { @@ -68,4 +69,11 @@ impl Job for RegisterProofJob { fn verification_polling_delay_seconds(&self) -> u64 { todo!() } + + fn job_processing_lock( + &self, + _config: Arc, + ) -> std::option::Option> { + None + } } diff --git a/orchestrator/crates/orchestrator/src/jobs/snos_job/mod.rs b/orchestrator/crates/orchestrator/src/jobs/snos_job/mod.rs index c82d48c38..066341c54 100644 --- a/orchestrator/crates/orchestrator/src/jobs/snos_job/mod.rs +++ b/orchestrator/crates/orchestrator/src/jobs/snos_job/mod.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::io::Read; use std::sync::Arc; @@ -8,6 +7,7 @@ use cairo_vm::types::layout_name::LayoutName; use cairo_vm::vm::runners::cairo_pie::CairoPie; use cairo_vm::Felt252; use chrono::{SubsecRound, Utc}; +use color_eyre::eyre::eyre; use color_eyre::Result; use prove_block::prove_block; use starknet_os::io::output::StarknetOsOutput; @@ -15,11 +15,11 @@ use tempfile::NamedTempFile; use thiserror::Error; use uuid::Uuid; -use super::constants::{JOB_METADATA_SNOS_BLOCK, JOB_METADATA_SNOS_FACT}; use super::{JobError, OtherError}; use crate::config::Config; -use crate::constants::{CAIRO_PIE_FILE_NAME, PROGRAM_OUTPUT_FILE_NAME, SNOS_OUTPUT_FILE_NAME}; use crate::data_storage::DataStorage; +use crate::helpers; +use crate::jobs::metadata::{JobMetadata, JobSpecificMetadata, SnosMetadata}; use crate::jobs::snos_job::error::FactError; use crate::jobs::snos_job::fact_info::get_fact_info; use crate::jobs::types::{JobItem, JobStatus, JobType, JobVerificationStatus}; @@ -76,11 +76,16 @@ impl Job for SnosJob { &self, _config: Arc, internal_id: String, - metadata: HashMap, + metadata: JobMetadata, ) -> Result { - tracing::info!(log_type = "starting", category = "snos", function_type = "create_job", block_no = %internal_id, "SNOS job creation started."); - let mut metadata = metadata; - metadata.insert(JOB_METADATA_SNOS_BLOCK.to_string(), internal_id.clone()); + tracing::info!( + log_type = "starting", + category = "snos", + function_type = "create_job", + block_no = %internal_id, + "SNOS job creation started." + ); + let job_item = JobItem { id: Uuid::new_v4(), internal_id: internal_id.clone(), @@ -92,25 +97,50 @@ impl Job for SnosJob { created_at: Utc::now().round_subsecs(0), updated_at: Utc::now().round_subsecs(0), }; - tracing::info!(log_type = "completed", category = "snos", function_type = "create_job", block_no = %internal_id, "SNOS job creation completed."); + + tracing::info!( + log_type = "completed", + category = "snos", + function_type = "create_job", + block_no = %internal_id, + "SNOS job creation completed." + ); Ok(job_item) } #[tracing::instrument(fields(category = "snos"), skip(self, config), ret, err)] async fn process_job(&self, config: Arc, job: &mut JobItem) -> Result { let internal_id = job.internal_id.clone(); - tracing::info!(log_type = "starting", category = "snos", function_type = "process_job", job_id = ?job.id, block_no = %internal_id, "SNOS job processing started."); - let block_number = self.get_block_number_from_metadata(job)?; + tracing::info!( + log_type = "starting", + category = "snos", + function_type = "process_job", + job_id = ?job.id, + block_no = %internal_id, + "SNOS job processing started." + ); + + // Get SNOS metadata + let snos_metadata: SnosMetadata = job.metadata.specific.clone().try_into().map_err(|e| { + tracing::error!(job_id = %job.internal_id, error = %e, "Invalid metadata type for SNOS job"); + JobError::Other(OtherError(e)) + })?; + + // Get block number from metadata + let block_number = snos_metadata.block_number; tracing::debug!(job_id = %job.internal_id, block_number = %block_number, "Retrieved block number from metadata"); let snos_url = config.snos_config().rpc_for_snos.to_string(); let snos_url = snos_url.trim_end_matches('/'); tracing::debug!(job_id = %job.internal_id, "Calling prove_block function"); + let (cairo_pie, snos_output) = - prove_block(COMPILED_OS, block_number, snos_url, LayoutName::all_cairo, false).await.map_err(|e| { - tracing::error!(job_id = %job.internal_id, error = %e, "SNOS execution failed"); - SnosError::SnosExecutionError { internal_id: job.internal_id.clone(), message: e.to_string() } - })?; + prove_block(COMPILED_OS, block_number, snos_url, LayoutName::all_cairo, snos_metadata.full_output) + .await + .map_err(|e| { + tracing::error!(job_id = %job.internal_id, error = %e, "SNOS execution failed"); + SnosError::SnosExecutionError { internal_id: job.internal_id.clone(), message: e.to_string() } + })?; tracing::debug!(job_id = %job.internal_id, "prove_block function completed successfully"); let fact_info = get_fact_info(&cairo_pie, None)?; @@ -118,10 +148,22 @@ impl Job for SnosJob { tracing::debug!(job_id = %job.internal_id, "Fact info calculated successfully"); tracing::debug!(job_id = %job.internal_id, "Storing SNOS outputs"); - self.store(config.storage(), &job.internal_id, block_number, cairo_pie, snos_output, program_output).await?; + self.store(internal_id.clone(), config.storage(), &snos_metadata, cairo_pie, snos_output, program_output) + .await?; + + // Update the metadata with new paths and fact info + if let JobSpecificMetadata::Snos(metadata) = &mut job.metadata.specific { + metadata.snos_fact = Some(fact_info.fact.to_string()); + } - job.metadata.insert(JOB_METADATA_SNOS_FACT.into(), fact_info.fact.to_string()); - tracing::info!(log_type = "completed", category = "snos", function_type = "process_job", job_id = ?job.id, block_no = %internal_id, "SNOS job processed successfully."); + tracing::info!( + log_type = "completed", + category = "snos", + function_type = "process_job", + job_id = ?job.id, + block_no = %block_number, + "SNOS job processed successfully." + ); Ok(block_number.to_string()) } @@ -147,64 +189,72 @@ impl Job for SnosJob { fn verification_polling_delay_seconds(&self) -> u64 { 1 } -} - -impl SnosJob { - /// Get the block number that needs to be run with SNOS for the current - /// job. - fn get_block_number_from_metadata(&self, job: &JobItem) -> Result { - let block_number: u64 = job - .metadata - .get(JOB_METADATA_SNOS_BLOCK) - .ok_or(SnosError::UnspecifiedBlockNumber { internal_id: job.internal_id.clone() })? - .parse() - .map_err(|_| SnosError::InvalidBlockNumber { - internal_id: job.internal_id.clone(), - block_number: job.metadata[JOB_METADATA_SNOS_BLOCK].clone(), - })?; - Ok(block_number) + fn job_processing_lock(&self, config: Arc) -> std::option::Option> { + Some(config.processing_locks().snos_job_processing_lock.clone()) } +} +impl SnosJob { /// Stores the [CairoPie] and the [StarknetOsOutput] in the Data Storage. /// The paths will be: /// - [block_number]/cairo_pie.zip /// - [block_number]/snos_output.json async fn store( &self, + internal_id: String, data_storage: &dyn DataStorage, - internal_id: &str, - block_number: u64, + snos_metadata: &SnosMetadata, cairo_pie: CairoPie, snos_output: StarknetOsOutput, program_output: Vec, ) -> Result<(), SnosError> { - let cairo_pie_key = format!("{block_number}/{CAIRO_PIE_FILE_NAME}"); - let cairo_pie_zip_bytes = self.cairo_pie_to_zip_bytes(cairo_pie).await.map_err(|e| { - SnosError::CairoPieUnserializable { internal_id: internal_id.to_string(), message: e.to_string() } - })?; - data_storage.put_data(cairo_pie_zip_bytes, &cairo_pie_key).await.map_err(|e| { - SnosError::CairoPieUnstorable { internal_id: internal_id.to_string(), message: e.to_string() } - })?; + // Get storage paths from metadata + let cairo_pie_key = snos_metadata + .cairo_pie_path + .as_ref() + .ok_or_else(|| SnosError::Other(OtherError(eyre!("Cairo Pie path not found in metadata"))))?; - let snos_output_key = format!("{block_number}/{SNOS_OUTPUT_FILE_NAME}"); - let snos_output_json = serde_json::to_vec(&snos_output).map_err(|e| SnosError::SnosOutputUnserializable { - internal_id: internal_id.to_string(), - message: e.to_string(), - })?; - data_storage.put_data(snos_output_json.into(), &snos_output_key).await.map_err(|e| { - SnosError::SnosOutputUnstorable { internal_id: internal_id.to_string(), message: e.to_string() } - })?; + let snos_output_key = snos_metadata + .snos_output_path + .as_ref() + .ok_or_else(|| SnosError::Other(OtherError(eyre!("SNOS output path not found in metadata"))))?; - let program_output: Vec<[u8; 32]> = program_output.iter().map(|f| f.to_bytes_be()).collect(); - let encoded_data = bincode::serialize(&program_output).map_err(|e| SnosError::ProgramOutputUnserializable { - internal_id: internal_id.to_string(), - message: e.to_string(), - })?; - let program_output_key = format!("{block_number}/{PROGRAM_OUTPUT_FILE_NAME}"); - data_storage.put_data(encoded_data.into(), &program_output_key).await.map_err(|e| { - SnosError::ProgramOutputUnstorable { internal_id: internal_id.to_string(), message: e.to_string() } - })?; + let program_output_key = snos_metadata + .program_output_path + .as_ref() + .ok_or_else(|| SnosError::Other(OtherError(eyre!("Program output path not found in metadata"))))?; + + // Store Cairo Pie + { + let cairo_pie_zip_bytes = self.cairo_pie_to_zip_bytes(cairo_pie).await.map_err(|e| { + SnosError::CairoPieUnserializable { internal_id: internal_id.clone(), message: e.to_string() } + })?; + data_storage.put_data(cairo_pie_zip_bytes, cairo_pie_key).await.map_err(|e| { + SnosError::CairoPieUnstorable { internal_id: internal_id.clone(), message: e.to_string() } + })?; + } + + // Store SNOS Output + { + let snos_output_json = serde_json::to_vec(&snos_output).map_err(|e| { + SnosError::SnosOutputUnserializable { internal_id: internal_id.clone(), message: e.to_string() } + })?; + data_storage.put_data(snos_output_json.into(), snos_output_key).await.map_err(|e| { + SnosError::SnosOutputUnstorable { internal_id: internal_id.clone(), message: e.to_string() } + })?; + } + + // Store Program Output + { + let program_output: Vec<[u8; 32]> = program_output.iter().map(|f| f.to_bytes_be()).collect(); + let encoded_data = bincode::serialize(&program_output).map_err(|e| { + SnosError::ProgramOutputUnserializable { internal_id: internal_id.clone(), message: e.to_string() } + })?; + data_storage.put_data(encoded_data.into(), program_output_key).await.map_err(|e| { + SnosError::ProgramOutputUnstorable { internal_id: internal_id.clone(), message: e.to_string() } + })?; + } Ok(()) } @@ -213,12 +263,15 @@ impl SnosJob { async fn cairo_pie_to_zip_bytes(&self, cairo_pie: CairoPie) -> Result { let mut cairo_pie_zipfile = NamedTempFile::new()?; cairo_pie.write_zip_file(cairo_pie_zipfile.path())?; + drop(cairo_pie); // Drop cairo_pie to release the memory let cairo_pie_zip_bytes = self.tempfile_to_bytes(&mut cairo_pie_zipfile)?; cairo_pie_zipfile.close()?; Ok(cairo_pie_zip_bytes) } /// Converts a [NamedTempFile] to [Bytes]. + /// This function reads the file in chunks and appends them to the buffer. + /// This is useful when the file is too large to be read in one go. fn tempfile_to_bytes(&self, tmp_file: &mut NamedTempFile) -> Result { let mut buffer = Vec::new(); tmp_file.as_file_mut().read_to_end(&mut buffer)?; diff --git a/orchestrator/crates/orchestrator/src/jobs/state_update_job/mod.rs b/orchestrator/crates/orchestrator/src/jobs/state_update_job/mod.rs index 326f40c73..e7ea9a1f0 100644 --- a/orchestrator/crates/orchestrator/src/jobs/state_update_job/mod.rs +++ b/orchestrator/crates/orchestrator/src/jobs/state_update_job/mod.rs @@ -1,27 +1,24 @@ pub mod utils; -use std::collections::HashMap; use std::sync::Arc; -use ::orchestrator_utils::collections::{has_dup, is_sorted}; use async_trait::async_trait; use cairo_vm::Felt252; use chrono::{SubsecRound, Utc}; use color_eyre::eyre::eyre; use orchestrator_settlement_client_interface::SettlementVerificationStatus; +use orchestrator_utils::collections::{has_dup, is_sorted}; use starknet_os::io::output::StarknetOsOutput; use thiserror::Error; use uuid::Uuid; -use super::constants::{ - JOB_METADATA_STATE_UPDATE_ATTEMPT_PREFIX, JOB_METADATA_STATE_UPDATE_LAST_FAILED_BLOCK_NO, - JOB_PROCESS_ATTEMPT_METADATA_KEY, -}; use super::{JobError, OtherError}; use crate::config::Config; -use crate::constants::{PROGRAM_OUTPUT_FILE_NAME, SNOS_OUTPUT_FILE_NAME}; -use crate::jobs::constants::JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY; -use crate::jobs::state_update_job::utils::fetch_blob_data_for_block; +use crate::helpers; +use crate::jobs::metadata::{JobMetadata, JobSpecificMetadata, StateUpdateMetadata}; +use crate::jobs::state_update_job::utils::{ + fetch_blob_data_for_block, fetch_program_output_for_block, fetch_snos_for_block, +}; use crate::jobs::types::{JobItem, JobStatus, JobType, JobVerificationStatus}; use crate::jobs::Job; @@ -78,78 +75,133 @@ impl Job for StateUpdateJob { &self, _config: Arc, internal_id: String, - metadata: HashMap, + metadata: JobMetadata, ) -> Result { - tracing::info!(log_type = "starting", category = "state_update", function_type = "create_job", block_no = %internal_id, "State update job creation started."); - // Inserting the metadata (If it doesn't exist) - let mut metadata = metadata.clone(); - if !metadata.contains_key(JOB_PROCESS_ATTEMPT_METADATA_KEY) { - tracing::debug!(job_id = %internal_id, "Inserting initial process attempt metadata"); - metadata.insert(JOB_PROCESS_ATTEMPT_METADATA_KEY.to_string(), "0".to_string()); + tracing::info!( + log_type = "starting", + category = "state_update", + function_type = "create_job", + block_no = %internal_id, + "State update job creation started." + ); + + // Extract state transition metadata + let state_metadata: StateUpdateMetadata = metadata.specific.clone().try_into().map_err(|e| { + tracing::error!(job_id = %internal_id, error = %e, "Invalid metadata type for state update job"); + JobError::Other(OtherError(e)) + })?; + + // Validate required paths + if state_metadata.snos_output_paths.is_empty() + || state_metadata.program_output_paths.is_empty() + || state_metadata.blob_data_paths.is_empty() + { + tracing::error!(job_id = %internal_id, "Missing required paths in metadata"); + return Err(JobError::Other(OtherError(eyre!("Missing required paths in metadata")))); } + // Create job with initialized metadata let job_item = JobItem { id: Uuid::new_v4(), internal_id: internal_id.clone(), job_type: JobType::StateTransition, status: JobStatus::Created, external_id: String::new().into(), - // metadata must contain the blocks for which state update will be performed - // we don't do one job per state update as that makes nonce management complicated - metadata, + metadata: metadata.clone(), version: 0, created_at: Utc::now().round_subsecs(0), updated_at: Utc::now().round_subsecs(0), }; - tracing::info!(log_type = "completed", category = "state_update", function_type = "create_job", block_no = %internal_id, "State update job created."); + + tracing::info!( + log_type = "completed", + category = "state_update", + function_type = "create_job", + block_no = %internal_id, + blocks_to_settle = ?state_metadata.blocks_to_settle, + "State update job created." + ); + Ok(job_item) } #[tracing::instrument(fields(category = "state_update"), skip(self, config), ret, err)] async fn process_job(&self, config: Arc, job: &mut JobItem) -> Result { let internal_id = job.internal_id.clone(); - tracing::info!(log_type = "starting", category = "state_update", function_type = "process_job", job_id = %job.id, block_no = %internal_id, "State update job processing started."); - let attempt_no = job - .metadata - .get(JOB_PROCESS_ATTEMPT_METADATA_KEY) - .ok_or_else(|| StateUpdateError::AttemptNumberNotFound)? - .clone(); - - // Read the metadata to get the blocks for which state update will be performed. - // We assume that blocks nbrs are formatted as follow: "2,3,4,5,6". - let mut block_numbers = self.get_block_numbers_from_metadata(job)?; - self.validate_block_numbers(config.clone(), &block_numbers).await?; - - if let Some(last_failed_block) = job.metadata.get(JOB_METADATA_STATE_UPDATE_LAST_FAILED_BLOCK_NO) { - let last_failed_block = - last_failed_block.parse().map_err(|_| StateUpdateError::LastFailedBlockNonPositive)?; - block_numbers = block_numbers.into_iter().filter(|&block| block >= last_failed_block).collect::>(); - } + tracing::info!( + log_type = "starting", + category = "state_update", + function_type = "process_job", + job_id = %job.id, + block_no = %internal_id, + "State update job processing started." + ); + + let mut state_metadata: StateUpdateMetadata = job.metadata.specific.clone().try_into().map_err(|e| { + tracing::error!(job_id = %internal_id, error = %e, "Invalid metadata type for state update job"); + JobError::Other(OtherError(e)) + })?; + + self.validate_block_numbers(config.clone(), &state_metadata.blocks_to_settle).await?; + + // Filter block numbers if there was a previous failure + let last_failed_block = state_metadata.last_failed_block_no.unwrap_or(0); + let filtered_indices: Vec = state_metadata + .blocks_to_settle + .iter() + .enumerate() + .filter(|(_, &block)| block >= last_failed_block) + .map(|(i, _)| i) + .collect(); + + let snos_output_paths = state_metadata.snos_output_paths.clone(); + let program_output_paths = state_metadata.program_output_paths.clone(); + let blob_data_paths = state_metadata.blob_data_paths.clone(); let mut nonce = config.settlement_client().get_nonce().await.map_err(|e| JobError::Other(OtherError(e)))?; - let mut sent_tx_hashes: Vec = Vec::with_capacity(block_numbers.len()); - for block_no in block_numbers.iter() { - tracing::debug!(job_id = %job.internal_id, block_no = %block_no, "Processing block"); - let snos = self.fetch_snos_for_block(*block_no, config.clone()).await?; - let txn_hash = self - .update_state_for_block(config.clone(), *block_no, snos, nonce) + let mut sent_tx_hashes: Vec = Vec::with_capacity(filtered_indices.len()); + + for &i in &filtered_indices { + let block_no = state_metadata.blocks_to_settle[i]; + tracing::debug!(job_id = %job.internal_id, block_no = %block_no, "Processing block"); + let snos = fetch_snos_for_block(internal_id.clone(), i, config.clone(), &snos_output_paths).await?; + let program_output = fetch_program_output_for_block(i, config.clone(), &program_output_paths).await?; + let blob_data = fetch_blob_data_for_block(i, config.clone(), &blob_data_paths).await?; + let txn_hash = match self + .update_state_for_block(config.clone(), block_no, snos, nonce, program_output, blob_data) .await - .map_err(|e| { + { + Ok(hash) => hash, + Err(e) => { tracing::error!(job_id = %job.internal_id, block_no = %block_no, error = %e, "Error updating state for block"); - job.metadata.insert(JOB_METADATA_STATE_UPDATE_LAST_FAILED_BLOCK_NO.into(), block_no.to_string()); - self.insert_attempts_into_metadata(job, &attempt_no, &sent_tx_hashes); - OtherError(eyre!("Block #{block_no} - Error occurred during the state update: {e}")); - }) - .unwrap(); + state_metadata.last_failed_block_no = Some(block_no); + state_metadata.tx_hashes = sent_tx_hashes.clone(); + job.metadata.specific = JobSpecificMetadata::StateUpdate(state_metadata.clone()); + + return Err(JobError::Other(OtherError(eyre!( + "Block #{block_no} - Error occurred during the state update: {e}" + )))); + } + }; + sent_tx_hashes.push(txn_hash); + state_metadata.tx_hashes = sent_tx_hashes.clone(); + job.metadata.specific = JobSpecificMetadata::StateUpdate(state_metadata.clone()); nonce += 1; } - self.insert_attempts_into_metadata(job, &attempt_no, &sent_tx_hashes); + let val = state_metadata.blocks_to_settle.last().ok_or_else(|| StateUpdateError::LastNumberReturnedError)?; - let val = block_numbers.last().ok_or_else(|| StateUpdateError::LastNumberReturnedError)?; - tracing::info!(log_type = "completed", category = "state_update", function_type = "process_job", job_id = %job.id, block_no = %internal_id, last_settled_block = %val, "State update job processed successfully."); + tracing::info!( + log_type = "completed", + category = "state_update", + function_type = "process_job", + job_id = %job.id, + block_no = %internal_id, + last_settled_block = %val, + "State update job processed successfully." + ); Ok(val.to_string()) } @@ -162,84 +214,132 @@ impl Job for StateUpdateJob { #[tracing::instrument(fields(category = "state_update"), skip(self, config), ret, err)] async fn verify_job(&self, config: Arc, job: &mut JobItem) -> Result { let internal_id = job.internal_id.clone(); - tracing::info!(log_type = "starting", category = "state_update", function_type = "verify_job", job_id = %job.id, block_no = %internal_id, "State update job verification started."); - let attempt_no = job - .metadata - .get(JOB_PROCESS_ATTEMPT_METADATA_KEY) - .ok_or_else(|| StateUpdateError::AttemptNumberNotFound)?; - tracing::debug!(job_id = %job.internal_id, attempt_no = %attempt_no, "Retrieved attempt number"); - - // We are doing attempt_no - 1 because the attempt number is increased in the - // global process job function and the transaction hash is stored with attempt - // number : 0 - let metadata_tx_hashes = job - .metadata - .get(&format!( - "{}{}", - JOB_METADATA_STATE_UPDATE_ATTEMPT_PREFIX, - attempt_no.parse::().map_err(|e| JobError::Other(OtherError(eyre!(e))))? - 1 - )) - .ok_or_else(|| StateUpdateError::TxnHashMetadataNotFound)? - .clone() - .replace(' ', ""); - - let tx_hashes: Vec<&str> = metadata_tx_hashes.split(',').collect(); - let block_numbers = self.get_block_numbers_from_metadata(job)?; + tracing::info!( + log_type = "starting", + category = "state_update", + function_type = "verify_job", + job_id = %job.id, + block_no = %internal_id, + "State update job verification started." + ); + + // Get state update metadata + let mut state_metadata: StateUpdateMetadata = job.metadata.specific.clone().try_into().map_err(|e| { + tracing::error!(job_id = ?job.id, error = ?e, "Invalid metadata type for state update job"); + JobError::Other(OtherError(e)) + })?; + // Get transaction hashes + let tx_hashes = state_metadata.tx_hashes; + + let block_numbers = state_metadata.blocks_to_settle; tracing::debug!(job_id = %job.internal_id, "Retrieved block numbers from metadata"); let settlement_client = config.settlement_client(); for (tx_hash, block_no) in tx_hashes.iter().zip(block_numbers.iter()) { - tracing::trace!(job_id = %job.internal_id, tx_hash = %tx_hash, block_no = %block_no, "Verifying transaction inclusion"); + tracing::trace!( + job_id = %job.internal_id, + tx_hash = %tx_hash, + block_no = %block_no, + "Verifying transaction inclusion" + ); + let tx_inclusion_status = settlement_client.verify_tx_inclusion(tx_hash).await.map_err(|e| JobError::Other(OtherError(e)))?; + match tx_inclusion_status { SettlementVerificationStatus::Rejected(_) => { - tracing::warn!(job_id = %job.internal_id, tx_hash = %tx_hash, block_no = %block_no, "Transaction rejected"); - job.metadata.insert(JOB_METADATA_STATE_UPDATE_LAST_FAILED_BLOCK_NO.into(), block_no.to_string()); + tracing::warn!( + job_id = %job.internal_id, + tx_hash = %tx_hash, + block_no = %block_no, + "Transaction rejected" + ); + state_metadata.last_failed_block_no = Some(*block_no); return Ok(tx_inclusion_status.into()); } // If the tx is still pending, we wait for it to be finalized and check again the status. SettlementVerificationStatus::Pending => { - tracing::debug!(job_id = %job.internal_id, tx_hash = %tx_hash, "Transaction pending, waiting for finality"); + tracing::debug!( + job_id = %job.internal_id, + tx_hash = %tx_hash, + "Transaction pending, waiting for finality" + ); settlement_client .wait_for_tx_finality(tx_hash) .await .map_err(|e| JobError::Other(OtherError(e)))?; + let new_status = settlement_client .verify_tx_inclusion(tx_hash) .await .map_err(|e| JobError::Other(OtherError(e)))?; + match new_status { SettlementVerificationStatus::Rejected(_) => { - tracing::warn!(job_id = %job.internal_id, tx_hash = %tx_hash, block_no = %block_no, "Transaction rejected after finality"); - job.metadata - .insert(JOB_METADATA_STATE_UPDATE_LAST_FAILED_BLOCK_NO.into(), block_no.to_string()); + tracing::warn!( + job_id = %job.internal_id, + tx_hash = %tx_hash, + block_no = %block_no, + "Transaction rejected after finality" + ); + state_metadata.last_failed_block_no = Some(*block_no); return Ok(new_status.into()); } SettlementVerificationStatus::Pending => { - tracing::error!(job_id = %job.internal_id, tx_hash = %tx_hash, "Transaction still pending after finality check"); + tracing::error!( + job_id = %job.internal_id, + tx_hash = %tx_hash, + "Transaction still pending after finality check" + ); Err(StateUpdateError::TxnShouldNotBePending { tx_hash: tx_hash.to_string() })? } SettlementVerificationStatus::Verified => { - tracing::debug!(job_id = %job.internal_id, tx_hash = %tx_hash, "Transaction verified after finality"); + tracing::debug!( + job_id = %job.internal_id, + tx_hash = %tx_hash, + "Transaction verified after finality" + ); } } } SettlementVerificationStatus::Verified => { - tracing::debug!(job_id = %job.internal_id, tx_hash = %tx_hash, "Transaction verified"); + tracing::debug!( + job_id = %job.internal_id, + tx_hash = %tx_hash, + "Transaction verified" + ); } } } + // verify that the last settled block is indeed the one we expect to be let expected_last_block_number = block_numbers.last().ok_or_else(|| StateUpdateError::EmptyBlockNumberList)?; let out_last_block_number = settlement_client.get_last_settled_block().await.map_err(|e| JobError::Other(OtherError(e)))?; + let block_status = if out_last_block_number == *expected_last_block_number { - tracing::info!(log_type = "completed", category = "state_update", function_type = "verify_job", job_id = %job.id, block_no = %internal_id, last_settled_block = %out_last_block_number, "Last settled block verified."); + tracing::info!( + log_type = "completed", + category = "state_update", + function_type = "verify_job", + job_id = %job.id, + block_no = %internal_id, + last_settled_block = %out_last_block_number, + "Last settled block verified." + ); SettlementVerificationStatus::Verified } else { - tracing::warn!(log_type = "failed/rejected", category = "state_update", function_type = "verify_job", job_id = %job.id, block_no = %internal_id, expected = %expected_last_block_number, actual = %out_last_block_number, "Last settled block mismatch."); + tracing::warn!( + log_type = "failed/rejected", + category = "state_update", + function_type = "verify_job", + job_id = %job.id, + block_no = %internal_id, + expected = %expected_last_block_number, + actual = %out_last_block_number, + "Last settled block mismatch." + ); SettlementVerificationStatus::Rejected(format!( "Last settle bock expected was {} but found {}", expected_last_block_number, out_last_block_number @@ -259,31 +359,16 @@ impl Job for StateUpdateJob { fn verification_polling_delay_seconds(&self) -> u64 { 60 } -} - -impl StateUpdateJob { - /// Read the metadata and parse the block numbers - fn get_block_numbers_from_metadata(&self, job: &JobItem) -> Result, JobError> { - let blocks_to_settle = job - .metadata - .get(JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY) - .ok_or_else(|| StateUpdateError::UnspecifiedBlockNumber { internal_id: job.internal_id.clone() })?; - - self.parse_block_numbers(blocks_to_settle) - } - /// Parse a list of blocks comma separated - fn parse_block_numbers(&self, blocks_to_settle: &str) -> Result, JobError> { - let sanitized_blocks = blocks_to_settle.replace(' ', ""); - let block_numbers: Vec = sanitized_blocks - .split(',') - .map(|block_no| block_no.parse::()) - .collect::, _>>() - .map_err(|e| eyre!("Block numbers to settle list is not correctly formatted: {e}")) - .map_err(|e| JobError::Other(OtherError(e)))?; - Ok(block_numbers) + fn job_processing_lock( + &self, + _config: Arc, + ) -> std::option::Option> { + None } +} +impl StateUpdateJob { /// Validate that the list of block numbers to process is valid. async fn validate_block_numbers(&self, config: Arc, block_numbers: &[u64]) -> Result<(), JobError> { if block_numbers.is_empty() { @@ -305,26 +390,20 @@ impl StateUpdateJob { } /// Update the state for the corresponding block using the settlement layer. + #[allow(clippy::too_many_arguments)] async fn update_state_for_block( &self, config: Arc, block_no: u64, snos: StarknetOsOutput, nonce: u64, + program_output: Vec<[u8; 32]>, + blob_data: Vec>, ) -> Result { let settlement_client = config.settlement_client(); let last_tx_hash_executed = if snos.use_kzg_da == Felt252::ZERO { unimplemented!("update_state_for_block not implemented as of now for calldata DA.") } else if snos.use_kzg_da == Felt252::ONE { - let blob_data = fetch_blob_data_for_block(block_no, config.clone()) - .await - .map_err(|e| JobError::Other(OtherError(e)))?; - - let program_output = self.fetch_program_output_for_block(block_no, config.clone()).await?; - - // TODO : - // Fetching nonce before the transaction is run - // Sending update_state transaction from the settlement client settlement_client .update_state_with_blobs(program_output, blob_data, nonce) .await @@ -334,38 +413,4 @@ impl StateUpdateJob { }; Ok(last_tx_hash_executed) } - - /// Retrieves the SNOS output for the corresponding block. - async fn fetch_snos_for_block(&self, block_no: u64, config: Arc) -> Result { - let storage_client = config.storage(); - let key = block_no.to_string() + "/" + SNOS_OUTPUT_FILE_NAME; - - let snos_output_bytes = storage_client.get_data(&key).await.map_err(|e| JobError::Other(OtherError(e)))?; - - serde_json::from_slice(snos_output_bytes.iter().as_slice()).map_err(|e| { - JobError::Other(OtherError(eyre!("Failed to deserialize SNOS output for block {}: {}", block_no, e))) - }) - } - - async fn fetch_program_output_for_block( - &self, - block_number: u64, - config: Arc, - ) -> Result, JobError> { - let storage_client = config.storage(); - let key = block_number.to_string() + "/" + PROGRAM_OUTPUT_FILE_NAME; - - let program_output = storage_client.get_data(&key).await.map_err(|e| JobError::Other(OtherError(e)))?; - - bincode::deserialize(&program_output).map_err(|e| { - JobError::Other(OtherError(eyre!("Failed to deserialize program output for block {}: {}", block_number, e))) - }) - } - - /// Insert the tx hashes into the the metadata for the attempt number - will be used later by - /// verify_job to make sure that all tx are successful. - fn insert_attempts_into_metadata(&self, job: &mut JobItem, attempt_no: &str, tx_hashes: &[String]) { - let new_attempt_metadata_key = format!("{}{}", JOB_METADATA_STATE_UPDATE_ATTEMPT_PREFIX, attempt_no); - job.metadata.insert(new_attempt_metadata_key, tx_hashes.join(",")); - } } diff --git a/orchestrator/crates/orchestrator/src/jobs/state_update_job/utils.rs b/orchestrator/crates/orchestrator/src/jobs/state_update_job/utils.rs index a6acfe4ee..ba9d46985 100644 --- a/orchestrator/crates/orchestrator/src/jobs/state_update_job/utils.rs +++ b/orchestrator/crates/orchestrator/src/jobs/state_update_job/utils.rs @@ -6,25 +6,95 @@ use std::sync::Arc; use alloy::primitives::U256; use color_eyre::eyre::eyre; use num_bigint::BigUint; +use starknet_os::io::output::StarknetOsOutput; +use super::{JobError, OtherError}; use crate::config::Config; -use crate::constants::{BLOB_DATA_FILE_NAME, PROGRAM_OUTPUT_FILE_NAME}; - /// Fetching the blob data (stored in remote storage during DA job) for a particular block -pub async fn fetch_blob_data_for_block(block_number: u64, config: Arc) -> color_eyre::Result>> { +pub async fn fetch_blob_data_for_block( + block_index: usize, + config: Arc, + blob_data_paths: &[String], +) -> Result>, JobError> { + tracing::debug!("Fetching blob data for block index {}", block_index); + let storage_client = config.storage(); - let key = block_number.to_string() + "/" + BLOB_DATA_FILE_NAME; - let blob_data = storage_client.get_data(&key).await?; + + // Get the path for this block + let path = blob_data_paths.get(block_index).ok_or_else(|| { + tracing::error!("Blob data path not found for index {}", block_index); + JobError::Other(OtherError(eyre!("Blob data path not found for index {}", block_index))) + })?; + + tracing::debug!("Retrieving blob data from path: {}", path); + let blob_data = storage_client.get_data(path).await.map_err(|e| { + tracing::error!("Failed to retrieve blob data from path {}: {}", path, e); + JobError::Other(OtherError(e)) + })?; + + tracing::debug!("Successfully retrieved blob data for block index {}", block_index); Ok(vec![blob_data.to_vec()]) } -/// Fetching the blob data (stored in remote storage during DA job) for a particular block -pub async fn fetch_program_data_for_block(block_number: u64, config: Arc) -> color_eyre::Result> { +/// Retrieves the SNOS output for the corresponding block. +pub async fn fetch_snos_for_block( + internal_id: String, + index: usize, + config: Arc, + snos_output_paths: &[String], +) -> Result { + tracing::debug!(job_id = %internal_id, "Fetching SNOS output for block index {}", index); + let storage_client = config.storage(); - let key = block_number.to_string() + "/" + PROGRAM_OUTPUT_FILE_NAME; - let blob_data = storage_client.get_data(&key).await?; - let transformed_blob_vec_u8 = bytes_to_vec_u8(blob_data.as_ref())?; - Ok(transformed_blob_vec_u8) + + let snos_path = snos_output_paths.get(index).ok_or_else(|| { + tracing::error!(job_id = %internal_id, "SNOS path not found for index {}", index); + JobError::Other(OtherError(eyre!("Failed to get the SNOS path for job ID {}", internal_id))) + })?; + + tracing::debug!(job_id = %internal_id, "Retrieving SNOS output from path: {}", snos_path); + let snos_output_bytes = storage_client.get_data(snos_path).await.map_err(|e| { + tracing::error!(job_id = %internal_id, "Failed to retrieve SNOS data from path {}: {}", snos_path, e); + JobError::Other(OtherError(e)) + })?; + + tracing::debug!(job_id = %internal_id, "Deserializing SNOS output from path: {}", snos_path); + serde_json::from_slice(snos_output_bytes.iter().as_slice()).map_err(|e| { + tracing::error!( + job_id = %internal_id, + "Failed to deserialize SNOS output from path {}: {}", + snos_path, e + ); + JobError::Other(OtherError(eyre!("Failed to deserialize SNOS output from path {}: {}", snos_path, e))) + }) +} + +pub async fn fetch_program_output_for_block( + block_index: usize, + config: Arc, + program_output_paths: &[String], +) -> Result, JobError> { + tracing::debug!("Fetching program output for block index {}", block_index); + + let storage_client = config.storage(); + + // Get the path for this block + let path = program_output_paths.get(block_index).ok_or_else(|| { + tracing::error!("Program output path not found for index {}", block_index); + JobError::Other(OtherError(eyre!("Program output path not found for index {}", block_index))) + })?; + + tracing::debug!("Retrieving program output from path: {}", path); + let program_output = storage_client.get_data(path).await.map_err(|e| { + tracing::error!("Failed to retrieve program output from path {}: {}", path, e); + JobError::Other(OtherError(e)) + })?; + + tracing::debug!("Deserializing program output from path: {}", path); + bincode::deserialize(&program_output).map_err(|e| { + tracing::error!("Failed to deserialize program output from path {}: {}", path, e); + JobError::Other(OtherError(eyre!("Failed to deserialize program output from path {}: {}", path, e))) + }) } // Util Functions diff --git a/orchestrator/crates/orchestrator/src/jobs/types.rs b/orchestrator/crates/orchestrator/src/jobs/types.rs index 7e1f147a7..fd3160b6f 100644 --- a/orchestrator/crates/orchestrator/src/jobs/types.rs +++ b/orchestrator/crates/orchestrator/src/jobs/types.rs @@ -1,6 +1,3 @@ -use std::collections::HashMap; - -// TODO: job types shouldn't depend on mongodb use chrono::{DateTime, Utc}; use color_eyre::eyre::eyre; use color_eyre::Result; @@ -11,6 +8,8 @@ use orchestrator_settlement_client_interface::SettlementVerificationStatus; use serde::{Deserialize, Serialize}; use uuid::Uuid; +use crate::jobs::metadata::JobMetadata; + /// An external id. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] #[serde(untagged)] @@ -103,6 +102,8 @@ pub enum JobStatus { VerificationFailed, /// The job failed completing Failed, + /// The job is being retried + PendingRetry, } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] @@ -120,7 +121,7 @@ pub struct JobItem { /// or job_id from SHARP pub external_id: ExternalId, /// additional field to store values related to the job - pub metadata: HashMap, + pub metadata: JobMetadata, /// helps to keep track of the version of the item for optimistic locking pub version: i32, /// timestamp when the job was created @@ -140,7 +141,7 @@ pub struct JobItemUpdates { pub job_type: Option, pub status: Option, pub external_id: Option, - pub metadata: Option>, + pub metadata: Option, } /// implements only needed singular changes @@ -173,7 +174,7 @@ impl JobItemUpdates { self.external_id = Some(external_id); self } - pub fn update_metadata(mut self, metadata: HashMap) -> JobItemUpdates { + pub fn update_metadata(mut self, metadata: JobMetadata) -> JobItemUpdates { self.metadata = Some(metadata); self } diff --git a/orchestrator/crates/orchestrator/src/lib.rs b/orchestrator/crates/orchestrator/src/lib.rs index 9a0ee5239..875da675f 100644 --- a/orchestrator/crates/orchestrator/src/lib.rs +++ b/orchestrator/crates/orchestrator/src/lib.rs @@ -11,6 +11,7 @@ pub mod cron; pub mod data_storage; /// Contains the trait that all database clients must implement pub mod database; +pub mod helpers; /// Contains the trait that all jobs must implement. Also /// contains the root level functions for which detect the job /// type and call the corresponding job diff --git a/orchestrator/crates/orchestrator/src/main.rs b/orchestrator/crates/orchestrator/src/main.rs index 90461de6a..ee8f5db15 100644 --- a/orchestrator/crates/orchestrator/src/main.rs +++ b/orchestrator/crates/orchestrator/src/main.rs @@ -7,6 +7,9 @@ use orchestrator::routes::setup_server; use orchestrator::setup::setup_cloud; use orchestrator::telemetry::{setup_analytics, shutdown_analytics}; +#[global_allocator] +static A: jemallocator::Jemalloc = jemallocator::Jemalloc; + /// Start the server #[tokio::main] // not sure why clippy gives this error on the latest rust diff --git a/orchestrator/crates/orchestrator/src/queue/mod.rs b/orchestrator/crates/orchestrator/src/queue/mod.rs index ebaac7668..98ce3e668 100644 --- a/orchestrator/crates/orchestrator/src/queue/mod.rs +++ b/orchestrator/crates/orchestrator/src/queue/mod.rs @@ -112,7 +112,7 @@ lazy_static! { #[async_trait] pub trait QueueProvider: Send + Sync { async fn send_message_to_queue(&self, queue: QueueType, payload: String, delay: Option) - -> EyreResult<()>; + -> EyreResult<()>; async fn consume_message_from_queue(&self, queue: QueueType) -> std::result::Result; async fn create_queue(&self, queue_config: &QueueConfig) -> EyreResult<()>; async fn setup(&self) -> EyreResult<()> { diff --git a/orchestrator/crates/orchestrator/src/routes/app_routes.rs b/orchestrator/crates/orchestrator/src/routes/app_routes.rs index 205c87907..b6e901768 100644 --- a/orchestrator/crates/orchestrator/src/routes/app_routes.rs +++ b/orchestrator/crates/orchestrator/src/routes/app_routes.rs @@ -3,18 +3,63 @@ use axum::response::IntoResponse; use axum::routing::get; use axum::Router; +/// Creates the main application router with basic health check and development routes. +/// +/// This router provides fundamental application endpoints including: +/// - Health check endpoint at `/health` +/// - Development routes under `/v1/dev` +/// +/// # Returns +/// * `Router` - Configured application router with health and dev routes +/// +/// ``` pub fn app_router() -> Router { Router::new().route("/health", get(root)).nest("/v1/dev", dev_routes()) } +/// Health check endpoint handler. +/// +/// Returns a simple "UP" response to indicate the service is running. +/// This endpoint is commonly used by load balancers and monitoring systems +/// to verify service availability. +/// +/// # Returns +/// * `&'static str` - Always returns "UP" +/// +/// ``` async fn root() -> &'static str { "UP" } +/// Handles 404 Not Found responses for the application. +/// +/// This handler is used as a fallback when no other routes match the request. +/// It provides a consistent error response format across the application. +/// +/// # Returns +/// * `impl IntoResponse` - Returns a 404 status code with a descriptive message +/// +/// # Examples +/// ``` +/// // When accessing an undefined route: +/// // GET /undefined -> 404 Not Found +/// // Response: "The requested resource was not found" +/// ``` pub async fn handler_404() -> impl IntoResponse { (StatusCode::NOT_FOUND, "The requested resource was not found") } +/// Creates a router for development-only endpoints. +/// +/// This router is nested under `/v1/dev` and is intended for +/// development and testing purposes. Currently empty but provides +/// a location for adding development-specific endpoints. +/// +/// # Returns +/// * `Router` - Empty router for development endpoints +/// +/// # Security +/// These routes should be disabled or properly secured in production environments. fn dev_routes() -> Router { Router::new() } diff --git a/orchestrator/crates/orchestrator/src/routes/error.rs b/orchestrator/crates/orchestrator/src/routes/error.rs new file mode 100644 index 000000000..fb7a1cd6d --- /dev/null +++ b/orchestrator/crates/orchestrator/src/routes/error.rs @@ -0,0 +1,118 @@ +use axum::http::StatusCode; +use axum::response::{IntoResponse, Response}; +use axum::Json; + +use super::types::ApiResponse; + +/// Represents errors that can occur during job route handling operations. +/// +/// This enum implements both `Debug` and the custom `Error` trait from thiserror, +/// providing formatted error messages for each variant. +/// +/// # Error Variants +/// Each variant maps to a specific HTTP status code when converted to a response: +/// * `InvalidId` - 400 Bad Request +/// * `NotFound` - 404 Not Found +/// * `ProcessingError` - 400 Bad Request +/// * `InvalidJobState` - 409 Conflict +/// * `DatabaseError` - 500 Internal Server Error +/// * `InvalidStatus` - 400 Bad Request +/// +/// # Examples +/// ``` +/// use crate::routes::error::JobRouteError; +/// +/// // Creating an invalid ID error +/// let error = JobRouteError::InvalidId("123-invalid".to_string()); +/// +/// // Creating a processing error +/// let error = JobRouteError::ProcessingError("Failed to process job".to_string()); +/// ``` +#[derive(Debug, thiserror::Error)] +pub enum JobRouteError { + /// Indicates that the provided job ID is not valid (e.g., not a valid UUID) + #[error("Invalid job ID: {0}")] + InvalidId(String), + + /// Indicates that the requested job could not be found in the system + #[error("Job not found: {0}")] + NotFound(String), + + /// Represents errors that occur during job processing + #[error("Job processing error: {0}")] + ProcessingError(String), + + /// Indicates that the job is in an invalid state for the requested operation + #[error("Invalid job state: {0}")] + InvalidJobState(String), + + /// Represents errors from database operations + #[error("Database error")] + DatabaseError, + + /// Indicates that the job status is invalid for the requested operation + /// Contains both the job ID and the current status + #[error("Invalid status: {id}: {job_status}")] + InvalidStatus { id: String, job_status: String }, +} + +/// Implementation of axum's `IntoResponse` trait for converting errors into HTTP responses. +/// +/// This implementation ensures that each error variant is mapped to an appropriate +/// HTTP status code and formatted response body. +/// +/// # Response Format +/// All responses are returned as JSON with the following structure: +/// ```json +/// { +/// "success": false, +/// "message": "Error message here" +/// } +/// ``` +/// +/// # Status Code Mapping +/// * `InvalidId` -> 400 Bad Request +/// * `NotFound` -> 404 Not Found +/// * `ProcessingError` -> 400 Bad Request +/// * `InvalidJobState` -> 409 Conflict +/// * `DatabaseError` -> 500 Internal Server Error +/// * `InvalidStatus` -> 400 Bad Request +/// +/// # Examples +/// This implementation is used automatically when returning errors from route handlers: +/// ```rust +/// async fn handle_job(id: String) -> Result { +/// if !is_valid_id(&id) { +/// return Err(JobRouteError::InvalidId(id)); +/// } +/// // ... rest of handler +/// } +/// ``` +impl IntoResponse for JobRouteError { + fn into_response(self) -> Response { + match self { + JobRouteError::InvalidId(id) => { + (StatusCode::BAD_REQUEST, Json(ApiResponse::error(format!("Invalid job ID: {}", id)))).into_response() + } + JobRouteError::NotFound(id) => { + (StatusCode::NOT_FOUND, Json(ApiResponse::error(format!("Job not found: {}", id)))).into_response() + } + JobRouteError::ProcessingError(msg) => { + (StatusCode::BAD_REQUEST, Json(ApiResponse::error(format!("Processing error: {}", msg)))) + .into_response() + } + JobRouteError::InvalidJobState(msg) => { + (StatusCode::CONFLICT, Json(ApiResponse::error(format!("Invalid job state: {}", msg)))).into_response() + } + JobRouteError::DatabaseError => { + (StatusCode::INTERNAL_SERVER_ERROR, Json(ApiResponse::error("Database error occurred".to_string()))) + .into_response() + } + JobRouteError::InvalidStatus { id, job_status } => ( + StatusCode::BAD_REQUEST, + Json(ApiResponse::error(format!("Cannot retry job {id}: invalid status {job_status}"))), + ) + .into_response(), + } + } +} diff --git a/orchestrator/crates/orchestrator/src/routes/job_routes.rs b/orchestrator/crates/orchestrator/src/routes/job_routes.rs index a10730af6..f3517a249 100644 --- a/orchestrator/crates/orchestrator/src/routes/job_routes.rs +++ b/orchestrator/crates/orchestrator/src/routes/job_routes.rs @@ -3,83 +3,173 @@ use std::sync::Arc; use axum::extract::{Path, State}; use axum::response::IntoResponse; use axum::routing::get; -use axum::Router; +use axum::{Json, Router}; use opentelemetry::KeyValue; -use serde::{Deserialize, Serialize}; +use tracing::{error, info, instrument}; use uuid::Uuid; -use super::ApiResponse; +use super::error::JobRouteError; +use super::types::{ApiResponse, JobId, JobRouteResult}; use crate::config::Config; -use crate::jobs::{process_job, verify_job, JobError}; +use crate::jobs::{queue_job_for_processing, queue_job_for_verification, retry_job}; use crate::metrics::ORCHESTRATOR_METRICS; -#[derive(Deserialize)] -struct JobId { - id: String, -} - -#[derive(Serialize)] -struct JobApiResponse { - job_id: String, - status: String, -} - +/// Handles HTTP requests to process a job. +/// +/// This endpoint initiates the processing of a job identified by its UUID. It performs the +/// following: +/// 1. Validates and parses the job ID from the URL path parameter +/// 2. Calls the job processing logic +/// 3. Records metrics for successful/failed operations +/// 4. Returns an appropriate API response +/// +/// # Arguments +/// * `Path(JobId { id })` - The job ID extracted from the URL path +/// * `State(config)` - Shared application configuration +/// +/// # Returns +/// * `JobRouteResult` - Success response or error details +/// +/// # Errors +/// * `JobRouteError::InvalidId` - If the provided ID is not a valid UUID +/// * `JobRouteError::ProcessingError` - If job processing fails +#[instrument(skip(config), fields(job_id = %id))] async fn handle_process_job_request( Path(JobId { id }): Path, State(config): State>, -) -> impl IntoResponse { - // Parse UUID - let job_id = match Uuid::parse_str(&id) { - Ok(id) => id, - Err(_) => { - return ApiResponse::::error((JobError::InvalidId { id }).to_string()).into_response(); - } - }; +) -> JobRouteResult { + let job_id = Uuid::parse_str(&id).map_err(|_| JobRouteError::InvalidId(id.clone()))?; - // Process job - match process_job(job_id, config).await { + match queue_job_for_processing(job_id, config).await { Ok(_) => { - let response = JobApiResponse { job_id: job_id.to_string(), status: "completed".to_string() }; - ApiResponse::success(response).into_response() + info!("Job queued for processing successfully"); + ORCHESTRATOR_METRICS + .successful_job_operations + .add(1.0, &[KeyValue::new("operation_type", "queue_process")]); + Ok(Json(ApiResponse::success(Some(format!("Job with id {} queued for processing", id)))).into_response()) } Err(e) => { - ORCHESTRATOR_METRICS.failed_job_operations.add(1.0, &[KeyValue::new("operation_type", "process_job")]); - ApiResponse::::error(e.to_string()).into_response() + error!(error = %e, "Failed to queue job for processing"); + ORCHESTRATOR_METRICS.failed_job_operations.add(1.0, &[KeyValue::new("operation_type", "queue_process")]); + Err(JobRouteError::ProcessingError(e.to_string())) } } } +/// Handles HTTP requests to verify a job's status. +/// +/// This endpoint queues the job for verification by: +/// 1. Validates and parses the job ID +/// 2. Adds the job to the verification queue +/// 3. Resets verification attempt counter +/// 4. Records metrics for the queue operation +/// 5. Returns immediate response +/// +/// # Arguments +/// * `Path(JobId { id })` - The job ID extracted from the URL path +/// * `State(config)` - Shared application configuration +/// +/// # Returns +/// * `JobRouteResult` - Success response or error details +/// +/// # Errors +/// * `JobRouteError::InvalidId` - If the provided ID is not a valid UUID +/// * `JobRouteError::ProcessingError` - If queueing for verification fails +#[instrument(skip(config), fields(job_id = %id))] async fn handle_verify_job_request( Path(JobId { id }): Path, State(config): State>, -) -> impl IntoResponse { - // Parse UUID - let job_id = match Uuid::parse_str(&id) { - Ok(id) => id, - Err(_) => { - return ApiResponse::::error((JobError::InvalidId { id }).to_string()).into_response(); +) -> JobRouteResult { + let job_id = Uuid::parse_str(&id).map_err(|_| JobRouteError::InvalidId(id.clone()))?; + + match queue_job_for_verification(job_id, config).await { + Ok(_) => { + info!("Job queued for verification successfully"); + ORCHESTRATOR_METRICS.successful_job_operations.add(1.0, &[KeyValue::new("operation_type", "queue_verify")]); + Ok(Json(ApiResponse::success(Some(format!("Job with id {} queued for verification", id)))).into_response()) + } + Err(e) => { + error!(error = %e, "Failed to queue job for verification"); + ORCHESTRATOR_METRICS.failed_job_operations.add(1.0, &[KeyValue::new("operation_type", "queue_verify")]); + Err(JobRouteError::ProcessingError(e.to_string())) } - }; + } +} + +/// Handles HTTP requests to retry a failed job. +/// +/// This endpoint attempts to retry a previously failed job. It: +/// 1. Validates and parses the job ID +/// 2. Initiates the retry process +/// 3. Records metrics with additional retry context +/// 4. Returns the retry attempt result +/// +/// # Arguments +/// * `Path(JobId { id })` - The job ID extracted from the URL path +/// * `State(config)` - Shared application configuration +/// +/// # Returns +/// * `JobRouteResult` - Success response or error details +/// +/// # Errors +/// * `JobRouteError::InvalidId` - If the provided ID is not a valid UUID +/// * `JobRouteError::ProcessingError` - If retry attempt fails +#[instrument(skip(config), fields(job_id = %id))] +async fn handle_retry_job_request( + Path(JobId { id }): Path, + State(config): State>, +) -> JobRouteResult { + let job_id = Uuid::parse_str(&id).map_err(|_| JobRouteError::InvalidId(id.clone()))?; - // Verify job - match verify_job(job_id, config).await { + match retry_job(job_id, config).await { Ok(_) => { - let response = JobApiResponse { job_id: job_id.to_string(), status: "verified".to_string() }; - ApiResponse::success(response).into_response() + info!("Job retry initiated successfully"); + ORCHESTRATOR_METRICS.successful_job_operations.add( + 1.0, + &[KeyValue::new("operation_type", "process_job"), KeyValue::new("operation_info", "retry_job")], + ); + + Ok(Json(ApiResponse::success(Some(format!("Job with id {} retry initiated", id)))).into_response()) } Err(e) => { - ORCHESTRATOR_METRICS.failed_job_operations.add(1.0, &[KeyValue::new("operation_type", "verify_job")]); - ApiResponse::::error(e.to_string()).into_response() + error!(error = %e, "Failed to retry job"); + ORCHESTRATOR_METRICS.failed_job_operations.add( + 1.0, + &[KeyValue::new("operation_type", "process_job"), KeyValue::new("operation_info", "retry_job")], + ); + Err(JobRouteError::ProcessingError(e.to_string())) } } } + +/// Creates a router for job-related endpoints. +/// +/// This function sets up the main router for all job-related operations, +/// nesting the specific job trigger endpoints under the "/jobs" path. +/// +/// # Arguments +/// * `config` - Shared application configuration +/// +/// # Returns +/// * `Router` - Configured router with all job endpoints pub fn job_router(config: Arc) -> Router { Router::new().nest("/jobs", trigger_router(config.clone())) } +/// Creates the nested router for job trigger endpoints. +/// +/// Sets up specific routes for processing, verifying, and retrying jobs. +/// All endpoints are configured as GET requests and share the application config. +/// +/// # Arguments +/// * `config` - Shared application configuration +/// +/// # Returns +/// * `Router` - Configured router with trigger endpoints fn trigger_router(config: Arc) -> Router { Router::new() .route("/:id/process", get(handle_process_job_request)) .route("/:id/verify", get(handle_verify_job_request)) + .route("/:id/retry", get(handle_retry_job_request)) .with_state(config) } diff --git a/orchestrator/crates/orchestrator/src/routes/mod.rs b/orchestrator/crates/orchestrator/src/routes/mod.rs index b43b6a7e4..b2fd2036f 100644 --- a/orchestrator/crates/orchestrator/src/routes/mod.rs +++ b/orchestrator/crates/orchestrator/src/routes/mod.rs @@ -2,58 +2,65 @@ use std::net::SocketAddr; use std::sync::Arc; use app_routes::{app_router, handler_404}; -use axum::http::StatusCode; -use axum::response::{IntoResponse, Response}; -use axum::{Json, Router}; +use axum::Router; use job_routes::job_router; -use serde::Serialize; use crate::config::Config; +/// Routes module for the orchestrator service. +/// +/// This module provides the core routing and server setup functionality, organizing +/// different route handlers into submodules: +/// - `app_routes`: General application routes (e.g., health checks) +/// - `job_routes`: Job processing and management routes +/// - `error`: Error handling and HTTP response mapping +/// - `types`: Shared type definitions for route handlers pub mod app_routes; +pub mod error; pub mod job_routes; - +pub mod types; + +pub use error::JobRouteError; + +/// Configuration parameters for the HTTP server. +/// +/// Contains the necessary information to bind and start the server. +/// +/// # Examples +/// ``` +/// let params = ServerParams { host: "127.0.0.1".to_string(), port: 8080 }; +/// ``` #[derive(Debug, Clone)] pub struct ServerParams { + /// The host address to bind to (e.g., "127.0.0.1", "0.0.0.0") pub host: String, + /// The port number to listen on pub port: u16, } -#[derive(Debug, Serialize)] -struct ApiResponse -where - T: Serialize, -{ - data: Option, - error: Option, -} - -impl ApiResponse -where - T: Serialize, -{ - pub fn success(data: T) -> Self { - Self { data: Some(data), error: None } - } - - pub fn error(message: impl Into) -> Self { - Self { data: None, error: Some(message.into()) } - } -} - -impl IntoResponse for ApiResponse -where - T: Serialize, -{ - fn into_response(self) -> Response { - let status = if self.error.is_some() { StatusCode::INTERNAL_SERVER_ERROR } else { StatusCode::OK }; - - let json = Json(self); - - (status, json).into_response() - } -} - +/// Sets up and starts the HTTP server with configured routes. +/// +/// This function: +/// 1. Initializes the server with the provided configuration +/// 2. Sets up all route handlers (both app and job routes) +/// 3. Starts the server in a separate tokio task +/// +/// # Arguments +/// * `config` - Shared application configuration +/// +/// # Returns +/// * `SocketAddr` - The bound address of the server +/// +/// # Panics +/// * If the server fails to start +/// * If the address cannot be bound +/// +/// # Examples +/// ``` +/// let config = Arc::new(Config::new()); +/// let addr = setup_server(config).await; +/// println!("Server listening on {}", addr); +/// ``` pub async fn setup_server(config: Arc) -> SocketAddr { let (api_server_url, listener) = get_server_url(config.server_config()).await; @@ -68,6 +75,25 @@ pub async fn setup_server(config: Arc) -> SocketAddr { api_server_url } +/// Creates a TCP listener and returns its address. +/// +/// This function handles the low-level socket binding and address resolution. +/// +/// # Arguments +/// * `server_params` - Configuration for the server binding +/// +/// # Returns +/// * `(SocketAddr, TcpListener)` - The bound address and the TCP listener +/// +/// # Panics +/// * If binding to the specified address fails +/// * If the listener cannot be created +/// +/// # Examples +/// ``` +/// let params = ServerParams { host: "127.0.0.1".to_string(), port: 8080 }; +/// let (addr, listener) = get_server_url(¶ms).await; +/// ``` pub async fn get_server_url(server_params: &ServerParams) -> (SocketAddr, tokio::net::TcpListener) { let address = format!("{}:{}", server_params.host, server_params.port); let listener = tokio::net::TcpListener::bind(address.clone()).await.expect("Failed to get listener"); diff --git a/orchestrator/crates/orchestrator/src/routes/types.rs b/orchestrator/crates/orchestrator/src/routes/types.rs new file mode 100644 index 000000000..11679d53a --- /dev/null +++ b/orchestrator/crates/orchestrator/src/routes/types.rs @@ -0,0 +1,100 @@ +use axum::response::Response; +use serde::{Deserialize, Serialize}; + +use super::error::JobRouteError; + +/// Represents a job identifier in API requests. +/// +/// This struct is used to deserialize job IDs from incoming HTTP requests, +/// particularly in path parameters. +/// +/// # Examples +/// ``` +/// let job_id = JobId { id: "123e4567-e89b-12d3-a456-426614174000".to_string() }; +/// ``` +#[derive(Deserialize)] +pub struct JobId { + /// The string representation of the job's UUID + pub id: String, +} + +/// Represents a standardized API response structure. +/// +/// This struct provides a consistent format for all API responses, including +/// both successful operations and errors. It implements serialization for +/// converting responses to JSON. +/// +/// # Fields +/// * `success` - Indicates whether the operation was successful +/// * `message` - Optional message providing additional details (typically used for errors) +/// +/// # Examples +/// ``` +/// // Success response +/// let response = ApiResponse::success(); +/// assert_eq!(response.success, true); +/// assert_eq!(response.message, None); +/// +/// // Error response +/// let response = ApiResponse::error("Invalid job ID".to_string()); +/// assert_eq!(response.success, false); +/// assert_eq!(response.message, Some("Invalid job ID".to_string())); +/// ``` +#[derive(Serialize, Deserialize)] +pub struct ApiResponse { + /// Indicates if the operation was successful + pub success: bool, + /// Optional message, typically used for error details + pub message: Option, +} + +impl ApiResponse { + /// Creates a successful response with no message. + /// + /// # Returns + /// Returns an `ApiResponse` with `success` set to `true` and no message. + /// + /// # Examples + /// ``` + /// let response = ApiResponse::success(); + /// assert_eq!(response.success, true); + /// ``` + pub fn success(message: Option) -> Self { + Self { success: true, message } + } + + /// Creates an error response with the specified message. + /// + /// # Arguments + /// * `message` - The error message to include in the response + /// + /// # Returns + /// Returns an `ApiResponse` with `success` set to `false` and the provided message. + /// + /// # Examples + /// ``` + /// let response = ApiResponse::error("Operation failed".to_string()); + /// assert_eq!(response.success, false); + /// assert_eq!(response.message, Some("Operation failed".to_string())); + /// ``` + pub fn error(message: String) -> Self { + Self { success: false, message: Some(message) } + } +} + +/// Type alias for the result type used in job route handlers. +/// +/// This type combines axum's `Response` type with our custom `JobRouteError`, +/// providing a consistent error handling pattern across all job-related routes. +/// +/// # Examples +/// ``` +/// async fn handle_job() -> JobRouteResult { +/// // Success case +/// Ok(Json(ApiResponse::success()).into_response()) +/// +/// // Error case +/// Err(JobRouteError::NotFound("123".to_string())) +/// } +/// ``` +pub type JobRouteResult = Result; diff --git a/orchestrator/crates/orchestrator/src/setup/mod.rs b/orchestrator/crates/orchestrator/src/setup/mod.rs index e14f00fc9..37527d755 100644 --- a/orchestrator/crates/orchestrator/src/setup/mod.rs +++ b/orchestrator/crates/orchestrator/src/setup/mod.rs @@ -62,7 +62,7 @@ pub async fn setup_cloud(setup_cmd: &SetupCmd) -> color_eyre::Result<()> { // Cron println!("Setting up cron. ⏳"); // Sleeping for few seconds to let AWS index the newly created queues to be used for setting up cron - sleep(Duration::from_secs(100)).await; + sleep(Duration::from_secs(60)).await; let cron_params = setup_cmd.validate_cron_params().expect("Failed to validate cron params"); match cron_params { CronValidatedArgs::AWSEventBridge(aws_event_bridge_params) => { diff --git a/orchestrator/crates/orchestrator/src/tests/common/mod.rs b/orchestrator/crates/orchestrator/src/tests/common/mod.rs index 63065c69b..0bf59f7f1 100644 --- a/orchestrator/crates/orchestrator/src/tests/common/mod.rs +++ b/orchestrator/crates/orchestrator/src/tests/common/mod.rs @@ -1,6 +1,5 @@ pub mod constants; -use std::collections::HashMap; use std::sync::Arc; use ::uuid::Uuid; @@ -22,6 +21,7 @@ use crate::config::ProviderConfig; use crate::data_storage::aws_s3::{AWSS3ValidatedArgs, AWSS3}; use crate::data_storage::DataStorage; use crate::database::mongodb::MongoDb; +use crate::jobs::metadata::{CommonMetadata, DaMetadata, JobMetadata, JobSpecificMetadata}; use crate::jobs::types::JobStatus::Created; use crate::jobs::types::JobType::DataSubmission; use crate::jobs::types::{ExternalId, JobItem}; @@ -35,7 +35,10 @@ pub fn default_job_item() -> JobItem { job_type: DataSubmission, status: Created, external_id: ExternalId::String("0".to_string().into_boxed_str()), - metadata: HashMap::new(), + metadata: JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Da(DaMetadata { block_number: 0, blob_data_path: None, tx_hash: None }), + }, version: 0, created_at: Utc::now().round_subsecs(0), updated_at: Utc::now().round_subsecs(0), @@ -45,7 +48,14 @@ pub fn default_job_item() -> JobItem { #[fixture] pub fn custom_job_item(default_job_item: JobItem, #[default(String::from("0"))] internal_id: String) -> JobItem { let mut job_item = default_job_item; - job_item.internal_id = internal_id; + job_item.internal_id = internal_id.clone(); + + // Update block number in metadata to match internal_id if possible + if let Ok(block_number) = internal_id.parse::() { + if let JobSpecificMetadata::Da(ref mut da_metadata) = job_item.metadata.specific { + da_metadata.block_number = block_number; + } + } job_item } @@ -91,7 +101,6 @@ pub async fn delete_storage( // this is necessary for it to work with localstack in test cases s3_config_builder.set_force_path_style(Some(true)); let client = S3Client::from_conf(s3_config_builder.build()); - // Check if bucket exists match client.head_bucket().bucket(&bucket_name).send().await { Ok(_) => { diff --git a/orchestrator/crates/orchestrator/src/tests/config.rs b/orchestrator/crates/orchestrator/src/tests/config.rs index 7aa7e4ee1..44e8e3df1 100644 --- a/orchestrator/crates/orchestrator/src/tests/config.rs +++ b/orchestrator/crates/orchestrator/src/tests/config.rs @@ -34,6 +34,7 @@ use crate::data_storage::aws_s3::AWSS3ValidatedArgs; use crate::data_storage::{DataStorage, MockDataStorage}; use crate::database::mongodb::MongoDBValidatedArgs; use crate::database::{Database, MockDatabase}; +use crate::helpers::{JobProcessingState, ProcessingLocks}; use crate::queue::sqs::AWSSQSValidatedArgs; use crate::queue::{MockQueueProvider, QueueProvider}; use crate::routes::{get_server_url, setup_server, ServerParams}; @@ -230,20 +231,15 @@ impl TestConfigBuilder { implement_client::init_settlement_client(settlement_client_type, ¶ms.settlement_params).await; let prover_client = implement_client::init_prover_client(prover_client_type, ¶ms.prover_params).await; - // Delete the Storage before use delete_storage(provider_config.clone(), ¶ms.storage_params).await.expect("Could not delete storage"); - // External Dependencies let storage = implement_client::init_storage_client(storage_type, ¶ms.storage_params, provider_config.clone()).await; - let database = implement_client::init_database(database_type, ¶ms.db_params).await; - let queue = implement_client::init_queue_client(queue_type, params.queue_params.clone(), provider_config.clone()).await; // Deleting and Creating the queues in sqs. - create_queues(provider_config.clone(), ¶ms.queue_params) .await .expect("Not able to delete and create the queues."); @@ -252,6 +248,10 @@ impl TestConfigBuilder { // Creating the SNS ARN create_sns_arn(provider_config.clone(), ¶ms.alert_params).await.expect("Unable to create the sns arn"); + let snos_processing_lock = + JobProcessingState::new(params.orchestrator_params.service_config.max_concurrent_snos_jobs.unwrap_or(1)); + let processing_locks = ProcessingLocks { snos_job_processing_lock: Arc::new(snos_processing_lock) }; + let config = Arc::new(Config::new( params.orchestrator_params, starknet_client, @@ -262,6 +262,7 @@ impl TestConfigBuilder { queue, storage, alerts, + processing_locks, )); let api_server_address = implement_api_server(api_server_type, config.clone()).await; @@ -544,7 +545,13 @@ fn get_env_params() -> EnvParams { let env = get_env_var_optional("MADARA_ORCHESTRATOR_MIN_BLOCK_NO_TO_PROCESS").expect("Couldn't get min block"); let min_block: Option = env.and_then(|s| if s.is_empty() { None } else { Some(s.parse::().unwrap()) }); - let service_config = ServiceParams { max_block_to_process: max_block, min_block_to_process: min_block }; + let env = get_env_var_optional("MADARA_ORCHESTRATOR_MAX_CONCURRENT_SNOS_JOBS") + .expect("Couldn't get max concurrent snos jobs"); + let max_concurrent_snos_jobs: Option = + env.and_then(|s| if s.is_empty() { None } else { Some(s.parse::().unwrap()) }); + + let service_config = + ServiceParams { max_block_to_process: max_block, min_block_to_process: min_block, max_concurrent_snos_jobs }; let server_config = ServerParams { host: get_env_var_or_panic("MADARA_ORCHESTRATOR_HOST"), diff --git a/orchestrator/crates/orchestrator/src/tests/database/mod.rs b/orchestrator/crates/orchestrator/src/tests/database/mod.rs index 77c64f887..be15d9c34 100644 --- a/orchestrator/crates/orchestrator/src/tests/database/mod.rs +++ b/orchestrator/crates/orchestrator/src/tests/database/mod.rs @@ -1,12 +1,10 @@ -use std::collections::HashMap; - -use chrono::{SubsecRound, Utc}; use rstest::*; -use uuid::Uuid; -use crate::jobs::types::{ExternalId, JobItem, JobItemUpdates, JobStatus, JobType}; -use crate::jobs::{increment_key_in_metadata, JobError}; +use crate::jobs::metadata::JobSpecificMetadata; +use crate::jobs::types::{JobItemUpdates, JobStatus, JobType}; +use crate::jobs::JobError; use crate::tests::config::{ConfigType, TestConfigBuilder}; +use crate::tests::utils::build_job_item; #[rstest] #[tokio::test] @@ -203,9 +201,12 @@ async fn database_test_update_job() { let job_id = job.id; - let metadata = HashMap::new(); - let key = "test_key"; - let updated_metadata = increment_key_in_metadata(&metadata, key).unwrap(); + // Create updated metadata with the new structure + let mut updated_job_metadata = job.metadata.clone(); + if let JobSpecificMetadata::Da(ref mut da_metadata) = updated_job_metadata.specific { + da_metadata.block_number = 456; + da_metadata.tx_hash = Some("test_key".to_string()); + } let job_cloned = job.clone(); let updated_job = database_client @@ -213,7 +214,7 @@ async fn database_test_update_job() { &job_cloned, JobItemUpdates::new() .update_status(JobStatus::LockedForProcessing) - .update_metadata(updated_metadata) + .update_metadata(updated_job_metadata) .build(), ) .await; @@ -225,6 +226,13 @@ async fn database_test_update_job() { assert_eq!(1, job_after_updates_db.version); assert_eq!(456.to_string(), job_after_updates_db.internal_id); + // Check metadata was updated correctly + if let JobSpecificMetadata::Da(da_metadata) = &job_after_updates_db.metadata.specific { + assert_eq!(Some("test_key".to_string()), da_metadata.tx_hash); + } else { + panic!("Wrong metadata type"); + } + // check if value returned by `update_job` is the correct one // and matches the one in database assert_eq!(updated_job.unwrap(), job_after_updates_db); @@ -232,20 +240,3 @@ async fn database_test_update_job() { panic!("Job not found in Database.") } } - -// Test Util Functions -// ========================================== - -pub fn build_job_item(job_type: JobType, job_status: JobStatus, internal_id: u64) -> JobItem { - JobItem { - id: Uuid::new_v4(), - internal_id: internal_id.to_string(), - job_type, - status: job_status, - external_id: ExternalId::Number(0), - metadata: Default::default(), - version: 0, - created_at: Utc::now().round_subsecs(0), - updated_at: Utc::now().round_subsecs(0), - } -} diff --git a/orchestrator/crates/orchestrator/src/tests/jobs/da_job/mod.rs b/orchestrator/crates/orchestrator/src/tests/jobs/da_job/mod.rs index 8a9308334..aed7f296f 100644 --- a/orchestrator/crates/orchestrator/src/tests/jobs/da_job/mod.rs +++ b/orchestrator/crates/orchestrator/src/tests/jobs/da_job/mod.rs @@ -1,5 +1,3 @@ -use std::collections::HashMap; - use assert_matches::assert_matches; use chrono::{SubsecRound, Utc}; use color_eyre::eyre::eyre; @@ -10,12 +8,13 @@ use serde_json::json; use starknet::core::types::{Felt, MaybePendingStateUpdate, PendingStateUpdate, StateDiff}; use uuid::Uuid; +use crate::constants::BLOB_DATA_FILE_NAME; use crate::jobs::da_job::test::{get_nonce_attached, read_state_update_from_file}; use crate::jobs::da_job::{DaError, DaJob}; +use crate::jobs::metadata::{CommonMetadata, DaMetadata, JobMetadata, JobSpecificMetadata}; use crate::jobs::types::{ExternalId, JobItem, JobStatus, JobType}; use crate::jobs::{Job, JobError}; use crate::tests::config::{ConfigType, TestConfigBuilder}; - /// Tests the DA Job's handling of a blob length exceeding the supported size. /// It mocks the DA client to simulate the environment and expects an error on job processing. /// Validates the error message for exceeding blob limits against the expected output. @@ -39,16 +38,13 @@ async fn test_da_job_process_job_failure_on_small_blob_size( // dummy state will have more than 1200 bytes da_client.expect_max_blob_per_txn().with().returning(|| 1); da_client.expect_max_bytes_per_blob().with().returning(|| 1200); - let services = TestConfigBuilder::new() .configure_starknet_client(ConfigType::Actual) .configure_storage_client(ConfigType::Actual) .configure_da_client(da_client.into()) .build() .await; - let state_update = read_state_update_from_file(state_update_file.as_str()).expect("issue while reading"); - let state_update = MaybePendingStateUpdate::Update(state_update); let state_update = serde_json::to_value(&state_update).unwrap(); let response = json!({ "id": 640641,"jsonrpc":"2.0","result": state_update }); @@ -62,6 +58,17 @@ async fn test_da_job_process_job_failure_on_small_blob_size( let max_blob_per_txn = services.config.da_client().max_blob_per_txn().await; + // Create proper metadata structure + let block_number = internal_id.parse::().unwrap_or(0); + let metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Da(DaMetadata { + block_number, + blob_data_path: Some(format!("{}/{}", block_number, BLOB_DATA_FILE_NAME)), + tx_hash: None, + }), + }; + let response = DaJob .process_job( services.config, @@ -71,14 +78,13 @@ async fn test_da_job_process_job_failure_on_small_blob_size( job_type: JobType::DataSubmission, status: JobStatus::Created, external_id: ExternalId::String(internal_id.to_string().into_boxed_str()), - metadata: HashMap::default(), + metadata, version: 0, created_at: Utc::now().round_subsecs(0), updated_at: Utc::now().round_subsecs(0), }, ) .await; - assert_matches!(response, Err(e) => { let err = DaError::MaxBlobsLimitExceeded { max_blob_per_txn, current_blob_length, block_no: internal_id.to_string(), job_id: Uuid::default() }; @@ -126,6 +132,17 @@ async fn test_da_job_process_job_failure_on_pending_block() { then.status(200).body(serde_json::to_vec(&response).unwrap()); }); + // Create proper metadata structure + let block_number = internal_id.parse::().unwrap_or(0); + let metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Da(DaMetadata { + block_number, + blob_data_path: Some(format!("{}/{}", block_number, BLOB_DATA_FILE_NAME)), + tx_hash: None, + }), + }; + let response = DaJob .process_job( services.config, @@ -135,7 +152,7 @@ async fn test_da_job_process_job_failure_on_pending_block() { job_type: JobType::DataSubmission, status: JobStatus::Created, external_id: ExternalId::String("1".to_string().into_boxed_str()), - metadata: HashMap::default(), + metadata, version: 0, created_at: Utc::now().round_subsecs(0), updated_at: Utc::now().round_subsecs(0), @@ -209,6 +226,17 @@ async fn test_da_job_process_job_success( then.status(200).body(serde_json::to_vec(&response).unwrap()); }); + // Create proper metadata structure + let block_number = internal_id.parse::().unwrap_or(0); + let metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Da(DaMetadata { + block_number, + blob_data_path: Some(format!("{}/{}", block_number, BLOB_DATA_FILE_NAME)), + tx_hash: None, + }), + }; + let response = DaJob .process_job( services.config, @@ -218,7 +246,7 @@ async fn test_da_job_process_job_success( job_type: JobType::DataSubmission, status: JobStatus::Created, external_id: ExternalId::String(internal_id.to_string().into_boxed_str()), - metadata: HashMap::default(), + metadata, version: 0, created_at: Utc::now().round_subsecs(0), updated_at: Utc::now().round_subsecs(0), diff --git a/orchestrator/crates/orchestrator/src/tests/jobs/mod.rs b/orchestrator/crates/orchestrator/src/tests/jobs/mod.rs index 29aed76a0..5245b2c32 100644 --- a/orchestrator/crates/orchestrator/src/tests/jobs/mod.rs +++ b/orchestrator/crates/orchestrator/src/tests/jobs/mod.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; @@ -7,21 +6,19 @@ use mongodb::bson::doc; use omniqueue::QueueError; use rstest::rstest; use tokio::time::sleep; -use uuid::Uuid; -use super::database::build_job_item; -use crate::jobs::constants::{ - JOB_METADATA_FAILURE_REASON, JOB_PROCESS_ATTEMPT_METADATA_KEY, JOB_VERIFICATION_ATTEMPT_METADATA_KEY, -}; +use crate::constants::CAIRO_PIE_FILE_NAME; use crate::jobs::job_handler_factory::mock_factory; -use crate::jobs::types::{ExternalId, JobItem, JobStatus, JobType, JobVerificationStatus}; -use crate::jobs::{ - create_job, handle_job_failure, increment_key_in_metadata, process_job, verify_job, Job, JobError, MockJob, +use crate::jobs::metadata::{ + CommonMetadata, JobMetadata, JobSpecificMetadata, ProvingInputType, ProvingMetadata, SnosMetadata, }; +use crate::jobs::types::{ExternalId, JobStatus, JobType, JobVerificationStatus}; +use crate::jobs::{create_job, handle_job_failure, process_job, retry_job, verify_job, Job, JobError, MockJob}; use crate::queue::job_queue::QueueNameForJobType; use crate::queue::QueueType; use crate::tests::common::MessagePayloadType; use crate::tests::config::{ConfigType, TestConfigBuilder}; +use crate::tests::utils::build_job_item; #[cfg(test)] pub mod da_job; @@ -36,13 +33,12 @@ pub mod state_update_job; pub mod snos_job; use assert_matches::assert_matches; -use chrono::{SubsecRound, Utc}; /// Tests `create_job` function when job is not existing in the db. #[rstest] #[tokio::test] async fn create_job_job_does_not_exists_in_db_works() { - let job_item = build_job_item_by_type_and_status(JobType::SnosRun, JobStatus::Created, "0".to_string()); + let job_item = build_job_item(JobType::SnosRun, JobStatus::Created, 0); let mut job_handler = MockJob::new(); // Adding expectation for creation of new job. @@ -60,17 +56,26 @@ async fn create_job_job_does_not_exists_in_db_works() { let ctx = mock_factory::get_job_handler_context(); ctx.expect().times(1).with(eq(JobType::SnosRun)).return_once(move |_| Arc::clone(&job_handler)); - assert!(create_job(JobType::SnosRun, "0".to_string(), HashMap::new(), services.config.clone()).await.is_ok()); - - let mut hashmap: HashMap = HashMap::new(); - hashmap.insert(JOB_PROCESS_ATTEMPT_METADATA_KEY.to_string(), "0".to_string()); - hashmap.insert(JOB_VERIFICATION_ATTEMPT_METADATA_KEY.to_string(), "0".to_string()); + // Create a proper JobMetadata for the test + let metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Snos(SnosMetadata { + block_number: 0, + full_output: false, + cairo_pie_path: None, + snos_output_path: None, + program_output_path: None, + snos_fact: None, + }), + }; + + assert!(create_job(JobType::SnosRun, "0".to_string(), metadata, services.config.clone()).await.is_ok()); // Db checks. let job_in_db = services.config.database().get_job_by_id(job_item.id).await.unwrap().unwrap(); assert_eq!(job_in_db.id, job_item.id); assert_eq!(job_in_db.internal_id, job_item.internal_id); - assert_eq!(job_in_db.metadata, hashmap); + assert_eq!(job_in_db.metadata, job_item.metadata); // Waiting for 5 secs for message to be passed into the queue sleep(Duration::from_secs(5)).await; @@ -86,7 +91,7 @@ async fn create_job_job_does_not_exists_in_db_works() { #[rstest] #[tokio::test] async fn create_job_job_exists_in_db_works() { - let job_item = build_job_item_by_type_and_status(JobType::ProofCreation, JobStatus::Created, "0".to_string()); + let job_item = build_job_item(JobType::ProofCreation, JobStatus::Created, 0); let services = TestConfigBuilder::new() .configure_database(ConfigType::Actual) @@ -97,7 +102,19 @@ async fn create_job_job_exists_in_db_works() { let database_client = services.config.database(); database_client.create_job(job_item.clone()).await.unwrap(); - assert!(create_job(JobType::ProofCreation, "0".to_string(), HashMap::new(), services.config.clone()).await.is_ok()); + // Create a proper JobMetadata for the test + let metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Proving(ProvingMetadata { + block_number: 0, + input_path: Some(ProvingInputType::CairoPie(format!("{}/{}", "0", CAIRO_PIE_FILE_NAME))), + ensure_on_chain_registration: None, + download_proof: None, + }), + }; + + assert!(create_job(JobType::ProofCreation, "0".to_string(), metadata, services.config.clone()).await.is_ok()); + // There should be only 1 job in the db let jobs_in_db = database_client.get_jobs_by_statuses(vec![JobStatus::Created], None).await.unwrap(); assert_eq!(jobs_in_db.len(), 1); @@ -129,7 +146,18 @@ async fn create_job_job_handler_is_not_implemented_panics() { let job_type = JobType::ProofCreation; - assert!(create_job(job_type.clone(), "0".to_string(), HashMap::new(), services.config.clone()).await.is_err()); + // Create a proper JobMetadata for the test + let metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Proving(ProvingMetadata { + block_number: 0, + input_path: Some(ProvingInputType::CairoPie(format!("{}/{}", "0", CAIRO_PIE_FILE_NAME))), + ensure_on_chain_registration: None, + download_proof: None, + }), + }; + + assert!(create_job(job_type.clone(), "0".to_string(), metadata, services.config.clone()).await.is_err()); // Waiting for 5 secs for message to be passed into the queue sleep(Duration::from_secs(5)).await; @@ -150,8 +178,6 @@ async fn process_job_with_job_exists_in_db_and_valid_job_processing_status_works #[case] job_type: JobType, #[case] job_status: JobStatus, ) { - let job_item = build_job_item_by_type_and_status(job_type.clone(), job_status.clone(), "1".to_string()); - // Building config let services = TestConfigBuilder::new() .configure_database(ConfigType::Actual) @@ -160,6 +186,9 @@ async fn process_job_with_job_exists_in_db_and_valid_job_processing_status_works .await; let database_client = services.config.database(); + // Create a job with proper metadata structure + let job_item = build_job_item(job_type.clone(), job_status.clone(), 1); + let mut job_handler = MockJob::new(); // Creating job in database @@ -167,6 +196,7 @@ async fn process_job_with_job_exists_in_db_and_valid_job_processing_status_works // Expecting process job function in job processor to return the external ID. job_handler.expect_process_job().times(1).returning(move |_, _| Ok("0xbeef".to_string())); job_handler.expect_verification_polling_delay_seconds().return_const(1u64); + job_handler.expect_job_processing_lock().return_const(None); // Mocking the `get_job_handler` call in create_job function. let job_handler: Arc> = Arc::new(Box::new(job_handler)); @@ -179,7 +209,9 @@ async fn process_job_with_job_exists_in_db_and_valid_job_processing_status_works // checking if job_status is updated in db assert_eq!(updated_job.status, JobStatus::PendingVerification); assert_eq!(updated_job.external_id, ExternalId::String(Box::from("0xbeef"))); - assert_eq!(updated_job.metadata.get(JOB_PROCESS_ATTEMPT_METADATA_KEY).unwrap(), "1"); + + // Check that process attempt is recorded in common metadata + assert_eq!(updated_job.metadata.common.process_attempt_no, 1); // Waiting for 5 secs for message to be passed into the queue sleep(Duration::from_secs(5)).await; @@ -199,8 +231,6 @@ async fn process_job_with_job_exists_in_db_and_valid_job_processing_status_works #[rstest] #[tokio::test] async fn process_job_handles_panic() { - let job_item = build_job_item_by_type_and_status(JobType::SnosRun, JobStatus::Created, "1".to_string()); - // Building config let services = TestConfigBuilder::new() .configure_database(ConfigType::Actual) @@ -209,6 +239,10 @@ async fn process_job_handles_panic() { .await; let database_client = services.config.database(); + + // Create a job with proper metadata structure + let job_item = build_job_item(JobType::SnosRun, JobStatus::Created, 1); + // Creating job in database database_client.create_job(job_item.clone()).await.unwrap(); @@ -218,6 +252,7 @@ async fn process_job_handles_panic() { .expect_process_job() .times(1) .returning(|_, _| -> Result { panic!("Simulated panic in process_job") }); + job_handler.expect_job_processing_lock().return_const(None); // Mocking the `get_job_handler` call in process_job function let job_handler: Arc> = Arc::new(Box::new(job_handler)); @@ -229,11 +264,17 @@ async fn process_job_handles_panic() { // DB checks - verify the job was moved to failed state let job_in_db = database_client.get_job_by_id(job_item.id).await.unwrap().unwrap(); assert_eq!(job_in_db.status, JobStatus::Failed); - assert!(job_in_db - .metadata - .get(JOB_METADATA_FAILURE_REASON) - .unwrap() - .contains("Job handler panicked with message: Simulated panic in process_job")); + + // Check that failure reason is recorded in common metadata + assert!( + job_in_db + .metadata + .common + .failure_reason + .as_ref() + .unwrap() + .contains("Job handler panicked with message: Simulated panic in process_job") + ); } /// Tests `process_job` function when job is already existing in the db and job status is not @@ -242,7 +283,7 @@ async fn process_job_handles_panic() { #[tokio::test] async fn process_job_with_job_exists_in_db_with_invalid_job_processing_status_errors() { // Creating a job with Completed status which is invalid processing. - let job_item = build_job_item_by_type_and_status(JobType::SnosRun, JobStatus::Completed, "1".to_string()); + let job_item = build_job_item(JobType::SnosRun, JobStatus::Completed, 1); // building config let services = TestConfigBuilder::new() @@ -276,7 +317,7 @@ async fn process_job_with_job_exists_in_db_with_invalid_job_processing_status_er #[tokio::test] async fn process_job_job_does_not_exists_in_db_works() { // Creating a valid job which is not existing in the db. - let job_item = build_job_item_by_type_and_status(JobType::SnosRun, JobStatus::Created, "1".to_string()); + let job_item = build_job_item(JobType::SnosRun, JobStatus::Created, 1); // building config let services = TestConfigBuilder::new() @@ -307,6 +348,7 @@ async fn process_job_two_workers_process_same_job_works() { // Expecting process job function in job processor to return the external ID. job_handler.expect_process_job().times(1).returning(move |_, _| Ok("0xbeef".to_string())); job_handler.expect_verification_polling_delay_seconds().return_const(1u64); + job_handler.expect_job_processing_lock().return_const(None); // Mocking the `get_job_handler` call in create_job function. let job_handler: Arc> = Arc::new(Box::new(job_handler)); @@ -321,7 +363,7 @@ async fn process_job_two_workers_process_same_job_works() { .await; let db_client = services.config.database(); - let job_item = build_job_item_by_type_and_status(JobType::SnosRun, JobStatus::Created, "1".to_string()); + let job_item = build_job_item(JobType::SnosRun, JobStatus::Created, 1); // Creating the job in the db db_client.create_job(job_item.clone()).await.unwrap(); @@ -362,6 +404,7 @@ async fn process_job_job_handler_returns_error_works() { .times(1) .returning(move |_, _| Err(JobError::Other(failure_reason.to_string().into()))); job_handler.expect_verification_polling_delay_seconds().return_const(1u64); + job_handler.expect_job_processing_lock().return_const(None); // Mocking the `get_job_handler` call in create_job function. let job_handler: Arc> = Arc::new(Box::new(job_handler)); @@ -376,7 +419,7 @@ async fn process_job_job_handler_returns_error_works() { .await; let db_client = services.config.database(); - let job_item = build_job_item_by_type_and_status(JobType::SnosRun, JobStatus::Created, "1".to_string()); + let job_item = build_job_item(JobType::SnosRun, JobStatus::Created, 1); // Creating the job in the db db_client.create_job(job_item.clone()).await.unwrap(); @@ -385,7 +428,7 @@ async fn process_job_job_handler_returns_error_works() { let final_job_in_db = db_client.get_job_by_id(job_item.id).await.unwrap().unwrap(); assert_eq!(final_job_in_db.status, JobStatus::Failed); - assert!(final_job_in_db.metadata.get(JOB_METADATA_FAILURE_REASON).unwrap().to_string().contains(failure_reason)); + assert!(final_job_in_db.metadata.common.failure_reason.as_ref().unwrap().contains(failure_reason)); } /// Tests `verify_job` function when job is having expected status @@ -393,8 +436,7 @@ async fn process_job_job_handler_returns_error_works() { #[rstest] #[tokio::test] async fn verify_job_with_verified_status_works() { - let job_item = - build_job_item_by_type_and_status(JobType::DataSubmission, JobStatus::PendingVerification, "1".to_string()); + let job_item = build_job_item(JobType::DataSubmission, JobStatus::PendingVerification, 1); // building config let services = TestConfigBuilder::new() @@ -440,8 +482,7 @@ async fn verify_job_with_verified_status_works() { #[rstest] #[tokio::test] async fn verify_job_with_rejected_status_adds_to_queue_works() { - let job_item = - build_job_item_by_type_and_status(JobType::DataSubmission, JobStatus::PendingVerification, "1".to_string()); + let job_item = build_job_item(JobType::DataSubmission, JobStatus::PendingVerification, 1); // building config let services = TestConfigBuilder::new() @@ -485,14 +526,7 @@ async fn verify_job_with_rejected_status_adds_to_queue_works() { #[rstest] #[tokio::test] async fn verify_job_with_rejected_status_works() { - let mut job_item = - build_job_item_by_type_and_status(JobType::DataSubmission, JobStatus::PendingVerification, "1".to_string()); - - // increasing JOB_VERIFICATION_ATTEMPT_METADATA_KEY to simulate max. attempts reached. - let metadata = increment_key_in_metadata(&job_item.metadata, JOB_PROCESS_ATTEMPT_METADATA_KEY).unwrap(); - job_item.metadata = metadata; - - // building config + // Building config let services = TestConfigBuilder::new() .configure_database(ConfigType::Actual) .configure_queue_client(ConfigType::Actual) @@ -500,30 +534,39 @@ async fn verify_job_with_rejected_status_works() { .await; let database_client = services.config.database(); - let mut job_handler = MockJob::new(); - // creating job in database + // Create a job with proper metadata structure + let mut job_item = build_job_item(JobType::DataSubmission, JobStatus::PendingVerification, 1); + + // Set process_attempt_no to 1 to simulate max attempts reached + job_item.metadata.common.process_attempt_no = 1; + + // Creating job in database database_client.create_job(job_item.clone()).await.unwrap(); - // expecting process job function in job processor to return the external ID + + let mut job_handler = MockJob::new(); + // Expecting verify_job function to return Rejected status job_handler.expect_verify_job().times(1).returning(move |_, _| Ok(JobVerificationStatus::Rejected("".to_string()))); job_handler.expect_max_process_attempts().returning(move || 1u64); + // Mocking the `get_job_handler` call let job_handler: Arc> = Arc::new(Box::new(job_handler)); let ctx = mock_factory::get_job_handler_context(); - // Mocking the `get_job_handler` call in create_job function. ctx.expect().times(1).with(eq(JobType::DataSubmission)).returning(move |_| Arc::clone(&job_handler)); assert!(verify_job(job_item.id, services.config.clone()).await.is_ok()); - // DB checks. + // DB checks - verify the job was moved to failed state let updated_job = database_client.get_job_by_id(job_item.id).await.unwrap().unwrap(); assert_eq!(updated_job.status, JobStatus::Failed); - assert_eq!(updated_job.metadata.get(JOB_PROCESS_ATTEMPT_METADATA_KEY).unwrap(), "1"); + + // Check that process attempt is recorded in common metadata + assert_eq!(updated_job.metadata.common.process_attempt_no, 1); // Waiting for 5 secs for message to be passed into the queue sleep(Duration::from_secs(5)).await; - // Queue checks. + // Queue checks - verify no message was added to the process queue let consumed_messages_processing_queue = services.config.queue().consume_message_from_queue(job_item.job_type.process_queue_name()).await.unwrap_err(); assert_matches!(consumed_messages_processing_queue, QueueError::NoData); @@ -534,10 +577,7 @@ async fn verify_job_with_rejected_status_works() { #[rstest] #[tokio::test] async fn verify_job_with_pending_status_adds_to_queue_works() { - let job_item = - build_job_item_by_type_and_status(JobType::DataSubmission, JobStatus::PendingVerification, "1".to_string()); - - // building config + // Building config let services = TestConfigBuilder::new() .configure_database(ConfigType::Actual) .configure_queue_client(ConfigType::Actual) @@ -545,31 +585,38 @@ async fn verify_job_with_pending_status_adds_to_queue_works() { .await; let database_client = services.config.database(); - let mut job_handler = MockJob::new(); - // creating job in database + // Create a job with proper metadata structure + let job_item = build_job_item(JobType::DataSubmission, JobStatus::PendingVerification, 1); + + // Creating job in database database_client.create_job(job_item.clone()).await.unwrap(); - // expecting process job function in job processor to return the external ID + + let mut job_handler = MockJob::new(); + // Expecting verify_job function to return Pending status job_handler.expect_verify_job().times(1).returning(move |_, _| Ok(JobVerificationStatus::Pending)); job_handler.expect_max_verification_attempts().returning(move || 2u64); job_handler.expect_verification_polling_delay_seconds().returning(move || 2u64); + // Mocking the `get_job_handler` call let job_handler: Arc> = Arc::new(Box::new(job_handler)); let ctx = mock_factory::get_job_handler_context(); - // Mocking the `get_job_handler` call in create_job function. ctx.expect().times(1).with(eq(JobType::DataSubmission)).returning(move |_| Arc::clone(&job_handler)); assert!(verify_job(job_item.id, services.config.clone()).await.is_ok()); - // DB checks. + // DB checks - verify the job status remains PendingVerification and verification attempt is + // incremented let updated_job = database_client.get_job_by_id(job_item.id).await.unwrap().unwrap(); - assert_eq!(updated_job.metadata.get(JOB_VERIFICATION_ATTEMPT_METADATA_KEY).unwrap(), "1"); assert_eq!(updated_job.status, JobStatus::PendingVerification); + // Check that verification attempt is recorded in common metadata + assert_eq!(updated_job.metadata.common.verification_attempt_no, 1); + // Waiting for 5 secs for message to be passed into the queue sleep(Duration::from_secs(5)).await; - // Queue checks + // Queue checks - verify a message was added to the verification queue let consumed_messages = services.config.queue().consume_message_from_queue(job_item.job_type.verify_queue_name()).await.unwrap(); let consumed_message_payload: MessagePayloadType = consumed_messages.payload_serde_json().unwrap().unwrap(); @@ -582,14 +629,7 @@ async fn verify_job_with_pending_status_adds_to_queue_works() { #[rstest] #[tokio::test] async fn verify_job_with_pending_status_works() { - let mut job_item = - build_job_item_by_type_and_status(JobType::DataSubmission, JobStatus::PendingVerification, "1".to_string()); - - // increasing JOB_VERIFICATION_ATTEMPT_METADATA_KEY to simulate max. attempts reached. - let metadata = increment_key_in_metadata(&job_item.metadata, JOB_VERIFICATION_ATTEMPT_METADATA_KEY).unwrap(); - job_item.metadata = metadata; - - // building config + // Building config let services = TestConfigBuilder::new() .configure_database(ConfigType::Actual) .configure_queue_client(ConfigType::Actual) @@ -597,58 +637,51 @@ async fn verify_job_with_pending_status_works() { .await; let database_client = services.config.database(); - let mut job_handler = MockJob::new(); - // creating job in database + // Create a job with proper metadata structure + let mut job_item = build_job_item(JobType::DataSubmission, JobStatus::PendingVerification, 1); + + // Set verification_attempt_no to 1 to simulate max attempts reached + job_item.metadata.common.verification_attempt_no = 1; + + // Creating job in database database_client.create_job(job_item.clone()).await.unwrap(); - // expecting process job function in job processor to return the external ID + + let mut job_handler = MockJob::new(); + // Expecting verify_job function to return Pending status job_handler.expect_verify_job().times(1).returning(move |_, _| Ok(JobVerificationStatus::Pending)); job_handler.expect_max_verification_attempts().returning(move || 1u64); job_handler.expect_verification_polling_delay_seconds().returning(move || 2u64); + // Mocking the `get_job_handler` call let job_handler: Arc> = Arc::new(Box::new(job_handler)); let ctx = mock_factory::get_job_handler_context(); - // Mocking the `get_job_handler` call in create_job function. ctx.expect().times(1).with(eq(JobType::DataSubmission)).returning(move |_| Arc::clone(&job_handler)); assert!(verify_job(job_item.id, services.config.clone()).await.is_ok()); - // DB checks. + // DB checks - verify the job status is changed to VerificationTimeout let updated_job = database_client.get_job_by_id(job_item.id).await.unwrap().unwrap(); assert_eq!(updated_job.status, JobStatus::VerificationTimeout); - assert_eq!(updated_job.metadata.get(JOB_VERIFICATION_ATTEMPT_METADATA_KEY).unwrap(), "1"); + + // Check that verification attempt is still recorded in common metadata + assert_eq!(updated_job.metadata.common.verification_attempt_no, 1); // Waiting for 5 secs for message to be passed into the queue sleep(Duration::from_secs(5)).await; - // Queue checks. + // Queue checks - verify no message was added to the verification queue let consumed_messages_verification_queue = services.config.queue().consume_message_from_queue(job_item.job_type.verify_queue_name()).await.unwrap_err(); assert_matches!(consumed_messages_verification_queue, QueueError::NoData); } -fn build_job_item_by_type_and_status(job_type: JobType, job_status: JobStatus, internal_id: String) -> JobItem { - let mut hashmap: HashMap = HashMap::new(); - hashmap.insert(JOB_PROCESS_ATTEMPT_METADATA_KEY.to_string(), "0".to_string()); - hashmap.insert(JOB_VERIFICATION_ATTEMPT_METADATA_KEY.to_string(), "0".to_string()); - JobItem { - id: Uuid::new_v4(), - internal_id, - job_type, - status: job_status, - external_id: ExternalId::Number(0), - metadata: hashmap, - version: 0, - created_at: Utc::now().round_subsecs(0), - updated_at: Utc::now().round_subsecs(0), - } -} - #[rstest] #[case(JobType::DataSubmission, JobStatus::Completed)] // code should panic here, how can completed move to dl queue ? #[case(JobType::SnosRun, JobStatus::PendingVerification)] #[case(JobType::ProofCreation, JobStatus::LockedForProcessing)] -#[case(JobType::ProofRegistration, JobStatus::Created)] +// #[case(JobType::ProofRegistration, JobStatus::Created)] TODO: add this case when we have the metadata for proof +// registration #[case(JobType::StateTransition, JobStatus::Completed)] #[case(JobType::ProofCreation, JobStatus::VerificationTimeout)] #[case(JobType::DataSubmission, JobStatus::VerificationFailed)] @@ -663,20 +696,21 @@ async fn handle_job_failure_with_failed_job_status_works(#[case] job_type: JobTy let database_client = services.config.database(); let internal_id = 1; - // create a job, with already available "last_job_status" + // Create a job with Failed status let mut job_expected = build_job_item(job_type.clone(), JobStatus::Failed, internal_id); - let mut job_metadata = job_expected.metadata.clone(); - job_metadata.insert("last_job_status".to_string(), job_status.to_string()); - job_expected.metadata.clone_from(&job_metadata); + + // Store the previous job status in the common metadata + job_expected.metadata.common.failure_reason = Some(format!("last_job_status: {}", job_status)); let job_id = job_expected.id; - // feeding the job to DB + // Feeding the job to DB database_client.create_job(job_expected.clone()).await.unwrap(); - // calling handle_job_failure + // Calling handle_job_failure handle_job_failure(job_id, services.config.clone()).await.expect("handle_job_failure failed to run"); + // Fetch the job from DB and verify it's unchanged (since it's already in Failed status) let job_fetched = services.config.database().get_job_by_id(job_id).await.expect("Unable to fetch Job Data").unwrap(); @@ -697,30 +731,28 @@ async fn handle_job_failure_with_correct_job_status_works(#[case] job_type: JobT let database_client = services.config.database(); let internal_id = 1; - // create a job + // Create a job let job = build_job_item(job_type.clone(), job_status.clone(), internal_id); let job_id = job.id; - // feeding the job to DB + // Feeding the job to DB database_client.create_job(job.clone()).await.unwrap(); - // calling handle_job_failure + // Calling handle_job_failure handle_job_failure(job_id, services.config.clone()).await.expect("handle_job_failure failed to run"); let job_fetched = services.config.database().get_job_by_id(job_id).await.expect("Unable to fetch Job Data").unwrap(); - // creating expected output + // Creating expected output let mut job_expected = job.clone(); - let mut job_metadata = job_expected.metadata.clone(); - job_metadata.insert( - JOB_METADATA_FAILURE_REASON.to_string(), - format!("Received failure queue message for job with status: {}", job_status), - ); - job_expected.metadata.clone_from(&job_metadata); job_expected.status = JobStatus::Failed; job_expected.version = 1; + // Set the failure reason in common metadata + job_expected.metadata.common.failure_reason = + Some(format!("Received failure queue message for job with status: {}", job_status)); + assert_eq!(job_fetched, job_expected); } @@ -739,14 +771,14 @@ async fn handle_job_failure_job_status_completed_works(#[case] job_type: JobType let database_client = services.config.database(); let internal_id = 1; - // create a job + // Create a job let job_expected = build_job_item(job_type.clone(), job_status.clone(), internal_id); let job_id = job_expected.id; - // feeding the job to DB + // Feeding the job to DB database_client.create_job(job_expected.clone()).await.unwrap(); - // calling handle_job_failure + // Calling handle_job_failure handle_job_failure(job_id, services.config.clone()) .await .expect("Test call to handle_job_failure should have passed."); @@ -757,3 +789,72 @@ async fn handle_job_failure_job_status_completed_works(#[case] job_type: JobType assert_eq!(job_fetched, job_expected); } + +#[rstest] +#[tokio::test] +async fn test_retry_job_adds_to_process_queue() { + let services = TestConfigBuilder::new() + .configure_database(ConfigType::Actual) + .configure_queue_client(ConfigType::Actual) + .build() + .await; + + // Create a failed job + let job_item = build_job_item(JobType::DataSubmission, JobStatus::Failed, 1); + services.config.database().create_job(job_item.clone()).await.unwrap(); + let job_id = job_item.id; + + // Retry the job + assert!(retry_job(job_id, services.config.clone()).await.is_ok()); + + // Verify job status was updated to PendingRetry + let updated_job = services.config.database().get_job_by_id(job_id).await.unwrap().unwrap(); + assert_eq!(updated_job.status, JobStatus::PendingRetry); + + // Wait for message to be processed + tokio::time::sleep(Duration::from_secs(5)).await; + + // Verify message was added to process queue + let consumed_messages = + services.config.queue().consume_message_from_queue(job_item.job_type.process_queue_name()).await.unwrap(); + + let consumed_message_payload: MessagePayloadType = consumed_messages.payload_serde_json().unwrap().unwrap(); + assert_eq!(consumed_message_payload.id, job_id); +} + +#[rstest] +#[case::pending_verification(JobStatus::PendingVerification)] +#[case::completed(JobStatus::Completed)] +#[case::created(JobStatus::Created)] +#[tokio::test] +async fn test_retry_job_invalid_status(#[case] initial_status: JobStatus) { + let services = TestConfigBuilder::new() + .configure_database(ConfigType::Actual) + .configure_queue_client(ConfigType::Actual) + .build() + .await; + + // Create a job with non-Failed status + let job_item = build_job_item(JobType::DataSubmission, initial_status.clone(), 1); + services.config.database().create_job(job_item.clone()).await.unwrap(); + let job_id = job_item.id; + + // Attempt to retry the job + let result = retry_job(job_id, services.config.clone()).await; + assert!(result.is_err()); + + if let Err(error) = result { + assert_matches!(error, JobError::InvalidStatus { .. }); + } + + // Verify job status was not changed + let job = services.config.database().get_job_by_id(job_id).await.unwrap().unwrap(); + assert_eq!(job.status, initial_status); + + // Wait briefly to ensure no messages were added + tokio::time::sleep(Duration::from_secs(5)).await; + + // Verify no message was added to process queue + let queue_result = services.config.queue().consume_message_from_queue(job_item.job_type.process_queue_name()).await; + assert_matches!(queue_result, Err(QueueError::NoData)); +} diff --git a/orchestrator/crates/orchestrator/src/tests/jobs/proving_job/mod.rs b/orchestrator/crates/orchestrator/src/tests/jobs/proving_job/mod.rs index 930eed5fc..85e2a7ea3 100644 --- a/orchestrator/crates/orchestrator/src/tests/jobs/proving_job/mod.rs +++ b/orchestrator/crates/orchestrator/src/tests/jobs/proving_job/mod.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::fs::File; use std::io::Read; use std::path::Path; @@ -17,7 +16,7 @@ use uuid::Uuid; use super::super::common::default_job_item; use crate::constants::CAIRO_PIE_FILE_NAME; use crate::data_storage::MockDataStorage; -use crate::jobs::constants::JOB_METADATA_SNOS_FACT; +use crate::jobs::metadata::{CommonMetadata, JobMetadata, JobSpecificMetadata, ProvingInputType, ProvingMetadata}; use crate::jobs::proving_job::ProvingJob; use crate::jobs::types::{JobItem, JobStatus, JobType}; use crate::jobs::Job; @@ -28,7 +27,17 @@ use crate::tests::config::TestConfigBuilder; async fn test_create_job() { let services = TestConfigBuilder::new().build().await; - let job = ProvingJob.create_job(services.config.clone(), String::from("0"), HashMap::new()).await; + let metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Proving(ProvingMetadata { + block_number: 0, + input_path: None, + ensure_on_chain_registration: None, + download_proof: None, + }), + }; + + let job = ProvingJob.create_job(services.config.clone(), String::from("0"), metadata).await; assert!(job.is_ok()); let job = job.unwrap(); @@ -45,11 +54,17 @@ async fn test_create_job() { #[tokio::test] async fn test_verify_job(#[from(default_job_item)] mut job_item: JobItem) { let mut prover_client = MockProverClient::new(); - prover_client.expect_get_task_status().times(1).returning(|_, _| Ok(TaskStatus::Succeeded)); + prover_client.expect_get_task_status().times(1).returning(|_, _, _| Ok(TaskStatus::Succeeded)); let services = TestConfigBuilder::new().configure_prover_client(prover_client.into()).build().await; - job_item.metadata.insert(JOB_METADATA_SNOS_FACT.into(), "fact".to_string()); + job_item.metadata.specific = JobSpecificMetadata::Proving(ProvingMetadata { + block_number: 0, + input_path: None, + ensure_on_chain_registration: Some("fact".to_string()), + download_proof: None, + }); + assert!(ProvingJob.verify_job(services.config, &mut job_item).await.is_ok()); } @@ -71,10 +86,8 @@ async fn test_process_job() { let mut storage = MockDataStorage::new(); let buffer_bytes = Bytes::from(buffer); - storage - .expect_get_data() - .with(eq(format!("{}/{}", "0", CAIRO_PIE_FILE_NAME))) - .return_once(move |_| Ok(buffer_bytes)); + let cairo_pie_path = format!("0/{}", CAIRO_PIE_FILE_NAME); + storage.expect_get_data().with(eq(cairo_pie_path.clone())).return_once(move |_| Ok(buffer_bytes)); let services = TestConfigBuilder::new() .configure_starknet_client(provider.into()) @@ -83,6 +96,16 @@ async fn test_process_job() { .build() .await; + let metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Proving(ProvingMetadata { + block_number: 0, + input_path: Some(ProvingInputType::CairoPie(cairo_pie_path)), + ensure_on_chain_registration: Some("fact".to_string()), + download_proof: None, + }), + }; + assert_eq!( ProvingJob .process_job( @@ -93,7 +116,7 @@ async fn test_process_job() { job_type: JobType::ProofCreation, status: JobStatus::Created, external_id: String::new().into(), - metadata: HashMap::new(), + metadata, version: 0, created_at: Utc::now().round_subsecs(0), updated_at: Utc::now().round_subsecs(0) diff --git a/orchestrator/crates/orchestrator/src/tests/jobs/snos_job/mod.rs b/orchestrator/crates/orchestrator/src/tests/jobs/snos_job/mod.rs index 8a48f86ec..ab063e237 100644 --- a/orchestrator/crates/orchestrator/src/tests/jobs/snos_job/mod.rs +++ b/orchestrator/crates/orchestrator/src/tests/jobs/snos_job/mod.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::sync::Arc; use cairo_vm::vm::runners::cairo_pie::CairoPie; @@ -8,8 +7,8 @@ use starknet_os::io::output::StarknetOsOutput; use url::Url; use uuid::Uuid; -use crate::constants::{CAIRO_PIE_FILE_NAME, SNOS_OUTPUT_FILE_NAME}; -use crate::jobs::constants::JOB_METADATA_SNOS_BLOCK; +use crate::constants::{CAIRO_PIE_FILE_NAME, PROGRAM_OUTPUT_FILE_NAME, SNOS_OUTPUT_FILE_NAME}; +use crate::jobs::metadata::{CommonMetadata, JobMetadata, JobSpecificMetadata, SnosMetadata}; use crate::jobs::snos_job::SnosJob; use crate::jobs::types::{JobItem, JobStatus, JobType, JobVerificationStatus}; use crate::jobs::Job; @@ -22,7 +21,20 @@ use crate::tests::jobs::ConfigType; async fn test_create_job() { let services = TestConfigBuilder::new().build().await; - let job = SnosJob.create_job(services.config.clone(), String::from("0"), HashMap::new()).await; + // Create proper metadata structure + let metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Snos(SnosMetadata { + block_number: 0, + full_output: false, + cairo_pie_path: None, + snos_output_path: None, + program_output_path: None, + snos_fact: None, + }), + }; + + let job = SnosJob.create_job(services.config.clone(), String::from("0"), metadata).await; assert!(job.is_ok()); let job = job.unwrap(); @@ -39,6 +51,17 @@ async fn test_create_job() { #[tokio::test] async fn test_verify_job(#[from(default_job_item)] mut job_item: JobItem) { let services = TestConfigBuilder::new().build().await; + + // Update job_item to use the proper metadata structure for SNOS jobs + job_item.metadata.specific = JobSpecificMetadata::Snos(SnosMetadata { + block_number: 0, + full_output: false, + cairo_pie_path: None, + snos_output_path: None, + program_output_path: None, + snos_fact: None, + }); + let job_status = SnosJob.verify_job(services.config.clone(), &mut job_item).await; // Should always be [Verified] for the moment. @@ -69,13 +92,27 @@ async fn test_process_job() -> color_eyre::Result<()> { let storage_client = services.config.storage(); + // Create proper metadata structure + let block_number = 76793; + let metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Snos(SnosMetadata { + block_number, + full_output: false, + cairo_pie_path: Some(format!("{}/{}", block_number, CAIRO_PIE_FILE_NAME)), + snos_output_path: Some(format!("{}/{}", block_number, SNOS_OUTPUT_FILE_NAME)), + program_output_path: Some(format!("{}/{}", block_number, PROGRAM_OUTPUT_FILE_NAME)), + snos_fact: None, + }), + }; + let mut job_item = JobItem { id: Uuid::new_v4(), internal_id: "1".into(), job_type: JobType::SnosRun, status: JobStatus::Created, external_id: String::new().into(), - metadata: HashMap::from([(JOB_METADATA_SNOS_BLOCK.to_string(), "76793".to_string())]), + metadata, version: 0, created_at: Utc::now().round_subsecs(0), updated_at: Utc::now().round_subsecs(0), diff --git a/orchestrator/crates/orchestrator/src/tests/jobs/state_update_job/mod.rs b/orchestrator/crates/orchestrator/src/tests/jobs/state_update_job/mod.rs index 7f2dfe36a..999329b88 100644 --- a/orchestrator/crates/orchestrator/src/tests/jobs/state_update_job/mod.rs +++ b/orchestrator/crates/orchestrator/src/tests/jobs/state_update_job/mod.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::fs; use std::fs::read_to_string; use std::path::PathBuf; @@ -10,18 +9,15 @@ use httpmock::prelude::*; use lazy_static::lazy_static; use mockall::predicate::{always, eq}; use num_bigint::BigUint; -use orchestrator_settlement_client_interface::MockSettlementClient; use rstest::*; +use orchestrator_settlement_client_interface::MockSettlementClient; use starknet::providers::jsonrpc::HttpTransport; use starknet::providers::JsonRpcClient; use url::Url; use crate::constants::{BLOB_DATA_FILE_NAME, PROGRAM_OUTPUT_FILE_NAME, SNOS_OUTPUT_FILE_NAME}; use crate::data_storage::MockDataStorage; -use crate::jobs::constants::{ - JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY, JOB_METADATA_STATE_UPDATE_FETCH_FROM_TESTS, - JOB_METADATA_STATE_UPDATE_LAST_FAILED_BLOCK_NO, JOB_PROCESS_ATTEMPT_METADATA_KEY, -}; +use crate::jobs::metadata::{CommonMetadata, JobMetadata, JobSpecificMetadata, StateUpdateMetadata}; use crate::jobs::state_update_job::utils::hex_string_to_u8_vec; use crate::jobs::state_update_job::{StateUpdateError, StateUpdateJob}; use crate::jobs::types::{JobStatus, JobType}; @@ -44,9 +40,19 @@ async fn test_process_job_attempt_not_present_fails() { let mut job = default_job_item(); + // Update job metadata to use the proper structure + job.metadata.specific = JobSpecificMetadata::StateUpdate(StateUpdateMetadata { + blocks_to_settle: vec![], + snos_output_paths: vec![], + program_output_paths: vec![], + blob_data_paths: vec![], + last_failed_block_no: None, + tx_hashes: vec![], + }); + let state_update_job = StateUpdateJob {}; let res = state_update_job.process_job(services.config, &mut job).await.unwrap_err(); - assert_eq!(res, JobError::StateUpdateJobError(StateUpdateError::AttemptNumberNotFound)); + assert_eq!(res, JobError::StateUpdateJobError(StateUpdateError::BlockNumberNotFound)); } // TODO : make this test work @@ -54,7 +60,6 @@ async fn test_process_job_attempt_not_present_fails() { #[case(None, String::from("651053,651054,651055"), 0)] #[case(Some(651054), String::from("651053,651054,651055"), 1)] #[tokio::test] -// #[ignore] async fn test_process_job_works( #[case] failed_block_number: Option, #[case] blocks_to_process: String, @@ -109,9 +114,14 @@ async fn test_process_job_works( let storage_client = services.config.storage(); - for block in block_numbers { + // Prepare vectors to collect paths for metadata + let mut snos_output_paths = Vec::new(); + let mut program_output_paths = Vec::new(); + let mut blob_data_paths = Vec::new(); + + for block in block_numbers.iter() { // Getting the blob data from file. - let blob_data_key = block.to_owned().to_string() + "/" + BLOB_DATA_FILE_NAME; + let blob_data_key = block.to_string() + "/" + BLOB_DATA_FILE_NAME; let blob_data = fs::read_to_string( CURRENT_PATH.join(format!("src/tests/jobs/state_update_job/test_data/{}/{}", block, BLOB_DATA_FILE_NAME)), ) @@ -119,14 +129,14 @@ async fn test_process_job_works( let blob_data_vec = hex_string_to_u8_vec(&blob_data).unwrap(); // Getting the snos data from file. - let snos_output_key = block.to_owned().to_string() + "/" + SNOS_OUTPUT_FILE_NAME; + let snos_output_key = block.to_string() + "/" + SNOS_OUTPUT_FILE_NAME; let snos_output_data = fs::read_to_string( CURRENT_PATH.join(format!("src/tests/jobs/state_update_job/test_data/{}/{}", block, SNOS_OUTPUT_FILE_NAME)), ) .unwrap(); // Getting the program output data from file. - let program_output_key = block.to_owned().to_string() + "/" + PROGRAM_OUTPUT_FILE_NAME; + let program_output_key = block.to_string() + "/" + PROGRAM_OUTPUT_FILE_NAME; let program_output_data = read_file_to_vec_u8_32( CURRENT_PATH .join(format!("src/tests/jobs/state_update_job/test_data/{}/{}", block, PROGRAM_OUTPUT_FILE_NAME)) @@ -139,17 +149,28 @@ async fn test_process_job_works( storage_client.put_data(Bytes::from(snos_output_data), &snos_output_key).await.unwrap(); storage_client.put_data(Bytes::from(blob_data_vec), &blob_data_key).await.unwrap(); storage_client.put_data(Bytes::from(program_output_data_serialized), &program_output_key).await.unwrap(); - } - // setting last failed block number as 651053. - // setting blocks yet to process as 651054 and 651055. - // here total blocks to process will be 3. - let mut metadata: HashMap = HashMap::new(); - metadata.insert(JOB_PROCESS_ATTEMPT_METADATA_KEY.to_string(), "0".to_string()); - if let Some(block_number) = failed_block_number { - metadata.insert(JOB_METADATA_STATE_UPDATE_LAST_FAILED_BLOCK_NO.to_string(), block_number.to_string()); + // Add paths to our vectors for metadata + snos_output_paths.push(snos_output_key); + program_output_paths.push(program_output_key); + blob_data_paths.push(blob_data_key); } - metadata.insert(JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY.to_string(), blocks_to_process); + + // Create proper metadata structure with the collected paths + let mut metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::StateUpdate(StateUpdateMetadata { + blocks_to_settle: block_numbers.clone(), + snos_output_paths, + program_output_paths, + blob_data_paths, + last_failed_block_no: failed_block_number, + tx_hashes: Vec::new(), // Start with empty tx_hashes, they'll be populated during processing + }), + }; + + // Add process attempt to common metadata + metadata.common.process_attempt_no = 0; // creating a `JobItem` let mut job = default_job_item(); @@ -168,7 +189,20 @@ async fn test_process_job_works( async fn create_job_works() { let services = TestConfigBuilder::new().build().await; - let job = StateUpdateJob.create_job(services.config, String::from("0"), HashMap::default()).await; + // Create proper metadata structure + let metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::StateUpdate(StateUpdateMetadata { + blocks_to_settle: vec![1], + snos_output_paths: vec![format!("1/{}", SNOS_OUTPUT_FILE_NAME)], + program_output_paths: vec![format!("1/{}", PROGRAM_OUTPUT_FILE_NAME)], + blob_data_paths: vec![format!("1/{}", BLOB_DATA_FILE_NAME)], + last_failed_block_no: None, + tx_hashes: vec![], + }), + }; + + let job = StateUpdateJob.create_job(services.config, String::from("0"), metadata).await; assert!(job.is_ok()); let job = job.unwrap(); @@ -203,7 +237,7 @@ async fn process_job_works_unit_test() { .expect("Failed to read the snos output data json file"); storage_client .expect_get_data() - .with(eq(snos_output_key)) + .with(eq(snos_output_key.clone())) .returning(move |_| Ok(Bytes::from(snos_output_data.clone()))); let blob_data_key = block_no.to_owned() + "/" + BLOB_DATA_FILE_NAME; @@ -216,7 +250,7 @@ async fn process_job_works_unit_test() { let blob_data_vec_clone = blob_data_vec.clone(); storage_client .expect_get_data() - .with(eq(blob_data_key)) + .with(eq(blob_data_key.clone())) .returning(move |_| Ok(Bytes::from(blob_data_vec.clone()))); let x_0_key = block_no.to_owned() + "/" + X_0_FILE_NAME; @@ -237,11 +271,9 @@ async fn process_job_works_unit_test() { let program_output_clone = program_output.clone(); storage_client .expect_get_data() - .with(eq(program_output_key)) + .with(eq(program_output_key.clone())) .returning(move |_| Ok(Bytes::from(bincode::serialize(&program_output).unwrap()))); - // let nonce = settlement_client.get_nonce().await.expect("Unable to fetch nonce for settlement - // client."); settlement_client.expect_get_nonce().returning(|| Ok(1)); let deserialized_program_output: Vec<[u8; 32]> = @@ -259,10 +291,27 @@ async fn process_job_works_unit_test() { .build() .await; - let mut metadata: HashMap = HashMap::new(); - metadata.insert(String::from(JOB_METADATA_STATE_UPDATE_FETCH_FROM_TESTS), String::from("TRUE")); - metadata.insert(String::from(JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY), block_numbers.join(",")); - metadata.insert(String::from(JOB_PROCESS_ATTEMPT_METADATA_KEY), String::from("0")); + // Create proper metadata structure + let mut metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::StateUpdate(StateUpdateMetadata { + blocks_to_settle: block_numbers.iter().map(|b| b.parse::().unwrap()).collect(), + snos_output_paths: block_numbers + .iter() + .map(|block| format!("{}/{}", block, SNOS_OUTPUT_FILE_NAME)) + .collect(), + program_output_paths: block_numbers + .iter() + .map(|block| format!("{}/{}", block, PROGRAM_OUTPUT_FILE_NAME)) + .collect(), + blob_data_paths: block_numbers.iter().map(|block| format!("{}/{}", block, BLOB_DATA_FILE_NAME)).collect(), + last_failed_block_no: None, + tx_hashes: vec![], + }), + }; + + // Add process attempt to common metadata + metadata.common.process_attempt_no = 0; let mut job = StateUpdateJob.create_job(services.config.clone(), String::from("internal_id"), metadata).await.unwrap(); @@ -270,13 +319,11 @@ async fn process_job_works_unit_test() { } #[rstest] -#[case(String::from("651052, 651054, 651051, 651056"), "numbers aren't sorted in increasing order")] -#[case(String::from("651052, 651052, 651052, 651052"), "Duplicated block numbers")] -#[case(String::from("a, 651054, b, 651056"), "settle list is not correctly formatted")] -#[case(String::from("651052, 651052, 651053, 651053"), "Duplicated block numbers")] -#[case(String::from(""), "settle list is not correctly formatted")] +#[case(vec![651052, 651054, 651051, 651056], "numbers aren't sorted in increasing order")] +#[case(vec![651052, 651052, 651052, 651052], "Duplicated block numbers")] +#[case(vec![651052, 651052, 651053, 651053], "Duplicated block numbers")] #[tokio::test] -async fn process_job_invalid_inputs_errors(#[case] block_numbers_to_settle: String, #[case] expected_error: &str) { +async fn process_job_invalid_inputs_errors(#[case] block_numbers: Vec, #[case] expected_error: &str) { let server = MockServer::start(); let settlement_client = MockSettlementClient::new(); @@ -290,9 +337,26 @@ async fn process_job_invalid_inputs_errors(#[case] block_numbers_to_settle: Stri .build() .await; - let mut metadata: HashMap = HashMap::new(); - metadata.insert(String::from(JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY), block_numbers_to_settle); - metadata.insert(String::from(JOB_PROCESS_ATTEMPT_METADATA_KEY), String::from("0")); + // Create paths for each block number + let snos_output_paths = block_numbers.iter().map(|block| format!("{}/{}", block, SNOS_OUTPUT_FILE_NAME)).collect(); + + let program_output_paths = + block_numbers.iter().map(|block| format!("{}/{}", block, PROGRAM_OUTPUT_FILE_NAME)).collect(); + + let blob_data_paths = block_numbers.iter().map(|block| format!("{}/{}", block, BLOB_DATA_FILE_NAME)).collect(); + + // Create proper metadata structure with invalid block numbers but valid paths + let metadata = JobMetadata { + common: CommonMetadata { process_attempt_no: 0, ..CommonMetadata::default() }, + specific: JobSpecificMetadata::StateUpdate(StateUpdateMetadata { + blocks_to_settle: block_numbers, + snos_output_paths, + program_output_paths, + blob_data_paths, + last_failed_block_no: None, + tx_hashes: vec![], + }), + }; let mut job = StateUpdateJob.create_job(services.config.clone(), String::from("internal_id"), metadata).await.unwrap(); @@ -327,9 +391,30 @@ async fn process_job_invalid_input_gap_panics() { .build() .await; - let mut metadata: HashMap = HashMap::new(); - metadata.insert(String::from(JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY), String::from("6, 7, 8")); - metadata.insert(String::from(JOB_PROCESS_ATTEMPT_METADATA_KEY), String::from("0")); + // Create proper metadata structure with valid paths + let metadata = JobMetadata { + common: CommonMetadata { process_attempt_no: 0, ..CommonMetadata::default() }, + specific: JobSpecificMetadata::StateUpdate(StateUpdateMetadata { + blocks_to_settle: vec![6, 7, 8], // Gap between 4 and 6 + snos_output_paths: vec![ + format!("{}/{}", 6, SNOS_OUTPUT_FILE_NAME), + format!("{}/{}", 7, SNOS_OUTPUT_FILE_NAME), + format!("{}/{}", 8, SNOS_OUTPUT_FILE_NAME), + ], + program_output_paths: vec![ + format!("{}/{}", 6, PROGRAM_OUTPUT_FILE_NAME), + format!("{}/{}", 7, PROGRAM_OUTPUT_FILE_NAME), + format!("{}/{}", 8, PROGRAM_OUTPUT_FILE_NAME), + ], + blob_data_paths: vec![ + format!("{}/{}", 6, BLOB_DATA_FILE_NAME), + format!("{}/{}", 7, BLOB_DATA_FILE_NAME), + format!("{}/{}", 8, BLOB_DATA_FILE_NAME), + ], + last_failed_block_no: None, + tx_hashes: vec![], + }), + }; let mut job = StateUpdateJob.create_job(services.config.clone(), String::from("internal_id"), metadata).await.unwrap(); diff --git a/orchestrator/crates/orchestrator/src/tests/mod.rs b/orchestrator/crates/orchestrator/src/tests/mod.rs index 4f264304c..ca197a24d 100644 --- a/orchestrator/crates/orchestrator/src/tests/mod.rs +++ b/orchestrator/crates/orchestrator/src/tests/mod.rs @@ -10,4 +10,5 @@ pub mod queue; pub mod alerts; pub mod common; mod data_storage; +pub mod utils; pub mod workers; diff --git a/orchestrator/crates/orchestrator/src/tests/server/job_routes.rs b/orchestrator/crates/orchestrator/src/tests/server/job_routes.rs index 21f0719d1..6696e25b4 100644 --- a/orchestrator/crates/orchestrator/src/tests/server/job_routes.rs +++ b/orchestrator/crates/orchestrator/src/tests/server/job_routes.rs @@ -1,8 +1,8 @@ use core::panic; use std::net::SocketAddr; use std::sync::Arc; +use std::time::Duration; -use chrono::{SubsecRound as _, Utc}; use hyper::{Body, Request}; use mockall::predicate::eq; use orchestrator_utils::env_utils::get_env_var_or_panic; @@ -10,14 +10,16 @@ use rstest::*; use starknet::providers::jsonrpc::HttpTransport; use starknet::providers::JsonRpcClient; use url::Url; -use uuid::Uuid; use crate::config::Config; use crate::jobs::job_handler_factory::mock_factory; -use crate::jobs::types::{ExternalId, JobItem, JobStatus, JobType, JobVerificationStatus}; +use crate::jobs::types::{JobStatus, JobType}; use crate::jobs::{Job, MockJob}; use crate::queue::init_consumers; +use crate::queue::job_queue::{JobQueueMessage, QueueNameForJobType}; +use crate::routes::types::ApiResponse; use crate::tests::config::{ConfigType, TestConfigBuilder}; +use crate::tests::utils::build_job_item; #[fixture] async fn setup_trigger() -> (SocketAddr, Arc) { @@ -45,23 +47,12 @@ async fn setup_trigger() -> (SocketAddr, Arc) { #[rstest] async fn test_trigger_process_job(#[future] setup_trigger: (SocketAddr, Arc)) { let (addr, config) = setup_trigger.await; - let job_type = JobType::DataSubmission; let job_item = build_job_item(job_type.clone(), JobStatus::Created, 1); - let mut job_handler = MockJob::new(); - - job_handler.expect_process_job().times(1).returning(move |_, _| Ok("0xbeef".to_string())); - config.database().create_job(job_item.clone()).await.unwrap(); let job_id = job_item.clone().id; - job_handler.expect_verification_polling_delay_seconds().return_const(1u64); - - let job_handler: Arc> = Arc::new(Box::new(job_handler)); - let ctx = mock_factory::get_job_handler_context(); - ctx.expect().times(1).with(eq(job_type)).returning(move |_| Arc::clone(&job_handler)); - let client = hyper::Client::new(); let response = client .request( @@ -70,12 +61,22 @@ async fn test_trigger_process_job(#[future] setup_trigger: (SocketAddr, Arc)) { let (addr, config) = setup_trigger.await; - let job_type = JobType::DataSubmission; - let job_item = build_job_item(job_type.clone(), JobStatus::PendingVerification, 1); - let mut job_handler = MockJob::new(); + // Create a job with initial metadata + let mut job_item = build_job_item(job_type.clone(), JobStatus::PendingVerification, 1); - job_handler.expect_verify_job().times(1).returning(move |_, _| Ok(JobVerificationStatus::Verified)); + // Set verification counters in common metadata + job_item.metadata.common.verification_retry_attempt_no = 0; + job_item.metadata.common.verification_attempt_no = 10; config.database().create_job(job_item.clone()).await.unwrap(); let job_id = job_item.clone().id; + // Set up mock job handler + let mut job_handler = MockJob::new(); job_handler.expect_verification_polling_delay_seconds().return_const(1u64); - let job_handler: Arc> = Arc::new(Box::new(job_handler)); + let ctx = mock_factory::get_job_handler_context(); - ctx.expect().times(1).with(eq(job_type)).returning(move |_| Arc::clone(&job_handler)); + ctx.expect().with(eq(job_type.clone())).times(1).returning(move |_| Arc::clone(&job_handler)); let client = hyper::Client::new(); let response = client @@ -108,15 +112,98 @@ async fn test_trigger_verify_job(#[future] setup_trigger: (SocketAddr, Arc)) { + let (addr, config) = setup_trigger.await; + let job_type = JobType::DataSubmission; + + let job_item = build_job_item(job_type.clone(), JobStatus::Failed, 1); + config.database().create_job(job_item.clone()).await.unwrap(); + let job_id = job_item.clone().id; + + let client = hyper::Client::new(); + let response = client + .request(Request::builder().uri(format!("http://{}/jobs/{}/retry", addr, job_id)).body(Body::empty()).unwrap()) + .await + .unwrap(); + + assert_eq!(response.status(), 200); + let body_bytes = hyper::body::to_bytes(response.into_body()).await.unwrap(); + let response: ApiResponse = serde_json::from_slice(&body_bytes).unwrap(); + assert!(response.success); + assert_eq!(response.message, Some(format!("Job with id {} retry initiated", job_id))); + + // Verify job was added to process queue + let queue_message = config.queue().consume_message_from_queue(job_type.process_queue_name()).await.unwrap(); + + let message_payload: JobQueueMessage = queue_message.payload_serde_json().unwrap().unwrap(); + assert_eq!(message_payload.id, job_id); + + // Verify job status changed to PendingRetry + let job_fetched = config.database().get_job_by_id(job_id).await.unwrap().expect("Could not get job from database"); + assert_eq!(job_fetched.id, job_item.id); + assert_eq!(job_fetched.metadata.common.process_retry_attempt_no, 1); + assert_eq!(job_fetched.status, JobStatus::PendingRetry); +} + +#[rstest] +#[case::pending_verification_job(JobStatus::PendingVerification)] +#[case::completed_job(JobStatus::Completed)] +#[case::created_job(JobStatus::Created)] +#[tokio::test] +async fn test_trigger_retry_job_not_allowed( + #[future] setup_trigger: (SocketAddr, Arc), + #[case] initial_status: JobStatus, +) { + let (addr, config) = setup_trigger.await; + let job_type = JobType::DataSubmission; + + let job_item = build_job_item(job_type.clone(), initial_status.clone(), 1); + config.database().create_job(job_item.clone()).await.unwrap(); + let job_id = job_item.clone().id; + + let client = hyper::Client::new(); + let response = client + .request(Request::builder().uri(format!("http://{}/jobs/{}/retry", addr, job_id)).body(Body::empty()).unwrap()) + .await + .unwrap(); + + // Verify request was rejected + assert_eq!(response.status(), 400); + + // Verify job status hasn't changed + let job_fetched = config.database().get_job_by_id(job_id).await.unwrap().expect("Could not get job from database"); + assert_eq!(job_fetched.status, initial_status); + + // Verify no message was added to the queue + let queue_result = config.queue().consume_message_from_queue(job_type.process_queue_name()).await; + assert!(queue_result.is_err(), "Queue should be empty - no message should be added for non-Failed jobs"); } #[rstest] @@ -125,20 +212,3 @@ async fn test_init_consumer() { let services = TestConfigBuilder::new().build().await; assert!(init_consumers(services.config).await.is_ok()); } - -// Test Util Functions -// ========================================== - -pub fn build_job_item(job_type: JobType, job_status: JobStatus, internal_id: u64) -> JobItem { - JobItem { - id: Uuid::new_v4(), - internal_id: internal_id.to_string(), - job_type, - status: job_status, - external_id: ExternalId::Number(0), - metadata: Default::default(), - version: 0, - created_at: Utc::now().round_subsecs(0), - updated_at: Utc::now().round_subsecs(0), - } -} diff --git a/orchestrator/crates/orchestrator/src/tests/utils.rs b/orchestrator/crates/orchestrator/src/tests/utils.rs new file mode 100644 index 000000000..6cd04c0c8 --- /dev/null +++ b/orchestrator/crates/orchestrator/src/tests/utils.rs @@ -0,0 +1,69 @@ +use chrono::{SubsecRound, Utc}; +use uuid::Uuid; + +use crate::constants::{BLOB_DATA_FILE_NAME, CAIRO_PIE_FILE_NAME, PROGRAM_OUTPUT_FILE_NAME, SNOS_OUTPUT_FILE_NAME}; +use crate::jobs::metadata::{ + CommonMetadata, DaMetadata, JobMetadata, JobSpecificMetadata, ProvingInputType, ProvingMetadata, SnosMetadata, + StateUpdateMetadata, +}; +use crate::jobs::types::{ExternalId, JobItem, JobStatus, JobType}; + +// Test Util Functions +// ========================================== + +pub fn build_job_item(job_type: JobType, job_status: JobStatus, internal_id: u64) -> JobItem { + let metadata = match job_type { + JobType::StateTransition => JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::StateUpdate(StateUpdateMetadata { + blocks_to_settle: vec![internal_id], + snos_output_paths: vec![format!("{}/{}", internal_id, SNOS_OUTPUT_FILE_NAME)], + program_output_paths: vec![format!("{}/{}", internal_id, PROGRAM_OUTPUT_FILE_NAME)], + blob_data_paths: vec![format!("{}/{}", internal_id, BLOB_DATA_FILE_NAME)], + last_failed_block_no: None, + tx_hashes: Vec::new(), + }), + }, + JobType::SnosRun => JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Snos(SnosMetadata { + block_number: internal_id, + full_output: false, + cairo_pie_path: Some(format!("{}/{}", internal_id, CAIRO_PIE_FILE_NAME)), + snos_output_path: Some(format!("{}/{}", internal_id, SNOS_OUTPUT_FILE_NAME)), + program_output_path: Some(format!("{}/{}", internal_id, PROGRAM_OUTPUT_FILE_NAME)), + snos_fact: None, + }), + }, + JobType::ProofCreation => JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Proving(ProvingMetadata { + block_number: internal_id, + input_path: Some(ProvingInputType::CairoPie(format!("{}/{}", internal_id, CAIRO_PIE_FILE_NAME))), + ensure_on_chain_registration: None, + download_proof: None, + }), + }, + JobType::DataSubmission => JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Da(DaMetadata { + block_number: internal_id, + blob_data_path: Some(format!("{}/{}", internal_id, BLOB_DATA_FILE_NAME)), + tx_hash: None, + }), + }, + _ => panic!("Invalid job type"), + }; + + JobItem { + id: Uuid::new_v4(), + internal_id: internal_id.to_string(), + job_type, + status: job_status, + external_id: ExternalId::Number(0), + metadata, + version: 0, + created_at: Utc::now().round_subsecs(0), + updated_at: Utc::now().round_subsecs(0), + } +} diff --git a/orchestrator/crates/orchestrator/src/tests/workers/proving/mod.rs b/orchestrator/crates/orchestrator/src/tests/workers/proving/mod.rs index b097941a0..e16d2899c 100644 --- a/orchestrator/crates/orchestrator/src/tests/workers/proving/mod.rs +++ b/orchestrator/crates/orchestrator/src/tests/workers/proving/mod.rs @@ -1,19 +1,19 @@ use std::error::Error; use std::sync::Arc; +use orchestrator_da_client_interface::MockDaClient; use httpmock::MockServer; use mockall::predicate::eq; -use orchestrator_da_client_interface::MockDaClient; use orchestrator_prover_client_interface::MockProverClient; -use orchestrator_settlement_client_interface::MockSettlementClient; use rstest::rstest; +use orchestrator_settlement_client_interface::MockSettlementClient; use starknet::providers::jsonrpc::HttpTransport; use starknet::providers::JsonRpcClient; use url::Url; use crate::database::MockDatabase; use crate::jobs::job_handler_factory::mock_factory; -use crate::jobs::types::{JobItem, JobStatus, JobType}; +use crate::jobs::types::{JobStatus, JobType}; use crate::jobs::{Job, MockJob}; use crate::queue::MockQueueProvider; use crate::tests::config::TestConfigBuilder; @@ -34,55 +34,62 @@ async fn test_proving_worker(#[case] incomplete_runs: bool) -> Result<(), Box = get_job_by_mock_id_vector(JobType::ProofCreation, JobStatus::Created, 5, 1) - .into_iter() - .filter(|val| val.internal_id != "3") - .collect(); - // Mocking db call for getting successful snos jobs - db.expect_get_jobs_without_successor() - .times(1) - .withf(|_, _, _| true) - .returning(move |_, _, _| Ok(jobs_vec_temp.clone())); - - let num_vec: Vec = vec![1, 2, 4, 5]; - - for i in num_vec { - db_checks_proving_worker(i, &mut db, &mut job_handler); + // Create mock SNOS jobs with snos_fact field set + let mut snos_jobs = Vec::new(); + let num_jobs = 5; + + for i in 1..=num_jobs { + // Skip job with ID 3 if incomplete_runs is true + if incomplete_runs && i == 3 { + continue; } - // Queue function call simulations + // Create a SNOS job with snos_fact field set + let mut job = get_job_by_mock_id_vector(JobType::SnosRun, JobStatus::Completed, 1, i)[0].clone(); + + // Ensure the SNOS job has a snos_fact field + if let crate::jobs::metadata::JobSpecificMetadata::Snos(ref mut snos_metadata) = job.metadata.specific { + snos_metadata.snos_fact = Some(format!("0x{:064x}", i)); + } + + snos_jobs.push(job); + } + + // Mock db call for getting successful SNOS jobs without successor + db.expect_get_jobs_without_successor() + .times(1) + .withf(|job_type, job_status, successor_type| { + *job_type == JobType::SnosRun + && *job_status == JobStatus::Completed + && *successor_type == JobType::ProofCreation + }) + .returning(move |_, _, _| Ok(snos_jobs.clone())); + + // Set up expectations for each job + for i in 1..=num_jobs { + if incomplete_runs && i == 3 { + continue; + } + db_checks_proving_worker(i as i32, &mut db, &mut job_handler); + } + + // Queue function call simulations + if incomplete_runs { queue .expect_send_message_to_queue() .times(4) .returning(|_, _, _| Ok(())) .withf(|queue, _payload, _delay| *queue == QueueType::ProvingJobProcessing); } else { - for i in 1..5 + 1 { - db_checks_proving_worker(i, &mut db, &mut job_handler); - } - - // Mocking db call for getting successful snos jobs - db.expect_get_jobs_without_successor() - .times(1) - .withf(|_, _, _| true) - .returning(move |_, _, _| Ok(get_job_by_mock_id_vector(JobType::ProofCreation, JobStatus::Created, 5, 1))); - - // Queue function call simulations queue .expect_send_message_to_queue() .times(5) .returning(|_, _, _| Ok(())) .withf(|queue, _payload, _delay| *queue == QueueType::ProvingJobProcessing); } - let provider = JsonRpcClient::new(HttpTransport::new( Url::parse(format!("http://localhost:{}", server.port()).as_str()).expect("Failed to parse URL"), )); @@ -99,6 +106,7 @@ async fn test_proving_worker(#[case] incomplete_runs: bool) -> Result<(), Box> = Arc::new(Box::new(job_handler)); let ctx = mock_factory::get_job_handler_context(); + // Mocking the `get_job_handler` call in create_job function. if incomplete_runs { ctx.expect().times(4).with(eq(JobType::ProofCreation)).returning(move |_| Arc::clone(&job_handler)); diff --git a/orchestrator/crates/orchestrator/src/tests/workers/snos/mod.rs b/orchestrator/crates/orchestrator/src/tests/workers/snos/mod.rs index bc563f7f8..2f582dc12 100644 --- a/orchestrator/crates/orchestrator/src/tests/workers/snos/mod.rs +++ b/orchestrator/crates/orchestrator/src/tests/workers/snos/mod.rs @@ -1,9 +1,9 @@ use std::error::Error; use std::sync::Arc; +use orchestrator_da_client_interface::MockDaClient; use httpmock::MockServer; use mockall::predicate::eq; -use orchestrator_da_client_interface::MockDaClient; use rstest::rstest; use serde_json::json; use starknet::providers::jsonrpc::HttpTransport; diff --git a/orchestrator/crates/orchestrator/src/tests/workers/update_state/mod.rs b/orchestrator/crates/orchestrator/src/tests/workers/update_state/mod.rs index c9cc47810..25de55525 100644 --- a/orchestrator/crates/orchestrator/src/tests/workers/update_state/mod.rs +++ b/orchestrator/crates/orchestrator/src/tests/workers/update_state/mod.rs @@ -1,16 +1,16 @@ -use std::collections::HashMap; use std::sync::Arc; use mockall::predicate::eq; use rstest::*; use uuid::Uuid; -use crate::jobs::constants::JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY; +use crate::constants::{BLOB_DATA_FILE_NAME, PROGRAM_OUTPUT_FILE_NAME, SNOS_OUTPUT_FILE_NAME}; use crate::jobs::job_handler_factory::mock_factory; +use crate::jobs::metadata::{CommonMetadata, JobMetadata, JobSpecificMetadata, StateUpdateMetadata}; use crate::jobs::state_update_job::StateUpdateJob; use crate::jobs::types::{JobStatus, JobType}; use crate::tests::config::{ConfigType, TestConfigBuilder}; -use crate::tests::workers::utils::get_job_item_mock_by_id; +use crate::tests::workers::utils::{create_and_store_prerequisite_jobs, get_job_item_mock_by_id}; use crate::workers::update_state::UpdateStateWorker; use crate::workers::Worker; @@ -48,11 +48,8 @@ async fn update_state_worker_first_block() { .build() .await; - let unique_id = Uuid::new_v4(); - let mut job_item = get_job_item_mock_by_id("0".to_string(), unique_id); - job_item.status = JobStatus::Completed; - job_item.job_type = JobType::DataSubmission; - services.config.database().create_job(job_item).await.unwrap(); + // Create both SNOS and DA jobs for block 0 with Completed status + let (_, _) = create_and_store_prerequisite_jobs(services.config.clone(), 0, JobStatus::Completed).await.unwrap(); let ctx = mock_factory::get_job_handler_context(); ctx.expect().with(eq(JobType::StateTransition)).returning(move |_| Arc::new(Box::new(StateUpdateJob))); @@ -64,7 +61,10 @@ async fn update_state_worker_first_block() { services.config.database().get_latest_job_by_type(JobType::StateTransition).await.unwrap().unwrap(); assert_eq!(latest_job.status, JobStatus::Created); assert_eq!(latest_job.job_type, JobType::StateTransition); - assert_eq!(latest_job.metadata.get(JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY).unwrap(), "0"); + + // Get the blocks to settle from the StateUpdateMetadata + let state_metadata: StateUpdateMetadata = latest_job.metadata.specific.clone().try_into().unwrap(); + assert_eq!(state_metadata.blocks_to_settle, vec![0]); } #[rstest] @@ -76,11 +76,9 @@ async fn update_state_worker_first_block_missing() { .build() .await; - // skip first block from DA completion - let mut job_item = get_job_item_mock_by_id("2".to_string(), Uuid::new_v4()); - job_item.status = JobStatus::Completed; - job_item.job_type = JobType::DataSubmission; - services.config.database().create_job(job_item).await.unwrap(); + // Create both SNOS and DA jobs for block 2 with Completed status + // Note: Block 0 and 1 are missing, so the worker should not create a job + let (_, _) = create_and_store_prerequisite_jobs(services.config.clone(), 2, JobStatus::Completed).await.unwrap(); let ctx = mock_factory::get_job_handler_context(); ctx.expect().with(eq(JobType::StateTransition)).returning(move |_| Arc::new(Box::new(StateUpdateJob))); @@ -101,21 +99,10 @@ async fn update_state_worker_selects_consective_blocks() { .build() .await; - let mut job_item_one = get_job_item_mock_by_id("0".to_string(), Uuid::new_v4()); - job_item_one.status = JobStatus::Completed; - job_item_one.job_type = JobType::DataSubmission; - services.config.database().create_job(job_item_one).await.unwrap(); - - let mut job_item_two = get_job_item_mock_by_id("1".to_string(), Uuid::new_v4()); - job_item_two.status = JobStatus::Completed; - job_item_two.job_type = JobType::DataSubmission; - services.config.database().create_job(job_item_two).await.unwrap(); - - // skip block 3 - let mut job_item_three = get_job_item_mock_by_id("3".to_string(), Uuid::new_v4()); - job_item_three.status = JobStatus::Completed; - job_item_three.job_type = JobType::DataSubmission; - services.config.database().create_job(job_item_three).await.unwrap(); + // Create both SNOS and DA jobs for blocks 0, 1, and 3 with Completed status + let (_, _) = create_and_store_prerequisite_jobs(services.config.clone(), 0, JobStatus::Completed).await.unwrap(); + let (_, _) = create_and_store_prerequisite_jobs(services.config.clone(), 1, JobStatus::Completed).await.unwrap(); + let (_, _) = create_and_store_prerequisite_jobs(services.config.clone(), 3, JobStatus::Completed).await.unwrap(); let ctx = mock_factory::get_job_handler_context(); ctx.expect().with(eq(JobType::StateTransition)).returning(move |_| Arc::new(Box::new(StateUpdateJob))); @@ -128,7 +115,10 @@ async fn update_state_worker_selects_consective_blocks() { // update state worker should not create any job assert_eq!(latest_job.status, JobStatus::Created); assert_eq!(latest_job.job_type, JobType::StateTransition); - assert_eq!(latest_job.metadata.get(JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY).unwrap(), "0,1"); + + // Get the blocks to settle from the StateUpdateMetadata + let state_metadata: StateUpdateMetadata = latest_job.metadata.specific.clone().try_into().unwrap(); + assert_eq!(state_metadata.blocks_to_settle, vec![0, 1]); } #[rstest] @@ -140,19 +130,45 @@ async fn update_state_worker_continues_from_previous_state_update() { .build() .await; - // add DA completion job for block 5 - let mut job_item = get_job_item_mock_by_id("5".to_string(), Uuid::new_v4()); - job_item.status = JobStatus::Completed; - job_item.job_type = JobType::DataSubmission; - services.config.database().create_job(job_item).await.unwrap(); + // Create both SNOS and DA jobs for block 5 with Completed status + let (_, _) = create_and_store_prerequisite_jobs(services.config.clone(), 5, JobStatus::Completed).await.unwrap(); // add state transition job for blocks 0-4 let mut job_item = get_job_item_mock_by_id("0".to_string(), Uuid::new_v4()); job_item.status = JobStatus::Completed; job_item.job_type = JobType::StateTransition; - let mut metadata = HashMap::new(); - metadata.insert(JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY.to_string(), "0,1,2,3,4".to_string()); - job_item.metadata = metadata; + + // Create proper StateUpdateMetadata with blocks 0-4 + let state_metadata = StateUpdateMetadata { + blocks_to_settle: vec![0, 1, 2, 3, 4], + snos_output_paths: vec![ + format!("{}/{}", 0, SNOS_OUTPUT_FILE_NAME), + format!("{}/{}", 1, SNOS_OUTPUT_FILE_NAME), + format!("{}/{}", 2, SNOS_OUTPUT_FILE_NAME), + format!("{}/{}", 3, SNOS_OUTPUT_FILE_NAME), + format!("{}/{}", 4, SNOS_OUTPUT_FILE_NAME), + ], + program_output_paths: vec![ + format!("{}/{}", 0, PROGRAM_OUTPUT_FILE_NAME), + format!("{}/{}", 1, PROGRAM_OUTPUT_FILE_NAME), + format!("{}/{}", 2, PROGRAM_OUTPUT_FILE_NAME), + format!("{}/{}", 3, PROGRAM_OUTPUT_FILE_NAME), + format!("{}/{}", 4, PROGRAM_OUTPUT_FILE_NAME), + ], + blob_data_paths: vec![ + format!("{}/{}", 0, BLOB_DATA_FILE_NAME), + format!("{}/{}", 1, BLOB_DATA_FILE_NAME), + format!("{}/{}", 2, BLOB_DATA_FILE_NAME), + format!("{}/{}", 3, BLOB_DATA_FILE_NAME), + format!("{}/{}", 4, BLOB_DATA_FILE_NAME), + ], + last_failed_block_no: None, + tx_hashes: Vec::new(), + }; + + job_item.metadata = + JobMetadata { common: CommonMetadata::default(), specific: JobSpecificMetadata::StateUpdate(state_metadata) }; + services.config.database().create_job(job_item).await.unwrap(); let ctx = mock_factory::get_job_handler_context(); @@ -163,11 +179,13 @@ async fn update_state_worker_continues_from_previous_state_update() { let latest_job = services.config.database().get_latest_job_by_type(JobType::StateTransition).await.unwrap().unwrap(); - println!("latest job item {:?}", latest_job); // update state worker should not create any job assert_eq!(latest_job.status, JobStatus::Created); assert_eq!(latest_job.job_type, JobType::StateTransition); - assert_eq!(latest_job.metadata.get(JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY).unwrap(), "5"); + + // Get the blocks to settle from the StateUpdateMetadata + let state_metadata: StateUpdateMetadata = latest_job.metadata.specific.clone().try_into().unwrap(); + assert_eq!(state_metadata.blocks_to_settle, vec![5]); } #[rstest] @@ -179,20 +197,47 @@ async fn update_state_worker_next_block_missing() { .build() .await; - // add DA completion job for block 5 - let mut job_item = get_job_item_mock_by_id("6".to_string(), Uuid::new_v4()); - job_item.status = JobStatus::Completed; - job_item.job_type = JobType::DataSubmission; - services.config.database().create_job(job_item).await.unwrap(); + // Create both SNOS and DA jobs for block 6 with Completed status + // Note: Block 5 is missing, so the worker should not create a job + let (_, _) = create_and_store_prerequisite_jobs(services.config.clone(), 6, JobStatus::Completed).await.unwrap(); // add state transition job for blocks 0-4 let unique_id = Uuid::new_v4(); let mut job_item = get_job_item_mock_by_id("0".to_string(), unique_id); job_item.status = JobStatus::Completed; job_item.job_type = JobType::StateTransition; - let mut metadata = HashMap::new(); - metadata.insert(JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY.to_string(), "0,1,2,3,4".to_string()); - job_item.metadata = metadata; + + // Create proper StateUpdateMetadata with blocks 0-4 + let state_metadata = StateUpdateMetadata { + blocks_to_settle: vec![0, 1, 2, 3, 4], + snos_output_paths: vec![ + format!("{}/{}", 0, SNOS_OUTPUT_FILE_NAME), + format!("{}/{}", 1, SNOS_OUTPUT_FILE_NAME), + format!("{}/{}", 2, SNOS_OUTPUT_FILE_NAME), + format!("{}/{}", 3, SNOS_OUTPUT_FILE_NAME), + format!("{}/{}", 4, SNOS_OUTPUT_FILE_NAME), + ], + program_output_paths: vec![ + format!("{}/{}", 0, PROGRAM_OUTPUT_FILE_NAME), + format!("{}/{}", 1, PROGRAM_OUTPUT_FILE_NAME), + format!("{}/{}", 2, PROGRAM_OUTPUT_FILE_NAME), + format!("{}/{}", 3, PROGRAM_OUTPUT_FILE_NAME), + format!("{}/{}", 4, PROGRAM_OUTPUT_FILE_NAME), + ], + blob_data_paths: vec![ + format!("{}/{}", 0, BLOB_DATA_FILE_NAME), + format!("{}/{}", 1, BLOB_DATA_FILE_NAME), + format!("{}/{}", 2, BLOB_DATA_FILE_NAME), + format!("{}/{}", 3, BLOB_DATA_FILE_NAME), + format!("{}/{}", 4, BLOB_DATA_FILE_NAME), + ], + last_failed_block_no: None, + tx_hashes: Vec::new(), + }; + + job_item.metadata = + JobMetadata { common: CommonMetadata::default(), specific: JobSpecificMetadata::StateUpdate(state_metadata) }; + services.config.database().create_job(job_item).await.unwrap(); let ctx = mock_factory::get_job_handler_context(); diff --git a/orchestrator/crates/orchestrator/src/tests/workers/utils/mod.rs b/orchestrator/crates/orchestrator/src/tests/workers/utils/mod.rs index ac4b1b6bc..7534268b1 100644 --- a/orchestrator/crates/orchestrator/src/tests/workers/utils/mod.rs +++ b/orchestrator/crates/orchestrator/src/tests/workers/utils/mod.rs @@ -1,21 +1,43 @@ -use std::collections::HashMap; +use std::sync::Arc; use chrono::{SubsecRound, Utc}; use mockall::predicate::eq; use uuid::Uuid; +use crate::config::Config; +use crate::constants::{BLOB_DATA_FILE_NAME, CAIRO_PIE_FILE_NAME, PROGRAM_OUTPUT_FILE_NAME, SNOS_OUTPUT_FILE_NAME}; use crate::database::MockDatabase; +use crate::jobs::metadata::{ + CommonMetadata, DaMetadata, JobMetadata, JobSpecificMetadata, ProvingInputType, ProvingMetadata, SnosMetadata, + StateUpdateMetadata, +}; use crate::jobs::types::{ExternalId, JobItem, JobStatus, JobType}; use crate::jobs::MockJob; pub fn get_job_item_mock_by_id(id: String, uuid: Uuid) -> JobItem { + // Parse the ID as a u64 for use in metadata + let block_number = id.parse::().unwrap_or(0); + + // Create appropriate metadata for SnosRun job type + let metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Snos(SnosMetadata { + block_number, + full_output: false, + cairo_pie_path: Some(format!("{}/{}", block_number, CAIRO_PIE_FILE_NAME)), + snos_output_path: Some(format!("{}/{}", block_number, SNOS_OUTPUT_FILE_NAME)), + program_output_path: Some(format!("{}/{}", block_number, PROGRAM_OUTPUT_FILE_NAME)), + snos_fact: None, + }), + }; + JobItem { id: uuid, internal_id: id.clone(), job_type: JobType::SnosRun, status: JobStatus::Created, external_id: ExternalId::Number(0), - metadata: HashMap::new(), + metadata, version: 0, created_at: Utc::now().round_subsecs(0), updated_at: Utc::now().round_subsecs(0), @@ -43,13 +65,17 @@ pub fn get_job_by_mock_id_vector( for i in start_index..number_of_jobs + start_index { let uuid = Uuid::new_v4(); + + // Create appropriate metadata based on job type + let metadata = create_metadata_for_job_type(job_type.clone(), i); + jobs_vec.push(JobItem { id: uuid, internal_id: i.to_string(), job_type: job_type.clone(), status: job_status.clone(), external_id: ExternalId::Number(0), - metadata: HashMap::new(), + metadata, version: 0, created_at: Utc::now().round_subsecs(0), updated_at: Utc::now().round_subsecs(0), @@ -59,32 +85,154 @@ pub fn get_job_by_mock_id_vector( jobs_vec } -pub fn db_checks_proving_worker(id: i32, db: &mut MockDatabase, mock_job: &mut MockJob) { - fn get_job_item_mock_by_id(id: i32) -> JobItem { - let uuid = Uuid::new_v4(); - JobItem { - id: uuid, - internal_id: id.to_string(), - job_type: JobType::ProofCreation, - status: JobStatus::Created, - external_id: ExternalId::Number(0), - metadata: HashMap::new(), - version: 0, - created_at: Utc::now().round_subsecs(0), - updated_at: Utc::now().round_subsecs(0), - } +/// Helper function to create appropriate metadata based on job type +fn create_metadata_for_job_type(job_type: JobType, block_number: u64) -> JobMetadata { + match job_type { + JobType::SnosRun => JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Snos(SnosMetadata { + block_number, + full_output: false, + cairo_pie_path: Some(format!("{}/{}", block_number, CAIRO_PIE_FILE_NAME)), + snos_output_path: Some(format!("{}/{}", block_number, SNOS_OUTPUT_FILE_NAME)), + program_output_path: Some(format!("{}/{}", block_number, PROGRAM_OUTPUT_FILE_NAME)), + snos_fact: Some(String::from("0xdeadbeef")), + }), + }, + JobType::DataSubmission => JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Da(DaMetadata { + block_number, + blob_data_path: Some(format!("{}/{}", block_number, BLOB_DATA_FILE_NAME)), + tx_hash: None, + }), + }, + JobType::ProofCreation => JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Proving(ProvingMetadata { + block_number, + input_path: Some(ProvingInputType::CairoPie(format!("{}/{}", block_number, CAIRO_PIE_FILE_NAME))), + ensure_on_chain_registration: None, + download_proof: None, + }), + }, + JobType::StateTransition => JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::StateUpdate(StateUpdateMetadata { + blocks_to_settle: vec![block_number], + snos_output_paths: vec![format!("{}/{}", block_number, SNOS_OUTPUT_FILE_NAME)], + program_output_paths: vec![format!("{}/{}", block_number, PROGRAM_OUTPUT_FILE_NAME)], + blob_data_paths: vec![format!("{}/{}", block_number, BLOB_DATA_FILE_NAME)], + last_failed_block_no: None, + tx_hashes: Vec::new(), + }), + }, + // For any other job types, use a default metadata structure + _ => JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Snos(SnosMetadata { + block_number, + full_output: false, + cairo_pie_path: None, + snos_output_path: None, + program_output_path: None, + snos_fact: None, + }), + }, } +} + +/// Creates and stores both SNOS and DA jobs for a given block number +/// This ensures that the update state worker can find the required jobs +/// +/// Arguments: +/// +/// `config` - The configuration containing the database client +/// `block_number` - The block number for which to create jobs +/// `job_status` - The status to set for the created jobs +/// +/// Returns: +/// A tuple of (SNOS job UUID, DA job UUID) +pub async fn create_and_store_prerequisite_jobs( + config: Arc, + block_number: u64, + job_status: JobStatus, +) -> color_eyre::Result<(Uuid, Uuid)> { + // Create SNOS job + let snos_uuid = Uuid::new_v4(); + let snos_job = JobItem { + id: snos_uuid, + internal_id: block_number.to_string(), + job_type: JobType::SnosRun, + status: job_status.clone(), + external_id: ExternalId::Number(0), + metadata: create_metadata_for_job_type(JobType::SnosRun, block_number), + version: 0, + created_at: Utc::now().round_subsecs(0), + updated_at: Utc::now().round_subsecs(0), + }; + // Create DA job + let da_uuid = Uuid::new_v4(); + let da_job = JobItem { + id: da_uuid, + internal_id: block_number.to_string(), + job_type: JobType::DataSubmission, + status: job_status, + external_id: ExternalId::Number(0), + metadata: create_metadata_for_job_type(JobType::DataSubmission, block_number), + version: 0, + created_at: Utc::now().round_subsecs(0), + updated_at: Utc::now().round_subsecs(0), + }; + + // Store jobs in database + config.database().create_job(snos_job).await?; + config.database().create_job(da_job).await?; + + Ok((snos_uuid, da_uuid)) +} + +pub fn db_checks_proving_worker(id: i32, db: &mut MockDatabase, mock_job: &mut MockJob) { + // Create a job item with proper metadata for ProofCreation job type + let uuid = Uuid::new_v4(); + let block_number = id as u64; + + // Create proving metadata with the SNOS fact + let metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Proving(ProvingMetadata { + block_number, + input_path: Some(ProvingInputType::CairoPie(format!("{}/{}", block_number, CAIRO_PIE_FILE_NAME))), + ensure_on_chain_registration: Some(format!("0x{:064x}", block_number)), // Add the SNOS fact + download_proof: None, + }), + }; + + let job_item = JobItem { + id: uuid, + internal_id: id.to_string(), + job_type: JobType::ProofCreation, + status: JobStatus::Created, + external_id: ExternalId::Number(0), + metadata, + version: 0, + created_at: Utc::now().round_subsecs(0), + updated_at: Utc::now().round_subsecs(0), + }; + + let job_item_cloned = job_item.clone(); + + // Check if a proving job already exists for this SNOS job db.expect_get_job_by_internal_id_and_type() .times(1) .with(eq(id.clone().to_string()), eq(JobType::ProofCreation)) .returning(|_, _| Ok(None)); - let job_item = get_job_item_mock_by_id(id); - let job_item_cloned = job_item.clone(); - + // Create the proving job mock_job.expect_create_job().times(1).returning(move |_, _, _| Ok(job_item.clone())); + // Store the job in the database db.expect_create_job() .times(1) .withf(move |item| item.internal_id == id.clone().to_string()) diff --git a/orchestrator/crates/orchestrator/src/workers/data_submission_worker.rs b/orchestrator/crates/orchestrator/src/workers/data_submission_worker.rs index 24585435d..b3eb1e87e 100644 --- a/orchestrator/crates/orchestrator/src/workers/data_submission_worker.rs +++ b/orchestrator/crates/orchestrator/src/workers/data_submission_worker.rs @@ -1,11 +1,12 @@ -use std::collections::HashMap; use std::sync::Arc; use async_trait::async_trait; use opentelemetry::KeyValue; use crate::config::Config; +use crate::constants::BLOB_DATA_FILE_NAME; use crate::jobs::create_job; +use crate::jobs::metadata::{CommonMetadata, DaMetadata, JobMetadata, JobSpecificMetadata, ProvingMetadata}; use crate::jobs::types::{JobStatus, JobType}; use crate::metrics::ORCHESTRATOR_METRICS; use crate::workers::Worker; @@ -25,11 +26,42 @@ impl Worker for DataSubmissionWorker { .get_jobs_without_successor(JobType::ProofCreation, JobStatus::Completed, JobType::DataSubmission) .await?; - for job in successful_proving_jobs { - match create_job(JobType::DataSubmission, job.internal_id.clone(), HashMap::new(), config.clone()).await { - Ok(_) => tracing::info!(block_id = %job.internal_id, "Successfully created new data submission job"), + for proving_job in successful_proving_jobs { + // Extract proving metadata + let proving_metadata: ProvingMetadata = proving_job.metadata.specific.try_into().map_err(|e| { + tracing::error!( + job_id = %proving_job.internal_id, + error = %e, + "Invalid metadata type for proving job" + ); + e + })?; + + // Create DA metadata + let da_metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Da(DaMetadata { + block_number: proving_metadata.block_number, + // Set the blob data path using block number + blob_data_path: Some(format!("{}/{BLOB_DATA_FILE_NAME}", proving_metadata.block_number)), + // These will be populated during processing + tx_hash: None, + }), + }; + + match create_job(JobType::DataSubmission, proving_job.internal_id.clone(), da_metadata, config.clone()) + .await + { + Ok(_) => tracing::info!( + block_id = %proving_job.internal_id, + "Successfully created new data submission job" + ), Err(e) => { - tracing::warn!(block_id = %job.internal_id, error = %e, "Failed to create new data submission job"); + tracing::warn!( + block_id = %proving_job.internal_id, + error = %e, + "Failed to create new data submission job" + ); let attributes = [ KeyValue::new("operation_job_type", format!("{:?}", JobType::DataSubmission)), KeyValue::new("operation_type", format!("{:?}", "create_job")), diff --git a/orchestrator/crates/orchestrator/src/workers/proving.rs b/orchestrator/crates/orchestrator/src/workers/proving.rs index 66c52dfe0..5a4f3d1fb 100644 --- a/orchestrator/crates/orchestrator/src/workers/proving.rs +++ b/orchestrator/crates/orchestrator/src/workers/proving.rs @@ -5,6 +5,9 @@ use opentelemetry::KeyValue; use crate::config::Config; use crate::jobs::create_job; +use crate::jobs::metadata::{ + CommonMetadata, JobMetadata, JobSpecificMetadata, ProvingInputType, ProvingMetadata, SnosMetadata, +}; use crate::jobs::types::{JobStatus, JobType}; use crate::metrics::ORCHESTRATOR_METRICS; use crate::workers::Worker; @@ -25,12 +28,43 @@ impl Worker for ProvingWorker { tracing::debug!("Found {} successful SNOS jobs without proving jobs", successful_snos_jobs.len()); - for job in successful_snos_jobs { - tracing::debug!(job_id = %job.internal_id, "Creating proof creation job for SNOS job"); - match create_job(JobType::ProofCreation, job.internal_id.to_string(), job.metadata, config.clone()).await { - Ok(_) => tracing::info!(block_id = %job.internal_id, "Successfully created new proving job"), + for snos_job in successful_snos_jobs { + // Extract SNOS metadata + let snos_metadata: SnosMetadata = snos_job.metadata.specific.try_into().map_err(|e| { + tracing::error!(job_id = %snos_job.internal_id, error = %e, "Invalid metadata type for SNOS job"); + e + })?; + + // Get SNOS fact early to handle the error case + let snos_fact = match &snos_metadata.snos_fact { + Some(fact) => fact.clone(), + None => { + tracing::error!(job_id = %snos_job.internal_id, "SNOS fact not found in metadata"); + continue; + } + }; + + // Create proving job metadata + let proving_metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Proving(ProvingMetadata { + block_number: snos_metadata.block_number, + // Set input path as CairoPie type + input_path: snos_metadata.cairo_pie_path.map(ProvingInputType::CairoPie), + // Set download path if needed + download_proof: None, + // Set SNOS fact for on-chain verification + ensure_on_chain_registration: Some(snos_fact), + }), + }; + + tracing::debug!(job_id = %snos_job.internal_id, "Creating proof creation job for SNOS job"); + match create_job(JobType::ProofCreation, snos_job.internal_id.clone(), proving_metadata, config.clone()) + .await + { + Ok(_) => tracing::info!(block_id = %snos_job.internal_id, "Successfully created new proving job"), Err(e) => { - tracing::warn!(job_id = %job.internal_id, error = %e, "Failed to create new state transition job"); + tracing::warn!(job_id = %snos_job.internal_id, error = %e, "Failed to create new proving job"); let attributes = [ KeyValue::new("operation_job_type", format!("{:?}", JobType::ProofCreation)), KeyValue::new("operation_type", format!("{:?}", "create_job")), diff --git a/orchestrator/crates/orchestrator/src/workers/snos.rs b/orchestrator/crates/orchestrator/src/workers/snos.rs index 0abaf2a1c..5be9a9f6a 100644 --- a/orchestrator/crates/orchestrator/src/workers/snos.rs +++ b/orchestrator/crates/orchestrator/src/workers/snos.rs @@ -1,5 +1,4 @@ use std::cmp::{max, min}; -use std::collections::HashMap; use std::sync::Arc; use async_trait::async_trait; @@ -8,7 +7,9 @@ use opentelemetry::KeyValue; use starknet::providers::Provider; use crate::config::Config; +use crate::constants::{CAIRO_PIE_FILE_NAME, PROGRAM_OUTPUT_FILE_NAME, SNOS_OUTPUT_FILE_NAME}; use crate::jobs::create_job; +use crate::jobs::metadata::{CommonMetadata, JobMetadata, JobSpecificMetadata, SnosMetadata}; use crate::jobs::types::JobType; use crate::metrics::ORCHESTRATOR_METRICS; use crate::workers::Worker; @@ -52,7 +53,21 @@ impl Worker for SnosWorker { }; for block_num in block_start..latest_block_number + 1 { - match create_job(JobType::SnosRun, block_num.to_string(), HashMap::new(), config.clone()).await { + // Create typed metadata structure with predefined paths + let metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::Snos(SnosMetadata { + block_number: block_num, + full_output: false, + // Set the storage paths using block number + cairo_pie_path: Some(format!("{}/{}", block_num, CAIRO_PIE_FILE_NAME)), + snos_output_path: Some(format!("{}/{}", block_num, SNOS_OUTPUT_FILE_NAME)), + program_output_path: Some(format!("{}/{}", block_num, PROGRAM_OUTPUT_FILE_NAME)), + snos_fact: None, + }), + }; + + match create_job(JobType::SnosRun, block_num.to_string(), metadata, config.clone()).await { Ok(_) => tracing::info!(block_id = %block_num, "Successfully created new Snos job"), Err(e) => { tracing::warn!(block_id = %block_num, error = %e, "Failed to create new Snos job"); diff --git a/orchestrator/crates/orchestrator/src/workers/update_state.rs b/orchestrator/crates/orchestrator/src/workers/update_state.rs index a6b7da2d1..d91388d46 100644 --- a/orchestrator/crates/orchestrator/src/workers/update_state.rs +++ b/orchestrator/crates/orchestrator/src/workers/update_state.rs @@ -1,12 +1,14 @@ -use std::collections::HashMap; use std::sync::Arc; use async_trait::async_trait; +use color_eyre::eyre::eyre; use opentelemetry::KeyValue; use crate::config::Config; -use crate::jobs::constants::JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY; use crate::jobs::create_job; +use crate::jobs::metadata::{ + CommonMetadata, DaMetadata, JobMetadata, JobSpecificMetadata, SnosMetadata, StateUpdateMetadata, +}; use crate::jobs::types::{JobStatus, JobType}; use crate::metrics::ORCHESTRATOR_METRICS; use crate::workers::Worker; @@ -19,7 +21,6 @@ impl Worker for UpdateStateWorker { tracing::trace!(log_type = "starting", category = "UpdateStateWorker", "UpdateStateWorker started."); let latest_job = config.database().get_latest_job_by_type(JobType::StateTransition).await?; - let (completed_da_jobs, last_block_processed_in_last_job) = match latest_job { Some(job) => { if job.status != JobStatus::Completed { @@ -30,19 +31,20 @@ impl Worker for UpdateStateWorker { return Ok(()); } - let mut blocks_processed_in_last_job: Vec = job - .metadata - .get(JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY) - .unwrap() - .split(',') - .filter_map(|s| s.parse().ok()) - .collect(); + // Extract blocks from state transition metadata + let state_metadata: StateUpdateMetadata = job.metadata.specific + .try_into() + .map_err(|e| { + tracing::error!(job_id = %job.internal_id, error = %e, "Invalid metadata type for state transition job"); + e + })?; - // ideally it's already sorted, but just to be safe - blocks_processed_in_last_job.sort(); + let mut blocks_processed = state_metadata.blocks_to_settle.clone(); + blocks_processed.sort(); - let last_block_processed_in_last_job = - blocks_processed_in_last_job[blocks_processed_in_last_job.len() - 1]; + let last_block_processed = *blocks_processed + .last() + .ok_or_else(|| eyre!("No blocks found in previous state transition job"))?; ( config @@ -50,10 +52,10 @@ impl Worker for UpdateStateWorker { .get_jobs_after_internal_id_by_job_type( JobType::DataSubmission, JobStatus::Completed, - last_block_processed_in_last_job.to_string(), + last_block_processed.to_string(), ) .await?, - Some(last_block_processed_in_last_job), + Some(last_block_processed), ) } None => { @@ -83,11 +85,10 @@ impl Worker for UpdateStateWorker { return Ok(()); } + // Verify block continuity match last_block_processed_in_last_job { - Some(last_block_processed_in_last_job) => { - // DA job for the block just after the last settled block - // is not yet completed - if blocks_to_process[0] != last_block_processed_in_last_job + 1 { + Some(last_block) => { + if blocks_to_process[0] != last_block + 1 { log::warn!( "DA job for the block just after the last settled block is not yet completed. Returning \ safely..." @@ -96,26 +97,72 @@ impl Worker for UpdateStateWorker { } } None => { - if blocks_to_process[0] != 0 { + let min_block_to_process = config.service_config().min_block_to_process.unwrap_or(0); + if blocks_to_process[0] != min_block_to_process { log::warn!("DA job for the first block is not yet completed. Returning safely..."); return Ok(()); } } } - let mut blocks_to_process: Vec = find_successive_blocks_in_vector(blocks_to_process); - + let mut blocks_to_process = find_successive_blocks_in_vector(blocks_to_process); if blocks_to_process.len() > 10 { blocks_to_process = blocks_to_process.into_iter().take(10).collect(); } - let mut metadata = HashMap::new(); - metadata.insert( - JOB_METADATA_STATE_UPDATE_BLOCKS_TO_SETTLE_KEY.to_string(), - blocks_to_process.iter().map(|ele| ele.to_string()).collect::>().join(","), - ); + // Prepare state transition metadata + let mut state_metadata = StateUpdateMetadata { + blocks_to_settle: blocks_to_process.clone(), + snos_output_paths: Vec::new(), + program_output_paths: Vec::new(), + blob_data_paths: Vec::new(), + last_failed_block_no: None, + tx_hashes: Vec::new(), + }; + + // Collect paths from SNOS and DA jobs + for block_number in &blocks_to_process { + // Get SNOS job paths + let snos_job = config + .database() + .get_job_by_internal_id_and_type(&block_number.to_string(), &JobType::SnosRun) + .await? + .ok_or_else(|| eyre!("SNOS job not found for block {}", block_number))?; + let snos_metadata: SnosMetadata = snos_job.metadata.specific.try_into().map_err(|e| { + tracing::error!(job_id = %snos_job.internal_id, error = %e, "Invalid metadata type for SNOS job"); + e + })?; + + if let Some(snos_path) = &snos_metadata.snos_output_path { + state_metadata.snos_output_paths.push(snos_path.clone()); + } + if let Some(program_path) = &snos_metadata.program_output_path { + state_metadata.program_output_paths.push(program_path.clone()); + } + + // Get DA job blob path + let da_job = config + .database() + .get_job_by_internal_id_and_type(&block_number.to_string(), &JobType::DataSubmission) + .await? + .ok_or_else(|| eyre!("DA job not found for block {}", block_number))?; + + let da_metadata: DaMetadata = da_job.metadata.specific.try_into().map_err(|e| { + tracing::error!(job_id = %da_job.internal_id, error = %e, "Invalid metadata type for DA job"); + e + })?; + + if let Some(blob_path) = &da_metadata.blob_data_path { + state_metadata.blob_data_paths.push(blob_path.clone()); + } + } + // Create job metadata + let metadata = JobMetadata { + common: CommonMetadata::default(), + specific: JobSpecificMetadata::StateUpdate(state_metadata), + }; - // Creating a single job for all the pending blocks. + // Create the state transition job let new_job_id = blocks_to_process[0].to_string(); match create_job(JobType::StateTransition, new_job_id.clone(), metadata, config.clone()).await { Ok(_) => tracing::info!(block_id = %new_job_id, "Successfully created new state transition job"), diff --git a/orchestrator/crates/prover-clients/atlantic-service/Cargo.toml b/orchestrator/crates/prover-clients/atlantic-service/Cargo.toml index e036be171..6d7dc527b 100644 --- a/orchestrator/crates/prover-clients/atlantic-service/Cargo.toml +++ b/orchestrator/crates/prover-clients/atlantic-service/Cargo.toml @@ -12,13 +12,12 @@ chrono.workspace = true color-eyre.workspace = true dotenvy.workspace = true env_logger.workspace = true +orchestrator-gps-fact-checker.workspace = true hex.workspace = true httpmock = { version = "0.8.0-alpha.1", features = ["proxy", "remote"] } lazy_static.workspace = true log.workspace = true -orchestrator-gps-fact-checker.workspace = true orchestrator-prover-client-interface.workspace = true -orchestrator-utils.workspace = true reqwest.workspace = true rstest.workspace = true serde.workspace = true @@ -31,6 +30,7 @@ thiserror.workspace = true tokio.workspace = true tokio-util = { version = "0.7.12", features = ["codec"] } url.workspace = true +orchestrator-utils.workspace = true uuid.workspace = true #Instrumentation diff --git a/orchestrator/crates/prover-clients/atlantic-service/src/lib.rs b/orchestrator/crates/prover-clients/atlantic-service/src/lib.rs index 95788db0c..55d042914 100644 --- a/orchestrator/crates/prover-clients/atlantic-service/src/lib.rs +++ b/orchestrator/crates/prover-clients/atlantic-service/src/lib.rs @@ -67,18 +67,43 @@ impl ProverClient for AtlanticProverService { } #[tracing::instrument(skip(self))] - async fn get_task_status(&self, job_key: &str, fact: &str) -> Result { + async fn get_task_status( + &self, + job_key: &str, + fact: Option, + cross_verify: bool, + ) -> Result { let res = self.atlantic_client.get_job_status(job_key).await?; + match res.atlantic_query.status { AtlanticQueryStatus::InProgress => Ok(TaskStatus::Processing), + AtlanticQueryStatus::Done => { - let fact = B256::from_str(fact).map_err(|e| ProverClientError::FailedToConvertFact(e.to_string()))?; + if !cross_verify { + tracing::debug!("Skipping cross-verification as it's disabled"); + return Ok(TaskStatus::Succeeded); + } + + // Cross verification is enabled + let fact_str = match fact { + Some(f) => f, + None => { + return Ok(TaskStatus::Failed("Cross verification enabled but no fact provided".to_string())); + } + }; + + let fact = + B256::from_str(&fact_str).map_err(|e| ProverClientError::FailedToConvertFact(e.to_string()))?; + + tracing::debug!(fact = %hex::encode(fact), "Cross-verifying fact on chain"); + if self.fact_checker.is_valid(&fact).await? { Ok(TaskStatus::Succeeded) } else { Ok(TaskStatus::Failed(format!("Fact {} is not valid or not registered", hex::encode(fact)))) } } + AtlanticQueryStatus::Failed => { Ok(TaskStatus::Failed("Task failed while processing on Atlantic side".to_string())) } diff --git a/orchestrator/crates/prover-clients/gps-fact-checker/Cargo.toml b/orchestrator/crates/prover-clients/gps-fact-checker/Cargo.toml index 2cf93e40d..1ee03553f 100644 --- a/orchestrator/crates/prover-clients/gps-fact-checker/Cargo.toml +++ b/orchestrator/crates/prover-clients/gps-fact-checker/Cargo.toml @@ -22,12 +22,12 @@ dotenvy.workspace = true itertools.workspace = true log.workspace = true num-bigint.workspace = true -orchestrator-utils.workspace = true serde.workspace = true starknet.workspace = true thiserror.workspace = true tokio.workspace = true url.workspace = true +orchestrator-utils.workspace = true #Instrumentation opentelemetry = { workspace = true, features = ["metrics", "logs"] } diff --git a/orchestrator/crates/prover-clients/prover-client-interface/Cargo.toml b/orchestrator/crates/prover-clients/prover-client-interface/Cargo.toml index dd93ab512..1ecd6bc42 100644 --- a/orchestrator/crates/prover-clients/prover-client-interface/Cargo.toml +++ b/orchestrator/crates/prover-clients/prover-client-interface/Cargo.toml @@ -6,8 +6,8 @@ edition.workspace = true [dependencies] async-trait.workspace = true cairo-vm.workspace = true -mockall.workspace = true orchestrator-gps-fact-checker.workspace = true -orchestrator-utils.workspace = true +mockall.workspace = true starknet-os.workspace = true thiserror.workspace = true +orchestrator-utils.workspace = true diff --git a/orchestrator/crates/prover-clients/prover-client-interface/src/lib.rs b/orchestrator/crates/prover-clients/prover-client-interface/src/lib.rs index 75c020d7b..0158bd65d 100644 --- a/orchestrator/crates/prover-clients/prover-client-interface/src/lib.rs +++ b/orchestrator/crates/prover-clients/prover-client-interface/src/lib.rs @@ -16,7 +16,12 @@ use orchestrator_gps_fact_checker::FactCheckerError; #[async_trait] pub trait ProverClient: Send + Sync { async fn submit_task(&self, task: Task, proof_layout: LayoutName) -> Result; - async fn get_task_status(&self, task_id: &str, fact: &str) -> Result; + async fn get_task_status( + &self, + task_id: &str, + fact: Option, + cross_verify: bool, + ) -> Result; } pub enum Task { diff --git a/orchestrator/crates/prover-clients/sharp-service/Cargo.toml b/orchestrator/crates/prover-clients/sharp-service/Cargo.toml index c76ed13da..2acf24ce7 100644 --- a/orchestrator/crates/prover-clients/sharp-service/Cargo.toml +++ b/orchestrator/crates/prover-clients/sharp-service/Cargo.toml @@ -10,13 +10,12 @@ base64.workspace = true cairo-vm.workspace = true color-eyre.workspace = true dotenvy.workspace = true +orchestrator-gps-fact-checker.workspace = true hex.workspace = true httpmock = { version = "0.8.0-alpha.1", features = ["proxy", "remote"] } lazy_static.workspace = true log.workspace = true -orchestrator-gps-fact-checker.workspace = true orchestrator-prover-client-interface.workspace = true -orchestrator-utils.workspace = true reqwest.workspace = true rstest.workspace = true serde.workspace = true @@ -25,6 +24,7 @@ starknet-os.workspace = true thiserror.workspace = true tokio.workspace = true url.workspace = true +orchestrator-utils.workspace = true uuid.workspace = true #Instrumentation diff --git a/orchestrator/crates/prover-clients/sharp-service/src/lib.rs b/orchestrator/crates/prover-clients/sharp-service/src/lib.rs index efcabdef6..ee28ba936 100644 --- a/orchestrator/crates/prover-clients/sharp-service/src/lib.rs +++ b/orchestrator/crates/prover-clients/sharp-service/src/lib.rs @@ -63,7 +63,12 @@ impl ProverClient for SharpProverService { } #[tracing::instrument(skip(self), ret, err)] - async fn get_task_status(&self, job_key: &str, fact: &str) -> Result { + async fn get_task_status( + &self, + job_key: &str, + fact: Option, + _cross_verify: bool, + ) -> Result { tracing::info!( log_type = "starting", category = "get_task_status", @@ -117,26 +122,34 @@ impl ProverClient for SharpProverService { ); Ok(TaskStatus::Processing) } - CairoJobStatus::ONCHAIN => { - let fact = B256::from_str(fact).map_err(|e| ProverClientError::FailedToConvertFact(e.to_string()))?; - if self.fact_checker.is_valid(&fact).await? { - tracing::info!( - log_type = "onchain", - category = "get_task_status", - function_type = "cairo_pie", - "Cairo PIE task status: ONCHAIN and fact is valid." - ); + CairoJobStatus::ONCHAIN => match fact { + Some(fact_str) => { + let fact = + B256::from_str(&fact_str).map_err(|e| ProverClientError::FailedToConvertFact(e.to_string()))?; + + if self.fact_checker.is_valid(&fact).await? { + tracing::info!( + log_type = "onchain", + category = "get_task_status", + function_type = "cairo_pie", + "Cairo PIE task status: ONCHAIN and fact is valid." + ); + Ok(TaskStatus::Succeeded) + } else { + tracing::error!( + log_type = "onchain_failed", + category = "get_task_status", + function_type = "cairo_pie", + "Cairo PIE task status: ONCHAIN and fact is not valid." + ); + Ok(TaskStatus::Failed(format!("Fact {} is not valid or not registered", hex::encode(fact)))) + } + } + None => { + tracing::debug!("No fact provided for verification, considering job successful"); Ok(TaskStatus::Succeeded) - } else { - tracing::error!( - log_type = "onchain_failed", - category = "get_task_status", - function_type = "cairo_pie", - "Cairo PIE task status: ONCHAIN and fact is not valid." - ); - Ok(TaskStatus::Failed(format!("Fact {} is not valid or not registered", hex::encode(fact)))) } - } + }, } } } diff --git a/orchestrator/crates/prover-clients/sharp-service/tests/lib.rs b/orchestrator/crates/prover-clients/sharp-service/tests/lib.rs index 02c2bc749..e26b80528 100644 --- a/orchestrator/crates/prover-clients/sharp-service/tests/lib.rs +++ b/orchestrator/crates/prover-clients/sharp-service/tests/lib.rs @@ -87,7 +87,10 @@ async fn prover_client_get_task_status_works(#[case] cairo_job_status: CairoJobS then.status(200).body(serde_json::to_vec(&get_task_status_sharp_response(&cairo_job_status)).unwrap()); }); - let task_status = sharp_service.get_task_status("c31381bf-4739-4667-b5b8-b08af1c6b1c7", TEST_FACT).await.unwrap(); + let task_status = sharp_service + .get_task_status("c31381bf-4739-4667-b5b8-b08af1c6b1c7", Some(TEST_FACT.to_string()), false) + .await + .unwrap(); assert_eq!(task_status, get_task_status_expectation(&cairo_job_status), "Cairo Job Status assertion failed"); sharp_add_job_call.assert(); diff --git a/orchestrator/crates/settlement-clients/ethereum/Cargo.toml b/orchestrator/crates/settlement-clients/ethereum/Cargo.toml index 3e7409085..246380e76 100644 --- a/orchestrator/crates/settlement-clients/ethereum/Cargo.toml +++ b/orchestrator/crates/settlement-clients/ethereum/Cargo.toml @@ -22,14 +22,14 @@ dotenvy = { workspace = true } lazy_static = { workspace = true } log.workspace = true mockall = { workspace = true } -orchestrator-settlement-client-interface = { workspace = true } -orchestrator-utils = { workspace = true } reqwest = { workspace = true } rstest = { workspace = true } serde = { workspace = true, features = ["derive"] } +orchestrator-settlement-client-interface = { workspace = true } starknet-os = { workspace = true } tokio = { workspace = true } url = { workspace = true } +orchestrator-utils = { workspace = true } #Instrumentation opentelemetry = { workspace = true, features = ["metrics", "logs"] } diff --git a/orchestrator/crates/settlement-clients/ethereum/src/lib.rs b/orchestrator/crates/settlement-clients/ethereum/src/lib.rs index ec7640508..f306b0c0d 100644 --- a/orchestrator/crates/settlement-clients/ethereum/src/lib.rs +++ b/orchestrator/crates/settlement-clients/ethereum/src/lib.rs @@ -23,9 +23,9 @@ use color_eyre::eyre::{bail, Ok}; use color_eyre::Result; use conversion::{get_input_data_for_eip_4844, prepare_sidecar}; use orchestrator_settlement_client_interface::{SettlementClient, SettlementVerificationStatus}; +use url::Url; #[cfg(feature = "testing")] use orchestrator_utils::env_utils::get_env_var_or_panic; -use url::Url; use crate::clients::interfaces::validity_interface::StarknetValidityContractTrait; use crate::clients::StarknetValidityContractClient; diff --git a/orchestrator/crates/settlement-clients/settlement-client-interface/Cargo.toml b/orchestrator/crates/settlement-clients/settlement-client-interface/Cargo.toml index 1892247d9..64b89a085 100644 --- a/orchestrator/crates/settlement-clients/settlement-client-interface/Cargo.toml +++ b/orchestrator/crates/settlement-clients/settlement-client-interface/Cargo.toml @@ -12,5 +12,5 @@ axum = { workspace = true } c-kzg = { workspace = true } color-eyre = { workspace = true } mockall = { workspace = true } -orchestrator-utils = { workspace = true } starknet = { workspace = true } +orchestrator-utils = { workspace = true } diff --git a/orchestrator/crates/settlement-clients/starknet/Cargo.toml b/orchestrator/crates/settlement-clients/starknet/Cargo.toml index e48ed6b70..5215752b4 100644 --- a/orchestrator/crates/settlement-clients/starknet/Cargo.toml +++ b/orchestrator/crates/settlement-clients/starknet/Cargo.toml @@ -15,15 +15,15 @@ dotenvy.workspace = true lazy_static = { workspace = true } log = { workspace = true } mockall = { workspace = true } -orchestrator-settlement-client-interface = { workspace = true } -orchestrator-utils = { workspace = true } reqwest = { workspace = true } rstest = { workspace = true } serde = { workspace = true } +orchestrator-settlement-client-interface = { workspace = true } starknet = { workspace = true } tempfile.workspace = true tokio = { workspace = true } url = { workspace = true } +orchestrator-utils = { workspace = true } #Instrumentation opentelemetry = { workspace = true, features = ["metrics", "logs"] } diff --git a/orchestrator/crates/utils/src/http_client.rs b/orchestrator/crates/utils/src/http_client.rs index 2b4c862ab..be7264c43 100644 --- a/orchestrator/crates/utils/src/http_client.rs +++ b/orchestrator/crates/utils/src/http_client.rs @@ -15,7 +15,7 @@ //! //! # Examples //! ``` -//! use utils::http_client::HttpClient; +//! use orchestrator_utils::http_client::HttpClient; //! //! let client = HttpClient::builder("https://api.example.com") //! .default_header("Authorization", "Bearer token")