From 98e025ce85a3dd72ee3dcd5d0bf2c27b4e16b312 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Wed, 27 Nov 2024 17:49:04 +0000 Subject: [PATCH] fix --- benchmarking/debug.sh | 5 +++-- deps/legion | 2 +- include/flexflow/request_manager.h | 2 +- src/runtime/model.cc | 4 ++-- src/runtime/request_manager.cc | 4 ++-- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/benchmarking/debug.sh b/benchmarking/debug.sh index 86c7d2d902..abcc40d773 100755 --- a/benchmarking/debug.sh +++ b/benchmarking/debug.sh @@ -26,14 +26,15 @@ export FF_DEBG_NO_WEIGHTS=1 gdb -ex run --args ./inference/incr_decoding/incr_decoding \ -ll:cpu $NCPUS -ll:gpu $NGPUS -ll:util $NCPUS \ -ll:fsize 20000 -ll:zsize 10000 \ - --verbose -lg:prof 1 -lg:prof_logfile prof_%.gz \ - -llm-model $MODEL_NAME \ + -llm-model $MODEL_NAME --verbose \ -prompt $PROMPT \ -tensor-parallelism-degree $NGPUS \ -log-file ../inference/output/test.out \ -output-file ../inference/output/test.json \ --max-requests-per-batch 1 --max-tokens-per-batch 3000 --max-sequence-length 3000 +#--verbose -lg:prof 1 -lg:prof_logfile prof_%.gz \ + # ./inference/peft/peft \ # -ll:cpu 4 -ll:gpu $NGPUS -ll:util 2 \ # -ll:fsize 10000 -ll:zsize 10000 \ diff --git a/deps/legion b/deps/legion index 0d32b35542..c0caf62ffd 160000 --- a/deps/legion +++ b/deps/legion @@ -1 +1 @@ -Subproject commit 0d32b35542bc0e9aba5950e485b8fc3413ae664b +Subproject commit c0caf62ffd1502185c1dbbb2057616ff8dafe4ec diff --git a/include/flexflow/request_manager.h b/include/flexflow/request_manager.h index a920436d54..aaae6cfc5a 100644 --- a/include/flexflow/request_manager.h +++ b/include/flexflow/request_manager.h @@ -341,7 +341,7 @@ class RequestManager { // std::vector const ®ions, // Legion::Context ctx, // Legion::Runtime *runtime); - static void process_work_from_old_batches_task( + static bool process_work_from_old_batches_task( Legion::Task const *task, std::vector const ®ions, Legion::Context ctx, diff --git a/src/runtime/model.cc b/src/runtime/model.cc index 5e2a188410..f8bbda3593 100644 --- a/src/runtime/model.cc +++ b/src/runtime/model.cc @@ -4690,14 +4690,14 @@ void register_flexflow_internal_tasks(Runtime *runtime, registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC)); registrar.set_leaf(); if (pre_register) { - Runtime::preregister_task_variant< + Runtime::preregister_task_variant( registrar, "RequestManager Process Work from Old Batches Task"); } else { if (enable_control_replication) { registrar.global_registration = false; } - runtime->register_task_variant< + runtime->register_task_variant(registrar); } } diff --git a/src/runtime/request_manager.cc b/src/runtime/request_manager.cc index 6ad77652ea..b5799db046 100644 --- a/src/runtime/request_manager.cc +++ b/src/runtime/request_manager.cc @@ -672,7 +672,6 @@ std::pair launcher3.add_future(std::get<3>(batch_pipeline_entry)); launcher3.add_future(pwfobf); BatchConfigFuture bcbf = runtime->execute_task(ctx, launcher3); - // return pair of batch futures return std::make_pair(bcff, bcbf); } @@ -680,7 +679,7 @@ std::pair // future[1]: old_bwd_bc // future[2]: inference result // future[3]: wait for bwd to finish -void RequestManager::process_work_from_old_batches_task( +bool RequestManager::process_work_from_old_batches_task( Task const *task, std::vector const ®ions, Context ctx, @@ -693,6 +692,7 @@ void RequestManager::process_work_from_old_batches_task( Future(task->futures[2]).get_result(); Future(task->futures[3]).get_void_result(); // wait until bwd is done rm->process_work_from_old_batches(*old_fwd_bc, *old_bwd_bc, result); + return true; } // future[0]: old_fwd_bc