fix

flexflow · Nov 25, 2024 · 9c0d827 · 9c0d827
1 parent 09aa5af
commit 9c0d827
Show file tree

Hide file tree

Showing 6 changed files with 96 additions and 84 deletions.
diff --git a/include/flexflow/batch_config.h b/include/flexflow/batch_config.h
@@ -38,6 +38,7 @@ using BeamSearchBatchConfigFuture = Legion::Future;
 using TreeVerifyBatchConfigFuture = Legion::Future;
 using BeamInferenceResultFuture = Legion::Future;
 using FinetuningBwdFuture = Legion::Future;
+using BatchConfigPairFuture = Legion::Future;
 
 struct OptimizerTasks {
   bool compute_gradients = true;
@@ -54,6 +55,7 @@ void set_optimizer_tasks(OptimizerTasks &tasks,
 class BatchConfig {
 public:
   using RequestGuid = size_t;
+  static const RequestGuid INVALID_GUID = 0;
   using TokenId = int;
   BatchConfig();
   int num_active_requests() const;

diff --git a/include/flexflow/request_manager.h b/include/flexflow/request_manager.h
@@ -29,6 +29,7 @@ class FFModel;
 class BeamTree;
 class RequestManager;
 using tokenizers::Tokenizer;
+using RequestGuid = BatchConfig::RequestGuid;
 
 class InferenceManager {
 public:
@@ -85,7 +86,7 @@ struct Request {
     // std::vector<int> finetuning_tokens_per_batch;
   };
   RequestType req_type = REQ_INFERENCE;
-  BatchConfig::RequestGuid guid;
+  RequestGuid guid = BatchConfig::INVALID_GUID;
   int max_length = -1;
   int max_new_tokens = -1;
   int benchmarking_tokens = -1;
@@ -139,16 +140,14 @@ class RequestManager {
     SERVING = 1002,
     TERMINATED = 1003,
   };
-  using RequestGuid = BatchConfig::RequestGuid;
   using TokenId = BatchConfig::TokenId;
 
-  static const RequestGuid INVALID_GUID = 0;
   RequestManager();
   static RequestManager *get_request_manager();
   size_t get_num_processed_requests();
   size_t get_num_ssms();
 
-  void load_request_token_ids(Request &request);
+  bool load_request_token_ids(Request &request);
 
   void set_max_requests_per_batch(int max_num_requests);
   int get_max_requests_per_batch();
@@ -196,6 +195,7 @@ class RequestManager {
   void serve_incr_decoding(FFModel *model);
   void serve_spec_infer(FFModel *model);
   GenerationResult get_generation_result(RequestGuid const &guid);
+  RequestGuid assign_next_guid();
   RequestGuid register_new_request(Request const &request_);
   RequestGuid register_new_peft_request(Request const &request_);
 
@@ -230,9 +230,9 @@ class RequestManager {
   void process_work_from_old_batches(BatchConfig const &old_fwd_bc, BatchConfig const &old_bwd_bc, InferenceResult const &result);
   BatchConfig prepare_next_bwd_batch();
   BatchConfig prepare_next_fwd_batch(BatchConfig const &old_fwd_bc, InferenceResult const &result);
-  std::pair<BatchConfigFuture, BatchConfigFuture> prepare_next_batch(std::tuple<BatchConfigFuture, BatchConfigFuture, InferenceResultFuture, FinetuningBwdFuture> &batch_pipeline_entry,
-                                                                    Context ctx,
-                                                                    Runtime *runtime);
+  BatchConfigPairFuture prepare_next_batch(std::tuple<BatchConfigFuture, BatchConfigFuture, InferenceResultFuture, FinetuningBwdFuture> &batch_pipeline_entry,
+                                          Context ctx,
+                                          Runtime *runtime);
   // BatchConfig prepare_next_batch(BatchConfig const &bc,
   //                                InferenceResult const &result);
   // BatchConfigFuture prepare_next_batch(BatchConfigFuture const &bc,

diff --git a/inference/peft/peft.cc b/inference/peft/peft.cc
@@ -375,8 +375,8 @@ void FlexFlow::top_level_task(Task const *task,
       fine_tuning_req.peft_model_id = (peft_model_id_finetuning != nullptr)
                                           ? *peft_model_id_finetuning
                                           : PEFTModelID::NO_ID;
-      fine_tuning_req.dataset_filepath = file_paths.dataset_file_path;
-      fine_tuning_req.max_training_steps = 2;
+      fine_tuning_req.peft_finetuning_info.dataset_filepath = file_paths.dataset_file_path;
+      fine_tuning_req.peft_finetuning_info.max_training_steps = 2;
       requests.push_back(fine_tuning_req);
     }
     std::vector<GenerationResult> result = model.generate(requests);

diff --git a/src/c/flexflow_c.cc b/src/c/flexflow_c.cc
@@ -1746,8 +1746,8 @@ void flexflow_model_generate(flexflow_model_t handle_,
         fine_tuning_req.peft_model_id = *peft_model_id;
       }
       std::string const dataset_fp(dataset_filepaths[i]);
-      fine_tuning_req.dataset_filepath = dataset_fp;
-      fine_tuning_req.max_training_steps = training_steps[i];
+      fine_tuning_req.peft_finetuning_info.dataset_filepath = dataset_fp;
+      fine_tuning_req.peft_finetuning_info.max_training_steps = training_steps[i];
       requests.push_back(fine_tuning_req);
       DEBUG_PRINT("[Model] finetune[%d] %p %s %i %i %i %i",
                   i,

diff --git a/src/ops/softmax.cc b/src/ops/softmax.cc
@@ -414,7 +414,7 @@ FutureMap Softmax::inference(FFModel const &ff,
         RegionRequirement(batch_outputs[0]->part_grad,
                           0 /*projection id*/,
                           WRITE_ONLY,
-                          EXCLUSIVE,
+                          SIMULTANEOUS,
                           batch_outputs[0]->region_grad));
     launcher.add_field(2, FID_DATA);
   }