update

flexflow · Nov 30, 2024 · 9fcf6a3 · 9fcf6a3
1 parent 03498c1
commit 9fcf6a3
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 21 deletions.
diff --git a/benchmarking/debug.sh b/benchmarking/debug.sh
@@ -10,7 +10,7 @@ cd "${BASH_SOURCE[0]%/*}/../build"
 PROMPT="/usr/FlexFlow/inference/prompt/peft.json"
 MODEL_NAME="JackFram/llama-160m"
 PEFT_MODEL_NAME="goliaro/llama-160m-lora"
-NGPUS=1
+NGPUS=4
 NCPUS=4
 
 reset
@@ -27,31 +27,19 @@ mkdir -p ../inference/output
 
 
 # export LEGION_BACKTRACE=1
-export FF_DEBG_NO_WEIGHTS=1
-export CUDA_VISIBLE_DEVICES=1,2,3,4
-
-# gdb -ex run --args ./inference/incr_decoding/incr_decoding \
-#     -ll:cpu $NCPUS -ll:gpu $NGPUS -ll:util $NCPUS \
-#     -ll:fsize 20000 -ll:zsize 10000 \
-#     -llm-model $MODEL_NAME --verbose \
-#     -prompt $PROMPT \
-#     -tensor-parallelism-degree $NGPUS \
-#     -log-file ../inference/output/test.out \
-#     -output-file ../inference/output/test.json \
-#     --max-requests-per-batch 1 --max-tokens-per-batch 3000 --max-sequence-length 3000
-
-#--verbose -lg:prof 1 -lg:prof_logfile prof_%.gz \
+# export FF_DEBG_NO_WEIGHTS=1
+# export CUDA_VISIBLE_DEVICES=1,2,3,4
 
 ./inference/peft/peft \
     -ll:cpu 4 -ll:gpu $NGPUS -ll:util 4 \
+    -lg:prof 1 -lg:prof_logfile prof_%.gz \
     -ll:fsize 20000 -ll:zsize 10000 \
-    -llm-model $MODEL_NAME --fusion \
+    -llm-model $MODEL_NAME \
     -enable-peft -peft-model $PEFT_MODEL_NAME \
-    -prompt $PROMPT \
     -finetuning-dataset /usr/FlexFlow/inference/prompt/peft_dataset.json \
+    -prompt /usr/FlexFlow/inference/prompt/peft.json \
     -tensor-parallelism-degree $NGPUS \
     -output-file ../inference/output/test.json \
     --max-requests-per-batch 1 --max-tokens-per-batch 3000 --max-sequence-length 3000
 
-# -lg:prof 1 -lg:prof_logfile prof_%.gz --verbose --inference-debugging \
-## -prompt /usr/FlexFlow/inference/prompt/peft.json \
+# -lg:prof 1 -lg:prof_logfile prof_%.gz --verbose --inference-debugging \
diff --git a/inference/peft/peft.cc b/inference/peft/peft.cc
@@ -51,6 +51,7 @@ void parse_input_args(char **argv,
                       int &max_requests_per_batch,
                       int &max_tokens_per_batch,
                       int &max_sequence_length,
+                      int &max_training_steps,
                       int &num_layers_per_finetuning_step) {
   for (int i = 1; i < argc; i++) {
     // llm model type
@@ -125,6 +126,10 @@ void parse_input_args(char **argv,
       max_sequence_length = std::stoi(argv[++i]);
       continue;
     }
+    if (!strcmp(argv[i], "--max-training-steps")) {
+      max_training_steps = std::stoi(argv[++i]);
+      continue;
+    }
     if (!strcmp(argv[i], "--num-layers-per-finetuning-step")) {
       num_layers_per_finetuning_step = std::stoi(argv[++i]);
       continue;
@@ -161,6 +166,7 @@ void FlexFlow::top_level_task(Task const *task,
   int max_requests_per_batch = 1;
   int max_tokens_per_batch = 128;
   int max_sequence_length = 256;
+  int max_training_steps = 2;
   bool enable_peft_finetuning = true;
   int num_layers_per_finetuning_step = -1;
 
@@ -181,6 +187,7 @@ void FlexFlow::top_level_task(Task const *task,
                    max_requests_per_batch,
                    max_tokens_per_batch,
                    max_sequence_length,
+                    max_training_steps,
                    num_layers_per_finetuning_step);
   assert(ffconfig.data_parallelism_degree * ffconfig.tensor_parallelism_degree *
              ffconfig.pipeline_parallelism_degree ==
@@ -372,7 +379,7 @@ void FlexFlow::top_level_task(Task const *task,
         printf("Inference prompt[%d]: %s\n", total_num_requests, text.c_str());
         Request inference_req;
         inference_req.prompt = text;
-        inference_req.max_new_tokens = 128;
+        inference_req.max_new_tokens = 4;
         inference_req.peft_model_id =
             (peft_model_id != nullptr) ? *peft_model_id : PEFTModelID::NO_ID;
         requests.push_back(inference_req);
@@ -393,7 +400,7 @@ void FlexFlow::top_level_task(Task const *task,
                                           : PEFTModelID::NO_ID;
       fine_tuning_req.peft_finetuning_info.dataset_filepath =
           file_paths.dataset_file_path;
-      fine_tuning_req.peft_finetuning_info.max_training_steps = 2;
+      fine_tuning_req.peft_finetuning_info.max_training_steps = max_training_steps;
       requests.push_back(fine_tuning_req);
     }
     std::vector<GenerationResult> result = model.generate(requests);