Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Nov 30, 2024
1 parent 03498c1 commit 9fcf6a3
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 21 deletions.
26 changes: 7 additions & 19 deletions benchmarking/debug.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ cd "${BASH_SOURCE[0]%/*}/../build"
PROMPT="/usr/FlexFlow/inference/prompt/peft.json"
MODEL_NAME="JackFram/llama-160m"
PEFT_MODEL_NAME="goliaro/llama-160m-lora"
NGPUS=1
NGPUS=4
NCPUS=4

reset
Expand All @@ -27,31 +27,19 @@ mkdir -p ../inference/output


# export LEGION_BACKTRACE=1
export FF_DEBG_NO_WEIGHTS=1
export CUDA_VISIBLE_DEVICES=1,2,3,4

# gdb -ex run --args ./inference/incr_decoding/incr_decoding \
# -ll:cpu $NCPUS -ll:gpu $NGPUS -ll:util $NCPUS \
# -ll:fsize 20000 -ll:zsize 10000 \
# -llm-model $MODEL_NAME --verbose \
# -prompt $PROMPT \
# -tensor-parallelism-degree $NGPUS \
# -log-file ../inference/output/test.out \
# -output-file ../inference/output/test.json \
# --max-requests-per-batch 1 --max-tokens-per-batch 3000 --max-sequence-length 3000

#--verbose -lg:prof 1 -lg:prof_logfile prof_%.gz \
# export FF_DEBG_NO_WEIGHTS=1
# export CUDA_VISIBLE_DEVICES=1,2,3,4

./inference/peft/peft \
-ll:cpu 4 -ll:gpu $NGPUS -ll:util 4 \
-lg:prof 1 -lg:prof_logfile prof_%.gz \
-ll:fsize 20000 -ll:zsize 10000 \
-llm-model $MODEL_NAME --fusion \
-llm-model $MODEL_NAME \
-enable-peft -peft-model $PEFT_MODEL_NAME \
-prompt $PROMPT \
-finetuning-dataset /usr/FlexFlow/inference/prompt/peft_dataset.json \
-prompt /usr/FlexFlow/inference/prompt/peft.json \
-tensor-parallelism-degree $NGPUS \
-output-file ../inference/output/test.json \
--max-requests-per-batch 1 --max-tokens-per-batch 3000 --max-sequence-length 3000

# -lg:prof 1 -lg:prof_logfile prof_%.gz --verbose --inference-debugging \
## -prompt /usr/FlexFlow/inference/prompt/peft.json \
# -lg:prof 1 -lg:prof_logfile prof_%.gz --verbose --inference-debugging \
11 changes: 9 additions & 2 deletions inference/peft/peft.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ void parse_input_args(char **argv,
int &max_requests_per_batch,
int &max_tokens_per_batch,
int &max_sequence_length,
int &max_training_steps,
int &num_layers_per_finetuning_step) {
for (int i = 1; i < argc; i++) {
// llm model type
Expand Down Expand Up @@ -125,6 +126,10 @@ void parse_input_args(char **argv,
max_sequence_length = std::stoi(argv[++i]);
continue;
}
if (!strcmp(argv[i], "--max-training-steps")) {
max_training_steps = std::stoi(argv[++i]);
continue;
}
if (!strcmp(argv[i], "--num-layers-per-finetuning-step")) {
num_layers_per_finetuning_step = std::stoi(argv[++i]);
continue;
Expand Down Expand Up @@ -161,6 +166,7 @@ void FlexFlow::top_level_task(Task const *task,
int max_requests_per_batch = 1;
int max_tokens_per_batch = 128;
int max_sequence_length = 256;
int max_training_steps = 2;
bool enable_peft_finetuning = true;
int num_layers_per_finetuning_step = -1;

Expand All @@ -181,6 +187,7 @@ void FlexFlow::top_level_task(Task const *task,
max_requests_per_batch,
max_tokens_per_batch,
max_sequence_length,
max_training_steps,
num_layers_per_finetuning_step);
assert(ffconfig.data_parallelism_degree * ffconfig.tensor_parallelism_degree *
ffconfig.pipeline_parallelism_degree ==
Expand Down Expand Up @@ -372,7 +379,7 @@ void FlexFlow::top_level_task(Task const *task,
printf("Inference prompt[%d]: %s\n", total_num_requests, text.c_str());
Request inference_req;
inference_req.prompt = text;
inference_req.max_new_tokens = 128;
inference_req.max_new_tokens = 4;
inference_req.peft_model_id =
(peft_model_id != nullptr) ? *peft_model_id : PEFTModelID::NO_ID;
requests.push_back(inference_req);
Expand All @@ -393,7 +400,7 @@ void FlexFlow::top_level_task(Task const *task,
: PEFTModelID::NO_ID;
fine_tuning_req.peft_finetuning_info.dataset_filepath =
file_paths.dataset_file_path;
fine_tuning_req.peft_finetuning_info.max_training_steps = 2;
fine_tuning_req.peft_finetuning_info.max_training_steps = max_training_steps;
requests.push_back(fine_tuning_req);
}
std::vector<GenerationResult> result = model.generate(requests);
Expand Down

0 comments on commit 9fcf6a3

Please sign in to comment.