Skip to content

Commit

Permalink
add overhead test
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Dec 6, 2024
1 parent 04c6857 commit ec500d2
Show file tree
Hide file tree
Showing 6 changed files with 590 additions and 6 deletions.
64 changes: 64 additions & 0 deletions benchmarking/overhead_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#! /usr/bin/env bash
set -x
set -e

# Cd into directory holding this script
cd "${BASH_SOURCE[0]%/*}/../build"


# MODEL_NAME="meta-llama/Llama-2-70b-hf"
# MODEL_NAME="meta-llama/Llama-3.1-70B-Instruct"
# PEFT_MODEL_NAME="goliaro/llama-2-70b-hf-lora"
# NGPUS=8
# NCPUS=16
# FSIZE=38000
# ZSIZE=200000

# MODEL_NAME="meta-llama/Meta-Llama-3-8B"
# PEFT_MODEL_NAME="goliaro/llama-3-8b-lora-dolly"
# NGPUS=8
# NCPUS=16
# FSIZE=30000
# ZSIZE=30000

MODEL_NAME="JackFram/llama-160m"
PEFT_MODEL_NAME="goliaro/llama-160m-lora"
NGPUS=4
NCPUS=16
FSIZE=30000
ZSIZE=20000

OUTPUT_FOLDER="/usr/FlexFlow/inference/output/overhead_test"
LOG_FILE="${OUTPUT_FOLDER}/test.log"
MAX_SEQ_LEN=2900
MAX_TOKENS_PER_BATCH=512
BATCH_SIZE=8

mkdir -p $OUTPUT_FOLDER

reset
make -j install

python -c "from huggingface_hub import snapshot_download; \
snapshot_download(repo_id=\"${MODEL_NAME}\", allow_patterns=\"*.safetensors\", max_workers=30)"
python ../inference/utils/download_hf_model.py $MODEL_NAME
python ../inference/utils/download_peft_model.py $PEFT_MODEL_NAME

# export NCCL_DEBUG=INFO
# export NCCL_DEBUG_FILE=/usr/FlexFlow/inference/output/nccl2.log
# export LEGION_BACKTRACE=1
export CUDA_VISIBLE_DEVICES=1,2,3,4

rm $LOG_FILE $OUTPUT_FILE || true

./inference/peft/overhead_test \
-ll:cpu $NCPUS -ll:gpu $NGPUS -ll:util $NCPUS \
-ll:fsize $FSIZE -ll:zsize $ZSIZE \
-llm-model $MODEL_NAME --fusion \
-enable-peft -peft-model $PEFT_MODEL_NAME \
-tensor-parallelism-degree $NGPUS \
-output-folder $OUTPUT_FOLDER \
--max-requests-per-batch $BATCH_SIZE \
--max-tokens-per-batch $MAX_TOKENS_PER_BATCH \
--max-sequence-length $MAX_SEQ_LEN \
2>&1 | tee $LOG_FILE
3 changes: 3 additions & 0 deletions include/flexflow/request_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,8 @@ class RequestManager {
int get_max_requests_per_batch();
void set_max_tokens_per_batch(int max_num_tokens);
int get_max_tokens_per_batch();
void set_max_fwd_finetuning_tokens_per_batch(int max_num_tokens);
int get_max_fwd_finetuning_tokens_per_batch();
void set_max_spec_tree_token_num(int max_num_tokens);
int get_max_spec_tree_token_num();
int get_max_verify_tokens_per_batch();
Expand Down Expand Up @@ -381,6 +383,7 @@ class RequestManager {
// configuration parameters
int max_requests_per_batch;
int max_tokens_per_batch;
int max_fwd_finetuning_tokens_per_batch;
int max_spec_tree_token_num;
int max_sequence_length;
int max_finetuning_sequence_length;
Expand Down
35 changes: 35 additions & 0 deletions inference/peft/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,38 @@ target_include_directories(${project_target4} PRIVATE ${CMAKE_SOURCE_DIR}/infere
target_link_libraries(${project_target4} -Wl,--whole-archive flexflow -Wl,--no-whole-archive ${FLEXFLOW_EXT_LIBRARIES})
set(BIN_DEST "bin")
install(TARGETS ${project_target4} DESTINATION ${BIN_DEST})


# Overhead peft test
set(project_target5 overhead_test)
set(CPU_SRC5
${FLEXFLOW_CPP_DRV_SRC}
overhead_test.cc
../models/llama.cc
../models/opt.cc
../models/falcon.cc
../models/starcoder.cc
../models/mpt.cc)

if (FF_GPU_BACKEND STREQUAL "cuda" OR FF_GPU_BACKEND STREQUAL "hip_cuda")
cuda_add_executable(${project_target5} ${CPU_SRC5})
if (FF_GPU_BACKEND STREQUAL "hip_cuda")
target_compile_definitions(${project_target5} PRIVATE __HIP_PLATFORM_NVIDIA__)
endif()
elseif(FF_GPU_BACKEND STREQUAL "hip_rocm")
set_source_files_properties(${CPU_SRC5} PROPERTIES LANGUAGE HIP)
hip_add_executable(${project_target5} ${CPU_SRC5})
if (FF_HIP_ARCH STREQUAL "")
message(FATAL_ERROR "FF_HIP_ARCH is empty!")
endif()
set_property(TARGET ${project_target5} PROPERTY HIP_ARCHITECTURES "${FF_HIP_ARCH}")
target_compile_definitions(${project_target5} PRIVATE __HIP_PLATFORM_AMD__)
else()
message(FATAL_ERROR "Compilation of ${project_target5} for ${FF_GPU_BACKEND} backend not yet supported")
endif()

target_include_directories(${project_target5} PRIVATE ${FLEXFLOW_INCLUDE_DIRS} ${CMAKE_INSTALL_INCLUDEDIR})
target_include_directories(${project_target5} PRIVATE ${CMAKE_SOURCE_DIR}/inference)
target_link_libraries(${project_target5} -Wl,--whole-archive flexflow -Wl,--no-whole-archive ${FLEXFLOW_EXT_LIBRARIES})
set(BIN_DEST "bin")
install(TARGETS ${project_target5} DESTINATION ${BIN_DEST})
Loading

0 comments on commit ec500d2

Please sign in to comment.