diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 8408b3a2bf18..e623541f8dcb 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,18 @@ +#### Before submitting + +- [ ] Lint code. If there are lint issues, please format the code first. + +```shell +# Install and register `pre-commit` in the project folder +pip install pre-commit && pre-commit install + +# Process previous code files separately +pre-commit run --file XXXX.py +``` + +- [ ] Add test cases into `tests` folder. If there are codecov issues, please add tests cases first. + ### PR types diff --git a/README.md b/README.md index e10bf27d3106..7cf797492ffb 100644 --- a/README.md +++ b/README.md @@ -204,7 +204,8 @@ mkdir -p llm/data && cd llm/data wget https://bj.bcebos.com/paddlenlp/models/transformers/llama/data/llama_openwebtext_100k.bin wget https://bj.bcebos.com/paddlenlp/models/transformers/llama/data/llama_openwebtext_100k.idx cd .. # change folder to PaddleNLP/llm -python -u -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" run_pretrain.py ./config/llama/pretrain_argument.json +# 如需使用use_fused_rms_norm=true,需要前往slm/model_zoo/gpt-3/external_ops安装fused_ln +python -u -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" run_pretrain.py ./config/llama/pretrain_argument.json --use_fused_rms_norm false ``` ### 大模型 SFT 精调 diff --git a/docs/llm/devices/dcu/llama/README.md b/docs/llm/devices/dcu/llama/README.md new file mode 120000 index 000000000000..924cb8226622 --- /dev/null +++ b/docs/llm/devices/dcu/llama/README.md @@ -0,0 +1 @@ +../../../../../llm/devices/dcu/llama/README.md \ No newline at end of file diff --git a/docs/llm/devices/gcu/llama/README.md b/docs/llm/devices/gcu/llama/README.md new file mode 120000 index 000000000000..43fb9df89fd6 --- /dev/null +++ b/docs/llm/devices/gcu/llama/README.md @@ -0,0 +1 @@ +../../../../../llm/devices/gcu/llama/README.md \ No newline at end of file diff --git a/docs/llm/devices/iluvatar/llama/README.md b/docs/llm/devices/iluvatar/llama/README.md new file mode 120000 index 000000000000..d0b71f0cc63d --- /dev/null +++ b/docs/llm/devices/iluvatar/llama/README.md @@ -0,0 +1 @@ +../../../../../llm/devices/iluvatar/llama/README.md \ No newline at end of file diff --git a/docs/llm/devices/intel_hpu/llama/README.md b/docs/llm/devices/intel_hpu/llama/README.md new file mode 120000 index 000000000000..e02d2da52ba7 --- /dev/null +++ b/docs/llm/devices/intel_hpu/llama/README.md @@ -0,0 +1 @@ +../../../../../llm/devices/intel_hpu/llama/README.md \ No newline at end of file diff --git a/docs/llm/devices/metax/llama/README.md b/docs/llm/devices/metax/llama/README.md new file mode 120000 index 000000000000..9b46ca066bb5 --- /dev/null +++ b/docs/llm/devices/metax/llama/README.md @@ -0,0 +1 @@ +../../../../../llm/devices/metax/llama/README.md \ No newline at end of file diff --git a/docs/llm/devices/mlu/llama/README.md b/docs/llm/devices/mlu/llama/README.md new file mode 120000 index 000000000000..6d569c8bc3e5 --- /dev/null +++ b/docs/llm/devices/mlu/llama/README.md @@ -0,0 +1 @@ +../../../../../llm/devices/mlu/llama/README.md \ No newline at end of file diff --git a/docs/llm/devices/mthreads/llama/README.md b/docs/llm/devices/mthreads/llama/README.md new file mode 120000 index 000000000000..3a8d59067a12 --- /dev/null +++ b/docs/llm/devices/mthreads/llama/README.md @@ -0,0 +1 @@ +../../../../../llm/devices/mthreads/llama/README.md \ No newline at end of file diff --git a/docs/llm/devices/npu/llama/README.md b/docs/llm/devices/npu/llama/README.md new file mode 120000 index 000000000000..529925f5f78d --- /dev/null +++ b/docs/llm/devices/npu/llama/README.md @@ -0,0 +1 @@ +../../../../../llm/devices/npu/llama/README.md \ No newline at end of file diff --git a/docs/llm/devices/sdaa/llama/README.md b/docs/llm/devices/sdaa/llama/README.md new file mode 120000 index 000000000000..2c31dd6c6141 --- /dev/null +++ b/docs/llm/devices/sdaa/llama/README.md @@ -0,0 +1 @@ +../../../../../llm/devices/sdaa/llama/README.md \ No newline at end of file diff --git a/docs/llm/devices/xpu/llama/README.md b/docs/llm/devices/xpu/llama/README.md new file mode 120000 index 000000000000..6cbe36607ef7 --- /dev/null +++ b/docs/llm/devices/xpu/llama/README.md @@ -0,0 +1 @@ +../../../../../llm/devices/xpu/llama/README.md \ No newline at end of file diff --git a/docs/llm/gcu/llama/README.md b/docs/llm/gcu/llama/README.md deleted file mode 120000 index 0dd2cc9df593..000000000000 --- a/docs/llm/gcu/llama/README.md +++ /dev/null @@ -1 +0,0 @@ -../../../../llm/devices/gcu/llama/README.md \ No newline at end of file diff --git a/docs/llm/intel_hpu/llama/README.md b/docs/llm/intel_hpu/llama/README.md deleted file mode 120000 index f1d81074e1c4..000000000000 --- a/docs/llm/intel_hpu/llama/README.md +++ /dev/null @@ -1 +0,0 @@ -../../../../llm/devices/intel_hpu/llama/README.md \ No newline at end of file diff --git a/docs/llm/metax/llama/README.md b/docs/llm/metax/llama/README.md deleted file mode 120000 index dd91090b6f76..000000000000 --- a/docs/llm/metax/llama/README.md +++ /dev/null @@ -1 +0,0 @@ -../../../../llm/devices/metax/llama/README.md \ No newline at end of file diff --git a/docs/llm/npu/llama/README.md b/docs/llm/npu/llama/README.md deleted file mode 120000 index 4a98f06ca26a..000000000000 --- a/docs/llm/npu/llama/README.md +++ /dev/null @@ -1 +0,0 @@ -../../../../llm/devices/npu/llama/README.md \ No newline at end of file diff --git a/docs/llm/sdaa/llama/README.md b/docs/llm/sdaa/llama/README.md deleted file mode 120000 index f96ebf326c62..000000000000 --- a/docs/llm/sdaa/llama/README.md +++ /dev/null @@ -1 +0,0 @@ -../../../../llm/devices/sdaa/llama/README.md \ No newline at end of file diff --git a/docs/llm/server/README.md b/docs/llm/server/README.md new file mode 120000 index 000000000000..caec81418031 --- /dev/null +++ b/docs/llm/server/README.md @@ -0,0 +1 @@ +../../../llm/server/README.md \ No newline at end of file diff --git a/docs/llm/server/docs/deploy_usage_tutorial.md b/docs/llm/server/docs/deploy_usage_tutorial.md new file mode 120000 index 000000000000..02df787be5be --- /dev/null +++ b/docs/llm/server/docs/deploy_usage_tutorial.md @@ -0,0 +1 @@ +../../../../llm/server/docs/deploy_usage_tutorial.md \ No newline at end of file diff --git a/docs/llm/xpu/llama/README.md b/docs/llm/xpu/llama/README.md deleted file mode 120000 index 2b268981dcc2..000000000000 --- a/docs/llm/xpu/llama/README.md +++ /dev/null @@ -1 +0,0 @@ -../../../../llm/devices/xpu/llama/README.md \ No newline at end of file diff --git a/llm/docs/predict/inference.md b/llm/docs/predict/inference.md index 9a332b663cc2..9c3439682573 100644 --- a/llm/docs/predict/inference.md +++ b/llm/docs/predict/inference.md @@ -17,7 +17,7 @@ PaddleNLP 大模型推理提供压缩、推理、服务全流程体验 : - 提供多种 PTQ 技术,提供 WAC(权重/激活/缓存)灵活可配的量化能力,支持 INT8、FP8、4Bit 量化能力 -- 支持多硬件大模型推理,包括[昆仑 XPU](../../xpu/llama/README.md)、[昇腾 NPU](../../npu/llama/README.md)、[海光 K100](../dcu_install.md)、[燧原 GCU](../../gcu/llama/README.md)、[X86 CPU](../cpu_install.md)等 +- 支持多硬件大模型推理,包括[昆仑 XPU](../../devices/xpu/llama/README.md)、[昇腾 NPU](../../devices/npu/llama/README.md)、[海光 K100](../dcu_install.md)、[燧原 GCU](../../devices/gcu/llama/README.md)、[X86 CPU](../cpu_install.md)等 - 提供面向服务器场景的部署服务,支持连续批处理(continuous batching)、流式输出等功能,HTTP 协议的服务接口 @@ -192,11 +192,11 @@ python ./predict/predictor.py --model_name_or_path meta-llama/Llama-2-7b-chat -- 更多硬件大模型推理教程: -- [昆仑 XPU](../../xpu/llama/README.md) -- [昇腾 NPU](../../npu/llama/README.md) +- [昆仑 XPU](../../devices/xpu/llama/README.md) +- [昇腾 NPU](../../devices/npu/llama/README.md) - [海光 K100](../dcu_install.md) -- [燧原 GCU](../../gcu/llama/README.md) -- [太初 SDAA](../../sdaa/llama/README.md) +- [燧原 GCU](../../devices/gcu/llama/README.md) +- [太初 SDAA](../../devices/sdaa/llama/README.md) - [X86 CPU](../cpu_install.md) ## 致谢 diff --git a/scripts/codestyle/check_dead_links.py b/scripts/codestyle/check_dead_links.py index f38f60153d37..1bf7ea85f8f0 100644 --- a/scripts/codestyle/check_dead_links.py +++ b/scripts/codestyle/check_dead_links.py @@ -69,7 +69,78 @@ def find_dead_links(directory): return dead_links +def create_symlinks(root_dir, src_dir, tgt_dir, file_extension=".md"): + """ + Create corresponding folders in the tgt directory based on the src directory, + and create relative path symlinks for files of a specific type. + Also check if existing files in tgt have corresponding files in src, otherwise print a warning. + + :param src_dir: Path to the source directory + :param tgt_dir: Path to the target directory + :param file_extension: File extension for which symlinks need to be created, default is ".md" + """ + tgt_dir = os.path.join(root_dir, tgt_dir) + src_dir = os.path.join(root_dir, src_dir) + + # List all existing files in the tgt directory (including files in subdirectories) + existing_tgt_files = set() + for root, dirs, files in os.walk(tgt_dir): + for file in files: + existing_tgt_files.add(os.path.relpath(os.path.join(root, file), tgt_dir)) + + # Ensure the target directory exists + os.makedirs(tgt_dir, exist_ok=True) + + count = 0 + + # Iterate over all files and folders in the source directory + for root, dirs, files in os.walk(src_dir): + # Create corresponding folder structure in the target directory + relative_path = os.path.relpath(root, src_dir) + tgt_path = os.path.join(tgt_dir, relative_path) + + # Create symlinks for files of a specific type + for file in files: + if file.endswith(file_extension): + src_file_path = os.path.join(root, file) + relative_src_file_path = os.path.relpath(src_file_path, tgt_path) + tgt_file_path = os.path.join(tgt_path, file) + + os.makedirs(tgt_path, exist_ok=True) + + # If the target file already exists and is a symlink, delete it first + if os.path.exists(tgt_file_path) and os.path.islink(tgt_file_path): + existing_link_target = os.readlink(tgt_file_path) + if existing_link_target != relative_src_file_path: + os.unlink(tgt_file_path) + # Create the symlink + os.symlink(relative_src_file_path, tgt_file_path) + count += 1 + + elif not os.path.exists(tgt_file_path): + os.symlink(relative_src_file_path, tgt_file_path) + count += 1 + else: + print(f"File already exists: {tgt_file_path}. Please remove it from {tgt_dir} and try again.") + sys.exit(1) + + # Remove this processed file from the existing tgt files + existing_tgt_files.discard(os.path.relpath(tgt_file_path, tgt_dir)) + + # Check for remaining files in tgt (i.e., files that exist in tgt but not found in src) + for file in existing_tgt_files: + print(f"Warning: File exists in {tgt_dir} but not found in {src_dir}: {file}") + + return count + + def process_file(file_path): + # Default synchronization of the 'llm' and 'docs/llm' folders + count = create_symlinks(file_path, "llm", "docs/llm", file_extension=".md") + if count > 0: + print("New files were added to docs/llm. Please check them.") + sys.exit(1) + dead_links = find_dead_links(file_path) if len(dead_links) > 0: print("Dead links found in", file_path) diff --git a/scripts/get_modified_files.py b/scripts/get_modified_files.py index 6e0fec054e81..53fd01c8a381 100644 --- a/scripts/get_modified_files.py +++ b/scripts/get_modified_files.py @@ -22,7 +22,7 @@ ) valid_dirs = "|".join(sys.argv[1:]) -regex = re.compile(rf"^({valid_dirs}).*?\.py$") +regex = re.compile(rf"^({valid_dirs}).*?\.(py|md)$") relevant_modified_files = [x for x in modified_files if regex.match(x)] print(" ".join(relevant_modified_files), end="")