[Inference] fix paddle cpu docs. (#9299)

PaddlePaddle · Oct 29, 2024 · 975d5c7 · 975d5c7
1 parent ce3a1ce
commit 975d5c7
Show file tree

Hide file tree

Showing 6 changed files with 16 additions and 11 deletions.
diff --git a/csrc/cpu/README.md b/csrc/cpu/README.md
@@ -1,7 +1,6 @@
 # cpu-custom-ops
 
 ## 快速开始
-
 ### 1.环境准备
 ```shell
 # 查询机器是否支持 avx512指令
@@ -12,3 +11,7 @@ lscpu | grep avx512*
 ```shell
 #建议在 gcc 9.4.0 下安装第三方库
 bash setup.sh
+```
+**Note:**
+
+包含avx指令CPU机器大模型推理教程 [X86 CPU](../cpu_install.md)
diff --git a/csrc/cpu/setup.sh b/csrc/cpu/setup.sh
@@ -17,7 +17,7 @@
 # apt-get install numactl
 
 # 1. download XFT
-if [ ! -d xFasterTransformer]; then
+if [ ! -d xFasterTransformer ]; then
     git clone https://github.com/intel/xFasterTransformer.git
 fi
 
@@ -55,12 +55,12 @@ rm -rf build
 mkdir build && cd build
 cmake ..
 make -j
+cd ..
 
 #xft
 export XFT_HEADER_DIR=$PWD
 export XFT_LIB_DIR=$XFT_HEADER_DIR/build
 export LD_LIBRARY_PATH=$XFT_LIB_DIR:$LD_LIBRARY_PATH
-
 #setup cpu paddle_nlp ops
 cd ..
-python ./src/setup_cpu.py install
+python ./src/setup_cpu.py install --user
diff --git a/csrc/cpu/src/setup_cpu.py b/csrc/cpu/src/setup_cpu.py
@@ -112,7 +112,7 @@ def check_avx512_bf16__support():
 
 custom_kernel_dot_module = CppExtension(
     sources=[
-        "../generation/save_with_output.cc",
+        "../gpu/save_with_output.cc",
         "./src/token_penalty_multi_scores.cc",
         "./src/stop_generation_multi_ends.cc",
         "./src/set_value_by_flags.cc",
@@ -129,6 +129,6 @@ def check_avx512_bf16__support():
 setup(
     name="paddlenlp_ops",
     version="1.0",
-    description="custom kernel fot compiling",
+    description="custom kernel for compiling",
     ext_modules=[custom_kernel_dot_module],
 )
diff --git a/llm/docs/cpu_install.md b/llm/docs/cpu_install.md
@@ -3,9 +3,9 @@ PaddleNLP 在支持 AVX 指令的 CPU 上对 llama 系列模型进行了深度
 
 ### 检查硬件：
 
- | 芯片类型 | GCC 版本 |
- | --- | --- |
- | Intel(R) Xeon(R) Platinum 8463B | 9.4.0|
+ | 芯片类型 | GCC 版本 |cmake 版本 |
+ | --- | --- | --- |
+ | Intel(R) Xeon(R) Platinum 8463B | 9.4.0| >=3.18 |
 
 **注：如果要验证您的机器是否支持 AVX 指令，只需系统环境下输入命令，看是否有输出：**
 ```

diff --git a/paddlenlp/experimental/transformers/fused_transformer_layers.py b/paddlenlp/experimental/transformers/fused_transformer_layers.py
@@ -41,7 +41,10 @@
         "The paddlenlp_ops package is not installed. you can read the docs and install it by hand, "
         "you can refer to: https://github.com/PaddlePaddle/PaddleNLP/blob/develop/csrc/README.md"
     )
-from paddlenlp_ops import rebuild_padding_v2
+if (
+    paddle.device.get_all_custom_device_type() is not None and len(paddle.device.get_all_custom_device_type()) > 0
+) or core.is_compiled_with_cuda():
+    from paddlenlp_ops import rebuild_padding_v2
 
 
 def use_cutlass_fp8_gemm():

diff --git a/paddlenlp/experimental/transformers/llama/modeling.py b/paddlenlp/experimental/transformers/llama/modeling.py
@@ -291,7 +291,6 @@ def forward(
     @paddle.no_grad()
     # avx
     def set_state_dict(self, state_dict):
-        self.transformer_block.init_weight()
         unfused_state_dict = {}
         head_size = self.hidden_size // self.num_attention_heads
         split_fn = split_param_func()