PaddlePaddle · decade-afk · Jan 25, 2025 · Jan 25, 2025 · Jan 25, 2025 · Jan 25, 2025
diff --git a/paddle/phi/backends/dynload/cusolver.h b/paddle/phi/backends/dynload/cusolver.h
@@ -65,6 +65,8 @@ CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP);
 
 #if CUDA_VERSION >= 9020
 #define CUSOLVER_ROUTINE_EACH_R1(__macro) \
+  __macro(cusolverDnSgetrs);              \
+  __macro(cusolverDnDgetrs);              \
   __macro(cusolverDnSpotrfBatched);       \
   __macro(cusolverDnDpotrfBatched);       \
   __macro(cusolverDnSpotrsBatched);       \

diff --git a/paddle/phi/backends/dynload/lapack.h b/paddle/phi/backends/dynload/lapack.h
@@ -29,6 +29,26 @@ extern "C" void dgetrf_(
 extern "C" void sgetrf_(
     int *m, int *n, float *a, int *lda, int *ipiv, int *info);
 
+// getrs_
+extern "C" void sgetrs_(char *trans,
+                        int *n,
+                        int *nrhs,
+                        float *a,
+                        int *lda,
+                        int *ipiv,
+                        float *b,
+                        int *ldb,
+                        int *info);
+extern "C" void dgetrs_(char *trans,
+                        int *n,
+                        int *nrhs,
+                        double *a,
+                        int *lda,
+                        int *ipiv,
+                        double *b,
+                        int *ldb,
+                        int *info);
+
 // evd
 extern "C" void zheevd_(char *jobz,
                         char *uplo,
@@ -339,6 +359,8 @@ extern void *lapack_dso_handle;
 #define LAPACK_ROUTINE_EACH(__macro) \
   __macro(dgetrf_);                  \
   __macro(sgetrf_);                  \
+  __macro(sgetrs_);                  \
+  __macro(dgetrs_);                  \
   __macro(zheevd_);                  \
   __macro(cheevd_);                  \
   __macro(dsyevd_);                  \

diff --git a/paddle/phi/kernels/cpu/lu_solve_grad_kernel.cc b/paddle/phi/kernels/cpu/lu_solve_grad_kernel.cc
@@ -0,0 +1,42 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+#include "paddle/phi/kernels/lu_solve_grad_kernel.h"
+#include "paddle/phi/kernels/lu_solve_kernel.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void LuSolveGradKernel(const Context& dev_ctx,
+                       const DenseTensor& x,
+                       const DenseTensor& lu,
+                       const DenseTensor& pivots,
+                       const DenseTensor& out_grad,
+                       const std::string& trans,
+                       DenseTensor* x_grad) {
+  // Allocate memory for x_grad
+  dev_ctx.template Alloc<T>(x_grad);
+
+  // Use the forward kernel to compute the gradient
+  LuSolveKernel<T, Context>(dev_ctx, out_grad, lu, pivots, trans, x_grad);
+}
+
+}  // namespace phi
+
+// Register the CPU backward kernel
+PD_REGISTER_KERNEL(
+    lu_solve_grad, CPU, ALL_LAYOUT, phi::LuSolveGradKernel, float, double) {}
diff --git a/paddle/phi/kernels/cpu/lu_solve_kernel.cc b/paddle/phi/kernels/cpu/lu_solve_kernel.cc
@@ -0,0 +1,85 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/lapack/lapack_function.h"
+
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/enforce.h"
+#include "paddle/phi/kernels/lu_solve_kernel.h"
+#include "paddle/phi/kernels/impl/lu_kernel_impl.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void LuSolveKernel(const Context& dev_ctx,
+                   const DenseTensor& x,
+                   const DenseTensor& lu,
+                   const DenseTensor& pivots,
+                   const std::string& trans,
+                   DenseTensor* out) {
+  // Get lu matrix dimensions
+  auto lu_dims = lu.dims();
+  // Get x matrix dimensions
+  auto x_dims = x.dims();
+
+  // Allocate output tensor
+  dev_ctx.template Alloc<T>(out);
+  // Copy RHS data to output (will be overwritten with solution)
+  // phi::Copy(dev_ctx, x, x.place(), false, out);
+  *out = Transpose2DTo6D<Context, T>(dev_ctx, x)
+
+  // Prepare LAPACK parameters
+  char trans_char = (trans == "N") ? 'N' : ((trans == "T") ? 'T' : 'C');
+  int n_int = lu_dims[lu_dims.size() - 1];
+  int nrhs_int = x_dims[x_dims.size() - 1];
+  int lda = std::max(1, n_int);  // Leading dimension of A (LU matrix)
+  int ldb = std::max(1, n_int);  // Leading dimension of B (RHS/solution matrix)
+  int info = 0;
+
+  auto outdims = out->dims();
+  auto outrank = outdims.size();
+  int batchsize = product(common::slice_ddim(outdims, 0, outrank - 2));
+  auto out_data = out->data<T>();
+  auto lu_data = reinterpret_cast<T*>(const_cast<T*>(lu.data<T>()));
+  auto pivots_data = reinterpret_cast<int*>(const_cast<int*>(pivots.data<int>()));
+
+  for (int i = 0; i < batchsize; i++) {
+    auto* out_data_item = &out_data[i * ldb * nrhs_int];
+    auto* lu_data_item = &lu_data[i * lda * n_int];
+    auto* pivots_data_item = &pivots_data[i * n_int];
+    phi::funcs::lapackLuSolve<T>(
+      trans_char,
+      n_int,
+      nrhs_int,
+      lu_data_item,
+      lda,
+      pivots_data_item,
+      out_data_item,
+      ldb,
+      &info);
+    PADDLE_ENFORCE_EQ(
+      info,
+      0,
+      phi::errors::PreconditionNotMet(
+      "LU solve failed with error code %d. Check if matrix is singular.",
+      info));
+  }
+  *out = Transpose2DTo6D<Context, T>(dev_ctx, *out)
+}
+}  // namespace phi
+
+PD_REGISTER_KERNEL(
+    lu_solve, CPU, ALL_LAYOUT, phi::LuSolveKernel, float, double) {}
diff --git a/paddle/phi/kernels/funcs/lapack/lapack_function.cc b/paddle/phi/kernels/funcs/lapack/lapack_function.cc
@@ -30,6 +30,33 @@ void lapackLu<float>(int m, int n, float *a, int lda, int *ipiv, int *info) {
   dynload::sgetrf_(&m, &n, a, &lda, ipiv, info);
 }
 
+// lu_solve
+template <>
+void lapackLuSolve<double>(char trans,
+                           int n,
+                           int nrhs,
+                           double* a,
+                           int lda,
+                           int* ipiv,
+                           double* b,
+                           int ldb,
+                           int* info) {
+  dynload::dgetrs_(&trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, info); 
+}
+
+template <>
+void lapackLuSolve<float>(char trans,
+                          int n,
+                          int nrhs,
+                          float* a,
+                          int lda,
+                          int* ipiv,
+                          float* b,
+                          int ldb,
+                          int* info) {
+  dynload::sgetrs_(&trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, info); 
+}
+
 // eigh
 template <>
 void lapackEigh<float>(char jobz,

diff --git a/paddle/phi/kernels/funcs/lapack/lapack_function.h b/paddle/phi/kernels/funcs/lapack/lapack_function.h
@@ -21,6 +21,18 @@ namespace funcs {
 template <typename T>
 void lapackLu(int m, int n, T *a, int lda, int *ipiv, int *info);
 
+// Lu_solve
+template <typename T>
+void lapackLuSolve(char trans,
+                   int n,
+                   int nrhs,
+                   T *a,
+                   int lda,
+                   int *ipiv,
+                   T *b,
+                   int ldb,
+                   int *info);
+
 // Eigh
 template <typename T, typename ValueType = T>
 void lapackEigh(char jobz,

diff --git a/paddle/phi/kernels/gpu/lu_solve_grad_kernel.cu b/paddle/phi/kernels/gpu/lu_solve_grad_kernel.cu
@@ -0,0 +1,42 @@
+// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+#include "paddle/phi/kernels/lu_solve_grad_kernel.h"
+#include "paddle/phi/kernels/lu_solve_kernel.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void LuSolveGradKernel(const Context& dev_ctx,
+                       const DenseTensor& x,
+                       const DenseTensor& lu,
+                       const DenseTensor& pivots,
+                       const DenseTensor& out_grad,
+                       const std::string& trans,
+                       DenseTensor* x_grad) {
+  // Allocate memory for x_grad
+  dev_ctx.template Alloc<T>(x_grad);
+
+  // Use the forward kernel to compute the gradient
+  LuSolveKernel<T, Context>(dev_ctx, out_grad, lu, pivots, trans, x_grad);
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(
+    lu_solve_grad, GPU, ALL_LAYOUT, phi::LuSolveGradKernel, float, double) {}