From b3c4d41ce862a03ea0ee6a102437d12a2d22cedf Mon Sep 17 00:00:00 2001 From: David Cortes Date: Tue, 12 Nov 2024 19:29:04 +0100 Subject: [PATCH 01/17] add spectral decomposition fallback on GPU --- .../dal/algo/linear_regression/test/batch.cpp | 3 +- .../algo/linear_regression/test/fixture.hpp | 78 +++---- .../backend/primitives/lapack/solve_dpc.cpp | 191 +++++++++++++++++- 3 files changed, 229 insertions(+), 43 deletions(-) diff --git a/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp b/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp index a5585202c31..33f690d2110 100644 --- a/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp +++ b/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp @@ -51,9 +51,10 @@ TEMPLATE_LIST_TEST_M(lr_batch_test, "LR common flow", "[lr][batch]", lr_types) { } TEMPLATE_LIST_TEST_M(lr_batch_test, "LR with non-PSD matrix", "[lr][batch-nonpsd]", lr_types) { - SKIP_IF(this->non_psd_system_not_supported_on_device()); + SKIP_IF(this->not_float64_friendly()); this->generate(777); + this->run_and_check_linear_indefinite(); this->run_and_check_linear_indefinite_multioutput(); } diff --git a/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp b/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp index e1e54092552..bfba2abf9e5 100644 --- a/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp +++ b/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp @@ -65,10 +65,6 @@ class lr_test : public te::crtp_algo_fixture { return static_cast(this); } - bool non_psd_system_not_supported_on_device() { - return this->get_policy().is_gpu(); - } - table compute_responses(const table& beta, const table& bias, const table& data) const { const auto s_count = data.get_row_count(); @@ -299,18 +295,20 @@ class lr_test : public te::crtp_algo_fixture { here is not positive-definite, thus it has an infinite number of possible solutions. The solution here is the one with minimum norm, which is typically more desirable. */ void run_and_check_linear_indefinite(double tol = 1e-3) { - const double X[] = { -0.98912135, -0.36778665, 1.28792526, 0.19397442, 0.9202309, - 0.57710379, -0.63646365, 0.54195222, -0.31659545, -0.32238912, - 0.09716732, -1.52593041, 1.1921661, -0.67108968, 1.00026942 }; - const double y[] = { 0.13632112, 1.53203308, -0.65996941 }; + const auto dtype = + std::is_same::value ? data_type::float64 : data_type::float32; + const float_t X[] = { -0.98912135, -0.36778665, 1.28792526, 0.19397442, 0.9202309, + 0.57710379, -0.63646365, 0.54195222, -0.31659545, -0.32238912, + 0.09716732, -1.52593041, 1.1921661, -0.67108968, 1.00026942 }; + const float_t y[] = { 0.13632112, 1.53203308, -0.65996941 }; auto X_tbl = oneapi::dal::detail::homogen_table_builder() - .set_data_type(data_type::float64) + .set_data_type(dtype) .set_layout(data_layout::row_major) .allocate(3, 5) .copy_data(X, 3, 5) .build(); auto y_tbl = oneapi::dal::detail::homogen_table_builder() - .set_data_type(data_type::float64) + .set_data_type(dtype) .set_layout(data_layout::row_major) .allocate(3, 1) .copy_data(y, 3, 1) @@ -321,20 +319,20 @@ class lr_test : public te::crtp_algo_fixture { const auto coefs = train_res.get_coefficients(); if (desc.get_result_options().test(result_options::intercept)) { - const double expected_beta[] = { 0.27785494, - 0.53011669, - 0.34352259, - 0.40506216, - -1.26026447 }; - const double expected_intercept[] = { 1.24485441 }; + const float_t expected_beta[] = { 0.27785494, + 0.53011669, + 0.34352259, + 0.40506216, + -1.26026447 }; + const float_t expected_intercept[] = { 1.24485441 }; const auto expected_beta_tbl = oneapi::dal::detail::homogen_table_builder() - .set_data_type(data_type::float64) + .set_data_type(dtype) .set_layout(data_layout::row_major) .allocate(1, 5) .copy_data(expected_beta, 1, 5) .build(); const auto expected_intercept_tbl = oneapi::dal::detail::homogen_table_builder() - .set_data_type(data_type::float64) + .set_data_type(dtype) .set_layout(data_layout::row_major) .allocate(1, 1) .copy_data(expected_intercept, 1, 1) @@ -351,13 +349,13 @@ class lr_test : public te::crtp_algo_fixture { } else { - const double expected_beta[] = { 0.38217445, - 0.2732197, - 1.87135517, - 0.63458468, - -2.08473134 }; + const float_t expected_beta[] = { 0.38217445, + 0.2732197, + 1.87135517, + 0.63458468, + -2.08473134 }; const auto expected_beta_tbl = oneapi::dal::detail::homogen_table_builder() - .set_data_type(data_type::float64) + .set_data_type(dtype) .set_layout(data_layout::row_major) .allocate(1, 5) .copy_data(expected_beta, 1, 5) @@ -369,19 +367,21 @@ class lr_test : public te::crtp_algo_fixture { } void run_and_check_linear_indefinite_multioutput(double tol = 1e-3) { - const double X[] = { -0.98912135, -0.36778665, 1.28792526, 0.19397442, 0.9202309, - 0.57710379, -0.63646365, 0.54195222, -0.31659545, -0.32238912, - 0.09716732, -1.52593041, 1.1921661, -0.67108968, 1.00026942 }; - const double y[] = { 0.13632112, 1.53203308, -0.65996941, - -0.31179486, 0.33776913, -2.2074711 }; + const auto dtype = + std::is_same::value ? data_type::float64 : data_type::float32; + const float_t X[] = { -0.98912135, -0.36778665, 1.28792526, 0.19397442, 0.9202309, + 0.57710379, -0.63646365, 0.54195222, -0.31659545, -0.32238912, + 0.09716732, -1.52593041, 1.1921661, -0.67108968, 1.00026942 }; + const float_t y[] = { 0.13632112, 1.53203308, -0.65996941, + -0.31179486, 0.33776913, -2.2074711 }; auto X_tbl = oneapi::dal::detail::homogen_table_builder() - .set_data_type(data_type::float64) + .set_data_type(dtype) .set_layout(data_layout::row_major) .allocate(3, 5) .copy_data(X, 3, 5) .build(); auto y_tbl = oneapi::dal::detail::homogen_table_builder() - .set_data_type(data_type::float64) + .set_data_type(dtype) .set_layout(data_layout::row_major) .allocate(3, 2) .copy_data(y, 3, 2) @@ -392,19 +392,19 @@ class lr_test : public te::crtp_algo_fixture { const auto coefs = train_res.get_coefficients(); if (desc.get_result_options().test(result_options::intercept)) { - const double expected_beta[] = { + const float_t expected_beta[] = { -0.18692112, -0.20034801, -0.09590892, -0.13672683, 0.56229012, -0.97006008, 1.39413595, 0.49238012, 1.11041239, -0.79213452, }; - const double expected_intercept[] = { -0.48964358, 0.96467681 }; + const float_t expected_intercept[] = { -0.48964358, 0.96467681 }; const auto expected_beta_tbl = oneapi::dal::detail::homogen_table_builder() - .set_data_type(data_type::float64) + .set_data_type(dtype) .set_layout(data_layout::row_major) .allocate(2, 5) .copy_data(expected_beta, 2, 5) .build(); const auto expected_intercept_tbl = oneapi::dal::detail::homogen_table_builder() - .set_data_type(data_type::float64) + .set_data_type(dtype) .set_layout(data_layout::row_major) .allocate(1, 2) .copy_data(expected_intercept, 1, 2) @@ -421,11 +421,11 @@ class lr_test : public te::crtp_algo_fixture { } else { - const double expected_beta[] = { -0.22795353, -0.09930168, -0.69685744, -0.22700585, - 0.88658098, -0.88921961, 1.19505839, 1.67634561, - 1.2882766, -1.43103981 }; + const float_t expected_beta[] = { -0.22795353, -0.09930168, -0.69685744, -0.22700585, + 0.88658098, -0.88921961, 1.19505839, 1.67634561, + 1.2882766, -1.43103981 }; const auto expected_beta_tbl = oneapi::dal::detail::homogen_table_builder() - .set_data_type(data_type::float64) + .set_data_type(dtype) .set_layout(data_layout::row_major) .allocate(2, 5) .copy_data(expected_beta, 2, 5) diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index 223f6753f69..77ffd3e2540 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -14,6 +14,8 @@ * limitations under the License. *******************************************************************************/ +#include + #include "oneapi/dal/backend/primitives/lapack.hpp" #include "oneapi/dal/backend/primitives/ndarray.hpp" #include "oneapi/dal/backend/primitives/ndindexer.hpp" @@ -59,6 +61,167 @@ inline sycl::event beta_copy_transform(sycl::queue& queue, }); } +/* +This is an adaptation of the CPU version, which can be found in this file: +cpp/daal/src/algorithms/service_kernel_math.h + +It solves a linear system A*x = b +where 'b' might be a matrix (multiple RHS) + +It is intended as a fallback for solving linear regression, where these +matrices are obtained as follows: + A = t(X)*X + b = t(X)*y + +It can handle systems that are not positive semi-definite, so it's used +as a fallback when Cholesky fails or when it involves too small numbers +(which tends to happen when floating point error results in a slightly +positive matrix when it should have zero determinant in theory). +*/ +template +sycl::event solve_spectral_decomposition( + sycl::queue& queue, + ndview& A, // t(X)*X, LHS, gets overwritten + sycl::event& event_A, + ndview& b, // t(X)*y, RHS, solution is outputted here + sycl::event& event_b, + const std::int64_t dim_A, + const std::int64_t nrhs) { + constexpr auto alloc = sycl::usm::alloc::device; + + /* Decompose: A = Q * diag(l) * t(Q) */ + /* Note: for NRHS>1, this will overallocate in order to reuse the memory as buffer later on */ + auto eigenvalues = array::empty(queue, dim_A * nrhs, alloc); + auto eigenvalues_view = ndview::wrap(eigenvalues); + sycl::event syevd_event = + syevd(queue, dim_A, A, dim_A, eigenvalues_view, { event_A }); + const Float eps = std::numeric_limits::epsilon(); + + /* Discard too small singular values */ + std::int64_t num_discarded; + { + /* This is placed inside a block because the array created here is not used any further */ + auto eigenvalues_cpu = eigenvalues_view.to_host(queue, { syevd_event }); + const Float* eigenvalues_cpu_ptr = eigenvalues_cpu.get_data(); + const Float largest_ev = eigenvalues_cpu_ptr[dim_A - 1]; + if (largest_ev <= eps) { + throw std::runtime_error( + "Could not solve linear system. Problem has too small singular values."); + } + const Float component_threshold = eps * largest_ev; + for (num_discarded = 0; num_discarded < dim_A - 1; num_discarded++) { + if (eigenvalues_cpu_ptr[num_discarded] > component_threshold) + break; + } + } + + /* Create the square root of the inverse: Qis = Q * diag(1 / sqrt(l)) */ + std::int64_t num_taken = dim_A - num_discarded; + auto ev_mutable = eigenvalues.get_mutable_data(); + sycl::event inv_sqrt_eigenvalues_event = queue.submit([&](sycl::handler& h) { + h.depends_on(syevd_event); + h.parallel_for(num_taken, [=](const auto& i) { + const std::size_t ix = i + num_discarded; + ev_mutable[ix] = sycl::sqrt(Float(1) / ev_mutable[ix]); + }); + }); + + auto Q_mutable = A.get_mutable_data(); + sycl::event inv_sqrt_eigenvectors_event = queue.submit([&](sycl::handler& h) { + const auto range = oneapi::dal::backend::make_range_2d(num_taken, dim_A); + h.depends_on(inv_sqrt_eigenvalues_event); + h.parallel_for(range, [=](sycl::id<2> id) { + const std::size_t col = id[0] + num_discarded; + const std::size_t row = id[1]; + Q_mutable[row + col * dim_A] *= ev_mutable[col]; + }); + }); + + /* Now calculate the actual solution: Qis * Qis' * B */ + const std::int64_t eigenvectors_offset = num_discarded * dim_A; + if (nrhs == 1) { + sycl::event gemv_right_event = + mkl::blas::column_major::gemv(queue, + mkl::transpose::trans, + dim_A, + num_taken, + Float(1), + Q_mutable + eigenvectors_offset, + dim_A, + b.get_data(), + 1, + Float(0), + ev_mutable, + 1, + { inv_sqrt_eigenvectors_event, event_b }); + return mkl::blas::column_major::gemv(queue, + mkl::transpose::nontrans, + dim_A, + num_taken, + Float(1), + Q_mutable + eigenvectors_offset, + dim_A, + ev_mutable, + 1, + Float(0), + b.get_mutable_data(), + 1, + { gemv_right_event }); + } + + else { + sycl::event gemm_right_event = + mkl::blas::column_major::gemm(queue, + mkl::transpose::trans, + mkl::transpose::nontrans, + num_taken, + nrhs, + dim_A, + Float(1), + Q_mutable + eigenvectors_offset, + dim_A, + b.get_data(), + dim_A, + Float(0), + ev_mutable, + num_taken, + { inv_sqrt_eigenvectors_event, event_b }); + return mkl::blas::column_major::gemm(queue, + mkl::transpose::nontrans, + mkl::transpose::nontrans, + dim_A, + nrhs, + num_taken, + Float(1), + Q_mutable + eigenvectors_offset, + dim_A, + ev_mutable, + num_taken, + Float(0), + b.get_mutable_data(), + dim_A, + { gemm_right_event }); + } +} + +/* Returns the minimum value among entries in the diagonal of a square matrix */ +template +Float diagonal_minimum(sycl::queue& queue, + const Float* Matrix, + const std::int64_t dim_matrix, + sycl::event& event_Matrix) { + constexpr auto alloc = sycl::usm::alloc::device; + auto diag_min_holder = array::empty(queue, 1, alloc); + sycl::event diag_min_event = queue.submit([&](sycl::handler& h) { + auto min_reduction = sycl::reduction(diag_min_holder.get_mutable_data(), sycl::minimum<>()); + h.depends_on(event_Matrix); + h.parallel_for(dim_matrix, min_reduction, [=](const auto& i, auto& min_obj) { + min_obj.combine(Matrix[i * (dim_matrix + 1)]); + }); + }); + return ndview::wrap(diag_min_holder).at_device(queue, 0, { diag_min_event }); +} + template sycl::event solve_system(sycl::queue& queue, const ndview& xtx, @@ -70,12 +233,34 @@ sycl::event solve_system(sycl::queue& queue, auto [nxty, xty_event] = copy(queue, xty, dependencies); auto [nxtx, xtx_event] = copy(queue, xtx, dependencies); + const std::int64_t dim_xtx = xtx.get_dimension(0); + + sycl::event solution_event; opt_array dummy{}; - auto potrf_event = potrf_factorization(queue, nxtx, dummy, { xtx_event }); - auto potrs_event = potrs_solution(queue, nxtx, nxty, dummy, { potrf_event, xty_event }); + try { + sycl::event potrf_event = potrf_factorization(queue, nxtx, dummy, { xtx_event }); + const Float diag_min = diagonal_minimum(queue, nxtx.get_data(), dim_xtx, potrf_event); + if (diag_min <= 1e-6) + throw mkl::exception(""); + solution_event = potrs_solution(queue, nxtx, nxty, dummy, { potrf_event, xty_event }); + } + catch (mkl::exception& ex) { + const std::int64_t nrhs = nxty.get_dimension(0); + /* Note: this templated version of 'copy' reuses the layout that was specified in the previous copy */ + sycl::event xtx_event_new = copy(queue, nxtx, xtx, dependencies); + sycl::event xty_event_new = copy(queue, nxty, xty, dependencies); + + solution_event = solve_spectral_decomposition(queue, + nxtx, + xtx_event_new, + nxty, + xty_event_new, + dim_xtx, + nrhs); + } - return beta_copy_transform(queue, nxty, final_xty, { potrs_event }); + return beta_copy_transform(queue, nxty, final_xty, { solution_event }); } #define INSTANTIATE(U, B, F, XL, YL) \ From d528fab78c415ba29aa81aaa49428835f021de4f Mon Sep 17 00:00:00 2001 From: David Cortes Date: Thu, 14 Nov 2024 17:51:06 +0100 Subject: [PATCH 02/17] fix failing min diagonal --- .../backend/primitives/lapack/solve_dpc.cpp | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index 77ffd3e2540..a0f42ecadbe 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -211,15 +211,18 @@ Float diagonal_minimum(sycl::queue& queue, const std::int64_t dim_matrix, sycl::event& event_Matrix) { constexpr auto alloc = sycl::usm::alloc::device; - auto diag_min_holder = array::empty(queue, 1, alloc); - sycl::event diag_min_event = queue.submit([&](sycl::handler& h) { - auto min_reduction = sycl::reduction(diag_min_holder.get_mutable_data(), sycl::minimum<>()); - h.depends_on(event_Matrix); - h.parallel_for(dim_matrix, min_reduction, [=](const auto& i, auto& min_obj) { - min_obj.combine(Matrix[i * (dim_matrix + 1)]); - }); - }); - return ndview::wrap(diag_min_holder).at_device(queue, 0, { diag_min_event }); + auto idx_min_holder = array::empty(queue, 1, alloc); + sycl::event diag_min_event = mkl::blas::column_major::iamin( + queue, + dim_matrix, + Matrix, + dim_matrix + 1, + idx_min_holder.get_mutable_data(), + mkl::index_base::zero, + { event_Matrix } + ); + const std::int64_t idx_min = ndview::wrap(idx_min_holder).at_device(queue, 0, { diag_min_event }); + return ndview::wrap(Matrix, dim_matrix * dim_matrix).at_device(queue, idx_min * (dim_matrix + 1), { event_Matrix }); } template From 5255e648bdd0d9e605da55eee9270cb9da5ceeae Mon Sep 17 00:00:00 2001 From: David Cortes Date: Fri, 15 Nov 2024 08:54:58 +0100 Subject: [PATCH 03/17] switch back to sycl reduction for diag min --- .../backend/primitives/lapack/solve_dpc.cpp | 31 ++++++++++--------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index a0f42ecadbe..305f74a4a67 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -211,18 +211,21 @@ Float diagonal_minimum(sycl::queue& queue, const std::int64_t dim_matrix, sycl::event& event_Matrix) { constexpr auto alloc = sycl::usm::alloc::device; - auto idx_min_holder = array::empty(queue, 1, alloc); - sycl::event diag_min_event = mkl::blas::column_major::iamin( - queue, - dim_matrix, - Matrix, - dim_matrix + 1, - idx_min_holder.get_mutable_data(), - mkl::index_base::zero, - { event_Matrix } - ); - const std::int64_t idx_min = ndview::wrap(idx_min_holder).at_device(queue, 0, { diag_min_event }); - return ndview::wrap(Matrix, dim_matrix * dim_matrix).at_device(queue, idx_min * (dim_matrix + 1), { event_Matrix }); + auto diag_min_holder = array::empty(queue, 1, alloc); + sycl::event diag_min_holder_init = queue.submit([&](sycl::handler& h) { + Float* diag_min_ptr = diag_min_holder.get_mutable_data(); + h.parallel_for(1, [=](const auto& i) { + diag_min_ptr[i] = std::numeric_limits::infinity(); + }); + }); + sycl::event diag_min_event = queue.submit([&](sycl::handler& h) { + auto min_reduction = sycl::reduction(diag_min_holder.get_mutable_data(), sycl::minimum<>()); + h.depends_on({ diag_min_holder_init, event_Matrix }); + h.parallel_for(dim_matrix, min_reduction, [=](const auto& i, auto& min_obj) { + min_obj.combine(Matrix[i * (dim_matrix + 1)]); + }); + }); + return ndview::wrap(diag_min_holder).at_device(queue, 0, { diag_min_event }); } template @@ -245,10 +248,10 @@ sycl::event solve_system(sycl::queue& queue, sycl::event potrf_event = potrf_factorization(queue, nxtx, dummy, { xtx_event }); const Float diag_min = diagonal_minimum(queue, nxtx.get_data(), dim_xtx, potrf_event); if (diag_min <= 1e-6) - throw mkl::exception(""); + throw mkl::lapack::computation_error("", "", 0); solution_event = potrs_solution(queue, nxtx, nxty, dummy, { potrf_event, xty_event }); } - catch (mkl::exception& ex) { + catch (mkl::lapack::computation_error& ex) { const std::int64_t nrhs = nxty.get_dimension(0); /* Note: this templated version of 'copy' reuses the layout that was specified in the previous copy */ sycl::event xtx_event_new = copy(queue, nxtx, xtx, dependencies); From 141b65b4c2e42aac0d0d27ac4117b3eb9e555b6c Mon Sep 17 00:00:00 2001 From: David Cortes Date: Fri, 15 Nov 2024 09:03:49 +0100 Subject: [PATCH 04/17] try another way --- .../dal/backend/primitives/lapack/solve_dpc.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index 305f74a4a67..5dc20bfd768 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -212,15 +212,11 @@ Float diagonal_minimum(sycl::queue& queue, sycl::event& event_Matrix) { constexpr auto alloc = sycl::usm::alloc::device; auto diag_min_holder = array::empty(queue, 1, alloc); - sycl::event diag_min_holder_init = queue.submit([&](sycl::handler& h) { - Float* diag_min_ptr = diag_min_holder.get_mutable_data(); - h.parallel_for(1, [=](const auto& i) { - diag_min_ptr[i] = std::numeric_limits::infinity(); - }); - }); sycl::event diag_min_event = queue.submit([&](sycl::handler& h) { - auto min_reduction = sycl::reduction(diag_min_holder.get_mutable_data(), sycl::minimum<>()); - h.depends_on({ diag_min_holder_init, event_Matrix }); + auto min_reduction = sycl::reduction(diag_min_holder.get_mutable_data(), + sycl::minimum<>(), + sycl::property::reduction::initialize_to_identity()); + h.depends_on({ event_Matrix }); h.parallel_for(dim_matrix, min_reduction, [=](const auto& i, auto& min_obj) { min_obj.combine(Matrix[i * (dim_matrix + 1)]); }); From e88ed4717a84281210202343b67952fbdf93cc44 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Fri, 15 Nov 2024 13:09:25 +0100 Subject: [PATCH 05/17] use ndarray instead of array for allocation --- cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index 5dc20bfd768..401a304cfbc 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -91,17 +91,16 @@ sycl::event solve_spectral_decomposition( /* Decompose: A = Q * diag(l) * t(Q) */ /* Note: for NRHS>1, this will overallocate in order to reuse the memory as buffer later on */ - auto eigenvalues = array::empty(queue, dim_A * nrhs, alloc); - auto eigenvalues_view = ndview::wrap(eigenvalues); + auto eigenvalues = ndarray::empty(queue, dim_A * nrhs, alloc); sycl::event syevd_event = - syevd(queue, dim_A, A, dim_A, eigenvalues_view, { event_A }); + syevd(queue, dim_A, A, dim_A, eigenvalues, { event_A }); const Float eps = std::numeric_limits::epsilon(); /* Discard too small singular values */ std::int64_t num_discarded; { /* This is placed inside a block because the array created here is not used any further */ - auto eigenvalues_cpu = eigenvalues_view.to_host(queue, { syevd_event }); + auto eigenvalues_cpu = eigenvalues.to_host(queue, { syevd_event }); const Float* eigenvalues_cpu_ptr = eigenvalues_cpu.get_data(); const Float largest_ev = eigenvalues_cpu_ptr[dim_A - 1]; if (largest_ev <= eps) { From a3254527c2d4c5e242823f8f96ae6537cbc0fa27 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Fri, 15 Nov 2024 16:52:21 +0100 Subject: [PATCH 06/17] avoid copying unused entries in array --- cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index 401a304cfbc..efa497e73fe 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -100,7 +100,10 @@ sycl::event solve_spectral_decomposition( std::int64_t num_discarded; { /* This is placed inside a block because the array created here is not used any further */ - auto eigenvalues_cpu = eigenvalues.to_host(queue, { syevd_event }); + /* Note: the array for 'eigenvalues' is allocated with size [dimA, nrhs], + but by this point, only 'dimA' elements will have been written to it. */ + auto eigenvalues_cpu = + ndview::wrap(eigenvalues.get_data(), dim_A).to_host(queue, { syevd_event }); const Float* eigenvalues_cpu_ptr = eigenvalues_cpu.get_data(); const Float largest_ev = eigenvalues_cpu_ptr[dim_A - 1]; if (largest_ev <= eps) { From 6addc0355e0d7f61cbd591f282b414eafb7fdd7b Mon Sep 17 00:00:00 2001 From: david-cortes-intel Date: Mon, 18 Nov 2024 15:07:26 +0100 Subject: [PATCH 07/17] Update cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp Co-authored-by: Victoriya Fedotova --- cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index efa497e73fe..1942c2cb1df 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -124,7 +124,7 @@ sycl::event solve_spectral_decomposition( h.depends_on(syevd_event); h.parallel_for(num_taken, [=](const auto& i) { const std::size_t ix = i + num_discarded; - ev_mutable[ix] = sycl::sqrt(Float(1) / ev_mutable[ix]); + ev_mutable[ix] = sycl::rsqrt(ev_mutable[ix]); }); }); From 8cc9ca187851a5cc4add852117a1ef30fcc67c75 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Mon, 18 Nov 2024 15:06:43 +0100 Subject: [PATCH 08/17] follow internal logic for error messages --- cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp | 3 +-- cpp/oneapi/dal/detail/error_messages.cpp | 2 ++ cpp/oneapi/dal/detail/error_messages.hpp | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index 1942c2cb1df..6cf3d51c7e1 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -107,8 +107,7 @@ sycl::event solve_spectral_decomposition( const Float* eigenvalues_cpu_ptr = eigenvalues_cpu.get_data(); const Float largest_ev = eigenvalues_cpu_ptr[dim_A - 1]; if (largest_ev <= eps) { - throw std::runtime_error( - "Could not solve linear system. Problem has too small singular values."); + throw internal_error{ dal::detail::error_messages::too_small_singular_values() }; } const Float component_threshold = eps * largest_ev; for (num_discarded = 0; num_discarded < dim_A - 1; num_discarded++) { diff --git a/cpp/oneapi/dal/detail/error_messages.cpp b/cpp/oneapi/dal/detail/error_messages.cpp index 20ce68e0ef2..85cbb969099 100644 --- a/cpp/oneapi/dal/detail/error_messages.cpp +++ b/cpp/oneapi/dal/detail/error_messages.cpp @@ -303,6 +303,8 @@ MSG(input_y_is_empty, "Input y is empty") /* Linear Regression */ MSG(intercept_result_option_requires_intercept_flag, "Intercept result option requires intercept flag") +MSG(too_small_singular_values, + "Linear system has too small singular values, cannot calculate a solution") /* Logistic Regression */ MSG(class_count_neq_two, diff --git a/cpp/oneapi/dal/detail/error_messages.hpp b/cpp/oneapi/dal/detail/error_messages.hpp index dbb9c8ba6ec..926e89321b0 100644 --- a/cpp/oneapi/dal/detail/error_messages.hpp +++ b/cpp/oneapi/dal/detail/error_messages.hpp @@ -242,6 +242,7 @@ class ONEDAL_EXPORT error_messages { /* Linear Regression */ MSG(intercept_result_option_requires_intercept_flag); + MSG(too_small_singular_values); /* Logistic Regression */ MSG(class_count_neq_two); From e39f2ac33744d32c9328cee3b4e94188fd7d0365 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Mon, 18 Nov 2024 15:11:51 +0100 Subject: [PATCH 09/17] add comment about the try-catch block --- cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index 6cf3d51c7e1..78bd7d21960 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -241,6 +241,10 @@ sycl::event solve_system(sycl::queue& queue, sycl::event solution_event; opt_array dummy{}; + /// Note: this is wrapped in a try-catch block in order to catch a potential exception + /// thrown by oneMKL when the Cholesky factorization ('potrs') fails due to the matrix + /// not being positive-definite. In such case, it will then fall back to spectral + /// decomposition-based inversion, which is able to handle such problems. try { sycl::event potrf_event = potrf_factorization(queue, nxtx, dummy, { xtx_event }); const Float diag_min = diagonal_minimum(queue, nxtx.get_data(), dim_xtx, potrf_event); From 9a4d13eef18ceb9c6be0d69e12f059621363468c Mon Sep 17 00:00:00 2001 From: David Cortes Date: Mon, 18 Nov 2024 15:13:34 +0100 Subject: [PATCH 10/17] add manual wait-and-throw call where appropriate --- cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index 78bd7d21960..f5b372001a2 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -247,6 +247,7 @@ sycl::event solve_system(sycl::queue& queue, /// decomposition-based inversion, which is able to handle such problems. try { sycl::event potrf_event = potrf_factorization(queue, nxtx, dummy, { xtx_event }); + queue.wait_and_throw(); const Float diag_min = diagonal_minimum(queue, nxtx.get_data(), dim_xtx, potrf_event); if (diag_min <= 1e-6) throw mkl::lapack::computation_error("", "", 0); From 9a1f48d2fc78116fb63ebd3226263f8c2b5e1258 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Mon, 18 Nov 2024 16:17:50 +0100 Subject: [PATCH 11/17] use internal wrappers for blas calls --- .../backend/primitives/lapack/solve_dpc.cpp | 97 ++++++++----------- 1 file changed, 41 insertions(+), 56 deletions(-) diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index f5b372001a2..e7576b9cf32 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -16,9 +16,12 @@ #include +#include "oneapi/dal/backend/primitives/blas/gemm.hpp" +#include "oneapi/dal/backend/primitives/blas/gemv.hpp" #include "oneapi/dal/backend/primitives/lapack.hpp" #include "oneapi/dal/backend/primitives/ndarray.hpp" #include "oneapi/dal/backend/primitives/ndindexer.hpp" +#include "oneapi/dal/detail/error_messages.hpp" namespace oneapi::dal::backend::primitives { @@ -141,67 +144,49 @@ sycl::event solve_spectral_decomposition( /* Now calculate the actual solution: Qis * Qis' * B */ const std::int64_t eigenvectors_offset = num_discarded * dim_A; if (nrhs == 1) { + auto ev_mutable_vec_view = ndview::wrap(ev_mutable, num_taken); + auto b_mutable_vec_view = ndview::wrap(b.get_mutable_data(), dim_A); sycl::event gemv_right_event = - mkl::blas::column_major::gemv(queue, - mkl::transpose::trans, - dim_A, - num_taken, - Float(1), - Q_mutable + eigenvectors_offset, - dim_A, - b.get_data(), - 1, - Float(0), - ev_mutable, - 1, - { inv_sqrt_eigenvectors_event, event_b }); - return mkl::blas::column_major::gemv(queue, - mkl::transpose::nontrans, - dim_A, - num_taken, - Float(1), - Q_mutable + eigenvectors_offset, - dim_A, - ev_mutable, - 1, - Float(0), - b.get_mutable_data(), - 1, - { gemv_right_event }); + gemv(queue, + ndview::wrap(Q_mutable + eigenvectors_offset, + ndshape<2>(num_taken, dim_A)), + b_mutable_vec_view, + ev_mutable_vec_view, + Float(1), + Float(0), + { inv_sqrt_eigenvectors_event, event_b }); + return gemv(queue, + ndview::wrap(Q_mutable + eigenvectors_offset, + ndshape<2>(dim_A, num_taken)), + ev_mutable_vec_view, + b_mutable_vec_view, + Float(1), + Float(0), + { gemv_right_event }); } else { + auto ev_mutable_mat_view = + ndview::wrap(ev_mutable, ndshape<2>(nrhs, num_taken)); + auto b_mutable_mat_view = + ndview::wrap(b.get_mutable_data(), ndshape<2>(nrhs, dim_A)); sycl::event gemm_right_event = - mkl::blas::column_major::gemm(queue, - mkl::transpose::trans, - mkl::transpose::nontrans, - num_taken, - nrhs, - dim_A, - Float(1), - Q_mutable + eigenvectors_offset, - dim_A, - b.get_data(), - dim_A, - Float(0), - ev_mutable, - num_taken, - { inv_sqrt_eigenvectors_event, event_b }); - return mkl::blas::column_major::gemm(queue, - mkl::transpose::nontrans, - mkl::transpose::nontrans, - dim_A, - nrhs, - num_taken, - Float(1), - Q_mutable + eigenvectors_offset, - dim_A, - ev_mutable, - num_taken, - Float(0), - b.get_mutable_data(), - dim_A, - { gemm_right_event }); + gemm(queue, + b_mutable_mat_view, + ndview::wrap(Q_mutable + eigenvectors_offset, + ndshape<2>(dim_A, num_taken)), + ev_mutable_mat_view, + Float(1), + Float(0), + { inv_sqrt_eigenvectors_event, event_b }); + return gemm(queue, + ev_mutable_mat_view, + ndview::wrap(Q_mutable + eigenvectors_offset, + ndshape<2>(num_taken, dim_A)), + b_mutable_mat_view, + Float(1), + Float(0), + { gemm_right_event }); } } From 8b3d1c1392a09ce58e2e42c66b7f96ff52c6bc75 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Mon, 18 Nov 2024 16:19:27 +0100 Subject: [PATCH 12/17] add comment about dimensions of inputs --- cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index e7576b9cf32..9d335198a53 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -84,9 +84,10 @@ positive matrix when it should have zero determinant in theory). template sycl::event solve_spectral_decomposition( sycl::queue& queue, - ndview& A, // t(X)*X, LHS, gets overwritten + ndview& A, // t(X)*X, LHS, gets overwritten, dim=[dim_A, dim_A] (symmetric) sycl::event& event_A, - ndview& b, // t(X)*y, RHS, solution is outputted here + ndview& + b, // t(X)*y, RHS, solution is outputted here, dim=[dim_A, nrhs] in colmajor order sycl::event& event_b, const std::int64_t dim_A, const std::int64_t nrhs) { From e86b5bde7929e0af69d2a4933048a242e21e880c Mon Sep 17 00:00:00 2001 From: David Cortes Date: Mon, 18 Nov 2024 16:21:27 +0100 Subject: [PATCH 13/17] change format of comments --- .../backend/primitives/lapack/solve_dpc.cpp | 56 +++++++++---------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index 9d335198a53..173c92f8ffe 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -64,48 +64,46 @@ inline sycl::event beta_copy_transform(sycl::queue& queue, }); } -/* -This is an adaptation of the CPU version, which can be found in this file: -cpp/daal/src/algorithms/service_kernel_math.h - -It solves a linear system A*x = b -where 'b' might be a matrix (multiple RHS) - -It is intended as a fallback for solving linear regression, where these -matrices are obtained as follows: - A = t(X)*X - b = t(X)*y - -It can handle systems that are not positive semi-definite, so it's used -as a fallback when Cholesky fails or when it involves too small numbers -(which tends to happen when floating point error results in a slightly -positive matrix when it should have zero determinant in theory). -*/ +/// This is an adaptation of the CPU version, which can be found in this file: +/// cpp/daal/src/algorithms/service_kernel_math.h +/// +/// It solves a linear system A*x = b +/// where 'b' might be a matrix (multiple RHS) +/// +/// It is intended as a fallback for solving linear regression, where these +/// matrices are obtained as follows: +/// A = t(X)*X +/// b = t(X)*y +/// +/// It can handle systems that are not positive semi-definite, so it's used +/// as a fallback when Cholesky fails or when it involves too small numbers +/// (which tends to happen when floating point error results in a slightly +/// positive matrix when it should have zero determinant in theory). template sycl::event solve_spectral_decomposition( sycl::queue& queue, - ndview& A, // t(X)*X, LHS, gets overwritten, dim=[dim_A, dim_A] (symmetric) + ndview& A, /// t(X)*X, LHS, gets overwritten, dim=[dim_A, dim_A] (symmetric) sycl::event& event_A, ndview& - b, // t(X)*y, RHS, solution is outputted here, dim=[dim_A, nrhs] in colmajor order + b, /// t(X)*y, RHS, solution is outputted here, dim=[dim_A, nrhs] in colmajor order sycl::event& event_b, const std::int64_t dim_A, const std::int64_t nrhs) { constexpr auto alloc = sycl::usm::alloc::device; - /* Decompose: A = Q * diag(l) * t(Q) */ - /* Note: for NRHS>1, this will overallocate in order to reuse the memory as buffer later on */ + /// Decompose: A = Q * diag(l) * t(Q) + /// Note: for NRHS>1, this will overallocate in order to reuse the memory as buffer later on auto eigenvalues = ndarray::empty(queue, dim_A * nrhs, alloc); sycl::event syevd_event = syevd(queue, dim_A, A, dim_A, eigenvalues, { event_A }); const Float eps = std::numeric_limits::epsilon(); - /* Discard too small singular values */ + /// Discard too small singular values std::int64_t num_discarded; { - /* This is placed inside a block because the array created here is not used any further */ - /* Note: the array for 'eigenvalues' is allocated with size [dimA, nrhs], - but by this point, only 'dimA' elements will have been written to it. */ + /// This is placed inside a block because the array created here is not used any further + /// Note: the array for 'eigenvalues' is allocated with size [dimA, nrhs], + /// but by this point, only 'dimA' elements will have been written to it. auto eigenvalues_cpu = ndview::wrap(eigenvalues.get_data(), dim_A).to_host(queue, { syevd_event }); const Float* eigenvalues_cpu_ptr = eigenvalues_cpu.get_data(); @@ -120,7 +118,7 @@ sycl::event solve_spectral_decomposition( } } - /* Create the square root of the inverse: Qis = Q * diag(1 / sqrt(l)) */ + /// Create the square root of the inverse: Qis = Q * diag(1 / sqrt(l)) std::int64_t num_taken = dim_A - num_discarded; auto ev_mutable = eigenvalues.get_mutable_data(); sycl::event inv_sqrt_eigenvalues_event = queue.submit([&](sycl::handler& h) { @@ -142,7 +140,7 @@ sycl::event solve_spectral_decomposition( }); }); - /* Now calculate the actual solution: Qis * Qis' * B */ + /// Now calculate the actual solution: Qis * Qis' * B const std::int64_t eigenvectors_offset = num_discarded * dim_A; if (nrhs == 1) { auto ev_mutable_vec_view = ndview::wrap(ev_mutable, num_taken); @@ -191,7 +189,7 @@ sycl::event solve_spectral_decomposition( } } -/* Returns the minimum value among entries in the diagonal of a square matrix */ +/// Returns the minimum value among entries in the diagonal of a square matrix template Float diagonal_minimum(sycl::queue& queue, const Float* Matrix, @@ -241,7 +239,7 @@ sycl::event solve_system(sycl::queue& queue, } catch (mkl::lapack::computation_error& ex) { const std::int64_t nrhs = nxty.get_dimension(0); - /* Note: this templated version of 'copy' reuses the layout that was specified in the previous copy */ + /// Note: this templated version of 'copy' reuses the layout that was specified in the previous copy sycl::event xtx_event_new = copy(queue, nxtx, xtx, dependencies); sycl::event xty_event_new = copy(queue, nxty, xty, dependencies); From 15b686d38224129c79bfd3be9cd6f877f5b54eb0 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Tue, 19 Nov 2024 13:41:20 +0100 Subject: [PATCH 14/17] add comment about threshold for diagonal minimum --- cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp | 8 +++++++- oneDAL.code-workspace | 7 +++++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 oneDAL.code-workspace diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index 173c92f8ffe..1fd7883c5d1 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -233,7 +233,13 @@ sycl::event solve_system(sycl::queue& queue, sycl::event potrf_event = potrf_factorization(queue, nxtx, dummy, { xtx_event }); queue.wait_and_throw(); const Float diag_min = diagonal_minimum(queue, nxtx.get_data(), dim_xtx, potrf_event); - if (diag_min <= 1e-6) + /// Note: this threshold was chosen as a guess for when Cholesky factorization might + /// succeed despite the matrix in theory not being positive-definite, or succeed but + /// having too large numerical inaccuracies. In such cases, there should be too small + /// singular values that the fallback will discard, but there's no guaranteed match + /// between singular values and entries in the Cholesky diagonal. This is just a guess. + const Float threshold_diagonal_min = 1e-6f; + if (diag_min <= threshold_diagonal_min) throw mkl::lapack::computation_error("", "", 0); solution_event = potrs_solution(queue, nxtx, nxty, dummy, { potrf_event, xty_event }); } diff --git a/oneDAL.code-workspace b/oneDAL.code-workspace new file mode 100644 index 00000000000..362d7c25bb4 --- /dev/null +++ b/oneDAL.code-workspace @@ -0,0 +1,7 @@ +{ + "folders": [ + { + "path": "." + } + ] +} \ No newline at end of file From 4cff7b9328c7aa5bd2ab36ccd8676d34d8d92e9f Mon Sep 17 00:00:00 2001 From: David Cortes Date: Tue, 19 Nov 2024 13:45:56 +0100 Subject: [PATCH 15/17] avoid manually throwing MKL error --- cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index 1fd7883c5d1..ef49ea497e1 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -238,12 +238,17 @@ sycl::event solve_system(sycl::queue& queue, /// having too large numerical inaccuracies. In such cases, there should be too small /// singular values that the fallback will discard, but there's no guaranteed match /// between singular values and entries in the Cholesky diagonal. This is just a guess. - const Float threshold_diagonal_min = 1e-6f; + const Float threshold_diagonal_min = 1e-6; if (diag_min <= threshold_diagonal_min) - throw mkl::lapack::computation_error("", "", 0); + goto fallback_solver; solution_event = potrs_solution(queue, nxtx, nxty, dummy, { potrf_event, xty_event }); } catch (mkl::lapack::computation_error& ex) { + goto fallback_solver; + } + /// Note: this block is structured so that it will only be entered into through a 'goto' + if (false) { + fallback_solver: const std::int64_t nrhs = nxty.get_dimension(0); /// Note: this templated version of 'copy' reuses the layout that was specified in the previous copy sycl::event xtx_event_new = copy(queue, nxtx, xtx, dependencies); From 58ebb83893b3ea5d0095d4cb37a3e948c55617b9 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Tue, 19 Nov 2024 13:49:18 +0100 Subject: [PATCH 16/17] remove accidentally commited file --- oneDAL.code-workspace | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 oneDAL.code-workspace diff --git a/oneDAL.code-workspace b/oneDAL.code-workspace deleted file mode 100644 index 362d7c25bb4..00000000000 --- a/oneDAL.code-workspace +++ /dev/null @@ -1,7 +0,0 @@ -{ - "folders": [ - { - "path": "." - } - ] -} \ No newline at end of file From 4e241dcfa8c92b063afc944e3510bdc61fab3a6f Mon Sep 17 00:00:00 2001 From: David Cortes Date: Tue, 19 Nov 2024 15:08:59 +0100 Subject: [PATCH 17/17] wrap procedure in a function --- .../backend/primitives/lapack/solve_dpc.cpp | 52 ++++++++++++------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp index ef49ea497e1..12eb654bbe4 100644 --- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp @@ -209,6 +209,30 @@ Float diagonal_minimum(sycl::queue& queue, return ndview::wrap(diag_min_holder).at_device(queue, 0, { diag_min_event }); } +template +sycl::event solve_with_fallback( + sycl::queue& queue, + const ndview& xtx, + const ndview& xty, + ndview& nxtx, + ndview& nxty, /// solution will be written here + const event_vector& dependencies) { + const std::int64_t dim_xtx = xtx.get_dimension(0); + const std::int64_t nrhs = nxty.get_dimension(0); + /// Note: this templated version of 'copy' reuses a layout that should have been + /// specified in a previous copy before the fallback. + sycl::event xtx_event_new = copy(queue, nxtx, xtx, dependencies); + sycl::event xty_event_new = copy(queue, nxty, xty, dependencies); + + return solve_spectral_decomposition(queue, + nxtx, + xtx_event_new, + nxty, + xty_event_new, + dim_xtx, + nrhs); +} + template sycl::event solve_system(sycl::queue& queue, const ndview& xtx, @@ -239,28 +263,16 @@ sycl::event solve_system(sycl::queue& queue, /// singular values that the fallback will discard, but there's no guaranteed match /// between singular values and entries in the Cholesky diagonal. This is just a guess. const Float threshold_diagonal_min = 1e-6; - if (diag_min <= threshold_diagonal_min) - goto fallback_solver; - solution_event = potrs_solution(queue, nxtx, nxty, dummy, { potrf_event, xty_event }); + if (diag_min > threshold_diagonal_min) { + solution_event = + potrs_solution(queue, nxtx, nxty, dummy, { potrf_event, xty_event }); + } + else { + solution_event = solve_with_fallback(queue, xtx, xty, nxtx, nxty, dependencies); + } } catch (mkl::lapack::computation_error& ex) { - goto fallback_solver; - } - /// Note: this block is structured so that it will only be entered into through a 'goto' - if (false) { - fallback_solver: - const std::int64_t nrhs = nxty.get_dimension(0); - /// Note: this templated version of 'copy' reuses the layout that was specified in the previous copy - sycl::event xtx_event_new = copy(queue, nxtx, xtx, dependencies); - sycl::event xty_event_new = copy(queue, nxty, xty, dependencies); - - solution_event = solve_spectral_decomposition(queue, - nxtx, - xtx_event_new, - nxty, - xty_event_new, - dim_xtx, - nrhs); + solution_event = solve_with_fallback(queue, xtx, xty, nxtx, nxty, dependencies); } return beta_copy_transform(queue, nxty, final_xty, { solution_event });