diff --git a/common/cuda_hip/factorization/cholesky_kernels.cpp b/common/cuda_hip/factorization/cholesky_kernels.cpp
index ce4dc823b98..51671d1386d 100644
--- a/common/cuda_hip/factorization/cholesky_kernels.cpp
+++ b/common/cuda_hip/factorization/cholesky_kernels.cpp
@@ -82,7 +82,8 @@ template <int subwarp_size, typename IndexType>
 __global__ __launch_bounds__(default_block_size) void symbolic_count(
     IndexType num_rows, const IndexType* row_ptrs, const IndexType* lower_ends,
     const IndexType* inv_postorder, const IndexType* postorder_cols,
-    const IndexType* postorder_parent, IndexType* row_nnz)
+    const IndexType* postorder_parent, IndexType* row_nnz,
+    IndexType* path_lengths)
 {
     const auto row = thread::get_subwarp_id_flat<subwarp_size, IndexType>();
     if (row >= num_rows) {
@@ -99,13 +100,54 @@ __global__ __launch_bounds__(default_block_size) void symbolic_count(
     const auto lane = subwarp.thread_rank();
     IndexType count{};
     for (auto nz = row_begin + lane; nz < lower_end; nz += subwarp_size) {
+        IndexType local_count{};
         auto node = postorder_cols[nz];
         const auto next_node =
             nz < lower_end - 1 ? postorder_cols[nz + 1] : diag_postorder;
         while (node < next_node) {
-            count++;
+            local_count++;
             node = postorder_parent[node];
         }
+        path_lengths[nz] = count;
+        count += local_count;
+    }
+    // lower entries plus diagonal
+    count = reduce(subwarp, count, thrust::plus<IndexType>{}) + 1;
+    if (lane == 0) {
+        row_nnz[row] = count;
+    }
+}
+
+
+template <int subwarp_size, typename IndexType>
+__global__ __launch_bounds__(default_block_size) void symbolic_count_lca(
+    IndexType num_rows, const IndexType* row_ptrs, const IndexType* lower_ends,
+    const IndexType* inv_postorder, const IndexType* postorder_cols,
+    const IndexType* levels, const IndexType* euler_first,
+    const range_minimum_query<IndexType> lca_rmq, IndexType* row_nnz,
+    IndexType* path_lengths)
+{
+    const auto row = thread::get_subwarp_id_flat<subwarp_size, IndexType>();
+    if (row >= num_rows) {
+        return;
+    }
+    const auto row_begin = row_ptrs[row];
+    const auto level = lca_rmq.min(euler_first[row]);
+    const auto lower_end = lower_ends[row];
+    const auto subwarp =
+        group::tiled_partition<subwarp_size>(group::this_thread_block());
+    const auto lane = subwarp.thread_rank();
+    IndexType count{};
+    for (auto nz = row_begin + lane; nz < lower_end; nz += subwarp_size) {
+        const auto node = postorder_cols[nz];
+        const auto euler_pos = euler_first[node];
+        const auto next_node =
+            nz < lower_end - 1 ? postorder_cols[nz + 1] : diag_postorder;
+        const auto next_euler_pos = euler_first[next_node];
+        const auto lca_level = lca_rmq.query(euler_pos, next_euler_pos).min;
+        const auto path_length = level - lca_level;
+        path_lengths[nz] = path_length;
+        count += path_length;
     }
     // lower entries plus diagonal
     count = reduce(subwarp, count, thrust::plus<IndexType>{}) + 1;
diff --git a/core/factorization/cholesky_kernels.hpp b/core/factorization/cholesky_kernels.hpp
index fa87edd7fe3..7f19e842a82 100644
--- a/core/factorization/cholesky_kernels.hpp
+++ b/core/factorization/cholesky_kernels.hpp
@@ -28,6 +28,15 @@ namespace kernels {
         IndexType* row_nnz, array<IndexType>& tmp_storage)
 
 
+#define GKO_DECLARE_CHOLESKY_SYMBOLIC_COUNT_LCA(ValueType, IndexType)      \
+    void symbolic_count_lca(                                               \
+        std::shared_ptr<const DefaultExecutor> exec,                       \
+        const matrix::Csr<ValueType, IndexType>* mtx,                      \
+        const gko::factorization::elimination_forest<IndexType>& forest,   \
+        const range_minimum_query<IndexType>& lca_rmq, IndexType* row_nnz, \
+        array<IndexType>& tmp_storage)
+
+
 #define GKO_DECLARE_CHOLESKY_SYMBOLIC_FACTORIZE(ValueType, IndexType)    \
     void symbolic_factorize(                                             \
         std::shared_ptr<const DefaultExecutor> exec,                     \
@@ -62,6 +71,8 @@ namespace kernels {
     template <typename ValueType, typename IndexType>              \
     GKO_DECLARE_CHOLESKY_SYMBOLIC_COUNT(ValueType, IndexType);     \
     template <typename ValueType, typename IndexType>              \
+    GKO_DECLARE_CHOLESKY_SYMBOLIC_COUNT_LCA(ValueType, IndexType); \
+    template <typename ValueType, typename IndexType>              \
     GKO_DECLARE_CHOLESKY_SYMBOLIC_FACTORIZE(ValueType, IndexType); \
     template <typename ValueType, typename IndexType>              \
     GKO_DECLARE_CHOLESKY_INITIALIZE(ValueType, IndexType);         \