From c8864c906b14b0875e755b7ba1bba5c8b51fea50 Mon Sep 17 00:00:00 2001 From: Samuel Ayala Date: Tue, 7 May 2024 18:07:47 -0400 Subject: [PATCH] parallel wake rollup & fix taskflow --- packages/taskflow.lua | 2 - tests/test_uvlm_theodorsen.cpp | 6 +-- vlm/backends/cpu/include/vlm_backend_cpu.hpp | 1 + vlm/backends/cpu/src/vlm_backend_cpu.cpp | 54 +++++++++++++------ .../cpu/src/vlm_backend_cpu_kernels.ispc | 11 ++-- vlm/include/vlm_backend.hpp | 1 + vlm/include/vlm_types.hpp | 6 +-- 7 files changed, 51 insertions(+), 30 deletions(-) diff --git a/packages/taskflow.lua b/packages/taskflow.lua index 17f043c..fc70e46 100644 --- a/packages/taskflow.lua +++ b/packages/taskflow.lua @@ -1,7 +1,5 @@ package("taskflow_custom") set_urls("https://github.com/samayala22/taskflow.git") - add_urls("https://github.com/samayala22/taskflow/archive/$(version).tar.gz") - add_versions("v3.6.2", "0a1d306f90e8e17cb98b95eaae9e8b9455beeeca0f0a72afee7719b27706c68c") if is_plat("linux") then add_syslinks("pthread") diff --git a/tests/test_uvlm_theodorsen.cpp b/tests/test_uvlm_theodorsen.cpp index 021b917..c70dd0c 100644 --- a/tests/test_uvlm_theodorsen.cpp +++ b/tests/test_uvlm_theodorsen.cpp @@ -182,7 +182,7 @@ int main() { mesh->resize_wake(vec_t.size()-1); // +1 for the initial pose const std::unique_ptr backend = create_backend(backend_name, *mesh); // create after mesh has been resized - + // Precompute the LHS since wing geometry is constant backend->compute_lhs(); backend->lu_factor(); @@ -219,8 +219,8 @@ int main() { velocities.z[idx] = local_velocity.z; if (idx == 0) { - f32 analytical_vel = - amplitude * omega * std::cos(omega * t); - f32 rel_error = 100.0f * std::abs((analytical_vel - local_velocity.z) / analytical_vel); + const f32 analytical_vel = - amplitude * omega * std::cos(omega * t); + const f32 rel_error = 100.0f * std::abs((analytical_vel - local_velocity.z) / analytical_vel); std::cout << "vel error:" << rel_error << "%\n"; avg_vel_error += rel_error; } diff --git a/vlm/backends/cpu/include/vlm_backend_cpu.hpp b/vlm/backends/cpu/include/vlm_backend_cpu.hpp index 057d1f4..7bcf6e1 100644 --- a/vlm/backends/cpu/include/vlm_backend_cpu.hpp +++ b/vlm/backends/cpu/include/vlm_backend_cpu.hpp @@ -14,6 +14,7 @@ class BackendCPU : public Backend { std::vector panel_uw; std::vector panel_vw; std::vector panel_ww; + SoA_3D_t rollup_vertices; std::vector wake_buffer; // (ns*nc) * ns std::vector rhs; std::vector ipiv; diff --git a/vlm/backends/cpu/src/vlm_backend_cpu.cpp b/vlm/backends/cpu/src/vlm_backend_cpu.cpp index fcffaa4..50d2a15 100644 --- a/vlm/backends/cpu/src/vlm_backend_cpu.cpp +++ b/vlm/backends/cpu/src/vlm_backend_cpu.cpp @@ -39,6 +39,7 @@ BackendCPU::BackendCPU(Mesh& mesh) : Backend(mesh) { lhs.resize(mesh.nb_panels_wing() * mesh.nb_panels_wing()); wake_buffer.resize(mesh.nb_panels_wing() * mesh.ns); + rollup_vertices.resize(mesh.nb_vertices_total()); uw.resize(mesh.nb_panels_wing() * mesh.ns); vw.resize(mesh.nb_panels_wing() * mesh.ns); ww.resize(mesh.nb_panels_wing() * mesh.ns); @@ -131,7 +132,6 @@ void BackendCPU::compute_rhs(const FlowData& flow) { kernel_cpu_rhs(m.nb_panels_wing(), m.normal.x.data(), m.normal.y.data(), m.normal.z.data(), flow.freestream.x, flow.freestream.y, flow.freestream.z, rhs.data()); } -// TODO: consider changing FlowData to SolverData void BackendCPU::add_wake_influence() { // const tiny::ScopedTimer timer("Wake Influence"); @@ -214,10 +214,30 @@ void BackendCPU::wake_rollup(float dt) { const u64 wake_vertices_begin = (mesh.nc + mesh.nw - mesh.current_nw + 1) * (mesh.ns+1); const u64 wake_vertices_end = (mesh.nc + mesh.nw + 1) * (mesh.ns + 1); - // parallel for - for (u64 vidx = wake_vertices_begin; vidx < wake_vertices_end; vidx++) { - ispc::kernel_induced_vel(mesh_view, vidx, dt, gamma.data(), sigma_vatistas); - } + tf::Taskflow taskflow; + + auto init = taskflow.placeholder(); + auto rollup = taskflow.for_each_index(wake_vertices_begin, wake_vertices_end, [&] (u64 vidx) { + ispc::kernel_induced_vel(mesh_view, dt, rollup_vertices.x.data(), rollup_vertices.y.data(), rollup_vertices.z.data(), vidx, gamma.data(), sigma_vatistas); + }); + auto copy = taskflow.emplace([&]{ + std::copy(rollup_vertices.x.data() + wake_vertices_begin, rollup_vertices.x.data() + wake_vertices_end, mesh.v.x.data() + wake_vertices_begin); + std::copy(rollup_vertices.y.data() + wake_vertices_begin, rollup_vertices.y.data() + wake_vertices_end, mesh.v.y.data() + wake_vertices_begin); + std::copy(rollup_vertices.z.data() + wake_vertices_begin, rollup_vertices.z.data() + wake_vertices_end, mesh.v.z.data() + wake_vertices_begin); + }); + auto sync = taskflow.placeholder(); + init.precede(rollup); + rollup.precede(copy); + copy.precede(sync); + + Executor::get().run(taskflow).wait(); + + // for (u64 vidx = wake_vertices_begin; vidx < wake_vertices_end; vidx++) { + // ispc::kernel_induced_vel(mesh_view, dt, rollup_vertices.x.data(), rollup_vertices.y.data(), rollup_vertices.z.data(), vidx, gamma.data(), sigma_vatistas); + // } + // std::copy(rollup_vertices.x.data() + wake_vertices_begin, rollup_vertices.x.data() + rollup_vertices.size, mesh.v.x.data() + wake_vertices_begin); + // std::copy(rollup_vertices.y.data() + wake_vertices_begin, rollup_vertices.y.data() + rollup_vertices.size, mesh.v.y.data() + wake_vertices_begin); + // std::copy(rollup_vertices.z.data() + wake_vertices_begin, rollup_vertices.z.data() + rollup_vertices.size, mesh.v.z.data() + wake_vertices_begin); } void BackendCPU::shed_gamma() { @@ -298,9 +318,9 @@ f32 BackendCPU::compute_coefficient_unsteady_cl(const SoA_3D_t& vel, f32 dt linalg::alias::float3 freestream{vel.x[li], vel.y[li], vel.z[li]}; //std::cout << "Without: " << freestream << "\n"; - freestream.x += panel_uw[li]; - freestream.y += panel_vw[li]; - freestream.z += panel_ww[li]; + // freestream.x += panel_uw[li]; + // freestream.y += panel_vw[li]; + // freestream.z += panel_ww[li]; // std::cout << "With: " << freestream << "\n"; const linalg::alias::float3 lift_axis = compute_lift_axis(freestream); @@ -315,17 +335,17 @@ f32 BackendCPU::compute_coefficient_unsteady_cl(const SoA_3D_t& vel, f32 dt const f32 gamma_dt = (gamma[li] - gamma_prev[li]) / dt; // backward difference // Joukowski method - // force += rho * delta_gamma[li] * linalg::cross(freestream, v1 - v0); - // force += rho * gamma_dt * mesh.area[li] * normal; + force += rho * delta_gamma[li] * linalg::cross(freestream, v1 - v0); + force += rho * gamma_dt * mesh.area[li] * normal; // Katz Plotkin method - linalg::alias::float3 delta_p = {0.0f, 0.0f, 0.0f}; - const f32 delta_gamma_i = (u == 0) ? gamma[li] : gamma[li] - gamma[(u-1) * mesh.ns + v]; - const f32 delta_gamma_j = (v == 0) ? gamma[li] : gamma[li] - gamma[u * mesh.ns + v - 1]; - delta_p += rho * linalg::dot(freestream, linalg::normalize(v1 - v0)) * delta_gamma_j / mesh.panel_width_y(u, v); - delta_p += rho * linalg::dot(freestream, linalg::normalize(v3 - v0)) * delta_gamma_i / mesh.panel_length(u, v); - delta_p += gamma_dt; - force = (delta_p * mesh.area[li]) * normal; + // linalg::alias::float3 delta_p = {0.0f, 0.0f, 0.0f}; + // const f32 delta_gamma_i = (u == 0) ? gamma[li] : gamma[li] - gamma[(u-1) * mesh.ns + v]; + // const f32 delta_gamma_j = (v == 0) ? gamma[li] : gamma[li] - gamma[u * mesh.ns + v - 1]; + // delta_p += rho * linalg::dot(freestream, linalg::normalize(v1 - v0)) * delta_gamma_j / mesh.panel_width_y(u, v); + // delta_p += rho * linalg::dot(freestream, linalg::normalize(v3 - v0)) * delta_gamma_i / mesh.panel_length(u, v); + // delta_p += gamma_dt; + // force = (delta_p * mesh.area[li]) * normal; // force /= linalg::length2(freestream); diff --git a/vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc b/vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc index 87744c0..c0e41ad 100644 --- a/vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc +++ b/vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc @@ -181,8 +181,9 @@ export void kernel_influence2( } export void kernel_induced_vel( - uniform const MeshView& m, uniform uint64 vidx, uniform float dt, - uniform float* uniform gamma, uniform float sigma + uniform MeshView& m, uniform float dt, + uniform float* uniform rollup_vx, uniform float* uniform rollup_vy, uniform float* uniform rollup_vz, + uniform uint64 vidx, uniform float* uniform gamma, uniform float sigma ) { const uniform float3 vertex_influenced = {m.v.x[vidx], m.v.y[vidx], m.v.z[vidx]}; @@ -237,9 +238,9 @@ export void kernel_induced_vel( } } - m.v.x[vidx] += reduce_add(induced_vel.x) * dt; - m.v.y[vidx] += reduce_add(induced_vel.y) * dt; - m.v.z[vidx] += reduce_add(induced_vel.z) * dt; + rollup_vx[vidx] = m.v.x[vidx] + reduce_add(induced_vel.x) * dt; + rollup_vy[vidx] = m.v.y[vidx] + reduce_add(induced_vel.y) * dt; + rollup_vz[vidx] = m.v.z[vidx] + reduce_add(induced_vel.z) * dt; } export uniform float kernel_trefftz_cd( diff --git a/vlm/include/vlm_backend.hpp b/vlm/include/vlm_backend.hpp index 602e181..8b03cc7 100644 --- a/vlm/include/vlm_backend.hpp +++ b/vlm/include/vlm_backend.hpp @@ -2,6 +2,7 @@ #include "vlm_fwd.hpp" #include "vlm_types.hpp" +#include "vlm_mesh.hpp" namespace vlm { diff --git a/vlm/include/vlm_types.hpp b/vlm/include/vlm_types.hpp index e90698e..4cbfda0 100644 --- a/vlm/include/vlm_types.hpp +++ b/vlm/include/vlm_types.hpp @@ -54,9 +54,9 @@ struct SoA_3D_t { void resize(u64 size_) { size = size_; - x.resize(size); - y.resize(size); - z.resize(size); + x.resize(size, 0.f); + y.resize(size, 0.f); + z.resize(size, 0.f); } void reserve(u64 size_) {