Skip to content

Commit

Permalink
Refactoring and new features
Browse files Browse the repository at this point in the history
## New features and APIs 
- Any grid operation (halo update, and computation) is now wrapped into a Container 
- New Naming convention for grids: Grid, Span, Field, Idx, Partition 
- Introduction of partition and span tables 
- 1D partitioner used by bGrid and eGrid 
- Grid can now generate both host and device Containers 
- Vec3D objects are tagged as constant expressions 
- New set of unit tests (a full refactoring of the unit tests is in process) 

## Refactoring 
- new dGrid implementation 
- new eGrid implementation 
- new dGrid implementation to support multi-GPU (with tray support at compile time)
- removed deprecated classes 

## Temporary dropped features 
This is a list of features and tests that have been temporally been dropped. The features will be reintroduced with future PRs.
- Reduction operations
- Sub grids 
- Staggered grids
- mGrid 
- Tutorials

## Out of sync features
- The website tutorial documentation still refers to the old API and should be updated
  • Loading branch information
massimim authored Jun 5, 2023
1 parent 3b98084 commit a6b7b73
Show file tree
Hide file tree
Showing 487 changed files with 19,206 additions and 21,131 deletions.
148 changes: 148 additions & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# Generated from CLion Inspection settings
---
Checks: '-*,
bugprone-argument-comment,
bugprone-assert-side-effect,
bugprone-bad-signal-to-kill-thread,
bugprone-branch-clone,
bugprone-copy-constructor-init,
bugprone-dangling-handle,
bugprone-dynamic-static-initializers,
bugprone-fold-init-type,
bugprone-forward-declaration-namespace,
bugprone-forwarding-reference-overload,
bugprone-inaccurate-erase,
bugprone-incorrect-roundings,
bugprone-integer-division,
bugprone-lambda-function-name,
bugprone-macro-parentheses,
bugprone-macro-repeated-side-effects,
bugprone-misplaced-operator-in-strlen-in-alloc,
bugprone-misplaced-pointer-arithmetic-in-alloc,
bugprone-misplaced-widening-cast,
bugprone-move-forwarding-reference,
bugprone-multiple-statement-macro,
bugprone-no-escape,
bugprone-not-null-terminated-result,
bugprone-parent-virtual-call,
bugprone-posix-return,
bugprone-reserved-identifier,
bugprone-sizeof-container,
bugprone-sizeof-expression,
bugprone-spuriously-wake-up-functions,
bugprone-string-constructor,
bugprone-string-integer-assignment,
bugprone-string-literal-with-embedded-nul,
bugprone-suspicious-enum-usage,
bugprone-suspicious-include,
bugprone-suspicious-memset-usage,
bugprone-suspicious-missing-comma,
bugprone-suspicious-semicolon,
bugprone-suspicious-string-compare,
bugprone-suspicious-memory-comparison,
bugprone-suspicious-realloc-usage,
bugprone-swapped-arguments,
bugprone-terminating-continue,
bugprone-throw-keyword-missing,
bugprone-too-small-loop-variable,
bugprone-undefined-memory-manipulation,
bugprone-undelegated-constructor,
bugprone-unhandled-self-assignment,
bugprone-unused-raii,
bugprone-unused-return-value,
bugprone-use-after-move,
bugprone-virtual-near-miss,
cert-dcl21-cpp,
cert-dcl58-cpp,
cert-err34-c,
cert-err52-cpp,
cert-err60-cpp,
cert-flp30-c,
cert-msc50-cpp,
cert-msc51-cpp,
cert-str34-c,
cppcoreguidelines-interfaces-global-init,
cppcoreguidelines-narrowing-conversions,
cppcoreguidelines-pro-type-member-init,
cppcoreguidelines-pro-type-static-cast-downcast,
cppcoreguidelines-slicing,
google-default-arguments,
google-explicit-constructor,
google-runtime-operator,
hicpp-exception-baseclass,
hicpp-multiway-paths-covered,
misc-misplaced-const,
misc-new-delete-overloads,
misc-no-recursion,
misc-non-copyable-objects,
misc-throw-by-value-catch-by-reference,
misc-unconventional-assign-operator,
misc-uniqueptr-reset-release,
modernize-avoid-bind,
modernize-concat-nested-namespaces,
modernize-deprecated-headers,
modernize-deprecated-ios-base-aliases,
modernize-loop-convert,
modernize-make-shared,
modernize-make-unique,
modernize-pass-by-value,
modernize-raw-string-literal,
modernize-redundant-void-arg,
modernize-replace-auto-ptr,
modernize-replace-disallow-copy-and-assign-macro,
modernize-replace-random-shuffle,
modernize-return-braced-init-list,
modernize-shrink-to-fit,
modernize-unary-static-assert,
modernize-use-auto,
modernize-use-bool-literals,
modernize-use-emplace,
modernize-use-equals-default,
modernize-use-equals-delete,
modernize-use-nodiscard,
modernize-use-noexcept,
modernize-use-nullptr,
modernize-use-override,
modernize-use-transparent-functors,
modernize-use-uncaught-exceptions,
mpi-buffer-deref,
mpi-type-mismatch,
openmp-use-default-none,
performance-faster-string-find,
performance-for-range-copy,
performance-implicit-conversion-in-loop,
performance-inefficient-algorithm,
performance-inefficient-string-concatenation,
performance-inefficient-vector-operation,
performance-move-const-arg,
performance-move-constructor-init,
performance-no-automatic-move,
performance-noexcept-move-constructor,
performance-trivially-destructible,
performance-type-promotion-in-math-fn,
performance-unnecessary-copy-initialization,
performance-unnecessary-value-param,
portability-simd-intrinsics,
readability-avoid-const-params-in-decls,
readability-const-return-type,
readability-container-size-empty,
readability-convert-member-functions-to-static,
readability-delete-null-pointer,
readability-deleted-default,
readability-inconsistent-declaration-parameter-name,
readability-make-member-function-const,
readability-misleading-indentation,
readability-misplaced-array-index,
readability-non-const-parameter,
readability-redundant-control-flow,
readability-redundant-declaration,
readability-redundant-function-ptr-dereference,
readability-redundant-smartptr-get,
readability-redundant-string-cstr,
readability-redundant-string-init,
readability-simplify-subscript-expr,
readability-static-accessed-through-instance,
readability-static-definition-in-anonymous-namespace,
readability-string-compare,
readability-uniqueptr-delete-release,
readability-use-anyofallof'
12 changes: 9 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ add_subdirectory("libNeonSys")
add_subdirectory("libNeonSet")
add_subdirectory("libNeonDomain")
add_subdirectory("libNeonSkeleton")
add_subdirectory("libNeonSolver")
add_subdirectory("tutorials")
add_subdirectory("apps")
#add_subdirectory("libNeonSolver")
#add_subdirectory("tutorials")
#add_subdirectory("apps")
add_subdirectory("benchmarks")


Expand Down Expand Up @@ -111,3 +111,9 @@ message("|| CUDA ARCHS : ${CMAKE_CUDA_ARCHITECTURES}")
#message("|| NeonCXXFlags : ${NeonCXXFlags}")
#message("|| NeonCUDAFlags : ${NeonCUDAFlags}")
message("\\===================================================")
# Usefull to debug cmake
# https://jeremimucha.com/2021/01/cmake-fundamentals-part3/
# get_target_property(interfaceLibs NeonDeveloperLib INTERFACE_COMPILE_OPTIONS)
# set(info ${interfaceLibs}[CXX_COMPILER_ID])
# message(STATUS "NeonDeveloperLib INTERFACE_LINK_LIBRARIES: ${info}")

4 changes: 2 additions & 2 deletions apps/fractal/fractal.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ int main(int argc, char** argv)

Neon::Backend backend(gpu_ids, runtime);

using Grid = Neon::domain::dGrid;
using Grid = Neon::dGrid;
Grid grid(
backend, dim,
[](const Neon::index_3d& idx) -> bool { return true; },
Expand All @@ -89,7 +89,7 @@ int main(int argc, char** argv)
for (time = 0; time < 1000; ++time) {
skeleton.run();

pixels.updateIO(0);
pixels.updateHostData(0);
//draw_pixels(time, pixels);
}
}
Expand Down
8 changes: 4 additions & 4 deletions apps/gameOfLife/gameOfLife.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ inline void exportVTI(FieldT& voxel_1, FieldT& voxel_2, int frame_id)
auto io = [&](int f, FieldT& voxel) {
printf("\n Exporting Frame =%d", f);
int precision = 4;
voxel.updateIO(0);
voxel.updateHostData(0);
std::ostringstream oss;
oss << std::setw(precision) << std::setfill('0') << f;
std::string fname = "gameOfLife_" + oss.str();
Expand Down Expand Up @@ -117,7 +117,7 @@ int main(int argc, char** argv)
const size_t num_frames = 500;

using T = float;
using Grid = Neon::domain::dGrid;
using Grid = Neon::dGrid;
Grid grid(
backend, grid_dim,
[](const Neon::index_3d& idx) -> bool { return true; },
Expand All @@ -139,8 +139,8 @@ int main(int argc, char** argv)
[](const Neon::index_3d&, const int&, T& val) { val = 0; });


voxel_1.updateCompute(0);
voxel_2.updateCompute(0);
voxel_1.updateDeviceData(0);
voxel_2.updateDeviceData(0);


std::vector<Neon::set::Container> containers;
Expand Down
20 changes: 10 additions & 10 deletions apps/lbm/lbm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -624,14 +624,14 @@ inline void setup(const FlowType flow_type,
velocity_1.forEachActiveCell([](const Neon::index_3d&, const int& c, T& val) { val = 0; });
velocity_2.forEachActiveCell([](const Neon::index_3d&, const int& c, T& val) { val = 0; });

lattice_1.updateCompute(0);
lattice_2.updateCompute(0);
rho_1.updateCompute(0);
rho_2.updateCompute(0);
boundary_mask.updateCompute(0);
center_mask.updateCompute(0);
velocity_1.updateCompute(0);
velocity_2.updateCompute(0);
lattice_1.updateDeviceData(0);
lattice_2.updateDeviceData(0);
rho_1.updateDeviceData(0);
rho_2.updateDeviceData(0);
boundary_mask.updateDeviceData(0);
center_mask.updateDeviceData(0);
velocity_1.updateDeviceData(0);
velocity_2.updateDeviceData(0);
}


Expand Down Expand Up @@ -680,7 +680,7 @@ inline void run(const int num_frames,

if (f % t == 0) {
backend.syncAll();
velocity_1.updateIO(0);
velocity_1.updateHostData(0);
exportVTI(save_id, velocity_1);
printf("\n frame %d exported", f);
save_id++;
Expand Down Expand Up @@ -722,7 +722,7 @@ int main(int argc, char** argv)
const Neon::index_3d grid_dim(dim_x, dim_y, dim_z);
const size_t num_frames = (DIM == 2) ? 60000 : 2000;

using Grid = Neon::domain::dGrid;
using Grid = Neon::dGrid;
using dataT = float;

Grid grid(
Expand Down
10 changes: 5 additions & 5 deletions apps/poisson/poisson.cu
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ void testPoisson(const Neon::Backend& backend,


// Move data to GPU
u.updateCompute(0);
rhs.updateCompute(0);
bd.updateCompute(0);
u.updateDeviceData(0);
rhs.updateDeviceData(0);
bd.updateDeviceData(0);


// Laplacian matvec operation
Expand Down Expand Up @@ -139,7 +139,7 @@ void testPoisson(const Neon::Backend& backend,

// Plotting
printf("Updating and exporting to VTI...\n");
u.updateIO(0);
u.updateHostData(0);
u.ioToVtk("poisson", "u");
}

Expand All @@ -161,7 +161,7 @@ int main(int argc, char** agrv)
size_t max_iterations = 1000;
double tolerance = 1e-10;

testPoisson<Neon::domain::dGrid, double, 1>(backend, domain_size, bdZMin, bdZMax, max_iterations, tolerance);
testPoisson<Neon::dGrid, double, 1>(backend, domain_size, bdZMin, bdZMax, max_iterations, tolerance);
}

return 0;
Expand Down
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
cmake_minimum_required(VERSION 3.19 FATAL_ERROR)

add_subdirectory("lbm-lid-driven-cavity-flow")
# add_subdirectory("lbm-flow-over-sphere")
20 changes: 20 additions & 0 deletions benchmarks/lbm-flow-over-sphere/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
cmake_minimum_required(VERSION 3.19 FATAL_ERROR)

SET(APP "lbm-flow-over-sphere")

file(GLOB_RECURSE SrcFiles src/*.*)

add_executable(${APP} ${SrcFiles})

target_link_libraries(${APP}
PUBLIC libNeonDomain
PUBLIC libNeonSkeleton)

set_target_properties(${APP} PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
CUDA_RESOLVE_DEVICE_SYMBOLS ON)

target_compile_options(${APP} INTERFACE
$<$<COMPILE_LANGUAGE:CXX>:${NeonCXXFlags}>
$<$<COMPILE_LANGUAGE:CUDA>:${NeonCUDAFlags}>
)
28 changes: 28 additions & 0 deletions benchmarks/lbm-flow-over-sphere/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
set -x

DOMAIN_SIZE_LIST="128 192 256 320 384 448 512"
GRID="dGrid"
STORAGE_FP_LIST="double float"
COMPUTE_FP_LIST="double float"
OCC="nOCC"

for DOMAIN_SIZE in ${DOMAIN_SIZE_LIST}; do
for STORAGE_FP in ${STORAGE_FP_LIST}; do
for COMPUTE_FP in ${COMPUTE_FP_LIST}; do

if [ "${STORAGE_FP}_${COMPUTE_FP}" = "double_float" ]; then
continue
fi

echo ./lbm-flow-over-cylinder \
--deviceType gpu --deviceIds 0 \
--grid "${GRID}" \
--domain-size "${DOMAIN_SIZE}" \
--warmup-iter 10 --max-iter 100 --repetitions 5 \
--report-filename "lbm-flow-over-cylinder_${DOMAIN_SIZE}_${GRID}_STORAGE_${STORAGE_FP}_COMPUTE_${COMPUTE_FP}" \
--computeFP "${COMPUTE_FP}" \
--storageFP "${STORAGE_FP}" \
--${OCC} --benchmark
done
done
done
Loading

0 comments on commit a6b7b73

Please sign in to comment.