Skip to content

Commit

Permalink
Remove leaf_count
Browse files Browse the repository at this point in the history
Start one thread per tree node, and filter out threads that do not
start at a leaf node, instead of preparing and updating a leaf node count.
  • Loading branch information
gonzalobg committed Jun 20, 2024
1 parent 2ee610a commit 5bd7e65
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 19 deletions.
4 changes: 0 additions & 4 deletions src/atomic_quad_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ class AtomicQuadTree {
vec<T, 2>* centre_masses;

// used for mass calc
atomic<int32_t>* leaf_count; // stores total number of sub leaves a node has
atomic<Index_t>* child_mass_complete; // stores number of children that have correct mass

void clear(Index_t i) {
Expand All @@ -49,7 +48,6 @@ class AtomicQuadTree {
node_status[i].store(NodeStatus::EmptyLeaf, memory_order_relaxed);
total_masses[i] = T(0);
centre_masses[i] = vec<T, 2>::splat(0);
leaf_count[i].store(0, memory_order_relaxed);
child_mass_complete[i].store(0, memory_order_relaxed);
}

Expand All @@ -65,7 +63,6 @@ class AtomicQuadTree {
qt.total_masses = new T[size];
qt.centre_masses = new vec<T, 2>[size];

qt.leaf_count = new atomic<int32_t>[size];
qt.child_mass_complete = new atomic<Index_t>[size];
return qt;
}
Expand All @@ -77,7 +74,6 @@ class AtomicQuadTree {
delete qt->node_status[];
delete qt->total_masses[];
delete qt->centre_masses[];
delete qt->leaf_count[];
delete qt->child_mass_complete[];
delete qt->bump_allocator;
}
Expand Down
20 changes: 5 additions & 15 deletions src/kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,9 @@
// raw kernels
template<typename T, typename Index_t>
void atomic_calc_mass(AtomicQuadTree<T, Index_t> tree, Index_t tree_index) {
tree_index = 0;

// navigate to leaf
while (tree.node_status[tree_index].load(memory_order_acquire) != NodeStatus::FullLeaf) {
// work out which child to go to ...
auto child_index = tree.first_child[tree_index];
if (tree.leaf_count[child_index + 0].fetch_sub(1, memory_order_relaxed) > 0) tree_index = child_index + 0;
else if (tree.leaf_count[child_index + 1].fetch_sub(1, memory_order_relaxed) > 0) tree_index = child_index + 1;
else if (tree.leaf_count[child_index + 2].fetch_sub(1, memory_order_relaxed) > 0) tree_index = child_index + 2;
else if (tree.leaf_count[child_index + 3].fetch_sub(1, memory_order_relaxed) > 0) tree_index = child_index + 3;
}
// If this node is not a leaf node with a body, we are done:
if (tree.node_status[tree_index].load(memory_order_relaxed) != NodeStatus::FullLeaf)
return;

// Accumulate masses up to the root
do {
Expand Down Expand Up @@ -86,7 +78,6 @@ void atomic_insert(T mass, vec<T, 2> pos, AtomicQuadTree<T, Index_t> tree) {
}
tree_index = tree.first_child[tree_index] + child_pos;

tree.leaf_count[tree_index].fetch_add(1, memory_order_relaxed); // count needed for mass traversal
side_length /= static_cast<T>(2);
} else if (local_node_status == NodeStatus::EmptyLeaf && tree.node_status[tree_index].compare_exchange_weak(local_node_status, NodeStatus::Locked, memory_order_acquire, memory_order_relaxed)) {
tree.total_masses[tree_index] = mass;
Expand Down Expand Up @@ -119,7 +110,6 @@ void atomic_insert(T mass, vec<T, 2> pos, AtomicQuadTree<T, Index_t> tree) {

// release node and continue to try to insert body
tree.node_status[tree_index].store(NodeStatus::NotLeaf, memory_order_release);
tree.leaf_count[evicted_index].fetch_add(1, memory_order_relaxed);
}
}
}
Expand Down Expand Up @@ -172,9 +162,9 @@ auto build_atomic_tree(System<T>& system, AtomicQuadTree<T, Index_t> tree) {
template<typename T, typename Index_t>
auto calc_mass_atomic_tree(System<T>& system, AtomicQuadTree<T, Index_t> tree) {
auto r = system.body_indices();
std::for_each(
std::for_each_n(
std::execution::par,
r.begin(), r.end(),
r.begin(), tree.capacity,
[tree] (auto i) {
atomic_calc_mass(tree, i);
}
Expand Down

0 comments on commit 5bd7e65

Please sign in to comment.