Skip to content

Commit

Permalink
[release] Merge branch 'master' of https://github.com/zenustech/zeno
Browse files Browse the repository at this point in the history
  • Loading branch information
archibate committed Sep 27, 2022
2 parents 4ef9434 + 03a504c commit c1e7afb
Show file tree
Hide file tree
Showing 15 changed files with 1,036 additions and 116 deletions.
1 change: 1 addition & 0 deletions projects/CUDA/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ set_target_properties(zeno
# wrangler
target_sources(zeno PRIVATE
utils/IndexBuckets.cu
utils/Primitives.cu
wrangle/PW.cu
wrangle/P2W.cu
wrangle/PNW.cu
Expand Down
130 changes: 130 additions & 0 deletions projects/CUDA/utils/Primitives.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#include "Structures.hpp"
#include "zensim/cuda/execution/ExecutionPolicy.cuh"
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <zeno/types/NumericObject.h>
#include <zeno/types/PrimitiveObject.h>
#include <zeno/utils/parallel_reduce.h>
#include <zeno/utils/vec.h>
#include <zeno/zeno.h>

namespace zeno {

/// utilities
constexpr std::size_t count_warps(std::size_t n) noexcept {
return (n + 31) / 32;
}
constexpr int warp_index(int n) noexcept {
return n / 32;
}
constexpr auto warp_mask(int i, int n) noexcept {
int k = n % 32;
const int tail = n - k;
if (i < tail)
return zs::make_tuple(0xFFFFFFFFu, 32);
return zs::make_tuple(((unsigned)(1ull << k) - 1), k);
}

template <typename T, typename Op> __forceinline__ __device__ void reduce_to(int i, int n, T val, T &dst, Op op) {
auto [mask, numValid] = warp_mask(i, n);
__syncwarp(mask);
auto locid = threadIdx.x & 31;
for (int stride = 1; stride < 32; stride <<= 1) {
auto tmp = __shfl_down_sync(mask, val, stride);
if (locid + stride < numValid)
val = op(val, tmp);
}
if (locid == 0)
dst = val;
}

template <typename TransOp, typename ReduceOp>
static float prim_reduce(typename ZenoParticles::particles_t &verts, float e, TransOp top, ReduceOp rop,
std::string attrToReduce) {
using namespace zs;
constexpr auto space = execspace_e::cuda;
using T = typename ZenoParticles::particles_t::value_type;
auto nchn = verts.getPropertySize(attrToReduce);
auto offset = verts.getPropertyOffset(attrToReduce);
const auto nwarps = count_warps(verts.size());

auto cudaPol = cuda_exec().device(0);

Vector<float> res{verts.get_allocator(), nwarps};
// cudaPol(res, [e] ZS_LAMBDA(auto &v) { v = e; });
cudaPol(range(verts.size()), [res = proxy<space>(res), verts = proxy<space>({}, verts), offset, nwarps, nchn, top,
rop] ZS_LAMBDA(int i) mutable {
auto [mask, numValid] = warp_mask(i, nwarps);
auto locid = threadIdx.x & 31;
float v = top(verts(offset, i));
while (--nchn) {
v = rop(top(verts(offset++, i)), v);
}
reduce_to(i, nwarps, v, res[i / 32], rop);
});

Vector<float> ret{res.get_allocator(), 1};
zs::reduce(cudaPol, std::begin(res), std::end(res), std::begin(ret), e, rop);
return ret.getVal();
}

struct ZSPrimitiveReduction : zeno::INode {
struct pass_on {
constexpr auto operator()(auto v) const noexcept {
return v;
}
};
struct getabs {
constexpr auto operator()(auto v) const noexcept {
return zs::abs(v);
}
};
virtual void apply() override {
using namespace zs;
constexpr auto space = execspace_e::cuda;
auto prim = get_input<ZenoParticles>("ZSParticles");
auto &verts = prim->getParticles();
auto attrToReduce = get_input2<std::string>("attr");
if (attrToReduce == "pos")
attrToReduce = "x";
if (attrToReduce == "vel")
attrToReduce = "v";

if (!verts.hasProperty(attrToReduce))
throw std::runtime_error(fmt::format("verts do not have property [{}]\n", attrToReduce));

auto opStr = get_input2<std::string>("op");
zeno::NumericValue result;
if (opStr == "avg") {
result = prim_reduce(verts, 0, pass_on{}, std::plus<float>{}, attrToReduce) / verts.size();
} else if (opStr == "max") {
result = prim_reduce(verts, limits<float>::lowest(), pass_on{}, getmax<float>{}, attrToReduce);
} else if (opStr == "min") {
result = prim_reduce(verts, limits<float>::max(), pass_on{}, getmin<float>{}, attrToReduce);
} else if (opStr == "absmax") {
result = prim_reduce(verts, 0, getabs{}, getmax<float>{}, attrToReduce);
}

auto out = std::make_shared<zeno::NumericObject>();
out->set(result);
set_output("result", std::move(out));
}
};
ZENDEFNODE(ZSPrimitiveReduction, {/* inputs: */ {
"ZSParticles",
{"string", "attr", "pos"},
{"enum avg max min absmax", "op", "avg"},
},
/* outputs: */
{
"result",
},
/* params: */
{},
/* category: */
{
"primitive",
}});

} // namespace zeno
26 changes: 16 additions & 10 deletions projects/CUDA/wrangle/P2W.cu
Original file line number Diff line number Diff line change
Expand Up @@ -153,15 +153,19 @@ struct ZSParticlesTwoWrangler : zeno::INode {
auto key = name.substr(1);
if (!checkDuplication(key))
newChns.push_back(PropertyTag{key, dim});
if (dim != 3 && dim != 1)
err_printf("ERROR: bad attribute dimension for primitive: %d\n", dim);
}
if (newChns.size() > 0)
pars.append_channels(cudaPol, newChns);
props.insert(std::end(props), std::begin(newChns), std::end(newChns));

if (_cuModule == nullptr) {
auto wrangleKernelPtxs = cudri::load_all_ptx_files_at();
void *state;
cuLinkCreate(0, nullptr, nullptr, (CUlinkState *)&state);
// fmt::print(">>>>>>>>>>>>>>>>>>>");
// fmt::print("{}\n", jitCode);
// fmt::print("<<<<<<<<<<<<<<<<<<<");

auto jitSrc = cudri::compile_cuda_source_to_ptx(jitCode);
cuLinkAddData((CUlinkState)state, CU_JIT_INPUT_PTX, (void *)jitSrc.data(), (size_t)jitSrc.size(),
Expand All @@ -188,27 +192,29 @@ struct ZSParticlesTwoWrangler : zeno::INode {
/// symbols
for (int i = 0; i < prog->symbols.size(); i++) {
auto [name, dimid] = prog->symbols[i];
#if 0
fmt::print("channel {}: {}.{}. chn offset: {} (of {})\n", i,
name.c_str(), dimid, pars.getChannelOffset(name.substr(1)),
pars.numChannels());
#endif
void *parsPtr = nullptr;
if (name[1] == '@') {

unsigned short aux = name[1] == '@' ? 1 : 0;
float *parsPtr = nullptr;
if (aux) {
aux = 1;
parsPtr = pars2.data();
name = name.substr(2);
} else {
parsPtr = pars.data();
name = name.substr(1);
}
const auto &curPars = name[1] == '@' ? pars2 : pars;
const auto &curPars = aux ? pars2 : pars;
haccessors[i] = zs::AccessorAoSoA{zs::aosoa_c,
parsPtr,
(unsigned short)unitBytes,
(unsigned short)tileSize,
(unsigned short)curPars.numChannels(),
(unsigned short)(curPars.getChannelOffset(name) + dimid),
(unsigned short)0};
aux};
#if 0
fmt::print("channel {}: {}.{}. chn offset: {} (of {})\n", i, name.c_str(), dimid,
curPars.getChannelOffset(name), curPars.numChannels());
#endif
}
auto daccessors = haccessors.clone({zs::memsrc_e::device, 0});

Expand Down
1 change: 0 additions & 1 deletion projects/CUDA/wrangle/PNBVHW.cu
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ struct ZSParticleNeighborBvhWrangler : INode {
}
if (newChns.size() > 0)
pars.append_channels(cudaPol, newChns);
props.insert(std::end(props), std::begin(newChns), std::end(newChns));

if (_cuModule == nullptr) {
/// execute on the current particle object
Expand Down
1 change: 0 additions & 1 deletion projects/CUDA/wrangle/PNW.cu
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@ struct ZSParticleNeighborWrangler : INode {
}
if (newChns.size() > 0)
pars.append_channels(cudaPol, newChns);
props.insert(std::end(props), std::begin(newChns), std::end(newChns));

if (_cuModule == nullptr) {
/// execute on the current particle object
Expand Down
1 change: 0 additions & 1 deletion projects/CUDA/wrangle/PPW.cu
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,6 @@ struct ZSParticleParticleWrangler : INode {
}
if (newChns.size() > 0)
pars.append_channels(cudaPol, newChns);
props.insert(std::end(props), std::begin(newChns), std::end(newChns));

if (_cuModule == nullptr) {
/// execute on the current particle object
Expand Down
1 change: 0 additions & 1 deletion projects/CUDA/wrangle/PW.cu
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ struct ZSParticlesWrangler : zeno::INode {
}
if (newChns.size() > 0)
pars.append_channels(cudaPol, newChns);
props.insert(std::end(props), std::begin(newChns), std::end(newChns));

if (_cuModule == nullptr) {
auto wrangleKernelPtxs = cudri::load_all_ptx_files_at();
Expand Down
69 changes: 67 additions & 2 deletions projects/CuLagrange/fem/Solver.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,68 @@
#include "Structures.hpp"
#include "zensim/container/Bvh.hpp"
#include "zensim/container/Bvs.hpp"
#include "zensim/container/Bvtt.hpp"
#include "zensim/container/HashTable.hpp"
#include "zensim/container/Vector.hpp"
#include "zensim/cuda/Cuda.h"
#include "zensim/cuda/execution/ExecutionPolicy.cuh"
#include "zensim/math/Vec.h"
#include <zeno/types/PrimitiveObject.h>
#include <zeno/utils/logger.h>
#include <zeno/utils/log.h>
#include <zeno/zeno.h>

namespace zeno {

template <int n = 1> struct HessianPiece {
using HessT = zs::vec<float, n * 3, n * 3>;
using IndsT = zs::vec<int, n>;
zs::Vector<HessT> hess;
zs::Vector<IndsT> inds;
zs::Vector<int> cnt;
using allocator_t = typename zs::Vector<int>::allocator_type;
void init(const allocator_t &allocator, std::size_t size = 0) {
hess = zs::Vector<HessT>{allocator, size};
inds = zs::Vector<IndsT>{allocator, size};
cnt = zs::Vector<int>{allocator, 1};
}
int count() const {
return cnt.getVal();
}
int increaseCount(int inc) {
int v = cnt.getVal();
cnt.setVal(v + inc);
hess.resize((std::size_t)(v + inc));
inds.resize((std::size_t)(v + inc));
return v;
}
void reset(bool setZero = true, std::size_t count = 0) {
if (setZero)
hess.reset(0);
cnt.setVal(count);
}
};
template <typename HessianPieceT> struct HessianView {
static constexpr bool is_const_structure = std::is_const_v<HessianPieceT>;
using HT = typename HessianPieceT::HessT;
using IT = typename HessianPieceT::IndsT;
zs::conditional_t<is_const_structure, const HT *, HT *> hess;
zs::conditional_t<is_const_structure, const IT *, IT *> inds;
zs::conditional_t<is_const_structure, const int *, int *> cnt;
};
template <zs::execspace_e space, int n> HessianView<HessianPiece<n>> proxy(HessianPiece<n> &hp) {
return HessianView<HessianPiece<n>>{hp.hess.data(), hp.inds.data(), hp.cnt.data()};
}
template <zs::execspace_e space, int n> HessianView<const HessianPiece<n>> proxy(const HessianPiece<n> &hp) {
return HessianView<const HessianPiece<n>>{hp.hess.data(), hp.inds.data(), hp.cnt.data()};
}

struct IPCSystem : IObject {
using T = double;
using Ti = zs::conditional_t<zs::is_same_v<T, double>, zs::i64, zs::i32>;
using dtiles_t = zs::TileVector<T, 32>;
using tiles_t = typename ZenoParticles::particles_t;
using vec3 = zs::vec<T, 3>;
using vec3f = zs::vec<float, 3>;
using ivec3 = zs::vec<int, 3>;
using ivec2 = zs::vec<int, 2>;
using mat2 = zs::vec<T, 2, 2>;
Expand All @@ -32,6 +77,7 @@ struct IPCSystem : IObject {
using dpair4_t = zs::vec<Ti, 4>;
// using bvh_t = zeno::ZenoLBvh<3, 32, int, T>;
using bvh_t = zs::LBvh<3, int, T>;
using bvfront_t = zs::BvttFront<int, int>;
using bv_t = zs::AABBBox<3, T>;

inline static const char s_meanMassTag[] = "MeanMass";
Expand Down Expand Up @@ -161,6 +207,9 @@ struct IPCSystem : IObject {
return zs::make_tuple(nPP.getVal(), nPE.getVal(), nPT.getVal(), nEE.getVal(), nPPM.getVal(), nPEM.getVal(),
nEEM.getVal(), ncsPT.getVal(), ncsEE.getVal());
}
auto getCollisionCnts() const {
return zs::make_tuple(ncsPT.getVal(), ncsEE.getVal());
}
void findCollisionConstraints(zs::CudaExecutionPolicy &pol, T dHat, T xi = 0);
void findCollisionConstraintsImpl(zs::CudaExecutionPolicy &pol, T dHat, T xi, bool withBoundary = false);
void precomputeFrictions(zs::CudaExecutionPolicy &pol, T dHat, T xi = 0);
Expand All @@ -178,8 +227,14 @@ struct IPCSystem : IObject {
void computeFrictionBarrierGradientAndHessian(zs::CudaExecutionPolicy &pol, const zs::SmallString &gTag,
bool includeHessian = true);
// krylov solver
void convertHessian(zs::CudaExecutionPolicy &pol);
void project(zs::CudaExecutionPolicy &pol, const zs::SmallString tag);
void precondition(zs::CudaExecutionPolicy &pol, std::true_type, const zs::SmallString srcTag,
const zs::SmallString dstTag);
void precondition(zs::CudaExecutionPolicy &pol, const zs::SmallString srcTag, const zs::SmallString dstTag);

void multiply(zs::CudaExecutionPolicy &pol, std::true_type, const zs::SmallString dxTag,
const zs::SmallString bTag);
void multiply(zs::CudaExecutionPolicy &pol, const zs::SmallString dxTag, const zs::SmallString bTag);
void cgsolve(zs::CudaExecutionPolicy &cudaPol);
void groundIntersectionFreeStepsize(zs::CudaExecutionPolicy &pol, T &stepSize);
Expand Down Expand Up @@ -225,7 +280,7 @@ struct IPCSystem : IObject {
//
std::vector<PrimitiveHandle> prims;
std::vector<PrimitiveHandle> auxPrims;
std::size_t coOffset, numDofs;
std::size_t coOffset, numDofs, numBouDofs;
std::size_t sfOffset, seOffset, svOffset;

// (scripted) collision objects
Expand Down Expand Up @@ -277,6 +332,13 @@ struct IPCSystem : IObject {
zs::Vector<pair4_t> csPT, csEE;
zs::Vector<int> ncsPT, ncsEE;

// for faster linear system solve
HessianPiece<1> hess1;
HessianPiece<2> hess2;
HessianPiece<3> hess3;
HessianPiece<4> hess4;
tiles_t cgtemp;

// boundary contacts
// auxiliary data (spatial acceleration)
tiles_t stInds, seInds, svInds;
Expand All @@ -285,6 +347,9 @@ struct IPCSystem : IObject {
bvs_t stBvs, seBvs; // STQ
bvh_t bouStBvh, bouSeBvh; // for collision objects
bvs_t bouStBvs, bouSeBvs; // STQ
bvfront_t selfStFront, boundaryStFront;
bvfront_t selfSeFront, boundarySeFront;
bool frontManageRequired;
T dt, framedt, curRatio;
};

Expand Down
Loading

0 comments on commit c1e7afb

Please sign in to comment.