Skip to content

Commit

Permalink
use exclusive scan direct sums for remainigs
Browse files Browse the repository at this point in the history
  • Loading branch information
ryichando committed Jan 12, 2025
1 parent 128d407 commit 8df4262
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 24 deletions.
3 changes: 3 additions & 0 deletions frontend/_app_.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,4 +179,7 @@ def clear_cache(self) -> "App":
shutil.rmtree(item_path)
else:
os.remove(item_path)
open3d_data_path = os.path.expanduser(os.path.join("~", "open3d_data"))
if os.path.exists(os.path.expanduser(open3d_data_path)):
shutil.rmtree(open3d_data_path)
return self
28 changes: 22 additions & 6 deletions frontend/_mesh_.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
from typing import Optional
import os
import time


class MeshManager:
Expand Down Expand Up @@ -426,12 +427,27 @@ def preset(self, name: str) -> "TriMesh":
import open3d as o3d

mesh = None
if name == "armadillo":
mesh = o3d.data.ArmadilloMesh()
elif name == "knot":
mesh = o3d.data.KnotMesh()
elif name == "bunny":
mesh = o3d.data.BunnyMesh()
num_try, max_try, success, wait_time = 0, 5, False, 3
while num_try < max_try:
try:
if name == "armadillo":
mesh = o3d.data.ArmadilloMesh()
elif name == "knot":
mesh = o3d.data.KnotMesh()
elif name == "bunny":
mesh = o3d.data.BunnyMesh()
success = True
break
except Exception as e:
num_try += 1
print(
f"Mesh {name} could not be downloaded: {e}. Retrying... in {wait_time} seconds"
)
time.sleep(wait_time)

if not success:
raise Exception(f"Mesh {name} could not be downloaded")

if mesh is not None:
mesh = o3d.io.read_triangle_mesh(mesh.path)
vert = np.asarray(mesh.vertices)
Expand Down
20 changes: 2 additions & 18 deletions src/cpp/csrmat/csrmat.cu
Original file line number Diff line number Diff line change
Expand Up @@ -308,23 +308,14 @@ void DynCSRMat::finalize() {
DISPATCH_START(rows.size)
[rows] __device__(unsigned i) mutable { rows[i].finalize(); } DISPATCH_END;
assert(check());
unsigned num_fixed_nnz = 0;

Vec<unsigned> fixed_row_offsets = this->fixed_row_offsets;
DISPATCH_START(nrow)
[fixed_row_offsets, rows] __device__(unsigned i) mutable {
fixed_row_offsets[i] = rows[i].head;
} DISPATCH_END;

unsigned tmp0, tmp1;
CUDA_HANDLE_ERROR(cudaMemcpy(&tmp0, fixed_row_offsets.data + nrow - 1,
sizeof(unsigned), cudaMemcpyDeviceToHost));

exclusive_scan(fixed_row_offsets.data, nrow);
CUDA_HANDLE_ERROR(cudaMemcpy(&tmp1, fixed_row_offsets.data + nrow - 1,
sizeof(unsigned), cudaMemcpyDeviceToHost));

num_fixed_nnz = tmp0 + tmp1;
unsigned num_fixed_nnz = exclusive_scan(fixed_row_offsets.data, nrow);
if (num_fixed_nnz > max_nnz) {
printf("num_fixed_nnz: %u, max_nnz: %u\n", num_fixed_nnz, max_nnz);
assert(false);
Expand All @@ -348,14 +339,7 @@ void DynCSRMat::finalize() {
}
} DISPATCH_END;

CUDA_HANDLE_ERROR(cudaMemcpy(&tmp0, ref_index_offsets.data + nrow - 1,
sizeof(unsigned), cudaMemcpyDeviceToHost));

exclusive_scan(ref_index_offsets.data, nrow);
CUDA_HANDLE_ERROR(cudaMemcpy(&tmp1, ref_index_offsets.data + nrow - 1,
sizeof(unsigned), cudaMemcpyDeviceToHost));

unsigned num_nnz = tmp0 + tmp1;
unsigned num_nnz = exclusive_scan(ref_index_offsets.data, nrow);
if (num_nnz >= max_nnz) {
printf("transpose num_nnz %u, max_nnz: %u\n", num_nnz, max_nnz);
assert(false);
Expand Down

0 comments on commit 8df4262

Please sign in to comment.