diff --git a/.github/workflows/getting-started-vast.yml b/.github/workflows/getting-started-vast.yml
deleted file mode 100644
index 5d5eb75..0000000
--- a/.github/workflows/getting-started-vast.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-# File: getting-started-vast.yml
-# Author: Ryoichi Ando (ryoichi.ando@zozo.com)
-# License: Apache v2.0
-
-name: vast.ai
-
-# To setup a vast instance as a self-hosted runner
-# set this before running config.py
-# export RUNNER_ALLOW_RUNASROOT="1"
-
-on:
-  workflow_dispatch:
-
-jobs:
-  headless:
-    runs-on: vast-ci # replace with your runner name
-    timeout-minutes: 15
-
-    steps:
-
-      - name: check out repo
-        uses: actions/checkout@v3
-
-      - name: print info
-        run: bash .github/workflows/print-info.sh
-
-      - name: setup 
-        run: |
-          nvidia-smi
-          apt update
-          apt install -y git python3
-
-      - name: warmup
-        run: python3 warmup.py
-
-      - name: build
-        run: /root/.cargo/bin/cargo build --release
-
-      - name: run headless
-        run: python3 examples/headless.py
diff --git a/.gitignore b/.gitignore
index 6ae7e8c..c737ef1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,10 +17,14 @@ npm*
 *.tmp
 __pycache__
 .clangd.swp
+.vscode
 eigsys/eigen2
 eigsys/eigen3
 eigsys/bench3
 examples/asset
 examples/data
 examples/**/.*
+examples/export
+examples/_*
 _skipped.node
+private/vast
\ No newline at end of file
diff --git a/src/cpp/math/vec.hpp b/src/cpp/math/vec.hpp
deleted file mode 100644
index 2ccbf5f..0000000
--- a/src/cpp/math/vec.hpp
+++ /dev/null
@@ -1,190 +0,0 @@
-// File: vec.hpp
-// Author: Ryoichi Ando (ryoichi.ando@zozo.com)
-// License: Apache v2.0
-
-#ifndef VEC_HPP
-#define VEC_HPP
-
-#include "../common.hpp"
-#include "../main/cuda_utils.hpp"
-#include <cmath>
-#include <iostream>
-#include <thrust/device_vector.h>
-#include <thrust/execution_policy.h>
-#include <thrust/inner_product.h>
-#include <thrust/transform.h>
-
-template <class T> struct VecVec {
-
-    T *data{nullptr};
-    unsigned *offset{nullptr};
-    unsigned size{0};
-    unsigned nnz{0};
-    unsigned nnz_allocated{0};
-    unsigned offset_allocated{0};
-
-    static VecVec<T> alloc(unsigned nrow, unsigned max_nnz) {
-        VecVec<T> result;
-        result.size = nrow;
-        result.nnz = 0;
-        result.nnz_allocated = max_nnz;
-        result.offset_allocated = nrow + 1;
-        CUDA_HANDLE_ERROR(
-            cudaMalloc(&result.offset, result.offset_allocated * sizeof(T)));
-        CUDA_HANDLE_ERROR(
-            cudaMalloc(&result.data, result.nnz_allocated * sizeof(T)));
-        return result;
-    }
-    __host__ __device__ T &operator()(unsigned i, unsigned j) {
-#if DEBUG_MODE
-        if (i >= size) {
-            printf("VecVec: operator() i = %u, size = %u\n", i, size);
-            assert(false);
-        }
-#endif
-        unsigned k = offset[i] + j;
-#if DEBUG_MODE
-        if (k >= offset[i + 1]) {
-            printf("VecVec: k >= offset[i + 1] failed\n");
-            assert(false);
-        }
-#endif
-        return data[k];
-    }
-    __host__ __device__ const T &operator()(unsigned i, unsigned j) const {
-#if DEBUG_MODE
-        if (i >= size) {
-            printf("VecVec: const T &operator() i = %u, size = %u\n", i, size);
-            assert(false);
-        }
-#endif
-        unsigned k = offset[i] + j;
-#if DEBUG_MODE
-        if (k >= offset[i + 1]) {
-            printf("VecVec: k >= offset[i + 1] failed\n");
-            assert(false);
-        }
-#endif
-        return data[k];
-    }
-    __host__ __device__ unsigned count(unsigned i) const {
-        if (size == 0) {
-            return 0;
-        }
-#if DEBUG_MODE
-        if (i >= size) {
-            printf("VecVec: count() i = %u, size = %u\n", i, size);
-            assert(false);
-        }
-#endif
-        return offset[i + 1] - offset[i];
-    }
-    __host__ __device__ unsigned count() const {
-        if (size == 0) {
-            return 0;
-        }
-        return offset[size];
-    }
-};
-
-template <class T> struct Vec {
-
-    T *data{nullptr};
-    unsigned size{0};
-    unsigned allocated{0};
-
-    __device__ T &operator[](unsigned i) {
-#if DEBUG_MODE
-        if (i >= size) {
-            printf("Vec: operator[] i = %u, size = %u\n", i, size);
-            assert(false);
-        }
-#endif
-        return data[i];
-    }
-    __device__ const T &operator[](unsigned i) const {
-#if DEBUG_MODE
-        if (i >= size) {
-            printf("Vec: const T &operator[] i = %u, size = %u\n", i, size);
-            assert(false);
-        }
-#endif
-        return data[i];
-    }
-    template <class A> Vec<A> flatten() {
-        Vec<A> result;
-        result.data = (A *)data;
-        result.size = sizeof(T) / sizeof(A) * size;
-        result.allocated = sizeof(T) / sizeof(A) * allocated;
-        return result;
-    }
-    void resize(unsigned size) {
-        if (size < this->allocated) {
-            this->size = size;
-        }
-    }
-    static Vec<T> alloc(unsigned n, unsigned alloc_factor = 1) {
-        Vec<T> result;
-        if (n > 0) {
-            result.allocated = alloc_factor * n;
-            CUDA_HANDLE_ERROR(
-                cudaMalloc(&result.data, result.allocated * sizeof(T)));
-            result.size = n;
-        }
-        return result;
-    }
-    bool free() {
-        if (data) {
-            CUDA_HANDLE_ERROR(cudaFree((void *)data));
-            data = nullptr;
-            return true;
-        }
-        return false;
-    }
-    Vec<T> clear(const T val = T()) {
-        if (data && size > 0) {
-            thrust::device_ptr<T> data_dev(data);
-            thrust::fill(data_dev, data_dev + size, val);
-        }
-        return *this;
-    }
-    __device__ void atomic_add(unsigned i, const T &val) {
-        assert(i < size);
-        if (val) {
-            atomicAdd(&data[i], val);
-        }
-    }
-    void copy(const Vec<T> &src) const {
-        assert(src.size == size);
-        thrust::device_ptr<T> src_dev(src.data);
-        thrust::device_ptr<T> dst_dev(this->data);
-        thrust::copy(src_dev, src_dev + src.size, dst_dev);
-    }
-    void add_scaled(Vec<T> &b, float c) const {
-        assert(b.size == size);
-        thrust::device_ptr<T> a_dev(this->data);
-        thrust::device_ptr<T> b_dev(b.data);
-        thrust::transform(
-            a_dev, a_dev + size, b_dev, a_dev,
-            [c] __device__(T a_val, T b_val) { return a_val + c * b_val; });
-    }
-    void combine(Vec<T> &a, Vec<T> &b, float c, float d) const {
-        assert(a.size == size);
-        assert(b.size == size);
-        thrust::device_ptr<T> dest_dev(this->data);
-        thrust::device_ptr<T> a_dev(a.data);
-        thrust::device_ptr<T> b_dev(b.data);
-        thrust::transform(a_dev, a_dev + size, b_dev, dest_dev,
-                          [c, d] __device__(T a_val, T b_val) {
-                              return c * a_val + d * b_val;
-                          });
-    }
-    float inner_product(const Vec<float> &b) const {
-        assert(b.size == size);
-        thrust::device_ptr<float> a_dev(data);
-        thrust::device_ptr<float> b_dev(b.data);
-        return thrust::inner_product(a_dev, a_dev + b.size, b_dev, 0.0f);
-    }
-};
-
-#endif
diff --git a/src/cpp/solver/cg.hpp b/src/cpp/solver/cg.hpp
deleted file mode 100644
index 9bb331d..0000000
--- a/src/cpp/solver/cg.hpp
+++ /dev/null
@@ -1,55 +0,0 @@
-// File: cg.hpp
-// Author: Ryoichi Ando (ryoichi.ando@zozo.com)
-// License: Apache v2.0
-
-#include "../data.hpp"
-#include "operator.hpp"
-#include <cmath>
-#include <string>
-#include <tuple>
-
-namespace cg {
-
-std::tuple<bool, unsigned, float> solve(const Operators &op, Vec<float> &r,
-                                        Vec<float> &x, unsigned max_iter,
-                                        float tol) {
-    static Vec<float> tmp = Vec<float>::alloc(x.size);
-    static Vec<float> z = Vec<float>::alloc(x.size);
-    static Vec<float> p = Vec<float>::alloc(x.size);
-    static Vec<float> r0 = Vec<float>::alloc(x.size);
-
-    op.apply(x, tmp);
-    r.add_scaled(tmp, -1.0f);
-    r0.copy(r);
-    op.precond(r, z);
-    p.copy(z);
-
-    unsigned iter = 1;
-    double rz0 = r.inner_product(z);
-    double err0 = op.norm(r, tmp);
-    if (!err0) {
-        return {true, iter, 0.0f};
-    } else {
-        while (true) {
-            op.apply(p, tmp);
-            double alpha = rz0 / (double)p.inner_product(tmp);
-            x.add_scaled(p, alpha);
-            r.add_scaled(tmp, -alpha);
-            double err = op.norm(r, tmp);
-            double reresid = err / err0;
-            if (reresid < tol) {
-                return {true, iter, reresid};
-            } else if (iter >= max_iter) {
-                return {false, iter, reresid};
-            }
-            op.precond(r, z);
-            double rz1 = r.inner_product(z);
-            double beta = rz1 / rz0;
-            p.combine(z, p, 1.0f, beta);
-            rz0 = rz1;
-            iter++;
-        }
-    }
-}
-
-} // namespace cg
diff --git a/src/cpp/solver/operator.hpp b/src/cpp/solver/operator.hpp
deleted file mode 100644
index 8974a03..0000000
--- a/src/cpp/solver/operator.hpp
+++ /dev/null
@@ -1,25 +0,0 @@
-// File: operator.hpp
-// Author: Ryoichi Ando (ryoichi.ando@zozo.com)
-// License: Apache v2.0
-
-#ifndef SOLVER_HPP
-#define SOLVER_HPP
-
-#include "../math/vec.hpp"
-#include "../utility/dispatcher.hpp"
-#include "../utility/utility.hpp"
-
-class Operators {
-  public:
-    virtual void apply(const Vec<float> &, Vec<float> &) const = 0;
-    virtual void precond(const Vec<float> &, Vec<float> &) const = 0;
-    virtual float norm(const Vec<float> &r, Vec<float> &tmp) const {
-        DISPATCH_START(r.size)
-        [r, tmp] __device__(unsigned i) mutable {
-            tmp[i] = fabs(r[i]);
-        } DISPATCH_END;
-        return utility::sum_array(tmp, r.size);
-    }
-};
-
-#endif