diff --git a/README.md b/README.md
index e7e36d9..a717382 100644
--- a/README.md
+++ b/README.md
@@ -62,4 +62,28 @@ You should see something similar to the following output:
 The first thing you might want to do is see how many NVIDIA cards you have installed and check their properties.
 
 
+#### QUDA
+Notes:
+The installation is a little janky.  You have to install libtensor1 first, otherwise the 
+cuQuantum installer will complain that you don't have an installable version.  This gets you
+unstuck, but to actually run you then need to install libtensor2 otherwise ld will complain
+that it can't find libcutensor.so.2
+
+Running hello_quda:
+
+    cuTensorNet version: 20400
+    ===== device info ======
+    GPU-name:NVIDIA GeForce RTX 3060 Ti
+    GPU-clock:1695000
+    GPU-memoryClock:7001000
+    GPU-nSM:38
+    GPU-major:8
+    GPU-minor:6
+    ========================
+    Included headers and defined data types
+
+Bob's your uncle.
+
+
+
 
diff --git a/cmake_files/executable.cmake b/cmake_files/executable.cmake
index dd0b436..c7b56a6 100644
--- a/cmake_files/executable.cmake
+++ b/cmake_files/executable.cmake
@@ -1,3 +1,12 @@
 
 add_executable(hello_device examples/hello_device.cpp)
 target_link_libraries(hello_device cudapp)
+
+add_executable(hello_quda examples/hello_quda.cu)
+target_link_libraries(hello_quda cudapp
+        /usr/lib/x86_64-linux-gnu/libcutensor/12/libcutensor.so
+        /usr/lib/x86_64-linux-gnu/libcuquantum/12/libcutensornet.so
+        /usr/lib/x86_64-linux-gnu/libcuquantum/12/libcustatevec.so
+)
+
+set_property(TARGET hello_quda PROPERTY CUDA_ARCHITECTURES OFF)
\ No newline at end of file
diff --git a/examples/hello_quda.cu b/examples/hello_quda.cu
new file mode 100644
index 0000000..76cac72
--- /dev/null
+++ b/examples/hello_quda.cu
@@ -0,0 +1,101 @@
+//
+// Created by olivas on 4/15/24.
+//
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <unordered_map>
+#include <vector>
+#include <cassert>
+
+#include <cuda_runtime.h>
+#include <cutensornet.h>
+
+
+#define HANDLE_ERROR(x)                                           \
+{ const auto err = x;                                             \
+  if( err != CUTENSORNET_STATUS_SUCCESS )                         \
+  { printf("Error: %s in line %d\n", cutensornetGetErrorString(err), __LINE__); \
+    fflush(stdout);                                               \
+  }                                                               \
+};
+
+#define HANDLE_CUDA_ERROR(x)                                      \
+{ const auto err = x;                                             \
+  if( err != cudaSuccess )                                        \
+  { printf("CUDA Error: %s in line %d\n", cudaGetErrorString(err), __LINE__); \
+    fflush(stdout);                                               \
+  }                                                               \
+};
+
+
+struct GPUTimer
+{
+    GPUTimer(cudaStream_t stream): stream_(stream)
+    {
+        HANDLE_CUDA_ERROR(cudaEventCreate(&start_));
+        HANDLE_CUDA_ERROR(cudaEventCreate(&stop_));
+    }
+
+    ~GPUTimer()
+    {
+        HANDLE_CUDA_ERROR(cudaEventDestroy(start_));
+        HANDLE_CUDA_ERROR(cudaEventDestroy(stop_));
+    }
+
+    void start()
+    {
+        HANDLE_CUDA_ERROR(cudaEventRecord(start_, stream_));
+    }
+
+    float seconds()
+    {
+        HANDLE_CUDA_ERROR(cudaEventRecord(stop_, stream_));
+        HANDLE_CUDA_ERROR(cudaEventSynchronize(stop_));
+        float time;
+        HANDLE_CUDA_ERROR(cudaEventElapsedTime(&time, start_, stop_));
+        return time * 1e-3;
+    }
+
+private:
+    cudaEvent_t start_, stop_;
+    cudaStream_t stream_;
+};
+
+
+int main() {
+    static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
+
+    bool verbose = true;
+
+    // Check cuTensorNet version
+    const size_t cuTensornetVersion = cutensornetGetVersion();
+    if (verbose)
+        printf("cuTensorNet version: %ld\n", cuTensornetVersion);
+
+    // Set GPU device
+    int numDevices{0};
+    HANDLE_CUDA_ERROR(cudaGetDeviceCount(&numDevices));
+    const int deviceId = 0;
+    HANDLE_CUDA_ERROR(cudaSetDevice(deviceId));
+    cudaDeviceProp prop;
+    HANDLE_CUDA_ERROR(cudaGetDeviceProperties(&prop, deviceId));
+
+    if (verbose) {
+        printf("===== device info ======\n");
+        printf("GPU-name:%s\n", prop.name);
+        printf("GPU-clock:%d\n", prop.clockRate);
+        printf("GPU-memoryClock:%d\n", prop.memoryClockRate);
+        printf("GPU-nSM:%d\n", prop.multiProcessorCount);
+        printf("GPU-major:%d\n", prop.major);
+        printf("GPU-minor:%d\n", prop.minor);
+        printf("========================\n");
+    }
+
+    typedef float floatType;
+    cudaDataType_t typeData = CUDA_R_32F;
+    cutensornetComputeType_t typeCompute = CUTENSORNET_COMPUTE_32F;
+
+    if (verbose)
+        printf("Included headers and defined data types\n");
+}
\ No newline at end of file