From e68b9f89d734a241cc9c26c2f7db283e46d2b28b Mon Sep 17 00:00:00 2001
From: Ivet Galabova <galabovaa@gmail.com>
Date: Fri, 15 Nov 2024 15:15:54 +0200
Subject: [PATCH] test OK

---
 check/CMakeLists.txt          | 10 ++++
 check/cublas_example.cpp      | 74 +++++++++++++++++++++++++++++
 check/cublas_gpu_start.cpp    | 89 +++++++++++++++++++++++++++++++++++
 src/pdlp/cupdlp/cupdlp_defs.h |  2 +-
 4 files changed, 174 insertions(+), 1 deletion(-)
 create mode 100644 check/cublas_example.cpp
 create mode 100644 check/cublas_gpu_start.cpp

diff --git a/check/CMakeLists.txt b/check/CMakeLists.txt
index 859b2788db..6441edd040 100644
--- a/check/CMakeLists.txt
+++ b/check/CMakeLists.txt
@@ -475,6 +475,16 @@ if (BUILD_EXTRA_UNIT_TESTS AND BUILD_EXTRA_UNIT_ONLY)
     set_tests_properties(unit_tests_extra PROPERTIES TIMEOUT 10000) 
 
     if (CUPDLP_GPU)
+        set_target_properties(unit_tests_extra PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
         target_link_libraries(unit_tests_extra ${CUDA_LIBRARY})
+
+        add_executable(cublas_example cublas_example.cpp)
+        set_target_properties(cublas_example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+        target_link_libraries(cublas_example ${CUDA_LIBRARY})
+
+        add_executable(cublas_gpu_start cublas_gpu_start.cpp)
+        set_target_properties(cublas_gpu_start PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+        target_link_libraries(cublas_gpu_start ${CUDA_LIBRARY})
     endif()
+
 endif()
\ No newline at end of file
diff --git a/check/cublas_example.cpp b/check/cublas_example.cpp
new file mode 100644
index 0000000000..96fb3ed938
--- /dev/null
+++ b/check/cublas_example.cpp
@@ -0,0 +1,74 @@
+//Example 2. Application Using C and cuBLAS: 0-based indexing
+//-----------------------------------------------------------
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <cuda_runtime.h>
+#include "cublas_v2.h"
+#define M 6
+#define N 5
+#define IDX2C(i,j,ld) (((j)*(ld))+(i))
+
+static __inline__ void modify (cublasHandle_t handle, float *m, int ldm, int n, int p, int q, float alpha, float beta){
+    cublasSscal (handle, n-q, &alpha, &m[IDX2C(p,q,ldm)], ldm);
+    cublasSscal (handle, ldm-p, &beta, &m[IDX2C(p,q,ldm)], 1);
+}
+
+int main (void){
+    cudaError_t cudaStat;
+    cublasStatus_t stat;
+    cublasHandle_t handle;
+    int i, j;
+    float* devPtrA;
+    float* a = 0;
+    a = (float *)malloc (M * N * sizeof (*a));
+    if (!a) {
+        printf ("host memory allocation failed");
+        return EXIT_FAILURE;
+    }
+    for (j = 0; j < N; j++) {
+        for (i = 0; i < M; i++) {
+            a[IDX2C(i,j,M)] = (float)(i * N + j + 1);
+        }
+    }
+    cudaStat = cudaMalloc ((void**)&devPtrA, M*N*sizeof(*a));
+    if (cudaStat != cudaSuccess) {
+        printf ("device memory allocation failed");
+        free (a);
+        return EXIT_FAILURE;
+    }
+    stat = cublasCreate(&handle);
+    if (stat != CUBLAS_STATUS_SUCCESS) {
+        printf ("CUBLAS initialization failed\n");
+        free (a);
+        cudaFree (devPtrA);
+        return EXIT_FAILURE;
+    }
+    stat = cublasSetMatrix (M, N, sizeof(*a), a, M, devPtrA, M);
+    if (stat != CUBLAS_STATUS_SUCCESS) {
+        printf ("data download failed");
+        free (a);
+        cudaFree (devPtrA);
+        cublasDestroy(handle);
+        return EXIT_FAILURE;
+    }
+    modify (handle, devPtrA, M, N, 1, 2, 16.0f, 12.0f);
+    stat = cublasGetMatrix (M, N, sizeof(*a), devPtrA, M, a, M);
+    if (stat != CUBLAS_STATUS_SUCCESS) {
+        printf ("data upload failed");
+        free (a);
+        cudaFree (devPtrA);
+        cublasDestroy(handle);
+        return EXIT_FAILURE;
+    }
+    cudaFree (devPtrA);
+    cublasDestroy(handle);
+    for (j = 0; j < N; j++) {
+        for (i = 0; i < M; i++) {
+            printf ("%7.0f", a[IDX2C(i,j,M)]);
+        }
+        printf ("\n");
+    }
+    free(a);
+    return EXIT_SUCCESS;
+}
\ No newline at end of file
diff --git a/check/cublas_gpu_start.cpp b/check/cublas_gpu_start.cpp
new file mode 100644
index 0000000000..8b3717ac7d
--- /dev/null
+++ b/check/cublas_gpu_start.cpp
@@ -0,0 +1,89 @@
+//Example 2. Application Using C and cuBLAS: 0-based indexing
+//-----------------------------------------------------------
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <iostream>
+#include <cuda_runtime.h>
+#include "cublas_v2.h"
+#define N 5
+
+
+int main (void){
+    cudaError_t cudaStat;
+    cublasStatus_t stat;
+    cublasHandle_t handle;
+    int i;
+    float* devPtrA;
+    float* a = 0;
+    a = (float *)malloc (N * sizeof (*a));
+    if (!a) {
+        printf ("host memory allocation failed");
+        return EXIT_FAILURE;
+    }
+
+    for (i = 0; i < N; i++) 
+        a[i] = (float)(i * N);
+    for (i = 0; i < N; i++) 
+        printf ("%7.0f", a[(i)]);
+    std::cout << std::endl;
+
+    cudaStat = cudaMalloc ((void**)&devPtrA, N*sizeof(*a));
+    if (cudaStat != cudaSuccess) {
+        printf ("device memory allocation failed");
+        free (a);
+        return EXIT_FAILURE;
+    }
+    stat = cublasCreate(&handle);
+    if (stat != CUBLAS_STATUS_SUCCESS) {
+        printf ("CUBLAS initialization failed\n");
+        free (a);
+        cudaFree (devPtrA);
+        return EXIT_FAILURE;
+    }
+
+    stat = cublasSetVector(N, sizeof(*a), a, 1, devPtrA, 1);
+    if (stat != CUBLAS_STATUS_SUCCESS) {
+        printf ("data download failed");
+        free (a);
+        cudaFree (devPtrA);
+        cublasDestroy(handle);
+        return EXIT_FAILURE;
+    }
+
+    float r;
+    
+    cublasStatus_t status = cublasSnrm2(handle, N, devPtrA, 1, &r);
+    if (stat != CUBLAS_STATUS_SUCCESS) {
+        printf ("error at dnorm");
+        free (a);
+        cudaFree (devPtrA);
+        cublasDestroy(handle);
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Snorm: " << r << std::endl << std::endl;
+
+    float * b;
+    b = (float *)malloc (N * sizeof (*b));
+
+    stat = cublasGetVector(N, sizeof(*a), devPtrA, 1, b, 1);
+    if (stat != CUBLAS_STATUS_SUCCESS) {
+        printf ("data upload failed");
+        free (a);
+        cudaFree (devPtrA);
+        cublasDestroy(handle);
+        return EXIT_FAILURE;
+    }
+
+    cudaFree (devPtrA);
+    cublasDestroy(handle);
+
+    for (i = 0; i < N; i++) {
+        printf ("%7.0f", b[(i)]);
+    }
+    printf ("\n");
+
+    free(a);
+    return EXIT_SUCCESS;
+}
\ No newline at end of file
diff --git a/src/pdlp/cupdlp/cupdlp_defs.h b/src/pdlp/cupdlp/cupdlp_defs.h
index 14f558cf51..b8a0b65023 100644
--- a/src/pdlp/cupdlp/cupdlp_defs.h
+++ b/src/pdlp/cupdlp/cupdlp_defs.h
@@ -2,7 +2,7 @@
 #define CUPDLP_H_GUARD
 
 // #define CUPDLP_CPU
-#define CUPDLP_DEBUG (0)
+// #define CUPDLP_DEBUG (0)
 #define CUPDLP_TIMER
 
 #include "HConfig.h"