From 04ac6b120f2b51368b89be37a984a28442472793 Mon Sep 17 00:00:00 2001 From: Alex Olivas Date: Mon, 15 Apr 2024 19:46:06 -0600 Subject: [PATCH] implemented the very basics of the first three classes. --- cmake_files/library.cmake | 4 +- cmake_files/tests.cmake | 4 + include/cudapp/cuda_device_prop.hpp | 89 +++++++++ include/cudapp/device.hpp | 26 +-- include/cudapp/device_manager.hpp | 31 +-- include/cudapp/device_properties.hpp | 28 +++ src/device.cu | 4 +- src/device_manager.cu | 27 +-- src/device_properties.cu | 274 +++++++++++++++++++++++++++ tests/test_device.cpp | 2 + tests/test_device_manager.cpp | 14 +- tests/test_device_properties.cpp | 21 ++ 12 files changed, 483 insertions(+), 41 deletions(-) create mode 100644 include/cudapp/cuda_device_prop.hpp create mode 100644 include/cudapp/device_properties.hpp create mode 100644 src/device_properties.cu create mode 100644 tests/test_device_properties.cpp diff --git a/cmake_files/library.cmake b/cmake_files/library.cmake index fb60a47..1404670 100644 --- a/cmake_files/library.cmake +++ b/cmake_files/library.cmake @@ -1,7 +1,9 @@ + add_library(cudapp SHARED src/device.cu + src/device_properties.cu src/device_manager.cu + ../include/cudapp/cuda_device_prop.hpp ) set_property(TARGET cudapp PROPERTY CUDA_ARCHITECTURES OFF) - diff --git a/cmake_files/tests.cmake b/cmake_files/tests.cmake index 7736aa5..1a6be9c 100644 --- a/cmake_files/tests.cmake +++ b/cmake_files/tests.cmake @@ -8,3 +8,7 @@ add_test(test_device bin/test_device) add_executable(test_device_manager tests/test_device_manager.cpp) target_link_libraries(test_device_manager cudapp) add_test(test_device_manager bin/test_device_manager) + +add_executable(test_device_properties tests/test_device_properties.cpp) +target_link_libraries(test_device_properties cudapp) +add_test(test_device_properties bin/test_device_properties) diff --git a/include/cudapp/cuda_device_prop.hpp b/include/cudapp/cuda_device_prop.hpp new file mode 100644 index 0000000..2327857 --- /dev/null +++ b/include/cudapp/cuda_device_prop.hpp @@ -0,0 +1,89 @@ +// +// Created by olivas on 4/15/24. +// + +#ifndef CUDAPP_CUDA_DEVICE_PROP_HPP +#define CUDAPP_CUDA_DEVICE_PROP_HPP + +#include +#include + +struct cuda_device_prop { + std::string name; + std::string uuid; // 16 byte unique identifier + size_t totalGlobalMem; + size_t sharedMemPerBlock; + unsigned regsPerBlock; + unsigned warpSize; + size_t memPitch; + unsigned maxThreadsPerBlock; + std::array maxThreadsDim; + std::array maxGridSize; + unsigned clockRate; + size_t totalConstMem; + unsigned major; + unsigned minor; + size_t textureAlignment; + size_t texturePitchAlignment; + unsigned deviceOverlap; + unsigned multiProcessorCount; + unsigned kernelExecTimeoutEnabled; + unsigned integrated; + unsigned canMapHostMemory; + unsigned computeMode; + unsigned maxTexture1D; + unsigned maxTexture1DMipmap; + unsigned maxTexture1DLinear; + std::array maxTexture2D; + std::array maxTexture2DMipmap; + std::array maxTexture2DLinear; + std::array maxTexture2DGather; + std::array maxTexture3D; + std::array maxTexture3DAlt; + unsigned maxTextureCubemap; + std::array maxTexture1DLayered; + std::array maxTexture2DLayered; + std::array maxTextureCubemapLayered; + unsigned maxSurface1D; + std::array maxSurface2D; + std::array maxSurface3D; + std::array maxSurface1DLayered; + std::array maxSurface2DLayered; + unsigned maxSurfaceCubemap; + std::array maxSurfaceCubemapLayered; + size_t surfaceAlignment; + unsigned concurrentKernels; + unsigned ECCEnabled; + unsigned pciBusID; + unsigned pciDeviceID; + unsigned pciDomainID; + unsigned tccDriver; + unsigned asyncEngineCount; + unsigned unifiedAddressing; + unsigned memoryClockRate; + unsigned memoryBusWidth; + unsigned l2CacheSize; + unsigned persistingL2CacheMaxSize; + unsigned maxThreadsPerMultiProcessor; + unsigned streamPrioritiesSupported; + unsigned globalL1CacheSupported; + unsigned localL1CacheSupported; + size_t sharedMemPerMultiprocessor; + unsigned regsPerMultiprocessor; + unsigned managedMemory; + unsigned isMultiGpuBoard; + unsigned multiGpuBoardGroupID; + unsigned singleToDoublePrecisionPerfRatio; + unsigned pageableMemoryAccess; + unsigned concurrentManagedAccess; + unsigned computePreemptionSupported; + unsigned canUseHostPointerForRegisteredMem; + unsigned cooperativeLaunch; + unsigned cooperativeMultiDeviceLaunch; + unsigned pageableMemoryAccessUsesHostPageTables; + unsigned directManagedMemAccessFromHost; + unsigned accessPolicyMaxWindowSize; +}; + + +#endif //CUDAPP_CUDA_DEVICE_PROP_HPP diff --git a/include/cudapp/device.hpp b/include/cudapp/device.hpp index 45fd4ee..5f37fcf 100644 --- a/include/cudapp/device.hpp +++ b/include/cudapp/device.hpp @@ -1,15 +1,17 @@ #pragma once -class Device{ -public: - Device(int device_number); - ~Device(); +namespace cudapp{ + class Device{ + public: + Device(int device_number); + ~Device(); - int multi_processor_count() const { return multi_processor_count_; } - size_t total_global_mem() const { return total_global_mem_; } - -private: - int device_number_; - int multi_processor_count_; - size_t total_global_mem_; -}; + int multi_processor_count() const { return multi_processor_count_; } + size_t total_global_mem() const { return total_global_mem_; } + + private: + int device_number_; + int multi_processor_count_; + size_t total_global_mem_; + }; +} diff --git a/include/cudapp/device_manager.hpp b/include/cudapp/device_manager.hpp index 594bb0b..d18fb6a 100644 --- a/include/cudapp/device_manager.hpp +++ b/include/cudapp/device_manager.hpp @@ -1,15 +1,20 @@ #pragma once -class DeviceManager{ -public: - DeviceManager(); - ~DeviceManager(); - - int multi_processor_count() const { return multi_processor_count_; } - size_t total_global_mem() const { return total_global_mem_; } - -private: - int device_number_; - int multi_processor_count_; - size_t total_global_mem_; -}; +#include + +namespace cudapp{ + class DeviceManager { + public: + DeviceManager(); + + ~DeviceManager(); + + [[nodiscard("this method's job is to return a value.")]] + std::optional device_count() const; + + [[nodiscard("this method's job is to return a value.")]] + std::optional current_device() const; + + }; + +} \ No newline at end of file diff --git a/include/cudapp/device_properties.hpp b/include/cudapp/device_properties.hpp new file mode 100644 index 0000000..55fa2d7 --- /dev/null +++ b/include/cudapp/device_properties.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include + +#include + +namespace cudapp{ + class DeviceProperties { + public: + + DeviceProperties(unsigned device_number); + + ~DeviceProperties(); + + void change_device_number(unsigned device_number); + + cuda_device_prop device_properties; + + void pretty_print(); + + private: + + unsigned device_number_; + + void set_device_properties(const unsigned device_number); + }; +} + diff --git a/src/device.cu b/src/device.cu index f2373c0..a997267 100644 --- a/src/device.cu +++ b/src/device.cu @@ -1,7 +1,7 @@ #include #include -Device::Device(int device_number): +cudapp::Device::Device(int device_number): device_number_(device_number) { CHECK_ERROR(cudaSetDeviceFlags(cudaDeviceBlockingSync)); @@ -14,7 +14,7 @@ Device::Device(int device_number): multi_processor_count_ = device_properties.multiProcessorCount; } -Device::~Device() +cudapp::Device::~Device() { CHECK_ERROR(cudaDeviceReset()); } diff --git a/src/device_manager.cu b/src/device_manager.cu index ee3dc19..da11622 100644 --- a/src/device_manager.cu +++ b/src/device_manager.cu @@ -1,20 +1,23 @@ #include #include -DeviceManager::DeviceManager() -{ - CHECK_ERROR(cudaSetDeviceFlags(cudaDeviceBlockingSync)); - CHECK_ERROR(cudaSetDevice(device_number_)); - std::cerr<<"device_number_ = "< +cudapp::DeviceManager::device_count() const { + int device_count{0}; + CHECK_ERROR(cudaGetDeviceCount(&device_count)); + return static_cast(device_count); } -DeviceManager::~DeviceManager() -{ - CHECK_ERROR(cudaDeviceReset()); +optional +cudapp::DeviceManager::current_device() const { + int device{0}; + CHECK_ERROR(cudaGetDevice(&device)); + return static_cast(device); } +cudapp::DeviceManager::~DeviceManager() {} + diff --git a/src/device_properties.cu b/src/device_properties.cu new file mode 100644 index 0000000..0f62d0f --- /dev/null +++ b/src/device_properties.cu @@ -0,0 +1,274 @@ +#include +#include +#include + +#include +#include + +#include +#include + +using std::optional; +using std::string; +using std::array; +using std::cout; +using std::endl; + +cudapp::DeviceProperties::DeviceProperties(unsigned device_number) : + device_number_{device_number} { + // check that it's a valid number + set_device_properties(device_number_); +} + +void +cudapp::DeviceProperties::change_device_number(unsigned int device_number) { + device_number_ = device_number; + set_device_properties(device_number_); +} + +void +cudapp::DeviceProperties::set_device_properties(unsigned int device_number) { + cudaDeviceProp cudevprops; + CHECK_ERROR(cudaGetDeviceProperties(&cudevprops, device_number)); + + // now cast and translate + device_properties.name = string(cudevprops.name); + //device_properties.uuid; // 16 byte unique identifier + device_properties.totalGlobalMem = static_cast(cudevprops.totalGlobalMem); + device_properties.sharedMemPerBlock = static_cast(cudevprops.sharedMemPerBlock); + device_properties.regsPerBlock = static_cast(cudevprops.regsPerBlock); + device_properties.warpSize = static_cast(cudevprops.warpSize); + device_properties.memPitch = static_cast(cudevprops.memPitch); + device_properties.maxThreadsPerBlock = static_cast(cudevprops.maxThreadsPerBlock); + device_properties.maxThreadsDim[0] = static_cast(cudevprops.maxThreadsDim[0]); + device_properties.maxThreadsDim[1] = static_cast(cudevprops.maxThreadsDim[1]); + device_properties.maxGridSize[0] = static_cast(cudevprops.maxGridSize[0]); + device_properties.maxGridSize[1] = static_cast(cudevprops.maxGridSize[1]); + device_properties.clockRate = static_cast(cudevprops.clockRate); + device_properties.totalConstMem = static_cast(cudevprops.totalConstMem); + device_properties.major = static_cast(cudevprops.major); + device_properties.minor = static_cast(cudevprops.minor); + device_properties.textureAlignment = static_cast(cudevprops.textureAlignment); + device_properties.texturePitchAlignment = static_cast(cudevprops.texturePitchAlignment); + device_properties.deviceOverlap = static_cast(cudevprops.deviceOverlap); + device_properties.multiProcessorCount = static_cast(cudevprops.multiProcessorCount); + device_properties.kernelExecTimeoutEnabled = static_cast(cudevprops.kernelExecTimeoutEnabled); + device_properties.integrated = static_cast(cudevprops.integrated); + device_properties.canMapHostMemory = static_cast(cudevprops.canMapHostMemory); + device_properties.computeMode = static_cast(cudevprops.computeMode); + device_properties.maxTexture1D = static_cast(cudevprops.maxTexture1D); + device_properties.maxTexture1DMipmap = static_cast(cudevprops.maxTexture1DMipmap); + device_properties.maxTexture1DLinear = static_cast(cudevprops.maxTexture1DLinear); + device_properties.maxTexture2D[0] = static_cast(cudevprops.maxTexture2D[0]); + device_properties.maxTexture2D[1] = static_cast(cudevprops.maxTexture2D[1]); + device_properties.maxTexture2DMipmap[0] = static_cast(cudevprops.maxTexture2DMipmap[0]); + device_properties.maxTexture2DMipmap[1] = static_cast(cudevprops.maxTexture2DMipmap[1]); + device_properties.maxTexture2DLinear[0] = static_cast(cudevprops.maxTexture2DLinear[0]); + device_properties.maxTexture2DLinear[1] = static_cast(cudevprops.maxTexture2DLinear[1]); + device_properties.maxTexture2DLinear[2] = static_cast(cudevprops.maxTexture2DLinear[2]); + device_properties.maxTexture2DGather[0] = static_cast(cudevprops.maxTexture2DGather[0]); + device_properties.maxTexture2DGather[1] = static_cast(cudevprops.maxTexture2DGather[1]); + device_properties.maxTexture3D[0] = static_cast(cudevprops.maxTexture3D[0]); + device_properties.maxTexture3D[1] = static_cast(cudevprops.maxTexture3D[1]); + device_properties.maxTexture3D[2] = static_cast(cudevprops.maxTexture3D[2]); + device_properties.maxTexture3DAlt[0] = static_cast(cudevprops.maxTexture3DAlt[0]); + device_properties.maxTexture3DAlt[1] = static_cast(cudevprops.maxTexture3DAlt[1]); + device_properties.maxTexture3DAlt[2] = static_cast(cudevprops.maxTexture3DAlt[2]); + device_properties.maxTextureCubemap = static_cast(cudevprops.maxTextureCubemap); + device_properties.maxTexture1DLayered[0] = static_cast(cudevprops.maxTexture1DLayered[0]); + device_properties.maxTexture1DLayered[1] = static_cast(cudevprops.maxTexture1DLayered[1]); + device_properties.maxTexture2DLayered[0] = static_cast(cudevprops.maxTexture2DLayered[0]); + device_properties.maxTexture2DLayered[1] = static_cast(cudevprops.maxTexture2DLayered[1]); + device_properties.maxTexture2DLayered[2] = static_cast(cudevprops.maxTexture2DLayered[2]); + device_properties.maxTextureCubemapLayered[0] = static_cast(cudevprops.maxTextureCubemapLayered[0]); + device_properties.maxTextureCubemapLayered[1] = static_cast(cudevprops.maxTextureCubemapLayered[1]); + device_properties.maxSurface1D = static_cast(cudevprops.maxSurface1D); + device_properties.maxSurface2D[0] = static_cast(cudevprops.maxSurface2D[0]); + device_properties.maxSurface2D[1] = static_cast(cudevprops.maxSurface2D[1]); + device_properties.maxSurface3D[0] = static_cast(cudevprops.maxSurface3D[0]); + device_properties.maxSurface3D[1] = static_cast(cudevprops.maxSurface3D[1]); + device_properties.maxSurface3D[2] = static_cast(cudevprops.maxSurface3D[2]); + device_properties.maxSurface1DLayered[0] = static_cast(cudevprops.maxSurface1DLayered[0]); + device_properties.maxSurface1DLayered[1] = static_cast(cudevprops.maxSurface1DLayered[1]); + device_properties.maxSurface2DLayered[0] = static_cast(cudevprops.maxSurface2DLayered[0]); + device_properties.maxSurface2DLayered[1] = static_cast(cudevprops.maxSurface2DLayered[1]); + device_properties.maxSurface2DLayered[2] = static_cast(cudevprops.maxSurface2DLayered[2]); + device_properties.maxSurfaceCubemap = static_cast(cudevprops.maxSurfaceCubemap); + device_properties.maxSurfaceCubemapLayered[0] = static_cast(cudevprops.maxSurfaceCubemapLayered[0]); + device_properties.maxSurfaceCubemapLayered[1] = static_cast(cudevprops.maxSurfaceCubemapLayered[1]); + device_properties.surfaceAlignment = static_cast(cudevprops.surfaceAlignment); + device_properties.concurrentKernels = static_cast(cudevprops.concurrentKernels); + device_properties.ECCEnabled = static_cast(cudevprops.ECCEnabled); + device_properties.pciBusID = static_cast(cudevprops.pciBusID); + device_properties.pciDeviceID = static_cast(cudevprops.pciDeviceID); + device_properties.pciDomainID = static_cast(cudevprops.pciDomainID); + device_properties.tccDriver = static_cast(cudevprops.tccDriver); + device_properties.asyncEngineCount = static_cast(cudevprops.asyncEngineCount); + device_properties.unifiedAddressing = static_cast(cudevprops.unifiedAddressing); + device_properties.memoryClockRate = static_cast(cudevprops.memoryClockRate); + device_properties.memoryBusWidth = static_cast(cudevprops.memoryBusWidth); + device_properties.l2CacheSize = static_cast(cudevprops.l2CacheSize); + device_properties.persistingL2CacheMaxSize = static_cast(cudevprops.persistingL2CacheMaxSize); + device_properties.maxThreadsPerMultiProcessor = static_cast(cudevprops.maxThreadsPerMultiProcessor); + device_properties.streamPrioritiesSupported = static_cast(cudevprops.streamPrioritiesSupported); + device_properties.globalL1CacheSupported = static_cast(cudevprops.globalL1CacheSupported); + device_properties.localL1CacheSupported = static_cast(cudevprops.localL1CacheSupported); + device_properties.sharedMemPerMultiprocessor = static_cast(cudevprops.sharedMemPerMultiprocessor); + device_properties.regsPerMultiprocessor = static_cast(cudevprops.regsPerMultiprocessor); + device_properties.managedMemory = static_cast(cudevprops.managedMemory); + device_properties.isMultiGpuBoard = static_cast(cudevprops.isMultiGpuBoard); + device_properties.multiGpuBoardGroupID = static_cast(cudevprops.multiGpuBoardGroupID); + device_properties.singleToDoublePrecisionPerfRatio = static_cast(cudevprops.singleToDoublePrecisionPerfRatio); + device_properties.pageableMemoryAccess = static_cast(cudevprops.pageableMemoryAccess); + device_properties.concurrentManagedAccess = static_cast(cudevprops.concurrentManagedAccess); + device_properties.computePreemptionSupported = static_cast(cudevprops.computePreemptionSupported); + device_properties.canUseHostPointerForRegisteredMem = static_cast(cudevprops.canUseHostPointerForRegisteredMem); + device_properties.cooperativeLaunch = static_cast(cudevprops.cooperativeLaunch); + device_properties.cooperativeMultiDeviceLaunch = static_cast(cudevprops.cooperativeMultiDeviceLaunch); + device_properties.pageableMemoryAccessUsesHostPageTables = static_cast(cudevprops.pageableMemoryAccessUsesHostPageTables); + device_properties.directManagedMemAccessFromHost = static_cast(cudevprops.directManagedMemAccessFromHost); + device_properties.accessPolicyMaxWindowSize = static_cast(cudevprops.accessPolicyMaxWindowSize); +} + +void +cudapp::DeviceProperties::pretty_print() { + // now cast and translate + cout << "name = " << device_properties.name << endl; + //cout<