-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmemoryManagmentHelper.cuh
143 lines (113 loc) · 4.39 KB
/
memoryManagmentHelper.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#ifndef MEMORY_MANAGMENT
#define MEMORY_MANAGMENT
#include "cuda.h"
#include <builtin_types.h>
#include "common.cuh"
#define PRINT_PHISICAL_ALLOCATION 0
typedef int ShareableHandle;
static const char *_cudaGetErrorEnum(CUresult error) {
static char unknown[] = "<unknown>";
const char *ret = NULL;
cuGetErrorName(error, &ret);
return ret ? ret : unknown;
}
typedef struct
{
CUdeviceptr ptr;
size_t alloc_size;
CUcontext cuda_context;
CUmemGenericAllocationHandle mem_handle;
} memoryProperties;
__host__ void validateDeviceIsSupported()
{
int device;
CUDA_CHECK(cudaGetDevice(&device));
int deviceSupportsVmm;
CUresult result = cuDeviceGetAttribute(&deviceSupportsVmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED, device);
if (deviceSupportsVmm != 0) {
return;
}
dbg_printf("Virtual memory API is unsupported by device");
exit(-1);
}
__host__ void cleanMemoryMaping(memoryProperties prop)
{
CUDA_RESULT_CHECK(cuMemUnmap(prop.ptr, prop.alloc_size));
CUDA_RESULT_CHECK(cuMemAddressFree(prop.ptr, prop.alloc_size));
if (prop.mem_handle != 0)
CUDA_RESULT_CHECK(cuMemRelease(prop.mem_handle));
// CUDA_RESULT_CHECK(cuCtxDestroy(prop.cuda_context));
}
__host__ void closeSharableHandle(ShareableHandle shHandle)
{
close(shHandle);
}
static memoryProperties importAndMapMemory(ShareableHandle ipc_handle, size_t buffer_size)
{
CUmemAccessDesc access;
int cuda_dindex = 0;
CUmemGenericAllocationHandle mem_handle;
memoryProperties memProperties;
memProperties.alloc_size = buffer_size;
memProperties.mem_handle = 0;
CUDA_RESULT_CHECK(cuInit(0));
// CUdevice cuda_device;
// CUDA_RESULT_CHECK(cuDeviceGet(&cuda_device, cuda_dindex));
// CUDA_RESULT_CHECK(cuCtxCreate(&(memProperties.cuda_context), CU_CTX_SCHED_AUTO, cuda_device));
dbg_printf("Sharable_handle in producer: %d\n", ipc_handle);
// import shared memory handle
CUDA_RESULT_CHECK(cuMemImportFromShareableHandle(&mem_handle,
(void *)(uintptr_t)(ipc_handle),
CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR));
// reserve virtual address space
CUDA_RESULT_CHECK(cuMemAddressReserve(&(memProperties.ptr), memProperties.alloc_size, 0, 0UL, 0));
// map device memory
CUDA_RESULT_CHECK(cuMemMap(memProperties.ptr, memProperties.alloc_size, 0, mem_handle, 0));
CUDA_RESULT_CHECK(cuMemRelease(mem_handle));
access.location.id = cuda_dindex;
access.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
CUDA_RESULT_CHECK(cuMemSetAccess(memProperties.ptr, buffer_size, &access, 1));
dbg_printf("Successfully import memory\n");
close(ipc_handle);
return memProperties;
}
static memoryProperties allocateSharableMemory(size_t memory_size, ShareableHandle *shHandle)
{
size_t granularity;
CUmemAllocationProp prop;
CUmemAccessDesc access;
int cuda_dindex = 0;
memoryProperties memProperties;
CUDA_RESULT_CHECK(cuInit(0));
// CUdevice cuda_device;
// CUDA_RESULT_CHECK(cuDeviceGet(&cuda_device, cuda_dindex));
// CUDA_RESULT_CHECK(cuCtxCreate(&(memProperties.cuda_context), CU_CTX_SCHED_AUTO, cuda_device));
// check allocation granularity
memset(&prop, 0, sizeof(CUmemAllocationProp));
prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
prop.location.id = cuda_dindex;
prop.requestedHandleTypes = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR;
prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
CUDA_RESULT_CHECK(cuMemGetAllocationGranularity(&granularity, &prop,
CU_MEM_ALLOC_GRANULARITY_RECOMMENDED));
// round up buffer_size by the granularity
memProperties.alloc_size = ROUND_UP(memory_size, granularity);
CUDA_RESULT_CHECK(cuMemCreate(&(memProperties.mem_handle), memProperties.alloc_size, &prop, 0));
#if PRINT_PHISICAL_ALLOCATION
// confirm physical memory consumption
system("nvidia-smi");
#endif
CUDA_RESULT_CHECK(cuMemAddressReserve(&(memProperties.ptr), memProperties.alloc_size, 0, 0UL, 0));
CUDA_RESULT_CHECK(cuMemMap(memProperties.ptr, memProperties.alloc_size, 0, memProperties.mem_handle, 0));
access.location = prop.location;
access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
CUDA_RESULT_CHECK(cuMemSetAccess(memProperties.ptr, memProperties.alloc_size, &access, 1));
// export the above allocation to sharable handle
CUDA_RESULT_CHECK(cuMemExportToShareableHandle(shHandle, memProperties.mem_handle,
CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR,
0));
dbg_printf("Sharable_handle '%d' was created\n", *shHandle);
return memProperties;
}
#endif