-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathshm.h
338 lines (298 loc) · 10.9 KB
/
shm.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
#pragma once
#include <pthread.h>
#include <sys/xattr.h>
#include <atomic>
#include <ostream>
#include "const.h"
#include "idx.h"
#include "posix.h"
#include "utils/logging.h"
#include "utils/utils.h"
namespace madfs::dram {
class alignas(SHM_PER_THREAD_SIZE) PerThreadData {
enum class State : uint8_t {
UNINITIALIZED,
PENDING, // the state is inconsistent (initializing or destroying)
INITIALIZED,
};
std::atomic<enum State> state;
// mutex used to indicate the liveness of the thread
// shall only be read when state == INITIALIZED
pthread_mutex_t mutex;
// the index within the shared memory region
size_t index;
// each thread will pin a tx block so that the garbage collector will not
// reclaim this block and blocks after it
std::atomic<LogicalBlockIdx> tx_block_idx;
public:
/**
* @return true if there are some data stored, regardless of whether the
* thread is alive or not
*/
[[nodiscard]] bool has_data() const { return state != State::UNINITIALIZED; }
/**
* @return true if this PerThreadData contains valid data (i.e. the state
* is initialized and the thread is not dead).
*/
[[nodiscard]] bool is_data_valid() {
if (state != State::INITIALIZED) return false;
return is_thread_alive();
}
/**
* Try to initialize the per-thread data. There should be only one thread
* calling this function at a time.
*
* @param i the index of this per-thread data
* @return true if initialization succeeded
*/
bool try_init(size_t i) {
State expected = State::UNINITIALIZED;
if (!state.compare_exchange_strong(expected, State::PENDING,
std::memory_order_acq_rel,
std::memory_order_acquire)) {
// If the state is not UNINITIALIZED, then it must be INITIALIZED.
// It cannot be INITIALIZING because there should be only one thread
// calling this function.
return false;
}
index = i;
tx_block_idx.store(0, std::memory_order_relaxed);
init_robust_mutex(&mutex);
// TODO: uncomment this
// pthread_mutex_lock(&mutex);
state.store(State::INITIALIZED, std::memory_order_release);
LOG_DEBUG("PerThreadData %ld initialized by tid %d", i, tid);
return true;
}
/**
* Destroy the per-thread data
*/
void reset() {
State expected = State::INITIALIZED;
if (!state.compare_exchange_strong(expected, State::PENDING,
std::memory_order_acq_rel,
std::memory_order_acquire)) {
// if the old state is already PENDING or UNINITIALIZED, then we
// don't need to do anything
return;
}
LOG_DEBUG("PerThreadData %ld to be reset by tid %d", index, tid);
// TODO: uncomment this
// if (is_thread_alive()) pthread_mutex_unlock(&mutex);
index = 0;
tx_block_idx.store(0, std::memory_order_relaxed);
pthread_mutex_destroy(&mutex);
state.store(State::UNINITIALIZED, std::memory_order_release);
}
void set_tx_block_idx(LogicalBlockIdx idx) {
tx_block_idx.store(idx, std::memory_order_relaxed);
}
[[nodiscard]] LogicalBlockIdx get_tx_block_idx() const {
return tx_block_idx.load(std::memory_order_relaxed);
}
private:
/**
* Check the robust mutex to see if the thread is alive.
*
* This function can only be called when state == INITIALIZED, since trying to
* lock an uninitialized mutex will cause undefined behavior.
*
* @return true if the thread is alive
*/
bool is_thread_alive() {
return true; // TODO: fix me
assert(state.load(std::memory_order_acquire) == State::INITIALIZED);
int rc = pthread_mutex_trylock(&mutex);
if (rc == 0) {
// if we can lock the mutex, then the thread is dead
pthread_mutex_unlock(&mutex);
return false;
} else if (rc == EBUSY) {
// if the mutex is already locked, then the thread is alive
return true;
} else if (rc == EOWNERDEAD) {
// detected that the owner of the mutex is dead
pthread_mutex_consistent(&mutex);
pthread_mutex_unlock(&mutex);
return false;
} else {
PANIC("pthread_mutex_trylock failed: %s", strerror(rc));
}
}
public:
friend std::ostream& operator<<(std::ostream& os, PerThreadData& data) {
std::array state_names = {"UNINITIALIZED", "PENDING", "INITIALIZED"};
State curr_state = data.state;
os << "PerThreadData{state="
<< state_names[static_cast<size_t>(curr_state)];
if (curr_state == State::INITIALIZED) {
os << ", is_thread_alive=" << data.is_thread_alive();
}
os << ", tx_block_idx=" << data.tx_block_idx << "}";
return os;
}
};
static_assert(sizeof(PerThreadData) == SHM_PER_THREAD_SIZE);
class ShmMgr {
pmem::MetaBlock* meta;
int fd = -1;
void* addr = nullptr;
char path[SHM_PATH_LEN]{};
public:
/**
* Open and memory map the shared memory. If the shared memory does not exist,
* create it.
*
* @param file_fd the file descriptor of the file that uses this shared memory
* @param stat the stat of the file that uses this shared memory
*/
ShmMgr(int file_fd, const struct stat& stat, pmem::MetaBlock* meta)
: meta(meta) {
// get or set the path of the shared memory
{
ssize_t rc = fgetxattr(file_fd, SHM_XATTR_NAME, path, SHM_PATH_LEN);
if (rc == -1 && errno == ENODATA) { // no shm_path attribute, create one
sprintf(path, "/dev/shm/madfs_%016lx_%013lx", stat.st_ino,
(stat.st_ctim.tv_sec * 1000000000 + stat.st_ctim.tv_nsec) >> 3);
rc = fsetxattr(file_fd, SHM_XATTR_NAME, path, SHM_PATH_LEN, 0);
PANIC_IF(rc == -1, "failed to set shm_path attribute");
} else if (rc == -1) {
PANIC("failed to get shm_path attribute");
}
}
// use posix::open instead of shm_open since shm_open calls open, which is
// overloaded by madfs
fd = posix::open(path, O_RDWR | O_NOFOLLOW | O_CLOEXEC, S_IRUSR | S_IWUSR);
if (fd < 0) {
fd = create(path, stat.st_mode, stat.st_uid, stat.st_gid);
}
LOG_DEBUG("posix::open(%s) = %d", path, fd);
addr = posix::mmap(nullptr, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
fd, 0);
if (addr == MAP_FAILED) {
posix::close(fd);
PANIC("mmap shared memory failed");
}
}
~ShmMgr() {
if (fd >= 0) posix::close(fd);
if (addr != nullptr) posix::munmap(addr, SHM_SIZE);
}
[[nodiscard]] void* get_bitmap_addr() const { return addr; }
/**
* Get the address of the per-thread data of the current thread.
* Shall only be called by the garbage collector.
*
* @param idx the index of the per-thread data
* @return the address of the per-thread data
*/
[[nodiscard]] PerThreadData* get_per_thread_data(size_t idx) const {
assert(idx < MAX_NUM_THREADS);
char* starting_addr = static_cast<char*>(addr) + TOTAL_NUM_BITMAP_BYTES;
return reinterpret_cast<PerThreadData*>(starting_addr) + idx;
}
/**
* Allocate a new per-thread data for the current thread.
* @return the address of the per-thread data
*/
[[nodiscard]] PerThreadData* alloc_per_thread_data() const {
// TODO: make sure that only one thread can allocate a per-thread data at a
// time. Otherwise, a thread crashes during PerThreadData::try_init will
// result in a leak (e.g., state == PENDING but the thread is dead).
// meta->lock();
for (size_t i = 0; i < MAX_NUM_THREADS; i++) {
PerThreadData* per_thread_data = get_per_thread_data(i);
bool success = per_thread_data->try_init(i);
if (success) return per_thread_data;
}
// meta->unlock();
PANIC("No empty per-thread data");
}
/**
* Remove the shared memory object associated.
*/
void unlink() const { unlink_by_shm_path(path); }
/**
* Create a shared memory object.
*
* @param shm_path the path of the shared memory object
* @param mode the mode of the shared memory object
* @param uid the uid of the shared memory object
* @param gid the gid of the shared memory object
* @return the file descriptor of the shared memory object
*/
static int create(const char* shm_path, mode_t mode, uid_t uid, gid_t gid) {
// We create a temporary file first, and then use `linkat` to put the file
// into the directory `/dev/shm`. This ensures the atomicity of the creating
// the shared memory file and setting its permission.
int shm_fd =
posix::open("/dev/shm", O_TMPFILE | O_RDWR | O_NOFOLLOW | O_CLOEXEC,
S_IRUSR | S_IWUSR);
if (unlikely(shm_fd < 0)) {
PANIC("create the temporary file failed");
}
// change permission and ownership of the new shared memory
if (fchmod(shm_fd, mode) < 0) {
posix::close(shm_fd);
PANIC("fchmod on shared memory failed");
}
if (fchown(shm_fd, uid, gid) < 0) {
posix::close(shm_fd);
PANIC("fchown on shared memory failed");
}
if (posix::fallocate(shm_fd, 0, 0, static_cast<off_t>(SHM_SIZE)) < 0) {
posix::close(shm_fd);
PANIC("fallocate on shared memory failed");
}
// publish the created tmpfile.
char tmpfile_path[PATH_MAX];
sprintf(tmpfile_path, "/proc/self/fd/%d", shm_fd);
int rc =
linkat(AT_FDCWD, tmpfile_path, AT_FDCWD, shm_path, AT_SYMLINK_FOLLOW);
if (rc < 0) {
// Another process may have created a new shared memory before us. Retry
// opening.
posix::close(shm_fd);
shm_fd = posix::open(shm_path, O_RDWR | O_NOFOLLOW | O_CLOEXEC,
S_IRUSR | S_IWUSR);
if (shm_fd < 0) {
PANIC("cannot open or create the shared memory object %s", shm_path);
}
}
return shm_fd;
}
/**
* Remove the shared memory object given its path.
* @param shm_path the path of the shared memory object
*/
static void unlink_by_shm_path(const char* shm_path) {
int ret = posix::unlink(shm_path);
LOG_TRACE("posix::unlink(%s) = %d", shm_path, ret);
if (unlikely(ret < 0))
LOG_WARN("Could not unlink shm file \"%s\": %m", shm_path);
}
/**
* Remove the shared memory object given the path of the file that uses it.
* @param filepath the path of the file that uses the shared memory object
*/
static void unlink_by_file_path(const char* filepath) {
char shm_path[SHM_PATH_LEN];
if (getxattr(filepath, SHM_XATTR_NAME, shm_path, SHM_PATH_LEN) <= 0) return;
unlink_by_shm_path(shm_path);
}
friend std::ostream& operator<<(std::ostream& os, const ShmMgr& mgr) {
__msan_scoped_disable_interceptor_checks();
os << "ShmMgr:\n"
<< "\tfd = " << mgr.fd << "\n"
<< "\taddr = " << mgr.addr << "\n"
<< "\tpath = " << mgr.path << "\n";
for (size_t i = 0; i < MAX_NUM_THREADS; ++i) {
PerThreadData* per_thread_data = mgr.get_per_thread_data(i);
if (!per_thread_data->has_data()) continue;
os << "\t" << i << ": " << *mgr.get_per_thread_data(i) << "\n";
}
__msan_scoped_enable_interceptor_checks();
return os;
}
};
} // namespace madfs::dram