From b047e7a793421e3ff1f5dca2b27c72751a1f4db4 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 3 Feb 2023 14:11:50 -0500 Subject: [PATCH] Fix: auto-resize hash table destroy deadlock Fix a deadlock for auto-resize hash tables when cds_lfht_destroy is called with RCU read-side lock held. Example stack track of a hang: Thread 2 (Thread 0x7f21ba876700 (LWP 26114)): #0 syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38 #1 0x00007f21beba7aa0 in futex (val3=0, uaddr2=0x0, timeout=0x0, val=-1, op=0, uaddr=0x7f21bedac308 ) at ../include/urcu/futex.h:81 #2 futex_noasync (timeout=0x0, uaddr2=0x0, val3=0, val=-1, op=0, uaddr=0x7f21bedac308 ) at ../include/urcu/futex.h:90 #3 wait_gp () at urcu.c:265 #4 wait_for_readers (input_readers=input_readers@entry=0x7f21ba8751b0, cur_snap_readers=cur_snap_readers@entry=0x0, qsreaders=qsreaders@entry=0x7f21ba8751c0) at urcu.c:357 #5 0x00007f21beba8339 in urcu_memb_synchronize_rcu () at urcu.c:498 #6 0x00007f21be99f93f in fini_table (last_order=, first_order=13, ht=0x5651cec75400) at rculfhash.c:1489 #7 _do_cds_lfht_shrink (new_size=, old_size=, ht=0x5651cec75400) at rculfhash.c:2001 #8 _do_cds_lfht_resize (ht=ht@entry=0x5651cec75400) at rculfhash.c:2023 #9 0x00007f21be99fa26 in do_resize_cb (work=0x5651e20621a0) at rculfhash.c:2063 #10 0x00007f21be99dbfd in workqueue_thread (arg=0x5651cec74a00) at workqueue.c:234 #11 0x00007f21bd7c06db in start_thread (arg=0x7f21ba876700) at pthread_create.c:463 #12 0x00007f21bd4e961f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95 Thread 1 (Thread 0x7f21bf285300 (LWP 26098)): #0 syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38 #1 0x00007f21be99d8b7 in futex (val3=0, uaddr2=0x0, timeout=0x0, val=-1, op=0, uaddr=0x5651d8b38584) at ../include/urcu/futex.h:81 #2 futex_async (timeout=0x0, uaddr2=0x0, val3=0, val=-1, op=0, uaddr=0x5651d8b38584) at ../include/urcu/futex.h:113 #3 futex_wait (futex=futex@entry=0x5651d8b38584) at workqueue.c:135 #4 0x00007f21be99e2c8 in urcu_workqueue_wait_completion (completion=completion@entry=0x5651d8b38580) at workqueue.c:423 #5 0x00007f21be99e3f9 in urcu_workqueue_flush_queued_work (workqueue=0x5651cec74a00) at workqueue.c:452 #6 0x00007f21be9a0c83 in cds_lfht_destroy (ht=0x5651d8b2fcf0, attr=attr@entry=0x0) at rculfhash.c:1906 This deadlock is easy to reproduce when rapidly adding a large number of entries in the cds_lfht, removing them, and calling cds_lfht_destroy(). The deadlock will occur if the call to cds_lfht_destroy() takes place while a resize of the hash table is ongoing. Fix this by moving the teardown of the lfht worker thread to libcds library destructor, so it does not have to wait on synchronize_rcu from a resize callback from within a read-side critical section. As a consequence, the atfork callbacks are left registered within each urcu flavor for which a resizeable hash table is created until the end of the executable lifetime. The other part of the fix is to move the hash table destruction to the worker thread for auto-resize hash tables. This prevents having to wait for resize callbacks from RCU read-side critical section. This is guaranteed by the fact that the worker thread serializes previously queued resize callbacks before the destroy callback. Signed-off-by: Mathieu Desnoyers Change-Id: If8b1c3c8063dc7b9846dc5c3fc452efd917eab4d --- include/urcu/rculfhash.h | 8 +-- src/rculfhash-internal.h | 8 ++- src/rculfhash.c | 105 +++++++++++++++++++++++++++++++-------- src/urcu-call-rcu-impl.h | 21 ++++---- 4 files changed, 103 insertions(+), 39 deletions(-) diff --git a/include/urcu/rculfhash.h b/include/urcu/rculfhash.h index 8586e1da..fbd33cc3 100644 --- a/include/urcu/rculfhash.h +++ b/include/urcu/rculfhash.h @@ -247,16 +247,16 @@ struct cds_lfht *cds_lfht_new(unsigned long init_size, * * Return 0 on success, negative error value on error. + * Threads calling this API need to be registered RCU read-side threads. + * * Prior to liburcu 0.10: - * - Threads calling this API need to be registered RCU read-side - * threads. * - cds_lfht_destroy should *not* be called from a RCU read-side * critical section. It should *not* be called from a call_rcu thread * context neither. * * Starting from liburcu 0.10, rculfhash implements its own worker - * thread to handle resize operations, which removes RCU requirements on - * cds_lfht_destroy. + * thread to handle resize operations, which removes the above RCU + * read-side critical section requirement on cds_lfht_destroy. */ extern int cds_lfht_destroy(struct cds_lfht *ht, pthread_attr_t **attr); diff --git a/src/rculfhash-internal.h b/src/rculfhash-internal.h index e17210b8..325e55be 100644 --- a/src/rculfhash-internal.h +++ b/src/rculfhash-internal.h @@ -29,6 +29,8 @@ #include #include +#include "workqueue.h" + #ifdef DEBUG #define dbg_printf(fmt, args...) printf("[debug rculfhash] " fmt, ## args) #else @@ -82,11 +84,13 @@ struct cds_lfht { * therefore cause grace-period deadlock if we hold off RCU G.P. * completion. */ - pthread_mutex_t resize_mutex; /* resize mutex: add/del mutex */ - pthread_attr_t *resize_attr; /* Resize threads attributes */ + pthread_mutex_t resize_mutex; /* resize mutex: add/del mutex */ + pthread_attr_t *caller_resize_attr; /* resize threads attributes from lfht_new caller */ + pthread_attr_t resize_attr; unsigned int in_progress_destroy; unsigned long resize_target; int resize_initiated; + struct urcu_work destroy_work; /* * Variables needed for add and remove fast-paths. diff --git a/src/rculfhash.c b/src/rculfhash.c index 5391d28e..37fbc1b7 100644 --- a/src/rculfhash.c +++ b/src/rculfhash.c @@ -363,7 +363,6 @@ struct partition_resize_work { }; static struct urcu_workqueue *cds_lfht_workqueue; -static unsigned long cds_lfht_workqueue_user_count; /* * Mutex ensuring mutual exclusion between workqueue initialization and @@ -380,8 +379,8 @@ static struct urcu_atfork cds_lfht_atfork; */ static int cds_lfht_workqueue_atfork_nesting; +static void __attribute__((destructor)) cds_lfht_exit(void); static void cds_lfht_init_worker(const struct rcu_flavor_struct *flavor); -static void cds_lfht_fini_worker(const struct rcu_flavor_struct *flavor); #ifdef CONFIG_CDS_LFHT_ITER_DEBUG @@ -1286,7 +1285,8 @@ void partition_resize_helper(struct cds_lfht *ht, unsigned long i, work[thread].len = partition_len; work[thread].start = thread * partition_len; work[thread].fct = fct; - ret = pthread_create(&(work[thread].thread_id), ht->resize_attr, + ret = pthread_create(&(work[thread].thread_id), + ht->caller_resize_attr ? &ht->resize_attr : NULL, partition_resize_thread, &work[thread]); if (ret == EAGAIN) { /* @@ -1643,7 +1643,9 @@ struct cds_lfht *_cds_lfht_new(unsigned long init_size, ht->flags = flags; ht->flavor = flavor; - ht->resize_attr = attr; + ht->caller_resize_attr = attr; + if (attr) + ht->resize_attr = *attr; alloc_split_items_count(ht); /* this mutex should not nest in read-side C.S. */ pthread_mutex_init(&ht->resize_mutex, NULL); @@ -1857,6 +1859,35 @@ int cds_lfht_is_node_deleted(const struct cds_lfht_node *node) return is_removed(CMM_LOAD_SHARED(node->next)); } +static +bool cds_lfht_is_empty(struct cds_lfht *ht) +{ + struct cds_lfht_node *node, *next; + bool empty = true; + bool was_online; + + was_online = ht->flavor->read_ongoing(); + if (!was_online) { + ht->flavor->thread_online(); + ht->flavor->read_lock(); + } + /* Check that the table is empty */ + node = bucket_at(ht, 0); + do { + next = rcu_dereference(node->next); + if (!is_bucket(next)) { + empty = false; + break; + } + node = clear_flag(next); + } while (!is_end(node)); + if (!was_online) { + ht->flavor->read_unlock(); + ht->flavor->thread_offline(); + } + return empty; +} + static int cds_lfht_delete_bucket(struct cds_lfht *ht) { @@ -1891,6 +1922,24 @@ int cds_lfht_delete_bucket(struct cds_lfht *ht) return 0; } +static +void do_auto_resize_destroy_cb(struct urcu_work *work) +{ + struct cds_lfht *ht = caa_container_of(work, struct cds_lfht, destroy_work); + int ret; + + ht->flavor->register_thread(); + ret = cds_lfht_delete_bucket(ht); + if (ret) + urcu_die(ret); + free_split_items_count(ht); + ret = pthread_mutex_destroy(&ht->resize_mutex); + if (ret) + urcu_die(ret); + ht->flavor->unregister_thread(); + poison_free(ht); +} + /* * Should only be called when no more concurrent readers nor writers can * possibly access the table. @@ -1900,22 +1949,38 @@ int cds_lfht_destroy(struct cds_lfht *ht, pthread_attr_t **attr) int ret; if (ht->flags & CDS_LFHT_AUTO_RESIZE) { + /* + * Perform error-checking for emptiness before queuing + * work, so we can return error to the caller. This runs + * concurrently with ongoing resize. + */ + if (!cds_lfht_is_empty(ht)) + return -EPERM; /* Cancel ongoing resize operations. */ _CMM_STORE_SHARED(ht->in_progress_destroy, 1); - /* Wait for in-flight resize operations to complete */ - urcu_workqueue_flush_queued_work(cds_lfht_workqueue); + if (attr) { + *attr = ht->caller_resize_attr; + ht->caller_resize_attr = NULL; + } + /* + * Queue destroy work after prior queued resize + * operations. Given there are no concurrent writers + * accessing the hash table at this point, no resize + * operations can be queued after this destroy work. + */ + urcu_workqueue_queue_work(cds_lfht_workqueue, + &ht->destroy_work, do_auto_resize_destroy_cb); + return 0; } ret = cds_lfht_delete_bucket(ht); if (ret) return ret; free_split_items_count(ht); if (attr) - *attr = ht->resize_attr; + *attr = ht->caller_resize_attr; ret = pthread_mutex_destroy(&ht->resize_mutex); if (ret) ret = -EBUSY; - if (ht->flags & CDS_LFHT_AUTO_RESIZE) - cds_lfht_fini_worker(ht->flavor); poison_free(ht); return ret; } @@ -2179,23 +2244,19 @@ static void cds_lfht_init_worker(const struct rcu_flavor_struct *flavor) flavor->register_rculfhash_atfork(&cds_lfht_atfork); mutex_lock(&cds_lfht_fork_mutex); - if (cds_lfht_workqueue_user_count++) - goto end; - cds_lfht_workqueue = urcu_workqueue_create(0, -1, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL); -end: + if (!cds_lfht_workqueue) + cds_lfht_workqueue = urcu_workqueue_create(0, -1, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL); mutex_unlock(&cds_lfht_fork_mutex); } -static void cds_lfht_fini_worker(const struct rcu_flavor_struct *flavor) +static void cds_lfht_exit(void) { mutex_lock(&cds_lfht_fork_mutex); - if (--cds_lfht_workqueue_user_count) - goto end; - urcu_workqueue_destroy(cds_lfht_workqueue); - cds_lfht_workqueue = NULL; -end: + if (cds_lfht_workqueue) { + urcu_workqueue_flush_queued_work(cds_lfht_workqueue); + urcu_workqueue_destroy(cds_lfht_workqueue); + cds_lfht_workqueue = NULL; + } mutex_unlock(&cds_lfht_fork_mutex); - - flavor->unregister_rculfhash_atfork(&cds_lfht_atfork); } diff --git a/src/urcu-call-rcu-impl.h b/src/urcu-call-rcu-impl.h index 9f85d55b..1aba04a9 100644 --- a/src/urcu-call-rcu-impl.h +++ b/src/urcu-call-rcu-impl.h @@ -102,7 +102,6 @@ static pthread_mutex_t call_rcu_mutex = PTHREAD_MUTEX_INITIALIZER; static struct call_rcu_data *default_call_rcu_data; static struct urcu_atfork *registered_rculfhash_atfork; -static unsigned long registered_rculfhash_atfork_refcount; /* * If the sched_getcpu() and sysconf(_SC_NPROCESSORS_CONF) calls are @@ -1028,20 +1027,20 @@ void call_rcu_after_fork_child(void) void urcu_register_rculfhash_atfork(struct urcu_atfork *atfork) { + if (CMM_LOAD_SHARED(registered_rculfhash_atfork)) + return; call_rcu_lock(&call_rcu_mutex); - if (registered_rculfhash_atfork_refcount++) - goto end; - registered_rculfhash_atfork = atfork; -end: + if (!registered_rculfhash_atfork) + registered_rculfhash_atfork = atfork; call_rcu_unlock(&call_rcu_mutex); } +/* + * This unregistration function is deprecated, meant only for internal + * use by rculfhash. + */ +__attribute__((noreturn)) void urcu_unregister_rculfhash_atfork(struct urcu_atfork *atfork __attribute__((unused))) { - call_rcu_lock(&call_rcu_mutex); - if (--registered_rculfhash_atfork_refcount) - goto end; - registered_rculfhash_atfork = NULL; -end: - call_rcu_unlock(&call_rcu_mutex); + urcu_die(EPERM); }