Skip to content

Commit

Permalink
Make threads faster and more reliable
Browse files Browse the repository at this point in the history
This change doubles the performance of thread spawning. That's thanks to
our new stack manager, which allows us to avoid zeroing stacks. It gives
us 15µs spawns rather than 30µs spawns on Linux. Also, pthread_exit() is
faster now, since it doesn't need to acquire the pthread GIL. On NetBSD,
that helps us avoid allocating too many semaphores. Even if that happens
we're now able to survive semaphores running out and even memory running
out, when allocating *NSYNC waiter objects. I found a lot more rare bugs
in the POSIX threads runtime that could cause things to crash, if you've
got dozens of threads all spawning and joining dozens of threads. I want
cosmo to be world class production worthy for 2025 so happy holidays all
  • Loading branch information
jart committed Dec 22, 2024
1 parent 906bd06 commit 6245732
Show file tree
Hide file tree
Showing 51 changed files with 1,005 additions and 320 deletions.
6 changes: 3 additions & 3 deletions libc/calls/clock_nanosleep-openbsd.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
#include "libc/sysv/consts/clock.h"
#include "libc/sysv/errfuns.h"

int sys_clock_nanosleep_openbsd(int clock, int flags,
const struct timespec *req,
struct timespec *rem) {
relegated int sys_clock_nanosleep_openbsd(int clock, int flags,
const struct timespec *req,
struct timespec *rem) {
int res;
struct timespec start, relative, remainder;
if (!flags) {
Expand Down
23 changes: 9 additions & 14 deletions libc/calls/time.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/time.h"
#include "libc/calls/struct/timeval.h"
#include "libc/dce.h"
#include "libc/sysv/errfuns.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/timespec.h"
#include "libc/sysv/consts/clock.h"

/**
* Returns time as seconds from UNIX epoch.
Expand All @@ -29,15 +28,11 @@
* @asyncsignalsafe
*/
int64_t time(int64_t *opt_out_ret) {
int64_t secs;
struct timeval tv;
if (gettimeofday(&tv, 0) != -1) {
secs = tv.tv_sec;
if (opt_out_ret) {
*opt_out_ret = secs;
}
} else {
secs = -1;
}
int64_t secs = -1;
struct timespec ts;
if (!clock_gettime(CLOCK_REALTIME, &ts))
secs = ts.tv_sec;
if (opt_out_ret)
*opt_out_ret = secs;
return secs;
}
13 changes: 11 additions & 2 deletions libc/cosmo.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,24 @@ errno_t cosmo_once(_COSMO_ATOMIC(unsigned) *, void (*)(void)) libcesque;
int systemvpe(const char *, char *const[], char *const[]) libcesque;
char *GetProgramExecutableName(void) libcesque;
void unleaf(void) libcesque;
bool32 IsLinuxModern(void) libcesque;

int __demangle(char *, const char *, size_t) libcesque;
int __is_mangled(const char *) libcesque;
bool32 IsLinuxModern(void) libcesque;
int LoadZipArgs(int *, char ***) libcesque;

int cosmo_args(const char *, char ***) libcesque;
int LoadZipArgs(int *, char ***) libcesque;

int cosmo_futex_wake(_COSMO_ATOMIC(int) *, int, char);
int cosmo_futex_wait(_COSMO_ATOMIC(int) *, int, char, int,
const struct timespec *);

errno_t cosmo_stack_alloc(size_t *, size_t *, void **) libcesque;
errno_t cosmo_stack_free(void *, size_t, size_t) libcesque;
void cosmo_stack_clear(void) libcesque;
void cosmo_stack_setmaxstacks(int) libcesque;
int cosmo_stack_getmaxstacks(void) libcesque;

int __deadlock_check(void *, int) libcesque;
int __deadlock_tracked(void *) libcesque;
void __deadlock_record(void *, int) libcesque;
Expand Down
26 changes: 26 additions & 0 deletions libc/intrin/count.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2024 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/atomic.h"
#include "libc/stdalign.h"
#include "libc/thread/thread.h"

// this counter is important because pthread_exit() needs to know if
// it's an orphan thread, without needing to acquire _pthread_lock()
// which causes contention and a file descriptor explosion on netbsd
alignas(64) atomic_uint _pthread_count = 1;
42 changes: 42 additions & 0 deletions libc/intrin/itimer.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2024 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/thread/itimer.h"
#include "libc/str/str.h"

struct IntervalTimer __itimer = {
.lock = PTHREAD_MUTEX_INITIALIZER,
.cond = PTHREAD_COND_INITIALIZER,
};

textwindows void __itimer_lock(void) {
pthread_mutex_lock(&__itimer.lock);
}

textwindows void __itimer_unlock(void) {
pthread_mutex_unlock(&__itimer.lock);
}

textwindows void __itimer_wipe_and_reset(void) {
// timers aren't inherited by forked subprocesses
bzero(&__itimer.it, sizeof(__itimer.it));
pthread_mutex_wipe_np(&__itimer.lock);
pthread_cond_init(&__itimer.cond, 0);
__itimer.thread = 0;
__itimer.once = 0;
}
36 changes: 36 additions & 0 deletions libc/intrin/kisdangerous.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2024 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/intrin/kprintf.h"
#include "libc/intrin/maps.h"

privileged optimizesize bool32 kisdangerous(const void *addr) {
bool32 res = true;
__maps_lock();
if (__maps.maps) {
struct Map *map;
if ((map = __maps_floor(addr)))
if ((const char *)addr >= map->addr &&
(const char *)addr < map->addr + map->size)
res = false;
} else {
res = false;
}
__maps_unlock();
return res;
}
16 changes: 0 additions & 16 deletions libc/intrin/kprintf.greg.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,22 +160,6 @@ __funline bool kischarmisaligned(const char *p, signed char t) {
return false;
}

ABI bool32 kisdangerous(const void *addr) {
bool32 res = true;
__maps_lock();
if (__maps.maps) {
struct Map *map;
if ((map = __maps_floor(addr)))
if ((const char *)addr >= map->addr &&
(const char *)addr < map->addr + map->size)
res = false;
} else {
res = false;
}
__maps_unlock();
return res;
}

ABI static void klogclose(long fd) {
#ifdef __x86_64__
long ax = __NR_close;
Expand Down
File renamed without changes.
3 changes: 3 additions & 0 deletions libc/intrin/pthread_orphan_np.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/assert.h"
#include "libc/intrin/atomic.h"
#include "libc/thread/posixthread.internal.h"
#include "libc/thread/thread.h"

Expand All @@ -28,5 +30,6 @@ int pthread_orphan_np(void) {
res = _pthread_list == _pthread_list->prev &&
_pthread_list == _pthread_list->next;
_pthread_unlock();
unassert(!res || atomic_load(&_pthread_count) <= 1);
return res;
}
3 changes: 2 additions & 1 deletion libc/intrin/pthreadlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/stdalign.h"
#include "libc/thread/posixthread.internal.h"

pthread_mutex_t __pthread_lock_obj = PTHREAD_MUTEX_INITIALIZER;
alignas(64) pthread_mutex_t __pthread_lock_obj = PTHREAD_MUTEX_INITIALIZER;

void _pthread_lock(void) {
pthread_mutex_lock(&__pthread_lock_obj);
Expand Down
55 changes: 30 additions & 25 deletions libc/intrin/sig.c
Original file line number Diff line number Diff line change
Expand Up @@ -696,35 +696,40 @@ textwindows dontinstrument static uint32_t __sig_worker(void *arg) {
}

// unblock stalled asynchronous signals in threads
_pthread_lock();
for (struct Dll *e = dll_first(_pthread_list); e;
e = dll_next(_pthread_list, e)) {
struct PosixThread *pt = POSIXTHREAD_CONTAINER(e);
if (atomic_load_explicit(&pt->pt_status, memory_order_acquire) >=
kPosixThreadTerminated) {
struct PosixThread *mark;
for (;;) {
sigset_t pending, mask;
mark = 0;
_pthread_lock();
for (struct Dll *e = dll_first(_pthread_list); e;
e = dll_next(_pthread_list, e)) {
struct PosixThread *pt = POSIXTHREAD_CONTAINER(e);
if (atomic_load_explicit(&pt->pt_status, memory_order_acquire) >=
kPosixThreadTerminated)
break;
pending = atomic_load_explicit(&pt->tib->tib_sigpending,
memory_order_acquire);
mask =
atomic_load_explicit(&pt->tib->tib_sigmask, memory_order_acquire);
if (pending & ~mask) {
_pthread_ref(pt);
mark = pt;
break;
}
}
_pthread_unlock();
if (!mark)
break;
while (!atomic_compare_exchange_weak_explicit(
&mark->tib->tib_sigpending, &pending, pending & ~mask,
memory_order_acq_rel, memory_order_relaxed)) {
}
sigset_t pending =
atomic_load_explicit(&pt->tib->tib_sigpending, memory_order_acquire);
sigset_t mask =
atomic_load_explicit(&pt->tib->tib_sigmask, memory_order_acquire);
if (pending & ~mask) {
_pthread_ref(pt);
_pthread_unlock();
while (!atomic_compare_exchange_weak_explicit(
&pt->tib->tib_sigpending, &pending, pending & ~mask,
memory_order_acq_rel, memory_order_relaxed)) {
}
while ((pending = pending & ~mask)) {
int sig = bsfl(pending) + 1;
pending &= ~(1ull << (sig - 1));
__sig_killer(pt, sig, SI_KERNEL);
}
_pthread_lock();
_pthread_unref(pt);
while ((pending = pending & ~mask)) {
int sig = bsfl(pending) + 1;
pending &= ~(1ull << (sig - 1));
__sig_killer(mark, sig, SI_KERNEL);
}
}
_pthread_unlock();

// wait until next scheduler quantum
pthread_mutex_unlock(&__sig_worker_lock);
Expand Down
Loading

0 comments on commit 6245732

Please sign in to comment.