libexpr: Use GC_set_sp_corrector instead of patch

Manually tested by printing to stderr in both branches (sp in os
stack, or not), and triggering a GC in a filterSource function,
e.g.:

    let
      generateTree = n: if n == 0 then "ha" else { left = generateTree (n - 1); right = generateTree (n - 1); };
    in
      builtins.deepSeq (generateTree 18) ...

Note that the darwin still uses the strategy of disabling GC, despite
having an implementation that compiles. The proper solution will be
enabled and tested later.
This commit is contained in:
Robert Hensing 2024-02-27 19:42:41 +01:00
parent 2edcdf8508
commit 2477e4e3b8
5 changed files with 62 additions and 111 deletions

View file

@ -366,7 +366,7 @@ fi
AC_ARG_ENABLE(gc, AS_HELP_STRING([--enable-gc],[enable garbage collection in the Nix expression evaluator (requires Boehm GC) [default=yes]]),
gc=$enableval, gc=yes)
if test "$gc" = yes; then
PKG_CHECK_MODULES([BDW_GC], [bdw-gc])
PKG_CHECK_MODULES([BDW_GC], [bdw-gc >= 8.2.4])
CXXFLAGS="$BDW_GC_CFLAGS $CXXFLAGS"
AC_DEFINE(HAVE_BOEHMGC, 1, [Whether to use the Boehm garbage collector.])
fi

View file

@ -1,99 +0,0 @@
diff --git a/darwin_stop_world.c b/darwin_stop_world.c
index 0468aaec..b348d869 100644
--- a/darwin_stop_world.c
+++ b/darwin_stop_world.c
@@ -356,6 +356,7 @@ GC_INNER void GC_push_all_stacks(void)
int nthreads = 0;
word total_size = 0;
mach_msg_type_number_t listcount = (mach_msg_type_number_t)THREAD_TABLE_SZ;
+ size_t stack_limit;
if (!EXPECT(GC_thr_initialized, TRUE))
GC_thr_init();
@@ -411,6 +412,19 @@ GC_INNER void GC_push_all_stacks(void)
GC_push_all_stack_sections(lo, hi, p->traced_stack_sect);
}
if (altstack_lo) {
+ // When a thread goes into a coroutine, we lose its original sp until
+ // control flow returns to the thread.
+ // While in the coroutine, the sp points outside the thread stack,
+ // so we can detect this and push the entire thread stack instead,
+ // as an approximation.
+ // We assume that the coroutine has similarly added its entire stack.
+ // This could be made accurate by cooperating with the application
+ // via new functions and/or callbacks.
+ stack_limit = pthread_get_stacksize_np(p->id);
+ if (altstack_lo >= altstack_hi || altstack_lo < altstack_hi - stack_limit) { // sp outside stack
+ altstack_lo = altstack_hi - stack_limit;
+ }
+
total_size += altstack_hi - altstack_lo;
GC_push_all_stack(altstack_lo, altstack_hi);
}
diff --git a/include/gc.h b/include/gc.h
index edab6c22..f2c61282 100644
--- a/include/gc.h
+++ b/include/gc.h
@@ -2172,6 +2172,11 @@ GC_API void GC_CALL GC_win32_free_heap(void);
(*GC_amiga_allocwrapper_do)(a,GC_malloc_atomic_ignore_off_page)
#endif /* _AMIGA && !GC_AMIGA_MAKINGLIB */
+#if !__APPLE__
+/* Patch doesn't work on apple */
+#define NIX_BOEHM_PATCH_VERSION 1
+#endif
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/pthread_stop_world.c b/pthread_stop_world.c
index b5d71e62..aed7b0bf 100644
--- a/pthread_stop_world.c
+++ b/pthread_stop_world.c
@@ -768,6 +768,8 @@ STATIC void GC_restart_handler(int sig)
/* world is stopped. Should not fail if it isn't. */
GC_INNER void GC_push_all_stacks(void)
{
+ size_t stack_limit;
+ pthread_attr_t pattr;
GC_bool found_me = FALSE;
size_t nthreads = 0;
int i;
@@ -851,6 +853,37 @@ GC_INNER void GC_push_all_stacks(void)
hi = p->altstack + p->altstack_size;
/* FIXME: Need to scan the normal stack too, but how ? */
/* FIXME: Assume stack grows down */
+ } else {
+#ifdef HAVE_PTHREAD_ATTR_GET_NP
+ if (!pthread_attr_init(&pattr)
+ || !pthread_attr_get_np(p->id, &pattr))
+#else /* HAVE_PTHREAD_GETATTR_NP */
+ if (pthread_getattr_np(p->id, &pattr))
+#endif
+ {
+ ABORT("GC_push_all_stacks: pthread_getattr_np failed!");
+ }
+ if (pthread_attr_getstacksize(&pattr, &stack_limit)) {
+ ABORT("GC_push_all_stacks: pthread_attr_getstacksize failed!");
+ }
+ if (pthread_attr_destroy(&pattr)) {
+ ABORT("GC_push_all_stacks: pthread_attr_destroy failed!");
+ }
+ // When a thread goes into a coroutine, we lose its original sp until
+ // control flow returns to the thread.
+ // While in the coroutine, the sp points outside the thread stack,
+ // so we can detect this and push the entire thread stack instead,
+ // as an approximation.
+ // We assume that the coroutine has similarly added its entire stack.
+ // This could be made accurate by cooperating with the application
+ // via new functions and/or callbacks.
+ #ifndef STACK_GROWS_UP
+ if (lo >= hi || lo < hi - stack_limit) { // sp outside stack
+ lo = hi - stack_limit;
+ }
+ #else
+ #error "STACK_GROWS_UP not supported in boost_coroutine2 (as of june 2021), so we don't support it in Nix."
+ #endif
}
GC_push_all_stack_sections(lo, hi, traced_stack_sect);
# ifdef STACK_GROWS_UP

View file

@ -150,8 +150,6 @@
enableLargeConfig = true;
}).overrideAttrs(o: {
patches = (o.patches or []) ++ [
./dep-patches/boehmgc-coroutine-sp-fallback.diff
# https://github.com/ivmai/bdwgc/pull/586
./dep-patches/boehmgc-traceable_allocator-public.diff
];

View file

@ -31,6 +31,7 @@
#include <sstream>
#include <cstring>
#include <optional>
#include <pthread.h>
#include <unistd.h>
#include <sys/time.h>
#include <fstream>
@ -250,6 +251,50 @@ class BoehmGCStackAllocator : public StackAllocator {
static BoehmGCStackAllocator boehmGCStackAllocator;
/**
* When a thread goes into a coroutine, we lose its original sp until
* control flow returns to the thread.
* While in the coroutine, the sp points outside the thread stack,
* so we can detect this and push the entire thread stack instead,
* as an approximation.
* The coroutine's stack is covered by `BoehmGCStackAllocator`.
* This is not an optimal solution, because the garbage is scanned when a
* coroutine is active, for both the coroutine and the original thread stack.
* However, the implementation is quite lean, and usually we don't have active
* coroutines during evaluation, so this is acceptable.
*/
void fixupBoehmStackPointer(void ** sp_ptr, void * pthread_id) {
void *& sp = *sp_ptr;
pthread_attr_t pattr;
size_t osStackSize;
void * osStackLow;
void * osStackBase;
#ifdef __APPLE__
osStackSize = pthread_get_stacksize_np((pthread_t)pthread_id);
osStackLow = pthread_get_stackaddr_np((pthread_t)pthread_id);
#else
if (pthread_attr_init(&pattr)) {
throw Error("fixupBoehmStackPointer: pthread_attr_init failed");
}
if (pthread_getattr_np((pthread_t)pthread_id, &pattr)) {
throw Error("fixupBoehmStackPointer: pthread_getattr_np failed");
}
if (pthread_attr_getstack(&pattr, &osStackLow, &osStackSize)) {
throw Error("fixupBoehmStackPointer: pthread_attr_getstack failed");
}
if (pthread_attr_destroy(&pattr)) {
throw Error("fixupBoehmStackPointer: pthread_attr_destroy failed");
}
#endif
osStackBase = (char *)osStackLow + osStackSize;
// NOTE: We assume the stack grows down, as it does on all architectures we support.
// Architectures that grow the stack up are rare.
if (sp >= osStackBase || sp < osStackLow) { // lo is outside the os stack
sp = osStackBase;
}
}
#endif
@ -303,16 +348,21 @@ void initGC()
GC_set_oom_fn(oomHandler);
// TODO: Comment suggests an implementation that works on darwin and windows
// https://github.com/ivmai/bdwgc/issues/362#issuecomment-1936672196
#ifndef __APPLE__
GC_set_sp_corrector(&fixupBoehmStackPointer);
#endif
StackAllocator::defaultAllocator = &boehmGCStackAllocator;
#if NIX_BOEHM_PATCH_VERSION != 1
printTalkative("Unpatched BoehmGC, disabling GC inside coroutines");
/* Used to disable GC when entering coroutines on macOS */
create_coro_gc_hook = []() -> std::shared_ptr<void> {
return std::make_shared<BoehmDisableGC>();
};
#endif
if (!GC_get_sp_corrector()) {
printTalkative("BoehmGC on this platform does not support sp_corrector; will disable GC inside coroutines");
/* Used to disable GC when entering coroutines on macOS */
create_coro_gc_hook = []() -> std::shared_ptr<void> {
return std::make_shared<BoehmDisableGC>();
};
}
/* Set the initial heap size to something fairly big (25% of
physical RAM, up to a maximum of 384 MiB) so that in most cases

View file

@ -15,7 +15,9 @@ libexpr_SOURCES := \
INCLUDE_libexpr := -I $(d)
libexpr_CXXFLAGS += $(INCLUDE_libutil) $(INCLUDE_libstore) $(INCLUDE_libfetchers) $(INCLUDE_libmain) $(INCLUDE_libexpr)
libexpr_CXXFLAGS += \
$(INCLUDE_libutil) $(INCLUDE_libstore) $(INCLUDE_libfetchers) $(INCLUDE_libmain) $(INCLUDE_libexpr) \
-DGC_THREADS
libexpr_LIBS = libutil libstore libfetchers