Factor out GC initialization code

This is not really part of the evaluator: it is just an integration between Boehm GC and Boost coroutines usable for any purpose. The evaluator (merely) optionally uses it.
2024-11-24 14:56:15 +02:00 · 2024-06-12 10:17:39 -04:00 · 2024-06-12 10:17:39 -04:00 · 5b53d8fec3
commit 5b53d8fec3
parent 7c2981fc55
4 changed files with 249 additions and 210 deletions
--- a/src/libexpr/eval-gc.cc
+++ b/src/libexpr/eval-gc.cc
@ -0,0 +1,226 @@
 #include "error.hh"
 #include "environment-variables.hh"
 #include "serialise.hh"
 #include "eval-gc.hh"
 #if HAVE_BOEHMGC
 #  include <pthread.h>
 #  if __FreeBSD__
 #    include <pthread_np.h>
 #  endif
 #  include <gc/gc.h>
 #  include <gc/gc_cpp.h>
 #  include <gc/gc_allocator.h>
 #  include <boost/coroutine2/coroutine.hpp>
 #  include <boost/coroutine2/protected_fixedsize_stack.hpp>
 #  include <boost/context/stack_context.hpp>
 #endif
 namespace nix {
 #if HAVE_BOEHMGC
 /* Called when the Boehm GC runs out of memory. */
 static void * oomHandler(size_t requested)
 {
    /* Convert this to a proper C++ exception. */
    throw std::bad_alloc();
 }
 class BoehmGCStackAllocator : public StackAllocator
 {
    boost::coroutines2::protected_fixedsize_stack stack{
        // We allocate 8 MB, the default max stack size on NixOS.
        // A smaller stack might be quicker to allocate but reduces the stack
        // depth available for source filter expressions etc.
        std::max(boost::context::stack_traits::default_size(), static_cast<std::size_t>(8 * 1024 * 1024))};
    // This is specific to boost::coroutines2::protected_fixedsize_stack.
    // The stack protection page is included in sctx.size, so we have to
    // subtract one page size from the stack size.
    std::size_t pfss_usable_stack_size(boost::context::stack_context & sctx)
    {
        return sctx.size - boost::context::stack_traits::page_size();
    }
 public:
    boost::context::stack_context allocate() override
    {
        auto sctx = stack.allocate();
        // Stacks generally start at a high address and grow to lower addresses.
        // Architectures that do the opposite are rare; in fact so rare that
        // boost_routine does not implement it.
        // So we subtract the stack size.
        GC_add_roots(static_cast<char *>(sctx.sp) - pfss_usable_stack_size(sctx), sctx.sp);
        return sctx;
    }
    void deallocate(boost::context::stack_context sctx) override
    {
        GC_remove_roots(static_cast<char *>(sctx.sp) - pfss_usable_stack_size(sctx), sctx.sp);
        stack.deallocate(sctx);
    }
 };
 static BoehmGCStackAllocator boehmGCStackAllocator;
 /**
 * When a thread goes into a coroutine, we lose its original sp until
 * control flow returns to the thread.
 * While in the coroutine, the sp points outside the thread stack,
 * so we can detect this and push the entire thread stack instead,
 * as an approximation.
 * The coroutine's stack is covered by `BoehmGCStackAllocator`.
 * This is not an optimal solution, because the garbage is scanned when a
 * coroutine is active, for both the coroutine and the original thread stack.
 * However, the implementation is quite lean, and usually we don't have active
 * coroutines during evaluation, so this is acceptable.
 */
 void fixupBoehmStackPointer(void ** sp_ptr, void * _pthread_id)
 {
    void *& sp = *sp_ptr;
    auto pthread_id = reinterpret_cast<pthread_t>(_pthread_id);
    pthread_attr_t pattr;
    size_t osStackSize;
    void * osStackLow;
    void * osStackBase;
 #  ifdef __APPLE__
    osStackSize = pthread_get_stacksize_np(pthread_id);
    osStackLow = pthread_get_stackaddr_np(pthread_id);
 #  else
    if (pthread_attr_init(&pattr)) {
        throw Error("fixupBoehmStackPointer: pthread_attr_init failed");
    }
 #    ifdef HAVE_PTHREAD_GETATTR_NP
    if (pthread_getattr_np(pthread_id, &pattr)) {
        throw Error("fixupBoehmStackPointer: pthread_getattr_np failed");
    }
 #    elif HAVE_PTHREAD_ATTR_GET_NP
    if (!pthread_attr_init(&pattr)) {
        throw Error("fixupBoehmStackPointer: pthread_attr_init failed");
    }
    if (!pthread_attr_get_np(pthread_id, &pattr)) {
        throw Error("fixupBoehmStackPointer: pthread_attr_get_np failed");
    }
 #    else
 #      error "Need one of `pthread_attr_get_np` or `pthread_getattr_np`"
 #    endif
    if (pthread_attr_getstack(&pattr, &osStackLow, &osStackSize)) {
        throw Error("fixupBoehmStackPointer: pthread_attr_getstack failed");
    }
    if (pthread_attr_destroy(&pattr)) {
        throw Error("fixupBoehmStackPointer: pthread_attr_destroy failed");
    }
 #  endif
    osStackBase = (char *) osStackLow + osStackSize;
    // NOTE: We assume the stack grows down, as it does on all architectures we support.
    //       Architectures that grow the stack up are rare.
    if (sp >= osStackBase || sp < osStackLow) { // lo is outside the os stack
        sp = osStackBase;
    }
 }
 /* Disable GC while this object lives. Used by CoroutineContext.
 *
 * Boehm keeps a count of GC_disable() and GC_enable() calls,
 * and only enables GC when the count matches.
 */
 class BoehmDisableGC
 {
 public:
    BoehmDisableGC()
    {
        GC_disable();
    };
    ~BoehmDisableGC()
    {
        GC_enable();
    };
 };
 static inline void initGCReal()
 {
    /* Initialise the Boehm garbage collector. */
    /* Don't look for interior pointers. This reduces the odds of
       misdetection a bit. */
    GC_set_all_interior_pointers(0);
    /* We don't have any roots in data segments, so don't scan from
       there. */
    GC_set_no_dls(1);
    GC_INIT();
    GC_set_oom_fn(oomHandler);
    StackAllocator::defaultAllocator = &boehmGCStackAllocator;
 // TODO: Remove __APPLE__ condition.
 //       Comment suggests an implementation that works on darwin and windows
 //       https://github.com/ivmai/bdwgc/issues/362#issuecomment-1936672196
 #  if GC_VERSION_MAJOR >= 8 && GC_VERSION_MINOR >= 2 && GC_VERSION_MICRO >= 4 && !defined(__APPLE__)
    GC_set_sp_corrector(&fixupBoehmStackPointer);
    if (!GC_get_sp_corrector()) {
        printTalkative("BoehmGC on this platform does not support sp_corrector; will disable GC inside coroutines");
        /* Used to disable GC when entering coroutines on macOS */
        create_coro_gc_hook = []() -> std::shared_ptr<void> { return std::make_shared<BoehmDisableGC>(); };
    }
 #  else
 #    warning \
        "BoehmGC version does not support GC while coroutine exists. GC will be disabled inside coroutines. Consider updating bdw-gc to 8.2.4 or later."
 #  endif
    /* Set the initial heap size to something fairly big (25% of
       physical RAM, up to a maximum of 384 MiB) so that in most cases
       we don't need to garbage collect at all.  (Collection has a
       fairly significant overhead.)  The heap size can be overridden
       through libgc's GC_INITIAL_HEAP_SIZE environment variable.  We
       should probably also provide a nix.conf setting for this.  Note
       that GC_expand_hp() causes a lot of virtual, but not physical
       (resident) memory to be allocated.  This might be a problem on
       systems that don't overcommit. */
    if (!getEnv("GC_INITIAL_HEAP_SIZE")) {
        size_t size = 32 * 1024 * 1024;
 #  if HAVE_SYSCONF && defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES)
        size_t maxSize = 384 * 1024 * 1024;
        long pageSize = sysconf(_SC_PAGESIZE);
        long pages = sysconf(_SC_PHYS_PAGES);
        if (pageSize != -1)
            size = (pageSize * pages) / 4; // 25% of RAM
        if (size > maxSize)
            size = maxSize;
 #  endif
        debug("setting initial heap size to %1% bytes", size);
        GC_expand_hp(size);
    }
 }
 #endif
 static bool gcInitialised = false;
 void initGC()
 {
    if (gcInitialised)
        return;
 #if HAVE_BOEHMGC
    initGCReal();
 #endif
    gcInitialised = true;
 }
 void assertGCInitialized()
 {
    assert(gcInitialised);
 }
 }
--- a/src/libexpr/eval-gc.hh
+++ b/src/libexpr/eval-gc.hh
@ -0,0 +1,16 @@
 #pragma once
 ///@file
 namespace nix {
 /**
 * Initialise the Boehm GC, if applicable.
 */
 void initGC();
 /**
 * Make sure `initGC` has already been called.
 */
 void assertGCInitialized();
 }
--- a/src/libexpr/eval.cc
+++ b/src/libexpr/eval.cc
@ -45,20 +45,11 @@
 #if HAVE_BOEHMGC
-#define GC_INCLUDE_NEW
+#  define GC_INCLUDE_NEW
-#include <pthread.h>
+#  include <gc/gc.h>
-#if __FreeBSD__
+#  include <gc/gc_cpp.h>
-#  include <pthread_np.h>
+#  include <gc/gc_allocator.h>
 #endif
 #include <gc/gc.h>
 #include <gc/gc_cpp.h>
 #include <gc/gc_allocator.h>
 #include <boost/coroutine2/coroutine.hpp>
 #include <boost/coroutine2/protected_fixedsize_stack.hpp>
 #include <boost/context/stack_context.hpp>
 #endif
@ -211,109 +202,6 @@ bool Value::isTrivial() const
 }
 #if HAVE_BOEHMGC
 /* Called when the Boehm GC runs out of memory. */
 static void * oomHandler(size_t requested)
 {
    /* Convert this to a proper C++ exception. */
    throw std::bad_alloc();
 }
 class BoehmGCStackAllocator : public StackAllocator {
    boost::coroutines2::protected_fixedsize_stack stack {
        // We allocate 8 MB, the default max stack size on NixOS.
        // A smaller stack might be quicker to allocate but reduces the stack
        // depth available for source filter expressions etc.
        std::max(boost::context::stack_traits::default_size(), static_cast<std::size_t>(8 * 1024 * 1024))
    };
    // This is specific to boost::coroutines2::protected_fixedsize_stack.
    // The stack protection page is included in sctx.size, so we have to
    // subtract one page size from the stack size.
    std::size_t pfss_usable_stack_size(boost::context::stack_context &sctx) {
        return sctx.size - boost::context::stack_traits::page_size();
    }
  public:
    boost::context::stack_context allocate() override {
        auto sctx = stack.allocate();
        // Stacks generally start at a high address and grow to lower addresses.
        // Architectures that do the opposite are rare; in fact so rare that
        // boost_routine does not implement it.
        // So we subtract the stack size.
        GC_add_roots(static_cast<char *>(sctx.sp) - pfss_usable_stack_size(sctx), sctx.sp);
        return sctx;
    }
    void deallocate(boost::context::stack_context sctx) override {
        GC_remove_roots(static_cast<char *>(sctx.sp) - pfss_usable_stack_size(sctx), sctx.sp);
        stack.deallocate(sctx);
    }
 };
 static BoehmGCStackAllocator boehmGCStackAllocator;
 /**
 * When a thread goes into a coroutine, we lose its original sp until
 * control flow returns to the thread.
 * While in the coroutine, the sp points outside the thread stack,
 * so we can detect this and push the entire thread stack instead,
 * as an approximation.
 * The coroutine's stack is covered by `BoehmGCStackAllocator`.
 * This is not an optimal solution, because the garbage is scanned when a
 * coroutine is active, for both the coroutine and the original thread stack.
 * However, the implementation is quite lean, and usually we don't have active
 * coroutines during evaluation, so this is acceptable.
 */
 void fixupBoehmStackPointer(void ** sp_ptr, void * _pthread_id) {
    void *& sp = *sp_ptr;
    auto pthread_id = reinterpret_cast<pthread_t>(_pthread_id);
    pthread_attr_t pattr;
    size_t osStackSize;
    void * osStackLow;
    void * osStackBase;
 #  ifdef __APPLE__
    osStackSize = pthread_get_stacksize_np(pthread_id);
    osStackLow = pthread_get_stackaddr_np(pthread_id);
 #  else
    if (pthread_attr_init(&pattr)) {
        throw Error("fixupBoehmStackPointer: pthread_attr_init failed");
    }
 #    ifdef HAVE_PTHREAD_GETATTR_NP
    if (pthread_getattr_np(pthread_id, &pattr)) {
        throw Error("fixupBoehmStackPointer: pthread_getattr_np failed");
    }
 #    elif HAVE_PTHREAD_ATTR_GET_NP
    if (!pthread_attr_init(&pattr)) {
        throw Error("fixupBoehmStackPointer: pthread_attr_init failed");
    }
    if (!pthread_attr_get_np(pthread_id, &pattr)) {
        throw Error("fixupBoehmStackPointer: pthread_attr_get_np failed");
    }
 #    else
 #      error "Need one of `pthread_attr_get_np` or `pthread_getattr_np`"
 #    endif
    if (pthread_attr_getstack(&pattr, &osStackLow, &osStackSize)) {
        throw Error("fixupBoehmStackPointer: pthread_attr_getstack failed");
    }
    if (pthread_attr_destroy(&pattr)) {
        throw Error("fixupBoehmStackPointer: pthread_attr_destroy failed");
    }
 #  endif
    osStackBase = (char *)osStackLow + osStackSize;
    // NOTE: We assume the stack grows down, as it does on all architectures we support.
    //       Architectures that grow the stack up are rare.
    if (sp >= osStackBase || sp < osStackLow) { // lo is outside the os stack
        sp = osStackBase;
    }
 }
 #endif
 static Symbol getName(const AttrName & name, EvalState & state, Env & env)
 {
    if (name.symbol) {
@ -326,92 +214,6 @@ static Symbol getName(const AttrName & name, EvalState & state, Env & env)
    }
 }
 #if HAVE_BOEHMGC
 /* Disable GC while this object lives. Used by CoroutineContext.
 *
 * Boehm keeps a count of GC_disable() and GC_enable() calls,
 * and only enables GC when the count matches.
 */
 class BoehmDisableGC {
 public:
    BoehmDisableGC() {
        GC_disable();
    };
    ~BoehmDisableGC() {
        GC_enable();
    };
 };
 #endif
 static bool gcInitialised = false;
 void initGC()
 {
    if (gcInitialised) return;
 #if HAVE_BOEHMGC
    /* Initialise the Boehm garbage collector. */
    /* Don't look for interior pointers. This reduces the odds of
       misdetection a bit. */
    GC_set_all_interior_pointers(0);
    /* We don't have any roots in data segments, so don't scan from
       there. */
    GC_set_no_dls(1);
    GC_INIT();
    GC_set_oom_fn(oomHandler);
    StackAllocator::defaultAllocator = &boehmGCStackAllocator;
    // TODO: Remove __APPLE__ condition.
    //       Comment suggests an implementation that works on darwin and windows
    //       https://github.com/ivmai/bdwgc/issues/362#issuecomment-1936672196
    #if GC_VERSION_MAJOR >= 8 && GC_VERSION_MINOR >= 2 && GC_VERSION_MICRO >= 4 && !defined(__APPLE__)
    GC_set_sp_corrector(&fixupBoehmStackPointer);
    if (!GC_get_sp_corrector()) {
        printTalkative("BoehmGC on this platform does not support sp_corrector; will disable GC inside coroutines");
        /* Used to disable GC when entering coroutines on macOS */
        create_coro_gc_hook = []() -> std::shared_ptr<void> {
            return std::make_shared<BoehmDisableGC>();
        };
    }
    #else
    #warning "BoehmGC version does not support GC while coroutine exists. GC will be disabled inside coroutines. Consider updating bdw-gc to 8.2.4 or later."
    #endif
    /* Set the initial heap size to something fairly big (25% of
       physical RAM, up to a maximum of 384 MiB) so that in most cases
       we don't need to garbage collect at all.  (Collection has a
       fairly significant overhead.)  The heap size can be overridden
       through libgc's GC_INITIAL_HEAP_SIZE environment variable.  We
       should probably also provide a nix.conf setting for this.  Note
       that GC_expand_hp() causes a lot of virtual, but not physical
       (resident) memory to be allocated.  This might be a problem on
       systems that don't overcommit. */
    if (!getEnv("GC_INITIAL_HEAP_SIZE")) {
        size_t size = 32 * 1024 * 1024;
 #if HAVE_SYSCONF && defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES)
        size_t maxSize = 384 * 1024 * 1024;
        long pageSize = sysconf(_SC_PAGESIZE);
        long pages = sysconf(_SC_PHYS_PAGES);
        if (pageSize != -1)
            size = (pageSize * pages) / 4; // 25% of RAM
        if (size > maxSize) size = maxSize;
 #endif
        debug("setting initial heap size to %1% bytes", size);
        GC_expand_hp(size);
    }
 #endif
    gcInitialised = true;
 }
 static constexpr size_t BASE_ENV_SIZE = 128;
 EvalState::EvalState(
@ -508,7 +310,7 @@ EvalState::EvalState(
    countCalls = getEnv("NIX_COUNT_CALLS").value_or("0") != "0";
-    assert(gcInitialised);
+    assertGCInitialized();
    static_assert(sizeof(Env) <= 16, "environment must be <= 16 bytes");
--- a/src/libexpr/eval.hh
+++ b/src/libexpr/eval.hh
@ -3,6 +3,7 @@
 #include "attr-set.hh"
 #include "eval-error.hh"
 #include "eval-gc.hh"
 #include "types.hh"
 #include "value.hh"
 #include "nixexpr.hh"
@ -146,12 +147,6 @@ std::string printValue(EvalState & state, Value & v);
 std::ostream & operator << (std::ostream & os, const ValueType t);
 /**
 * Initialise the Boehm GC, if applicable.
 */
 void initGC();
 struct RegexCache;
 std::shared_ptr<RegexCache> makeRegexCache();