From 5b53d8fec3149937d44b2302d51967aa95253d7a Mon Sep 17 00:00:00 2001 From: John Ericson Date: Wed, 12 Jun 2024 10:17:39 -0400 Subject: [PATCH] Factor out GC initialization code This is not really part of the evaluator: it is just an integration between Boehm GC and Boost coroutines usable for any purpose. The evaluator (merely) optionally uses it. --- src/libexpr/eval-gc.cc | 226 +++++++++++++++++++++++++++++++++++++++++ src/libexpr/eval-gc.hh | 16 +++ src/libexpr/eval.cc | 210 ++------------------------------------ src/libexpr/eval.hh | 7 +- 4 files changed, 249 insertions(+), 210 deletions(-) create mode 100644 src/libexpr/eval-gc.cc create mode 100644 src/libexpr/eval-gc.hh diff --git a/src/libexpr/eval-gc.cc b/src/libexpr/eval-gc.cc new file mode 100644 index 000000000..baf9df332 --- /dev/null +++ b/src/libexpr/eval-gc.cc @@ -0,0 +1,226 @@ +#include "error.hh" +#include "environment-variables.hh" +#include "serialise.hh" +#include "eval-gc.hh" + +#if HAVE_BOEHMGC + +# include +# if __FreeBSD__ +# include +# endif + +# include +# include +# include + +# include +# include +# include + +#endif + +namespace nix { + +#if HAVE_BOEHMGC +/* Called when the Boehm GC runs out of memory. */ +static void * oomHandler(size_t requested) +{ + /* Convert this to a proper C++ exception. */ + throw std::bad_alloc(); +} + +class BoehmGCStackAllocator : public StackAllocator +{ + boost::coroutines2::protected_fixedsize_stack stack{ + // We allocate 8 MB, the default max stack size on NixOS. + // A smaller stack might be quicker to allocate but reduces the stack + // depth available for source filter expressions etc. + std::max(boost::context::stack_traits::default_size(), static_cast(8 * 1024 * 1024))}; + + // This is specific to boost::coroutines2::protected_fixedsize_stack. + // The stack protection page is included in sctx.size, so we have to + // subtract one page size from the stack size. + std::size_t pfss_usable_stack_size(boost::context::stack_context & sctx) + { + return sctx.size - boost::context::stack_traits::page_size(); + } + +public: + boost::context::stack_context allocate() override + { + auto sctx = stack.allocate(); + + // Stacks generally start at a high address and grow to lower addresses. + // Architectures that do the opposite are rare; in fact so rare that + // boost_routine does not implement it. + // So we subtract the stack size. + GC_add_roots(static_cast(sctx.sp) - pfss_usable_stack_size(sctx), sctx.sp); + return sctx; + } + + void deallocate(boost::context::stack_context sctx) override + { + GC_remove_roots(static_cast(sctx.sp) - pfss_usable_stack_size(sctx), sctx.sp); + stack.deallocate(sctx); + } +}; + +static BoehmGCStackAllocator boehmGCStackAllocator; + +/** + * When a thread goes into a coroutine, we lose its original sp until + * control flow returns to the thread. + * While in the coroutine, the sp points outside the thread stack, + * so we can detect this and push the entire thread stack instead, + * as an approximation. + * The coroutine's stack is covered by `BoehmGCStackAllocator`. + * This is not an optimal solution, because the garbage is scanned when a + * coroutine is active, for both the coroutine and the original thread stack. + * However, the implementation is quite lean, and usually we don't have active + * coroutines during evaluation, so this is acceptable. + */ +void fixupBoehmStackPointer(void ** sp_ptr, void * _pthread_id) +{ + void *& sp = *sp_ptr; + auto pthread_id = reinterpret_cast(_pthread_id); + pthread_attr_t pattr; + size_t osStackSize; + void * osStackLow; + void * osStackBase; + +# ifdef __APPLE__ + osStackSize = pthread_get_stacksize_np(pthread_id); + osStackLow = pthread_get_stackaddr_np(pthread_id); +# else + if (pthread_attr_init(&pattr)) { + throw Error("fixupBoehmStackPointer: pthread_attr_init failed"); + } +# ifdef HAVE_PTHREAD_GETATTR_NP + if (pthread_getattr_np(pthread_id, &pattr)) { + throw Error("fixupBoehmStackPointer: pthread_getattr_np failed"); + } +# elif HAVE_PTHREAD_ATTR_GET_NP + if (!pthread_attr_init(&pattr)) { + throw Error("fixupBoehmStackPointer: pthread_attr_init failed"); + } + if (!pthread_attr_get_np(pthread_id, &pattr)) { + throw Error("fixupBoehmStackPointer: pthread_attr_get_np failed"); + } +# else +# error "Need one of `pthread_attr_get_np` or `pthread_getattr_np`" +# endif + if (pthread_attr_getstack(&pattr, &osStackLow, &osStackSize)) { + throw Error("fixupBoehmStackPointer: pthread_attr_getstack failed"); + } + if (pthread_attr_destroy(&pattr)) { + throw Error("fixupBoehmStackPointer: pthread_attr_destroy failed"); + } +# endif + osStackBase = (char *) osStackLow + osStackSize; + // NOTE: We assume the stack grows down, as it does on all architectures we support. + // Architectures that grow the stack up are rare. + if (sp >= osStackBase || sp < osStackLow) { // lo is outside the os stack + sp = osStackBase; + } +} + +/* Disable GC while this object lives. Used by CoroutineContext. + * + * Boehm keeps a count of GC_disable() and GC_enable() calls, + * and only enables GC when the count matches. + */ +class BoehmDisableGC +{ +public: + BoehmDisableGC() + { + GC_disable(); + }; + ~BoehmDisableGC() + { + GC_enable(); + }; +}; + +static inline void initGCReal() +{ + /* Initialise the Boehm garbage collector. */ + + /* Don't look for interior pointers. This reduces the odds of + misdetection a bit. */ + GC_set_all_interior_pointers(0); + + /* We don't have any roots in data segments, so don't scan from + there. */ + GC_set_no_dls(1); + + GC_INIT(); + + GC_set_oom_fn(oomHandler); + + StackAllocator::defaultAllocator = &boehmGCStackAllocator; + +// TODO: Remove __APPLE__ condition. +// Comment suggests an implementation that works on darwin and windows +// https://github.com/ivmai/bdwgc/issues/362#issuecomment-1936672196 +# if GC_VERSION_MAJOR >= 8 && GC_VERSION_MINOR >= 2 && GC_VERSION_MICRO >= 4 && !defined(__APPLE__) + GC_set_sp_corrector(&fixupBoehmStackPointer); + + if (!GC_get_sp_corrector()) { + printTalkative("BoehmGC on this platform does not support sp_corrector; will disable GC inside coroutines"); + /* Used to disable GC when entering coroutines on macOS */ + create_coro_gc_hook = []() -> std::shared_ptr { return std::make_shared(); }; + } +# else +# warning \ + "BoehmGC version does not support GC while coroutine exists. GC will be disabled inside coroutines. Consider updating bdw-gc to 8.2.4 or later." +# endif + + /* Set the initial heap size to something fairly big (25% of + physical RAM, up to a maximum of 384 MiB) so that in most cases + we don't need to garbage collect at all. (Collection has a + fairly significant overhead.) The heap size can be overridden + through libgc's GC_INITIAL_HEAP_SIZE environment variable. We + should probably also provide a nix.conf setting for this. Note + that GC_expand_hp() causes a lot of virtual, but not physical + (resident) memory to be allocated. This might be a problem on + systems that don't overcommit. */ + if (!getEnv("GC_INITIAL_HEAP_SIZE")) { + size_t size = 32 * 1024 * 1024; +# if HAVE_SYSCONF && defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES) + size_t maxSize = 384 * 1024 * 1024; + long pageSize = sysconf(_SC_PAGESIZE); + long pages = sysconf(_SC_PHYS_PAGES); + if (pageSize != -1) + size = (pageSize * pages) / 4; // 25% of RAM + if (size > maxSize) + size = maxSize; +# endif + debug("setting initial heap size to %1% bytes", size); + GC_expand_hp(size); + } +} + +#endif + +static bool gcInitialised = false; + +void initGC() +{ + if (gcInitialised) + return; + +#if HAVE_BOEHMGC + initGCReal(); +#endif + + gcInitialised = true; +} + +void assertGCInitialized() +{ + assert(gcInitialised); +} + +} diff --git a/src/libexpr/eval-gc.hh b/src/libexpr/eval-gc.hh new file mode 100644 index 000000000..cd4ea914d --- /dev/null +++ b/src/libexpr/eval-gc.hh @@ -0,0 +1,16 @@ +#pragma once +///@file + +namespace nix { + +/** + * Initialise the Boehm GC, if applicable. + */ +void initGC(); + +/** + * Make sure `initGC` has already been called. + */ +void assertGCInitialized(); + +} diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 42b26834c..301b81a62 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -40,25 +40,16 @@ #include #ifndef _WIN32 // TODO use portable implementation -# include +# include #endif #if HAVE_BOEHMGC -#define GC_INCLUDE_NEW +# define GC_INCLUDE_NEW -#include -#if __FreeBSD__ -# include -#endif - -#include -#include -#include - -#include -#include -#include +# include +# include +# include #endif @@ -211,109 +202,6 @@ bool Value::isTrivial() const } -#if HAVE_BOEHMGC -/* Called when the Boehm GC runs out of memory. */ -static void * oomHandler(size_t requested) -{ - /* Convert this to a proper C++ exception. */ - throw std::bad_alloc(); -} - -class BoehmGCStackAllocator : public StackAllocator { - boost::coroutines2::protected_fixedsize_stack stack { - // We allocate 8 MB, the default max stack size on NixOS. - // A smaller stack might be quicker to allocate but reduces the stack - // depth available for source filter expressions etc. - std::max(boost::context::stack_traits::default_size(), static_cast(8 * 1024 * 1024)) - }; - - // This is specific to boost::coroutines2::protected_fixedsize_stack. - // The stack protection page is included in sctx.size, so we have to - // subtract one page size from the stack size. - std::size_t pfss_usable_stack_size(boost::context::stack_context &sctx) { - return sctx.size - boost::context::stack_traits::page_size(); - } - - public: - boost::context::stack_context allocate() override { - auto sctx = stack.allocate(); - - // Stacks generally start at a high address and grow to lower addresses. - // Architectures that do the opposite are rare; in fact so rare that - // boost_routine does not implement it. - // So we subtract the stack size. - GC_add_roots(static_cast(sctx.sp) - pfss_usable_stack_size(sctx), sctx.sp); - return sctx; - } - - void deallocate(boost::context::stack_context sctx) override { - GC_remove_roots(static_cast(sctx.sp) - pfss_usable_stack_size(sctx), sctx.sp); - stack.deallocate(sctx); - } - -}; - -static BoehmGCStackAllocator boehmGCStackAllocator; - -/** - * When a thread goes into a coroutine, we lose its original sp until - * control flow returns to the thread. - * While in the coroutine, the sp points outside the thread stack, - * so we can detect this and push the entire thread stack instead, - * as an approximation. - * The coroutine's stack is covered by `BoehmGCStackAllocator`. - * This is not an optimal solution, because the garbage is scanned when a - * coroutine is active, for both the coroutine and the original thread stack. - * However, the implementation is quite lean, and usually we don't have active - * coroutines during evaluation, so this is acceptable. - */ -void fixupBoehmStackPointer(void ** sp_ptr, void * _pthread_id) { - void *& sp = *sp_ptr; - auto pthread_id = reinterpret_cast(_pthread_id); - pthread_attr_t pattr; - size_t osStackSize; - void * osStackLow; - void * osStackBase; - -# ifdef __APPLE__ - osStackSize = pthread_get_stacksize_np(pthread_id); - osStackLow = pthread_get_stackaddr_np(pthread_id); -# else - if (pthread_attr_init(&pattr)) { - throw Error("fixupBoehmStackPointer: pthread_attr_init failed"); - } -# ifdef HAVE_PTHREAD_GETATTR_NP - if (pthread_getattr_np(pthread_id, &pattr)) { - throw Error("fixupBoehmStackPointer: pthread_getattr_np failed"); - } -# elif HAVE_PTHREAD_ATTR_GET_NP - if (!pthread_attr_init(&pattr)) { - throw Error("fixupBoehmStackPointer: pthread_attr_init failed"); - } - if (!pthread_attr_get_np(pthread_id, &pattr)) { - throw Error("fixupBoehmStackPointer: pthread_attr_get_np failed"); - } -# else -# error "Need one of `pthread_attr_get_np` or `pthread_getattr_np`" -# endif - if (pthread_attr_getstack(&pattr, &osStackLow, &osStackSize)) { - throw Error("fixupBoehmStackPointer: pthread_attr_getstack failed"); - } - if (pthread_attr_destroy(&pattr)) { - throw Error("fixupBoehmStackPointer: pthread_attr_destroy failed"); - } -# endif - osStackBase = (char *)osStackLow + osStackSize; - // NOTE: We assume the stack grows down, as it does on all architectures we support. - // Architectures that grow the stack up are rare. - if (sp >= osStackBase || sp < osStackLow) { // lo is outside the os stack - sp = osStackBase; - } -} - -#endif - - static Symbol getName(const AttrName & name, EvalState & state, Env & env) { if (name.symbol) { @@ -326,92 +214,6 @@ static Symbol getName(const AttrName & name, EvalState & state, Env & env) } } -#if HAVE_BOEHMGC -/* Disable GC while this object lives. Used by CoroutineContext. - * - * Boehm keeps a count of GC_disable() and GC_enable() calls, - * and only enables GC when the count matches. - */ -class BoehmDisableGC { -public: - BoehmDisableGC() { - GC_disable(); - }; - ~BoehmDisableGC() { - GC_enable(); - }; -}; -#endif - -static bool gcInitialised = false; - -void initGC() -{ - if (gcInitialised) return; - -#if HAVE_BOEHMGC - /* Initialise the Boehm garbage collector. */ - - /* Don't look for interior pointers. This reduces the odds of - misdetection a bit. */ - GC_set_all_interior_pointers(0); - - /* We don't have any roots in data segments, so don't scan from - there. */ - GC_set_no_dls(1); - - GC_INIT(); - - GC_set_oom_fn(oomHandler); - - StackAllocator::defaultAllocator = &boehmGCStackAllocator; - - // TODO: Remove __APPLE__ condition. - // Comment suggests an implementation that works on darwin and windows - // https://github.com/ivmai/bdwgc/issues/362#issuecomment-1936672196 - #if GC_VERSION_MAJOR >= 8 && GC_VERSION_MINOR >= 2 && GC_VERSION_MICRO >= 4 && !defined(__APPLE__) - GC_set_sp_corrector(&fixupBoehmStackPointer); - - if (!GC_get_sp_corrector()) { - printTalkative("BoehmGC on this platform does not support sp_corrector; will disable GC inside coroutines"); - /* Used to disable GC when entering coroutines on macOS */ - create_coro_gc_hook = []() -> std::shared_ptr { - return std::make_shared(); - }; - } - #else - #warning "BoehmGC version does not support GC while coroutine exists. GC will be disabled inside coroutines. Consider updating bdw-gc to 8.2.4 or later." - #endif - - - /* Set the initial heap size to something fairly big (25% of - physical RAM, up to a maximum of 384 MiB) so that in most cases - we don't need to garbage collect at all. (Collection has a - fairly significant overhead.) The heap size can be overridden - through libgc's GC_INITIAL_HEAP_SIZE environment variable. We - should probably also provide a nix.conf setting for this. Note - that GC_expand_hp() causes a lot of virtual, but not physical - (resident) memory to be allocated. This might be a problem on - systems that don't overcommit. */ - if (!getEnv("GC_INITIAL_HEAP_SIZE")) { - size_t size = 32 * 1024 * 1024; -#if HAVE_SYSCONF && defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES) - size_t maxSize = 384 * 1024 * 1024; - long pageSize = sysconf(_SC_PAGESIZE); - long pages = sysconf(_SC_PHYS_PAGES); - if (pageSize != -1) - size = (pageSize * pages) / 4; // 25% of RAM - if (size > maxSize) size = maxSize; -#endif - debug("setting initial heap size to %1% bytes", size); - GC_expand_hp(size); - } - -#endif - - gcInitialised = true; -} - static constexpr size_t BASE_ENV_SIZE = 128; EvalState::EvalState( @@ -508,7 +310,7 @@ EvalState::EvalState( countCalls = getEnv("NIX_COUNT_CALLS").value_or("0") != "0"; - assert(gcInitialised); + assertGCInitialized(); static_assert(sizeof(Env) <= 16, "environment must be <= 16 bytes"); diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index dac763268..0e5e0433e 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -3,6 +3,7 @@ #include "attr-set.hh" #include "eval-error.hh" +#include "eval-gc.hh" #include "types.hh" #include "value.hh" #include "nixexpr.hh" @@ -146,12 +147,6 @@ std::string printValue(EvalState & state, Value & v); std::ostream & operator << (std::ostream & os, const ValueType t); -/** - * Initialise the Boehm GC, if applicable. - */ -void initGC(); - - struct RegexCache; std::shared_ptr makeRegexCache();