From fb2f7f5dcc6b37a4f39f59d9f477d3fa57d79095 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 25 Jan 2023 17:31:27 +0100 Subject: [PATCH] Fix auto-uid-allocation in Docker containers This didn't work because sandboxing doesn't work in Docker. However, the sandboxing check is done lazily - after clone(CLONE_NEWNS) fails, we retry with sandboxing disabled. But at that point, we've already done UID allocation under the assumption that user namespaces are enabled. So let's get rid of the "goto fallback" logic and just detect early whether user / mount namespaces are enabled. This commit also gets rid of a compatibility hack for some ancient Linux kernels (<2.13). --- src/libstore/build/local-derivation-goal.cc | 82 +++++---------------- src/libutil/namespaces.cc | 63 ++++++++++++++++ src/libutil/namespaces.hh | 9 +++ 3 files changed, 91 insertions(+), 63 deletions(-) create mode 100644 src/libutil/namespaces.cc create mode 100644 src/libutil/namespaces.hh diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 8ff83f748..16955e326 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -16,6 +16,7 @@ #include "json-utils.hh" #include "cgroup.hh" #include "personality.hh" +#include "namespaces.hh" #include #include @@ -167,7 +168,8 @@ void LocalDerivationGoal::killSandbox(bool getStats) } -void LocalDerivationGoal::tryLocalBuild() { +void LocalDerivationGoal::tryLocalBuild() +{ unsigned int curBuilds = worker.getNrLocalBuilds(); if (curBuilds >= settings.maxBuildJobs) { state = &DerivationGoal::tryToBuild; @@ -205,6 +207,17 @@ void LocalDerivationGoal::tryLocalBuild() { #endif } + #if __linux__ + if (useChroot) { + if (!mountNamespacesSupported()) { + if (!settings.sandboxFallback) + throw Error("this system does not support mount namespaces, which are required for sandboxing"); + debug("auto-disabling sandboxing because mount namespaces are not available"); + useChroot = false; + } + } + #endif + if (useBuildUsers()) { if (!buildUser) buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1, useChroot); @@ -888,12 +901,7 @@ void LocalDerivationGoal::startBuilder() userNamespaceSync.create(); - Path maxUserNamespaces = "/proc/sys/user/max_user_namespaces"; - static bool userNamespacesEnabled = - pathExists(maxUserNamespaces) - && trim(readFile(maxUserNamespaces)) != "0"; - - usingUserNamespace = userNamespacesEnabled; + usingUserNamespace = userNamespacesSupported(); Pid helper = startProcess([&]() { @@ -920,64 +928,15 @@ void LocalDerivationGoal::startBuilder() flags |= CLONE_NEWUSER; pid_t child = clone(childEntry, stack + stackSize, flags, this); - if (child == -1 && errno == EINVAL) { - /* Fallback for Linux < 2.13 where CLONE_NEWPID and - CLONE_PARENT are not allowed together. */ - flags &= ~CLONE_NEWPID; - child = clone(childEntry, stack + stackSize, flags, this); - } - if (usingUserNamespace && child == -1 && (errno == EPERM || errno == EINVAL)) { - /* Some distros patch Linux to not allow unprivileged - * user namespaces. If we get EPERM or EINVAL, try - * without CLONE_NEWUSER and see if that works. - * Details: https://salsa.debian.org/kernel-team/linux/-/commit/d98e00eda6bea437e39b9e80444eee84a32438a6 - */ - usingUserNamespace = false; - flags &= ~CLONE_NEWUSER; - child = clone(childEntry, stack + stackSize, flags, this); - } - if (child == -1) { - switch(errno) { - case EPERM: - case EINVAL: { - int errno_ = errno; - if (!userNamespacesEnabled && errno==EPERM) - notice("user namespaces appear to be disabled; they are required for sandboxing; check /proc/sys/user/max_user_namespaces"); - if (userNamespacesEnabled) { - Path procSysKernelUnprivilegedUsernsClone = "/proc/sys/kernel/unprivileged_userns_clone"; - if (pathExists(procSysKernelUnprivilegedUsernsClone) - && trim(readFile(procSysKernelUnprivilegedUsernsClone)) == "0") { - notice("user namespaces appear to be disabled; they are required for sandboxing; check /proc/sys/kernel/unprivileged_userns_clone"); - } - } - Path procSelfNsUser = "/proc/self/ns/user"; - if (!pathExists(procSelfNsUser)) - notice("/proc/self/ns/user does not exist; your kernel was likely built without CONFIG_USER_NS=y, which is required for sandboxing"); - /* Otherwise exit with EPERM so we can handle this in the - parent. This is only done when sandbox-fallback is set - to true (the default). */ - if (settings.sandboxFallback) - _exit(1); - /* Mention sandbox-fallback in the error message so the user - knows that having it disabled contributed to the - unrecoverability of this failure */ - throw SysError(errno_, "creating sandboxed builder process using clone(), without sandbox-fallback"); - } - default: - throw SysError("creating sandboxed builder process using clone()"); - } - } + + if (child == -1) + throw SysError("creating sandboxed builder process using clone()"); writeFull(builderOut.writeSide.get(), fmt("%d %d\n", usingUserNamespace, child)); _exit(0); }); - int res = helper.wait(); - if (res != 0 && settings.sandboxFallback) { - useChroot = false; - initTmpDir(); - goto fallback; - } else if (res != 0) + if (helper.wait() != 0) throw Error("unable to start build process"); userNamespaceSync.readSide = -1; @@ -1045,9 +1004,6 @@ void LocalDerivationGoal::startBuilder() } else #endif { -#if __linux__ - fallback: -#endif pid = startProcess([&]() { runChild(); }); diff --git a/src/libutil/namespaces.cc b/src/libutil/namespaces.cc new file mode 100644 index 000000000..0c3c3cbdd --- /dev/null +++ b/src/libutil/namespaces.cc @@ -0,0 +1,63 @@ +#include "namespaces.hh" +#include "util.hh" + +#if __linux__ + +namespace nix { + +bool userNamespacesSupported() +{ + static bool res = [&]() -> bool + { + if (!pathExists("/proc/self/ns/user")) { + notice("'/proc/self/ns/user' does not exist; your kernel was likely built without CONFIG_USER_NS=y, which is required for sandboxing"); + return false; + } + + Path maxUserNamespaces = "/proc/sys/user/max_user_namespaces"; + if (!pathExists(maxUserNamespaces) || + trim(readFile(maxUserNamespaces)) == "0") + { + notice("user namespaces appear to be disabled; they are required for sandboxing; check '/proc/sys/user/max_user_namespaces'"); + return false; + } + + Path procSysKernelUnprivilegedUsernsClone = "/proc/sys/kernel/unprivileged_userns_clone"; + if (pathExists(procSysKernelUnprivilegedUsernsClone) + && trim(readFile(procSysKernelUnprivilegedUsernsClone)) == "0") + { + notice("user namespaces appear to be disabled; they are required for sandboxing; check '/proc/sys/kernel/unprivileged_userns_clone'"); + return false; + } + + Pid pid = startProcess([&]() + { + auto res = unshare(CLONE_NEWUSER); + _exit(res ? 1 : 0); + }); + + return pid.wait() == 0; + }(); + return res; +} + +bool mountNamespacesSupported() +{ + static bool res = [&]() -> bool + { + bool useUserNamespace = userNamespacesSupported(); + + Pid pid = startProcess([&]() + { + auto res = unshare(CLONE_NEWNS | (useUserNamespace ? CLONE_NEWUSER : 0)); + _exit(res ? 1 : 0); + }); + + return pid.wait() == 0; + }(); + return res; +} + +} + +#endif diff --git a/src/libutil/namespaces.hh b/src/libutil/namespaces.hh new file mode 100644 index 000000000..4ed6cb683 --- /dev/null +++ b/src/libutil/namespaces.hh @@ -0,0 +1,9 @@ +#pragma once + +namespace nix { + +bool userNamespacesSupported(); + +bool mountNamespacesSupported(); + +}