Merge pull request #7692 from edolstra/fix-docker-auto-uid-allocation

Fix auto-uid-allocation in Docker containers
This commit is contained in:
Eelco Dolstra 2023-02-07 23:35:39 +01:00 committed by GitHub
commit ae6de012ee
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 134 additions and 63 deletions

View file

@ -16,6 +16,7 @@
#include "json-utils.hh" #include "json-utils.hh"
#include "cgroup.hh" #include "cgroup.hh"
#include "personality.hh" #include "personality.hh"
#include "namespaces.hh"
#include <regex> #include <regex>
#include <queue> #include <queue>
@ -167,7 +168,8 @@ void LocalDerivationGoal::killSandbox(bool getStats)
} }
void LocalDerivationGoal::tryLocalBuild() { void LocalDerivationGoal::tryLocalBuild()
{
unsigned int curBuilds = worker.getNrLocalBuilds(); unsigned int curBuilds = worker.getNrLocalBuilds();
if (curBuilds >= settings.maxBuildJobs) { if (curBuilds >= settings.maxBuildJobs) {
state = &DerivationGoal::tryToBuild; state = &DerivationGoal::tryToBuild;
@ -205,6 +207,17 @@ void LocalDerivationGoal::tryLocalBuild() {
#endif #endif
} }
#if __linux__
if (useChroot) {
if (!mountNamespacesSupported() || !pidNamespacesSupported()) {
if (!settings.sandboxFallback)
throw Error("this system does not support the kernel namespaces that are required for sandboxing; use '--no-sandbox' to disable sandboxing");
debug("auto-disabling sandboxing because the prerequisite namespaces are not available");
useChroot = false;
}
}
#endif
if (useBuildUsers()) { if (useBuildUsers()) {
if (!buildUser) if (!buildUser)
buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1, useChroot); buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1, useChroot);
@ -888,12 +901,7 @@ void LocalDerivationGoal::startBuilder()
userNamespaceSync.create(); userNamespaceSync.create();
Path maxUserNamespaces = "/proc/sys/user/max_user_namespaces"; usingUserNamespace = userNamespacesSupported();
static bool userNamespacesEnabled =
pathExists(maxUserNamespaces)
&& trim(readFile(maxUserNamespaces)) != "0";
usingUserNamespace = userNamespacesEnabled;
Pid helper = startProcess([&]() { Pid helper = startProcess([&]() {
@ -920,64 +928,15 @@ void LocalDerivationGoal::startBuilder()
flags |= CLONE_NEWUSER; flags |= CLONE_NEWUSER;
pid_t child = clone(childEntry, stack + stackSize, flags, this); pid_t child = clone(childEntry, stack + stackSize, flags, this);
if (child == -1 && errno == EINVAL) {
/* Fallback for Linux < 2.13 where CLONE_NEWPID and if (child == -1)
CLONE_PARENT are not allowed together. */ throw SysError("creating sandboxed builder process using clone()");
flags &= ~CLONE_NEWPID;
child = clone(childEntry, stack + stackSize, flags, this);
}
if (usingUserNamespace && child == -1 && (errno == EPERM || errno == EINVAL)) {
/* Some distros patch Linux to not allow unprivileged
* user namespaces. If we get EPERM or EINVAL, try
* without CLONE_NEWUSER and see if that works.
* Details: https://salsa.debian.org/kernel-team/linux/-/commit/d98e00eda6bea437e39b9e80444eee84a32438a6
*/
usingUserNamespace = false;
flags &= ~CLONE_NEWUSER;
child = clone(childEntry, stack + stackSize, flags, this);
}
if (child == -1) {
switch(errno) {
case EPERM:
case EINVAL: {
int errno_ = errno;
if (!userNamespacesEnabled && errno==EPERM)
notice("user namespaces appear to be disabled; they are required for sandboxing; check /proc/sys/user/max_user_namespaces");
if (userNamespacesEnabled) {
Path procSysKernelUnprivilegedUsernsClone = "/proc/sys/kernel/unprivileged_userns_clone";
if (pathExists(procSysKernelUnprivilegedUsernsClone)
&& trim(readFile(procSysKernelUnprivilegedUsernsClone)) == "0") {
notice("user namespaces appear to be disabled; they are required for sandboxing; check /proc/sys/kernel/unprivileged_userns_clone");
}
}
Path procSelfNsUser = "/proc/self/ns/user";
if (!pathExists(procSelfNsUser))
notice("/proc/self/ns/user does not exist; your kernel was likely built without CONFIG_USER_NS=y, which is required for sandboxing");
/* Otherwise exit with EPERM so we can handle this in the
parent. This is only done when sandbox-fallback is set
to true (the default). */
if (settings.sandboxFallback)
_exit(1);
/* Mention sandbox-fallback in the error message so the user
knows that having it disabled contributed to the
unrecoverability of this failure */
throw SysError(errno_, "creating sandboxed builder process using clone(), without sandbox-fallback");
}
default:
throw SysError("creating sandboxed builder process using clone()");
}
}
writeFull(builderOut.writeSide.get(), writeFull(builderOut.writeSide.get(),
fmt("%d %d\n", usingUserNamespace, child)); fmt("%d %d\n", usingUserNamespace, child));
_exit(0); _exit(0);
}); });
int res = helper.wait(); if (helper.wait() != 0)
if (res != 0 && settings.sandboxFallback) {
useChroot = false;
initTmpDir();
goto fallback;
} else if (res != 0)
throw Error("unable to start build process"); throw Error("unable to start build process");
userNamespaceSync.readSide = -1; userNamespaceSync.readSide = -1;
@ -1045,9 +1004,6 @@ void LocalDerivationGoal::startBuilder()
} else } else
#endif #endif
{ {
#if __linux__
fallback:
#endif
pid = startProcess([&]() { pid = startProcess([&]() {
runChild(); runChild();
}); });

100
src/libutil/namespaces.cc Normal file
View file

@ -0,0 +1,100 @@
#if __linux__
#include "namespaces.hh"
#include "util.hh"
#include "finally.hh"
#include <mntent.h>
namespace nix {
bool userNamespacesSupported()
{
static auto res = [&]() -> bool
{
if (!pathExists("/proc/self/ns/user")) {
debug("'/proc/self/ns/user' does not exist; your kernel was likely built without CONFIG_USER_NS=y");
return false;
}
Path maxUserNamespaces = "/proc/sys/user/max_user_namespaces";
if (!pathExists(maxUserNamespaces) ||
trim(readFile(maxUserNamespaces)) == "0")
{
debug("user namespaces appear to be disabled; check '/proc/sys/user/max_user_namespaces'");
return false;
}
Path procSysKernelUnprivilegedUsernsClone = "/proc/sys/kernel/unprivileged_userns_clone";
if (pathExists(procSysKernelUnprivilegedUsernsClone)
&& trim(readFile(procSysKernelUnprivilegedUsernsClone)) == "0")
{
debug("user namespaces appear to be disabled; check '/proc/sys/kernel/unprivileged_userns_clone'");
return false;
}
Pid pid = startProcess([&]()
{
auto res = unshare(CLONE_NEWUSER);
_exit(res ? 1 : 0);
});
bool supported = pid.wait() == 0;
if (!supported)
debug("user namespaces do not work on this system");
return supported;
}();
return res;
}
bool mountNamespacesSupported()
{
static auto res = [&]() -> bool
{
bool useUserNamespace = userNamespacesSupported();
Pid pid = startProcess([&]()
{
auto res = unshare(CLONE_NEWNS | (useUserNamespace ? CLONE_NEWUSER : 0));
_exit(res ? 1 : 0);
});
bool supported = pid.wait() == 0;
if (!supported)
debug("mount namespaces do not work on this system");
return supported;
}();
return res;
}
bool pidNamespacesSupported()
{
static auto res = [&]() -> bool
{
/* Check whether /proc is fully visible, i.e. there are no
filesystems mounted on top of files inside /proc. If this
is not the case, then we cannot mount a new /proc inside
the sandbox that matches the sandbox's PID namespace.
See https://lore.kernel.org/lkml/87tvsrjai0.fsf@xmission.com/T/. */
auto fp = fopen("/proc/mounts", "r");
if (!fp) return false;
Finally delFP = [&]() { fclose(fp); };
while (auto ent = getmntent(fp))
if (hasPrefix(std::string_view(ent->mnt_dir), "/proc/")) {
debug("PID namespaces do not work because /proc is not fully visible; disabling sandboxing");
return false;
}
return true;
}();
return res;
}
}
#endif

15
src/libutil/namespaces.hh Normal file
View file

@ -0,0 +1,15 @@
#pragma once
namespace nix {
#if __linux__
bool userNamespacesSupported();
bool mountNamespacesSupported();
bool pidNamespacesSupported();
#endif
}