mirror of
https://github.com/privatevoid-net/nix-super.git
synced 2025-01-19 09:36:47 +02:00
Split auto UID allocation from cgroups
Cgroups are now only used for derivations that require the uid-range range feature. This allows auto UID allocation even on systems that don't have cgroups (like macOS). Also, make things work on modern systems that use cgroups v2 (where there is a single hierarchy and no "systemd" controller).
This commit is contained in:
parent
40911d7dec
commit
2fde7e0108
9 changed files with 122 additions and 96 deletions
|
@ -160,7 +160,7 @@ void LocalDerivationGoal::tryLocalBuild() {
|
|||
|
||||
if (useBuildUsers()) {
|
||||
if (!buildUser)
|
||||
buildUser = acquireUserLock();
|
||||
buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1);
|
||||
|
||||
if (!buildUser) {
|
||||
if (!actLock)
|
||||
|
@ -495,8 +495,8 @@ void LocalDerivationGoal::startBuilder()
|
|||
}
|
||||
}
|
||||
|
||||
useUidRange = parsedDrv->getRequiredSystemFeatures().count("uid-range");
|
||||
useSystemdCgroup = parsedDrv->getRequiredSystemFeatures().count("Systemd-cgroup");
|
||||
assert(!useSystemdCgroup);
|
||||
|
||||
if (useChroot) {
|
||||
|
||||
|
@ -576,7 +576,8 @@ void LocalDerivationGoal::startBuilder()
|
|||
|
||||
printMsg(lvlChatty, format("setting up chroot environment in '%1%'") % chrootRootDir);
|
||||
|
||||
if (mkdir(chrootRootDir.c_str(), useUidRange ? 0755 : 0750) == -1)
|
||||
// FIXME: make this 0700
|
||||
if (mkdir(chrootRootDir.c_str(), buildUser && buildUser->getUIDCount() != 1 ? 0755 : 0750) == -1)
|
||||
throw SysError("cannot create '%1%'", chrootRootDir);
|
||||
|
||||
// FIXME: only make root writable for user namespace builds.
|
||||
|
@ -596,8 +597,8 @@ void LocalDerivationGoal::startBuilder()
|
|||
createDirs(chrootRootDir + "/etc");
|
||||
chownToBuilder(chrootRootDir + "/etc");
|
||||
|
||||
if (useUidRange && (!buildUser || buildUser->getUIDCount() < 65536))
|
||||
throw Error("feature 'uid-range' requires '%s' to be enabled", settings.autoAllocateUids.name);
|
||||
if (parsedDrv->useUidRange() && (!buildUser || buildUser->getUIDCount() < 65536))
|
||||
throw Error("feature 'uid-range' requires the setting '%s' to be enabled", settings.autoAllocateUids.name);
|
||||
|
||||
/* Declare the build user's group so that programs get a consistent
|
||||
view of the system (e.g., "id -gn"). */
|
||||
|
@ -670,7 +671,7 @@ void LocalDerivationGoal::startBuilder()
|
|||
#endif
|
||||
#endif
|
||||
} else {
|
||||
if (useUidRange)
|
||||
if (parsedDrv->useUidRange())
|
||||
throw Error("feature 'uid-range' is only supported in sandboxed builds");
|
||||
if (useSystemdCgroup)
|
||||
throw Error("feature 'systemd-cgroup' is only supported in sandboxed builds");
|
||||
|
@ -934,12 +935,12 @@ void LocalDerivationGoal::startBuilder()
|
|||
the calling user (if build users are disabled). */
|
||||
uid_t hostUid = buildUser ? buildUser->getUID() : getuid();
|
||||
uid_t hostGid = buildUser ? buildUser->getGID() : getgid();
|
||||
uint32_t nrIds = buildUser && useUidRange ? buildUser->getUIDCount() : 1;
|
||||
uid_t nrIds = buildUser ? buildUser->getUIDCount() : 1;
|
||||
|
||||
writeFile("/proc/" + std::to_string(pid) + "/uid_map",
|
||||
fmt("%d %d %d", sandboxUid(), hostUid, nrIds));
|
||||
|
||||
if (!useUidRange)
|
||||
if (!buildUser || buildUser->getUIDCount() == 1)
|
||||
writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny");
|
||||
|
||||
writeFile("/proc/" + std::to_string(pid) + "/gid_map",
|
||||
|
@ -1793,7 +1794,7 @@ void LocalDerivationGoal::runChild()
|
|||
throw SysError("mounting /proc");
|
||||
|
||||
/* Mount sysfs on /sys. */
|
||||
if (useUidRange) {
|
||||
if (buildUser && buildUser->getUIDCount() != 1) {
|
||||
createDirs(chrootRootDir + "/sys");
|
||||
if (mount("none", (chrootRootDir + "/sys").c_str(), "sysfs", 0, 0) == -1)
|
||||
throw SysError("mounting /sys");
|
||||
|
|
|
@ -41,9 +41,6 @@ struct LocalDerivationGoal : public DerivationGoal
|
|||
|
||||
Path chrootRootDir;
|
||||
|
||||
/* Whether to give the build more than 1 UID. */
|
||||
bool useUidRange = false;
|
||||
|
||||
/* Whether to make the 'systemd' cgroup controller available to
|
||||
the build. */
|
||||
bool useSystemdCgroup = false;
|
||||
|
@ -99,8 +96,8 @@ struct LocalDerivationGoal : public DerivationGoal
|
|||
result. */
|
||||
std::map<Path, ValidPathInfo> prevInfos;
|
||||
|
||||
uid_t sandboxUid() { return usingUserNamespace ? (useUidRange ? 0 : 1000) : buildUser->getUID(); }
|
||||
gid_t sandboxGid() { return usingUserNamespace ? (useUidRange ? 0 : 100) : buildUser->getGID(); }
|
||||
uid_t sandboxUid() { return usingUserNamespace ? (buildUser->getUIDCount() == 1 ? 1000 : 0) : buildUser->getUID(); }
|
||||
gid_t sandboxGid() { return usingUserNamespace ? (buildUser->getUIDCount() == 1 ? 100 : 0) : buildUser->getGID(); }
|
||||
|
||||
const static Path homeDir;
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
namespace nix {
|
||||
|
||||
// FIXME: obsolete, check for cgroup2
|
||||
std::map<std::string, std::string> getCgroups(const Path & cgroupFile)
|
||||
{
|
||||
std::map<std::string, std::string> cgroups;
|
||||
|
|
|
@ -130,6 +130,10 @@ StringSet Settings::getDefaultSystemFeatures()
|
|||
actually require anything special on the machines. */
|
||||
StringSet features{"nixos-test", "benchmark", "big-parallel"};
|
||||
|
||||
#if __linux__
|
||||
features.insert("uid-range");
|
||||
#endif
|
||||
|
||||
#if __linux__
|
||||
if (access("/dev/kvm", R_OK | W_OK) == 0)
|
||||
features.insert("kvm");
|
||||
|
|
|
@ -46,6 +46,8 @@ struct PluginFilesSetting : public BaseSetting<Paths>
|
|||
void set(const std::string & str, bool append = false) override;
|
||||
};
|
||||
|
||||
const uint32_t maxIdsPerBuild = 1 << 16;
|
||||
|
||||
class Settings : public Config {
|
||||
|
||||
unsigned int getDefaultCores();
|
||||
|
@ -279,12 +281,10 @@ public:
|
|||
Setting<bool> autoAllocateUids{this, false, "auto-allocate-uids",
|
||||
"Whether to allocate UIDs for builders automatically."};
|
||||
|
||||
const uint32_t idsPerBuild = 1 << 16;
|
||||
|
||||
Setting<uint32_t> startId{this, 872415232, "start-id",
|
||||
"The first UID and GID to use for dynamic ID allocation."};
|
||||
|
||||
Setting<uint32_t> uidCount{this, idsPerBuild * 128, "id-count",
|
||||
Setting<uint32_t> uidCount{this, maxIdsPerBuild * 128, "id-count",
|
||||
"The number of UIDs/GIDs to use for dynamic ID allocation."};
|
||||
#endif
|
||||
|
||||
|
|
|
@ -20,12 +20,8 @@ struct SimpleUserLock : UserLock
|
|||
killUser(uid);
|
||||
}
|
||||
|
||||
std::pair<uid_t, uid_t> getUIDRange() override
|
||||
{
|
||||
assert(uid);
|
||||
return {uid, uid};
|
||||
}
|
||||
|
||||
uid_t getUID() override { assert(uid); return uid; }
|
||||
uid_t getUIDCount() override { return 1; }
|
||||
gid_t getGID() override { assert(gid); return gid; }
|
||||
|
||||
std::vector<gid_t> getSupplementaryGIDs() override { return supplementaryGIDs; }
|
||||
|
@ -115,48 +111,65 @@ struct SimpleUserLock : UserLock
|
|||
}
|
||||
};
|
||||
|
||||
#if __linux__
|
||||
struct CgroupUserLock : UserLock
|
||||
struct AutoUserLock : UserLock
|
||||
{
|
||||
AutoCloseFD fdUserLock;
|
||||
uid_t uid;
|
||||
uid_t firstUid = 0;
|
||||
uid_t nrIds = 1;
|
||||
#if __linux__
|
||||
std::optional<Path> cgroup;
|
||||
#endif
|
||||
|
||||
~AutoUserLock()
|
||||
{
|
||||
// Get rid of our cgroup, ignoring errors.
|
||||
if (cgroup) rmdir(cgroup->c_str());
|
||||
}
|
||||
|
||||
void kill() override
|
||||
{
|
||||
#if __linux__
|
||||
if (cgroup) {
|
||||
printError("KILL CGROUP %s", *cgroup);
|
||||
destroyCgroup(*cgroup);
|
||||
cgroup.reset();
|
||||
if (mkdir(cgroup->c_str(), 0755) == -1)
|
||||
throw SysError("creating cgroup '%s'", *cgroup);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
assert(firstUid);
|
||||
printError("KILL USER %d", firstUid);
|
||||
killUser(firstUid);
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<uid_t, uid_t> getUIDRange() override
|
||||
{
|
||||
assert(uid);
|
||||
return {uid, uid + settings.idsPerBuild - 1};
|
||||
}
|
||||
uid_t getUID() override { assert(firstUid); return firstUid; }
|
||||
|
||||
gid_t getUIDCount() override { return nrIds; }
|
||||
|
||||
gid_t getGID() override
|
||||
{
|
||||
// We use the same GID ranges as for the UIDs.
|
||||
assert(uid);
|
||||
return uid;
|
||||
assert(firstUid);
|
||||
return firstUid;
|
||||
}
|
||||
|
||||
std::vector<gid_t> getSupplementaryGIDs() override { return {}; }
|
||||
|
||||
static std::unique_ptr<UserLock> acquire()
|
||||
static std::unique_ptr<UserLock> acquire(uid_t nrIds)
|
||||
{
|
||||
settings.requireExperimentalFeature(Xp::AutoAllocateUids);
|
||||
assert(settings.startId > 0);
|
||||
assert(settings.startId % settings.idsPerBuild == 0);
|
||||
assert(settings.uidCount % settings.idsPerBuild == 0);
|
||||
assert(settings.startId % maxIdsPerBuild == 0);
|
||||
assert(settings.uidCount % maxIdsPerBuild == 0);
|
||||
assert((uint64_t) settings.startId + (uint64_t) settings.uidCount <= std::numeric_limits<uid_t>::max());
|
||||
assert(nrIds <= maxIdsPerBuild);
|
||||
|
||||
// FIXME: check whether the id range overlaps any known users
|
||||
|
||||
createDirs(settings.nixStateDir + "/userpool2");
|
||||
|
||||
size_t nrSlots = settings.uidCount / settings.idsPerBuild;
|
||||
size_t nrSlots = settings.uidCount / maxIdsPerBuild;
|
||||
|
||||
for (size_t i = 0; i < nrSlots; i++) {
|
||||
debug("trying user slot '%d'", i);
|
||||
|
@ -170,11 +183,47 @@ struct CgroupUserLock : UserLock
|
|||
throw SysError("opening user lock '%s'", fnUserLock);
|
||||
|
||||
if (lockFile(fd.get(), ltWrite, false)) {
|
||||
auto lock = std::make_unique<CgroupUserLock>();
|
||||
auto s = drainFD(fd.get());
|
||||
|
||||
#if __linux__
|
||||
if (s != "") {
|
||||
/* Kill the old cgroup, to ensure there are no
|
||||
processes left over from an interrupted build. */
|
||||
destroyCgroup(s);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (ftruncate(fd.get(), 0) == -1)
|
||||
throw Error("truncating user lock");
|
||||
|
||||
auto lock = std::make_unique<AutoUserLock>();
|
||||
lock->fdUserLock = std::move(fd);
|
||||
lock->uid = settings.startId + i * settings.idsPerBuild;
|
||||
auto s = drainFD(lock->fdUserLock.get());
|
||||
if (s != "") lock->cgroup = s;
|
||||
lock->firstUid = settings.startId + i * maxIdsPerBuild;
|
||||
lock->nrIds = nrIds;
|
||||
|
||||
if (nrIds > 1) {
|
||||
auto ourCgroups = getCgroups("/proc/self/cgroup");
|
||||
auto ourCgroup = ourCgroups[""];
|
||||
if (ourCgroup == "")
|
||||
throw Error("cannot determine cgroup name from /proc/self/cgroup");
|
||||
|
||||
auto ourCgroupPath = canonPath("/sys/fs/cgroup/" + ourCgroup);
|
||||
|
||||
printError("PARENT CGROUP = %s", ourCgroupPath);
|
||||
|
||||
if (!pathExists(ourCgroupPath))
|
||||
throw Error("expected cgroup directory '%s'", ourCgroupPath);
|
||||
|
||||
lock->cgroup = fmt("%s/nix-build-%d", ourCgroupPath, lock->firstUid);
|
||||
|
||||
printError("CHILD CGROUP = %s", *lock->cgroup);
|
||||
|
||||
/* Record the cgroup in the lock file. This ensures that
|
||||
if we subsequently get executed under a different parent
|
||||
cgroup, we kill the previous cgroup first. */
|
||||
writeFull(lock->fdUserLock.get(), *lock->cgroup);
|
||||
}
|
||||
|
||||
return lock;
|
||||
}
|
||||
}
|
||||
|
@ -182,50 +231,16 @@ struct CgroupUserLock : UserLock
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
std::optional<Path> cgroup;
|
||||
|
||||
std::optional<Path> getCgroup() override
|
||||
{
|
||||
if (!cgroup) {
|
||||
/* Create a systemd cgroup since that's the minimum
|
||||
required by systemd-nspawn. */
|
||||
auto ourCgroups = getCgroups("/proc/self/cgroup");
|
||||
auto systemdCgroup = ourCgroups["systemd"];
|
||||
if (systemdCgroup == "")
|
||||
throw Error("'systemd' cgroup does not exist");
|
||||
|
||||
auto hostCgroup = canonPath("/sys/fs/cgroup/systemd/" + systemdCgroup);
|
||||
|
||||
if (!pathExists(hostCgroup))
|
||||
throw Error("expected cgroup directory '%s'", hostCgroup);
|
||||
|
||||
cgroup = fmt("%s/nix-%d", hostCgroup, uid);
|
||||
|
||||
destroyCgroup(*cgroup);
|
||||
|
||||
if (mkdir(cgroup->c_str(), 0755) == -1)
|
||||
throw SysError("creating cgroup '%s'", *cgroup);
|
||||
|
||||
/* Record the cgroup in the lock file. This ensures that
|
||||
if we subsequently get executed under a different parent
|
||||
cgroup, we kill the previous cgroup first. */
|
||||
if (ftruncate(fdUserLock.get(), 0) == -1)
|
||||
throw Error("truncating user lock");
|
||||
writeFull(fdUserLock.get(), *cgroup);
|
||||
}
|
||||
|
||||
return cgroup;
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
std::unique_ptr<UserLock> acquireUserLock()
|
||||
{
|
||||
#if __linux__
|
||||
if (settings.autoAllocateUids)
|
||||
return CgroupUserLock::acquire();
|
||||
else
|
||||
std::optional<Path> getCgroup() override { return cgroup; }
|
||||
#endif
|
||||
};
|
||||
|
||||
std::unique_ptr<UserLock> acquireUserLock(uid_t nrIds)
|
||||
{
|
||||
if (settings.autoAllocateUids)
|
||||
return AutoUserLock::acquire(nrIds);
|
||||
else
|
||||
return SimpleUserLock::acquire();
|
||||
}
|
||||
|
||||
|
|
|
@ -11,18 +11,16 @@ struct UserLock
|
|||
virtual ~UserLock() { }
|
||||
|
||||
/* Get the first and last UID. */
|
||||
virtual std::pair<uid_t, uid_t> getUIDRange() = 0;
|
||||
std::pair<uid_t, uid_t> getUIDRange()
|
||||
{
|
||||
auto first = getUID();
|
||||
return {first, first + getUIDCount() - 1};
|
||||
}
|
||||
|
||||
/* Get the first UID. */
|
||||
uid_t getUID()
|
||||
{
|
||||
return getUIDRange().first;
|
||||
}
|
||||
virtual uid_t getUID() = 0;
|
||||
|
||||
uid_t getUIDCount()
|
||||
{
|
||||
return getUIDRange().second - getUIDRange().first + 1;
|
||||
}
|
||||
virtual uid_t getUIDCount() = 0;
|
||||
|
||||
virtual gid_t getGID() = 0;
|
||||
|
||||
|
@ -31,12 +29,14 @@ struct UserLock
|
|||
/* Kill any processes currently executing as this user. */
|
||||
virtual void kill() = 0;
|
||||
|
||||
#if __linux__
|
||||
virtual std::optional<Path> getCgroup() { return {}; };
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Acquire a user lock. Note that this may return nullptr if no user
|
||||
is available. */
|
||||
std::unique_ptr<UserLock> acquireUserLock();
|
||||
/* Acquire a user lock for a UID range of size `nrIds`. Note that this
|
||||
may return nullptr if no user is available. */
|
||||
std::unique_ptr<UserLock> acquireUserLock(uid_t nrIds);
|
||||
|
||||
bool useBuildUsers();
|
||||
|
||||
|
|
|
@ -90,6 +90,7 @@ std::optional<Strings> ParsedDerivation::getStringsAttr(const std::string & name
|
|||
|
||||
StringSet ParsedDerivation::getRequiredSystemFeatures() const
|
||||
{
|
||||
// FIXME: cache this?
|
||||
StringSet res;
|
||||
for (auto & i : getStringsAttr("requiredSystemFeatures").value_or(Strings()))
|
||||
res.insert(i);
|
||||
|
@ -125,6 +126,11 @@ bool ParsedDerivation::substitutesAllowed() const
|
|||
return getBoolAttr("allowSubstitutes", true);
|
||||
}
|
||||
|
||||
bool ParsedDerivation::useUidRange() const
|
||||
{
|
||||
return getRequiredSystemFeatures().count("uid-range");
|
||||
}
|
||||
|
||||
static std::regex shVarName("[A-Za-z_][A-Za-z0-9_]*");
|
||||
|
||||
std::optional<nlohmann::json> ParsedDerivation::prepareStructuredAttrs(Store & store, const StorePathSet & inputPaths)
|
||||
|
|
|
@ -38,6 +38,8 @@ public:
|
|||
|
||||
bool substitutesAllowed() const;
|
||||
|
||||
bool useUidRange() const;
|
||||
|
||||
std::optional<nlohmann::json> prepareStructuredAttrs(Store & store, const StorePathSet & inputPaths);
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue