Restore parent mount namespace in restoreProcessContext

This ensures any started processes can't write to /nix/store (except
during builds). This partially reverts 01d07b1e, which happened because
of #2646.

The problem was only happening after nix downloads anything, causing
me to suspect the download thread. The problem turns out to be:
"A  process  can't  join a new mount namespace if it is sharing
filesystem-related attributes with another process", in this case this
process is the curl thread.

Ideally, we might kill it before spawning the shell process, but it's
inside a static variable in the getFileTransfer() function. So
instead, stop it from sharing FS state using unshare(). A strategy
such as the one from #5057 (single-threaded chroot helper binary) is
also very much on the table.

Fixes #4337.
This commit is contained in:
Yorick van Pelt 2021-10-15 16:25:49 +02:00
parent 130284b850
commit fcb8af550f
No known key found for this signature in database
GPG key ID: D8D3CC6D951384DE
4 changed files with 44 additions and 3 deletions

View file

@ -544,6 +544,14 @@ struct curlFileTransfer : public FileTransfer
stopWorkerThread();
});
#ifdef __linux__
/* Cause this thread to not share any FS attributes with the main thread,
because this causes setns() in restoreMountNamespace() to fail.
Ideally, this would happen in the std::thread() constructor. */
if (unshare(CLONE_FS) != 0)
throw SysError("unsharing filesystem state in download thread");
#endif
std::map<CURL *, std::shared_ptr<TransferItem>> items;
bool quit = false;

View file

@ -495,6 +495,7 @@ void LocalStore::makeStoreWritable()
throw SysError("getting info about the Nix store mount point");
if (stat.f_flag & ST_RDONLY) {
saveMountNamespace();
if (unshare(CLONE_NEWNS) == -1)
throw SysError("setting up a private mount namespace");

View file

@ -1629,10 +1629,34 @@ void setStackSize(size_t stackSize)
}
#endif
}
static AutoCloseFD fdSavedMountNamespace;
void restoreProcessContext()
void saveMountNamespace()
{
#if __linux__
static std::once_flag done;
std::call_once(done, []() {
fdSavedMountNamespace = open("/proc/self/ns/mnt", O_RDONLY);
if (!fdSavedMountNamespace)
throw SysError("saving parent mount namespace");
});
#endif
}
void restoreMountNamespace()
{
#if __linux__
if (fdSavedMountNamespace && setns(fdSavedMountNamespace.get(), CLONE_NEWNS) == -1)
throw SysError("restoring parent mount namespace");
#endif
}
void restoreProcessContext(bool restoreMounts)
{
restoreSignals();
if (restoreMounts) {
restoreMountNamespace();
}
restoreAffinity();
@ -1766,7 +1790,7 @@ void commonChildInit(Pipe & logPipe)
logger = makeSimpleLogger();
const static string pathNullDevice = "/dev/null";
restoreProcessContext();
restoreProcessContext(false);
/* Put the child in a separate session (and thus a separate
process group) so that it has no controlling terminal (meaning

View file

@ -300,7 +300,15 @@ void setStackSize(size_t stackSize);
/* Restore the original inherited Unix process context (such as signal
masks, stack size, CPU affinity). */
void restoreProcessContext();
void restoreProcessContext(bool restoreMounts = true);
/* Save the current mount namespace. Ignored if called more than
once. */
void saveMountNamespace();
/* Restore the mount namespace saved by saveMountNamespace(). Ignored
if saveMountNamespace() was never called. */
void restoreMountNamespace();
class ExecError : public Error