build-remote.pl: Use ‘nix-store --serve’ on the remote side

This makes things more efficient (we don't need to use an SSH master
connection, and we only start a single remote process) and gets rid of
locking issues (the remote nix-store process will keep inputs and
outputs locked as long as they're needed).

It also makes it more or less secure to connect directly to the root
account on the build machine, using a forced command
(e.g. ‘command="nix-store --serve --write"’). This bypasses the Nix
daemon and is therefore more efficient.

Also, don't call nix-store to import the output paths.
This commit is contained in:
Eelco Dolstra 2014-07-11 16:02:19 +02:00
parent b8f24f2535
commit a5c6347ff0
9 changed files with 185 additions and 115 deletions

View file

@ -3,76 +3,27 @@ package Nix::CopyClosure;
use strict; use strict;
use Nix::Config; use Nix::Config;
use Nix::Store; use Nix::Store;
use Nix::SSH;
use List::Util qw(sum); use List::Util qw(sum);
use IPC::Open2; use IPC::Open2;
sub readN { sub copyToOpen {
my ($bytes, $from) = @_; my ($from, $to, $sshHost, $storePaths, $compressor, $decompressor,
my $res = "";
while ($bytes > 0) {
my $s;
my $n = sysread($from, $s, $bytes);
die "I/O error reading from remote side\n" if !defined $n;
die "got EOF while expecting $bytes bytes from remote side\n" if !$n;
$bytes -= $n;
$res .= $s;
}
return $res;
}
sub readInt {
my ($from) = @_;
return unpack("L<x4", readN(8, $from));
}
sub writeString {
my ($s, $to) = @_;
my $len = length $s;
my $req .= pack("L<x4", $len);
$req .= $s;
$req .= "\000" x (8 - $len % 8) if $len % 8;
syswrite($to, $req) or die;
}
sub copyTo {
my ($sshHost, $sshOpts, $storePaths, $compressor, $decompressor,
$includeOutputs, $dryRun, $sign, $progressViewer, $useSubstitutes) = @_; $includeOutputs, $dryRun, $sign, $progressViewer, $useSubstitutes) = @_;
$useSubstitutes = 0 if $dryRun; $useSubstitutes = 0 if $dryRun || !defined $useSubstitutes;
# Get the closure of this path. # Get the closure of this path.
my @closure = reverse(topoSortPaths(computeFSClosure(0, $includeOutputs, my @closure = reverse(topoSortPaths(computeFSClosure(0, $includeOutputs,
map { followLinksToStorePath $_ } @{$storePaths}))); map { followLinksToStorePath $_ } @{$storePaths})));
# Start nix-store --serve on the remote host.
my ($from, $to);
my $pid = open2($from, $to, "ssh $sshHost @{$sshOpts} nix-store --serve --write");
# Do the handshake.
eval {
my $SERVE_MAGIC_1 = 0x390c9deb; # FIXME
my $clientVersion = 0x200;
syswrite($to, pack("L<x4L<x4", $SERVE_MAGIC_1, $clientVersion)) or die;
die "did not get valid handshake from remote host\n" if readInt($from) != 0x5452eecb;
my $serverVersion = readInt($from);
die "unsupported server version\n" if $serverVersion < 0x200 || $serverVersion >= 0x300;
};
if ($@) {
chomp $@;
warn "$@; falling back to old closure copying method\n";
return oldCopyTo(\@closure, @_);
}
# Send the "query valid paths" command with the "lock" option # Send the "query valid paths" command with the "lock" option
# enabled. This prevents a race where the remote host # enabled. This prevents a race where the remote host
# garbage-collect paths that are already there. Optionally, ask # garbage-collect paths that are already there. Optionally, ask
# the remote host to substitute missing paths. # the remote host to substitute missing paths.
syswrite($to, pack("L<x4L<x4L<x4L<x4", 1, 1, $useSubstitutes, scalar @closure)) or die; syswrite($to, pack("L<x4L<x4L<x4", 1, 1, $useSubstitutes)) or die;
writeString($_, $to) foreach @closure; writeStrings(\@closure, $to);
# Get back the set of paths that are already valid on the remote host. # Get back the set of paths that are already valid on the remote host.
my %present; my %present;
@ -115,22 +66,47 @@ sub copyTo {
} else { } else {
exportPaths(fileno($to), $sign, @missing); exportPaths(fileno($to), $sign, @missing);
close $to;
} }
readInt($from) == 1 or die "remote machine \`$sshHost' failed to import closure\n"; readInt($from) == 1 or die "remote machine \`$sshHost' failed to import closure\n";
} }
sub copyTo {
my ($sshHost, $sshOpts, $storePaths, $compressor, $decompressor,
$includeOutputs, $dryRun, $sign, $progressViewer, $useSubstitutes) = @_;
# Connect to the remote host.
my ($from, $to);
eval {
($from, $to) = connectToRemoteNix($sshHost, $sshOpts);
};
if ($@) {
chomp $@;
warn "$@; falling back to old closure copying method\n";
return oldCopyTo(@_);
}
copyToOpen($from, $to, $sshHost, $storePaths, $compressor, $decompressor,
$includeOutputs, $dryRun, $sign, $progressViewer, $useSubstitutes);
close $to;
}
# For backwards compatibility with Nix <= 1.7. Will be removed # For backwards compatibility with Nix <= 1.7. Will be removed
# eventually. # eventually.
sub oldCopyTo { sub oldCopyTo {
my ($closure, $sshHost, $sshOpts, $storePaths, $compressor, $decompressor, my ($sshHost, $sshOpts, $storePaths, $compressor, $decompressor,
$includeOutputs, $dryRun, $sign, $progressViewer, $useSubstitutes) = @_; $includeOutputs, $dryRun, $sign, $progressViewer, $useSubstitutes) = @_;
# Get the closure of this path.
my @closure = reverse(topoSortPaths(computeFSClosure(0, $includeOutputs,
map { followLinksToStorePath $_ } @{$storePaths})));
# Optionally use substitutes on the remote host. # Optionally use substitutes on the remote host.
if (!$dryRun && $useSubstitutes) { if (!$dryRun && $useSubstitutes) {
system "ssh $sshHost @{$sshOpts} nix-store -r --ignore-unknown @$closure"; system "ssh $sshHost @{$sshOpts} nix-store -r --ignore-unknown @closure";
# Ignore exit status because this is just an optimisation. # Ignore exit status because this is just an optimisation.
} }
@ -140,8 +116,8 @@ sub oldCopyTo {
# target having this option yet. # target having this option yet.
my @missing; my @missing;
my $missingSize = 0; my $missingSize = 0;
while (scalar(@$closure) > 0) { while (scalar(@closure) > 0) {
my @ps = splice(@$closure, 0, 1500); my @ps = splice(@closure, 0, 1500);
open(READ, "set -f; ssh $sshHost @{$sshOpts} nix-store --check-validity --print-invalid @ps|"); open(READ, "set -f; ssh $sshHost @{$sshOpts} nix-store --check-validity --print-invalid @ps|");
while (<READ>) { while (<READ>) {
chomp; chomp;

View file

@ -1,5 +1,16 @@
package Nix::SSH;
use strict; use strict;
use File::Temp qw(tempdir); use File::Temp qw(tempdir);
use IPC::Open2;
our @ISA = qw(Exporter);
our @EXPORT = qw(
sshOpts openSSHConnection closeSSHConnection
readN readInt writeInt writeString writeStrings
connectToRemoteNix
);
our @sshOpts = split ' ', ($ENV{"NIX_SSHOPTS"} or ""); our @sshOpts = split ' ', ($ENV{"NIX_SSHOPTS"} or "");
@ -8,6 +19,7 @@ push @sshOpts, "-x";
my $sshStarted = 0; my $sshStarted = 0;
my $sshHost; my $sshHost;
# Open a master SSH connection to `host', unless there already is a # Open a master SSH connection to `host', unless there already is a
# running master connection (as determined by `-O check'). # running master connection (as determined by `-O check').
sub openSSHConnection { sub openSSHConnection {
@ -18,7 +30,7 @@ sub openSSHConnection {
my $tmpDir = tempdir("nix-ssh.XXXXXX", CLEANUP => 1, TMPDIR => 1) my $tmpDir = tempdir("nix-ssh.XXXXXX", CLEANUP => 1, TMPDIR => 1)
or die "cannot create a temporary directory"; or die "cannot create a temporary directory";
push @sshOpts, "-S", "$tmpDir/control"; push @sshOpts, "-S", "$tmpDir/control";
# Start the master. We can't use the `-f' flag (fork into # Start the master. We can't use the `-f' flag (fork into
@ -39,6 +51,7 @@ sub openSSHConnection {
return 0; return 0;
} }
# Tell the master SSH client to exit. # Tell the master SSH client to exit.
sub closeSSHConnection { sub closeSSHConnection {
if ($sshStarted) { if ($sshStarted) {
@ -48,6 +61,70 @@ sub closeSSHConnection {
} }
} }
sub readN {
my ($bytes, $from) = @_;
my $res = "";
while ($bytes > 0) {
my $s;
my $n = sysread($from, $s, $bytes);
die "I/O error reading from remote side\n" if !defined $n;
die "got EOF while expecting $bytes bytes from remote side\n" if !$n;
$bytes -= $n;
$res .= $s;
}
return $res;
}
sub readInt {
my ($from) = @_;
return unpack("L<x4", readN(8, $from));
}
sub writeInt {
my ($n, $to) = @_;
syswrite($to, pack("L<x4", $n)) or die;
}
sub writeString {
my ($s, $to) = @_;
my $len = length $s;
my $req .= pack("L<x4", $len);
$req .= $s;
$req .= "\000" x (8 - $len % 8) if $len % 8;
syswrite($to, $req) or die;
}
sub writeStrings {
my ($ss, $to) = @_;
writeInt(scalar(@{$ss}), $to);
writeString($_, $to) foreach @{$ss};
}
sub connectToRemoteNix {
my ($sshHost, $sshOpts) = @_;
# Start nix-store --serve on the remote host.
my ($from, $to);
my $pid = open2($from, $to, "ssh $sshHost @{$sshOpts} nix-store --serve --write");
# Do the handshake.
my $SERVE_MAGIC_1 = 0x390c9deb; # FIXME
my $clientVersion = 0x200;
syswrite($to, pack("L<x4L<x4", $SERVE_MAGIC_1, $clientVersion)) or die;
die "did not get valid handshake from remote host\n" if readInt($from) != 0x5452eecb;
my $serverVersion = readInt($from);
die "unsupported server version\n" if $serverVersion < 0x200 || $serverVersion >= 0x300;
return ($from, $to, $pid);
}
END { my $saved = $?; closeSSHConnection; $? = $saved; } END { my $saved = $?; closeSSHConnection; $? = $saved; }
return 1; 1;

View file

@ -15,7 +15,7 @@ our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
our @EXPORT = qw( our @EXPORT = qw(
isValidPath queryReferences queryPathInfo queryDeriver queryPathHash isValidPath queryReferences queryPathInfo queryDeriver queryPathHash
queryPathFromHashPart queryPathFromHashPart
topoSortPaths computeFSClosure followLinksToStorePath exportPaths topoSortPaths computeFSClosure followLinksToStorePath exportPaths importPaths
hashPath hashFile hashString hashPath hashFile hashString
addToStore makeFixedOutputPath addToStore makeFixedOutputPath
derivationFromPath derivationFromPath

View file

@ -179,6 +179,17 @@ void exportPaths(int fd, int sign, ...)
} }
void importPaths(int fd)
PPCODE:
try {
doInit();
FdSource source(fd);
store->importPaths(false, source);
} catch (Error & e) {
croak(e.what());
}
SV * hashPath(char * algo, int base32, char * path) SV * hashPath(char * algo, int base32, char * path)
PPCODE: PPCODE:
try { try {

View file

@ -27,7 +27,7 @@ ifeq ($(perlbindings), yes)
Store_CXXFLAGS = \ Store_CXXFLAGS = \
-I$(shell $(perl) -e 'use Config; print $$Config{archlibexp};')/CORE \ -I$(shell $(perl) -e 'use Config; print $$Config{archlibexp};')/CORE \
-D_FILE_OFFSET_BITS=64 -D_FILE_OFFSET_BITS=64 -Wno-unused-variable -Wno-literal-suffix
Store_ALLOW_UNDEFINED = 1 Store_ALLOW_UNDEFINED = 1

View file

@ -4,7 +4,7 @@ use Fcntl qw(:DEFAULT :flock);
use English '-no_match_vars'; use English '-no_match_vars';
use IO::Handle; use IO::Handle;
use Nix::Config; use Nix::Config;
use Nix::SSH qw/sshOpts openSSHConnection/; use Nix::SSH;
use Nix::CopyClosure; use Nix::CopyClosure;
use Nix::Store; use Nix::Store;
no warnings('once'); no warnings('once');
@ -90,6 +90,7 @@ if (defined $conf && -e $conf) {
# Wait for the calling process to ask us whether we can build some derivation. # Wait for the calling process to ask us whether we can build some derivation.
my ($drvPath, $hostName, $slotLock); my ($drvPath, $hostName, $slotLock);
my ($from, $to);
REQ: while (1) { REQ: while (1) {
$_ = <STDIN> || exit 0; $_ = <STDIN> || exit 0;
@ -195,13 +196,15 @@ REQ: while (1) {
# Connect to the selected machine. # Connect to the selected machine.
@sshOpts = ("-i", $machine->{sshKeys}, "-x"); @sshOpts = ("-i", $machine->{sshKeys}, "-x");
$hostName = $machine->{hostName}; $hostName = $machine->{hostName};
if (openSSHConnection($hostName)) { eval {
last REQ if system("ssh $hostName @sshOpts nix-builds-inhibited < /dev/null > /dev/null 2>&1") != 0; ($from, $to) = connectToRemoteNix($hostName, \@sshOpts);
warn "machine `$hostName' is refusing builds, trying other available machines...\n"; # FIXME: check if builds are inhibited.
closeSSHConnection; };
} else { last REQ unless $@;
warn "unable to open SSH connection to `$hostName', trying other available machines...\n"; print STDERR "$@";
} warn "unable to open SSH connection to `$hostName', trying other available machines...\n";
$from = undef;
$to = undef;
$machine->{enabled} = 0; $machine->{enabled} = 0;
} }
} }
@ -220,18 +223,6 @@ my $maybeSign = "";
$maybeSign = "--sign" if -e "$Nix::Config::confDir/signing-key.sec"; $maybeSign = "--sign" if -e "$Nix::Config::confDir/signing-key.sec";
# Register the derivation as a temporary GC root. Note that $PPID is
# the PID of the remote SSH process, which, due to the use of a
# persistant SSH connection, should be the same across all remote
# command invocations for this session.
my $rootsDir = "@localstatedir@/nix/gcroots/tmp";
system("ssh $hostName @sshOpts 'mkdir -m 1777 -p $rootsDir; ln -sfn $drvPath $rootsDir/\$PPID.drv'");
sub removeRoots {
system("ssh $hostName @sshOpts 'rm -f $rootsDir/\$PPID.drv $rootsDir/\$PPID.out'");
}
# Copy the derivation and its dependencies to the build machine. This # Copy the derivation and its dependencies to the build machine. This
# is guarded by an exclusive lock per machine to prevent multiple # is guarded by an exclusive lock per machine to prevent multiple
# build-remote instances from copying to a machine simultaneously. # build-remote instances from copying to a machine simultaneously.
@ -255,48 +246,33 @@ if ($@) {
print STDERR "somebody is hogging $uploadLock, continuing...\n"; print STDERR "somebody is hogging $uploadLock, continuing...\n";
unlink $uploadLock; unlink $uploadLock;
} }
Nix::CopyClosure::copyTo($hostName, [ @sshOpts ], [ $drvPath, @inputs ], "", "", 0, 0, $maybeSign ne "", ""); Nix::CopyClosure::copyToOpen($from, $to, $hostName, [ $drvPath, @inputs ], "", "", 0, 0, $maybeSign ne "", "");
close UPLOADLOCK; close UPLOADLOCK;
# Perform the build. # Perform the build.
my $buildFlags =
"--max-silent-time $maxSilentTime --option build-timeout $buildTimeout"
. " --fallback --add-root $rootsDir/\$PPID.out --quiet"
. " --option build-keep-log false --option build-use-substitutes false";
# We let the remote side kill its process group when the connection is
# closed unexpectedly. This is necessary to ensure that no processes
# are left running on the remote system if the local Nix process is
# killed. (SSH itself doesn't kill child processes if the connection
# is interrupted unless the `-tt' flag is used to force a pseudo-tty,
# in which case every child receives SIGHUP; however, `-tt' doesn't
# work on some platforms when connection sharing is used.)
print STDERR "building `$drvPath' on `$hostName'\n"; print STDERR "building `$drvPath' on `$hostName'\n";
pipe STDIN, DUMMY; # make sure we have a readable STDIN writeInt(6, $to) or die; # == cmdBuildPaths
if (system("exec ssh $hostName @sshOpts '(read; kill -INT -\$\$) <&0 & exec nix-store -r $drvPath $buildFlags > /dev/null' 2>&4") != 0) { writeStrings([$drvPath], $to);
writeInt($maxSilentTime, $to);
writeInt($buildTimeout, $to);
my $res = readInt($from);
if ($res != 0) {
# Note that if we get exit code 100 from `nix-store -r', it # Note that if we get exit code 100 from `nix-store -r', it
# denotes a permanent build failure (as opposed to an SSH problem # denotes a permanent build failure (as opposed to an SSH problem
# or a temporary Nix problem). We propagate this to the caller to # or a temporary Nix problem). We propagate this to the caller to
# allow it to distinguish between transient and permanent # allow it to distinguish between transient and permanent
# failures. # failures.
my $res = $? >> 8;
print STDERR "build of `$drvPath' on `$hostName' failed with exit code $res\n"; print STDERR "build of `$drvPath' on `$hostName' failed with exit code $res\n";
removeRoots;
exit $res; exit $res;
} }
#print "build of `$drvPath' on `$hostName' succeeded\n";
# Copy the output from the build machine. # Copy the output from the build machine.
my @outputs2 = grep { !isValidPath($_) } @outputs; my @outputs2 = grep { !isValidPath($_) } @outputs;
if (scalar @outputs2 > 0) { if (scalar @outputs2 > 0) {
system("exec ssh $hostName @sshOpts 'nix-store --export @outputs2'" . writeInt(5, $to) or die; # == cmdExportPaths
"| NIX_HELD_LOCKS='@outputs2' @bindir@/nix-store --import > /dev/null") == 0 writeStrings(\@outputs2, $to);
or die("cannot copy paths " . join(", ", @outputs) . " from `$hostName': $?"); $ENV{'NIX_HELD_LOCKS'} = "@outputs2"; # FIXME: ugly
importPaths(fileno($from));
} }
# Get rid of the temporary GC roots.
removeRoots;

View file

@ -35,6 +35,7 @@ template<class T> T readStorePaths(Source & from)
} }
template PathSet readStorePaths(Source & from); template PathSet readStorePaths(Source & from);
template Paths readStorePaths(Source & from);
RemoteStore::RemoteStore() RemoteStore::RemoteStore()

View file

@ -928,7 +928,6 @@ static void opServe(Strings opFlags, Strings opArgs)
} }
writeStrings(store->queryValidPaths(paths), out); writeStrings(store->queryValidPaths(paths), out);
out.flush();
break; break;
} }
@ -947,17 +946,15 @@ static void opServe(Strings opFlags, Strings opArgs)
writeLongLong(info.narSize, out); writeLongLong(info.narSize, out);
} }
writeString("", out); writeString("", out);
out.flush();
break; break;
} }
case cmdDumpStorePath: case cmdDumpStorePath:
dumpPath(readStorePath(in), out); dumpPath(readStorePath(in), out);
out.flush();
break; break;
case cmdImportPaths: { case cmdImportPaths: {
if (!writeAllowed) throw Error("importing paths not allowed"); if (!writeAllowed) throw Error("importing paths is not allowed");
string compression = readString(in); string compression = readString(in);
if (compression != "") { if (compression != "") {
@ -986,7 +983,6 @@ static void opServe(Strings opFlags, Strings opArgs)
store->importPaths(false, in); store->importPaths(false, in);
writeInt(1, out); // indicate success writeInt(1, out); // indicate success
out.flush();
/* The decompressor will have left stdin in an /* The decompressor will have left stdin in an
undefined state, so we can't continue. */ undefined state, so we can't continue. */
@ -995,9 +991,40 @@ static void opServe(Strings opFlags, Strings opArgs)
break; break;
} }
case cmdExportPaths: {
exportPaths(*store, readStorePaths<Paths>(in), false, out);
break;
}
case cmdBuildPaths: {
/* Used by build-remote.pl. */
if (!writeAllowed) throw Error("building paths is not allowed");
PathSet paths = readStorePaths<PathSet>(in);
// FIXME: changing options here doesn't work if we're
// building through the daemon.
verbosity = lvlError;
settings.keepLog = false;
settings.useSubstitutes = false;
settings.maxSilentTime = readInt(in);
settings.buildTimeout = readInt(in);
int res = 0;
try {
store->buildPaths(paths);
} catch (Error & e) {
printMsg(lvlError, format("error: %1%") % e.msg());
res = e.status;
}
writeInt(res, out);
break;
}
default: default:
throw Error(format("unknown serve command %1%") % cmd); throw Error(format("unknown serve command %1%") % cmd);
} }
out.flush();
} }
} }

View file

@ -14,6 +14,8 @@ typedef enum {
cmdQueryPathInfos = 2, cmdQueryPathInfos = 2,
cmdDumpStorePath = 3, cmdDumpStorePath = 3,
cmdImportPaths = 4, cmdImportPaths = 4,
cmdExportPaths = 5,
cmdBuildPaths = 6,
} ServeCommand; } ServeCommand;
} }