2011-10-11 14:45:36 +03:00
|
|
|
|
#! @perl@ -w @perlFlags@
|
2007-11-15 16:28:08 +02:00
|
|
|
|
|
2014-08-29 18:48:25 +03:00
|
|
|
|
use utf8;
|
2011-12-21 21:11:58 +02:00
|
|
|
|
use Fcntl qw(:DEFAULT :flock);
|
2007-11-15 16:28:08 +02:00
|
|
|
|
use English '-no_match_vars';
|
2009-03-29 21:40:44 +03:00
|
|
|
|
use IO::Handle;
|
2011-11-23 14:21:35 +02:00
|
|
|
|
use Nix::Config;
|
2014-07-11 17:02:19 +03:00
|
|
|
|
use Nix::SSH;
|
2011-11-23 17:13:37 +02:00
|
|
|
|
use Nix::CopyClosure;
|
2012-09-11 23:59:59 +03:00
|
|
|
|
use Nix::Store;
|
2014-09-05 12:53:11 +03:00
|
|
|
|
use Encode;
|
2010-10-04 15:30:46 +03:00
|
|
|
|
no warnings('once');
|
2010-02-03 22:12:18 +02:00
|
|
|
|
|
2014-09-05 12:43:14 +03:00
|
|
|
|
STDERR->autoflush(1);
|
2014-08-29 18:48:25 +03:00
|
|
|
|
binmode STDERR, ":encoding(utf8)";
|
|
|
|
|
|
2014-09-05 12:43:14 +03:00
|
|
|
|
my $debug = defined $ENV{NIX_DEBUG_HOOK};
|
|
|
|
|
|
2007-11-15 16:28:08 +02:00
|
|
|
|
|
|
|
|
|
# General operation:
|
|
|
|
|
#
|
|
|
|
|
# Try to find a free machine of type $neededSystem. We do this as
|
|
|
|
|
# follows:
|
|
|
|
|
# - We acquire an exclusive lock on $currentLoad/main-lock.
|
|
|
|
|
# - For each machine $machine of type $neededSystem and for each $slot
|
|
|
|
|
# less than the maximum load for that machine, we try to get an
|
|
|
|
|
# exclusive lock on $currentLoad/$machine-$slot (without blocking).
|
|
|
|
|
# If we get such a lock, we send "accept" to the caller. Otherwise,
|
2012-09-11 23:59:59 +03:00
|
|
|
|
# we send "postpone" and exit.
|
2007-11-15 16:28:08 +02:00
|
|
|
|
# - We release the exclusive lock on $currentLoad/main-lock.
|
|
|
|
|
# - We perform the build on $neededSystem.
|
|
|
|
|
# - We release the exclusive lock on $currentLoad/$machine-$slot.
|
|
|
|
|
#
|
|
|
|
|
# The nice thing about this scheme is that if we die prematurely, the
|
|
|
|
|
# locks are released automatically.
|
|
|
|
|
|
2010-02-03 22:12:18 +02:00
|
|
|
|
|
|
|
|
|
# Make sure that we don't get any SSH passphrase or host key popups -
|
|
|
|
|
# if there is any problem it should fail, not do something
|
|
|
|
|
# interactive.
|
|
|
|
|
$ENV{"DISPLAY"} = "";
|
|
|
|
|
$ENV{"SSH_ASKPASS"} = "";
|
|
|
|
|
|
|
|
|
|
|
2007-11-15 16:28:08 +02:00
|
|
|
|
sub sendReply {
|
|
|
|
|
my $reply = shift;
|
2009-03-28 21:29:55 +02:00
|
|
|
|
print STDERR "# $reply\n";
|
2007-11-15 16:28:08 +02:00
|
|
|
|
}
|
|
|
|
|
|
2010-08-27 16:18:13 +03:00
|
|
|
|
sub all { $_ || return 0 for @_; 1 }
|
|
|
|
|
|
2007-11-15 16:28:08 +02:00
|
|
|
|
|
2010-08-25 23:44:28 +03:00
|
|
|
|
# Initialisation.
|
|
|
|
|
my $loadIncreased = 0;
|
|
|
|
|
|
2016-04-25 17:35:28 +03:00
|
|
|
|
my ($localSystem, $maxSilentTime, $buildTimeout) = @ARGV;
|
2007-11-15 16:28:08 +02:00
|
|
|
|
|
2014-08-17 19:05:24 +03:00
|
|
|
|
my $currentLoad = $ENV{"NIX_CURRENT_LOAD"} // "/run/nix/current-load";
|
|
|
|
|
my $conf = $ENV{"NIX_REMOTE_SYSTEMS"} // "@sysconfdir@/nix/machines";
|
2007-11-15 16:28:08 +02:00
|
|
|
|
|
2010-08-25 23:44:28 +03:00
|
|
|
|
|
|
|
|
|
sub openSlotLock {
|
|
|
|
|
my ($machine, $slot) = @_;
|
|
|
|
|
my $slotLockFn = "$currentLoad/" . (join '+', @{$machine->{systemTypes}}) . "-" . $machine->{hostName} . "-$slot";
|
|
|
|
|
my $slotLock = new IO::Handle;
|
2011-12-21 21:11:58 +02:00
|
|
|
|
sysopen $slotLock, "$slotLockFn", O_RDWR|O_CREAT, 0600 or die;
|
2010-08-25 23:44:28 +03:00
|
|
|
|
return $slotLock;
|
|
|
|
|
}
|
2007-11-15 16:28:08 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Read the list of machines.
|
2009-09-17 18:48:17 +03:00
|
|
|
|
my @machines;
|
2010-08-25 23:44:28 +03:00
|
|
|
|
if (defined $conf && -e $conf) {
|
2011-12-21 21:11:58 +02:00
|
|
|
|
open CONF, "<$conf" or die;
|
2010-08-25 23:44:28 +03:00
|
|
|
|
while (<CONF>) {
|
|
|
|
|
chomp;
|
|
|
|
|
s/\#.*$//g;
|
|
|
|
|
next if /^\s*$/;
|
2010-08-27 16:18:13 +03:00
|
|
|
|
my @tokens = split /\s/, $_;
|
2012-05-01 00:22:45 +03:00
|
|
|
|
my @supportedFeatures = split(/,/, $tokens[5] || "");
|
|
|
|
|
my @mandatoryFeatures = split(/,/, $tokens[6] || "");
|
2010-08-25 23:44:28 +03:00
|
|
|
|
push @machines,
|
2010-08-27 16:18:13 +03:00
|
|
|
|
{ hostName => $tokens[0]
|
|
|
|
|
, systemTypes => [ split(/,/, $tokens[1]) ]
|
2015-06-03 16:19:26 +03:00
|
|
|
|
, sshKey => $tokens[2]
|
2010-08-27 16:18:13 +03:00
|
|
|
|
, maxJobs => int($tokens[3])
|
|
|
|
|
, speedFactor => 1.0 * (defined $tokens[4] ? int($tokens[4]) : 1)
|
2012-05-01 00:22:45 +03:00
|
|
|
|
, supportedFeatures => [ @supportedFeatures, @mandatoryFeatures ]
|
|
|
|
|
, mandatoryFeatures => [ @mandatoryFeatures ]
|
2010-08-25 23:44:28 +03:00
|
|
|
|
, enabled => 1
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
close CONF;
|
2007-11-15 16:28:08 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2010-08-25 23:44:28 +03:00
|
|
|
|
# Wait for the calling process to ask us whether we can build some derivation.
|
|
|
|
|
my ($drvPath, $hostName, $slotLock);
|
2014-07-11 17:02:19 +03:00
|
|
|
|
my ($from, $to);
|
2007-11-15 16:28:08 +02:00
|
|
|
|
|
2010-08-25 23:44:28 +03:00
|
|
|
|
REQ: while (1) {
|
|
|
|
|
$_ = <STDIN> || exit 0;
|
2012-05-01 00:22:45 +03:00
|
|
|
|
(my $amWilling, my $neededSystem, $drvPath, my $requiredFeatures) = split;
|
2010-08-27 16:18:13 +03:00
|
|
|
|
my @requiredFeatures = split /,/, $requiredFeatures;
|
2007-11-15 16:28:08 +02:00
|
|
|
|
|
2010-08-25 23:44:28 +03:00
|
|
|
|
my $canBuildLocally = $amWilling && ($localSystem eq $neededSystem);
|
2010-02-03 22:35:37 +02:00
|
|
|
|
|
2010-08-25 23:44:28 +03:00
|
|
|
|
if (!defined $currentLoad) {
|
|
|
|
|
sendReply "decline";
|
|
|
|
|
next;
|
|
|
|
|
}
|
2012-09-11 23:59:59 +03:00
|
|
|
|
|
2010-08-25 23:44:28 +03:00
|
|
|
|
# Acquire the exclusive lock on $currentLoad/main-lock.
|
|
|
|
|
mkdir $currentLoad, 0777 or die unless -d $currentLoad;
|
|
|
|
|
my $mainLock = "$currentLoad/main-lock";
|
2011-12-21 21:11:58 +02:00
|
|
|
|
sysopen MAINLOCK, "$mainLock", O_RDWR|O_CREAT, 0600 or die;
|
2010-08-25 23:44:28 +03:00
|
|
|
|
flock(MAINLOCK, LOCK_EX) or die;
|
2012-09-11 23:59:59 +03:00
|
|
|
|
|
|
|
|
|
|
2010-08-25 23:44:28 +03:00
|
|
|
|
while (1) {
|
|
|
|
|
# Find all machine that can execute this build, i.e., that
|
2010-08-27 16:18:13 +03:00
|
|
|
|
# support builds for the given platform and features, and are
|
|
|
|
|
# not at their job limit.
|
2010-08-25 23:44:28 +03:00
|
|
|
|
my $rightType = 0;
|
|
|
|
|
my @available = ();
|
|
|
|
|
LOOP: foreach my $cur (@machines) {
|
2010-08-27 16:18:13 +03:00
|
|
|
|
if ($cur->{enabled}
|
|
|
|
|
&& (grep { $neededSystem eq $_ } @{$cur->{systemTypes}})
|
2012-05-01 00:22:45 +03:00
|
|
|
|
&& all(map { my $f = $_; 0 != grep { $f eq $_ } @{$cur->{supportedFeatures}} } (@requiredFeatures, @mandatoryFeatures))
|
|
|
|
|
&& all(map { my $f = $_; 0 != grep { $f eq $_ } @requiredFeatures } @{$cur->{mandatoryFeatures}})
|
|
|
|
|
)
|
2010-08-27 16:18:13 +03:00
|
|
|
|
{
|
2010-08-25 23:44:28 +03:00
|
|
|
|
$rightType = 1;
|
|
|
|
|
|
|
|
|
|
# We have a machine of the right type. Determine the load on
|
|
|
|
|
# the machine.
|
|
|
|
|
my $slot = 0;
|
|
|
|
|
my $load = 0;
|
|
|
|
|
my $free;
|
|
|
|
|
while ($slot < $cur->{maxJobs}) {
|
|
|
|
|
my $slotLock = openSlotLock($cur, $slot);
|
|
|
|
|
if (flock($slotLock, LOCK_EX | LOCK_NB)) {
|
|
|
|
|
$free = $slot unless defined $free;
|
|
|
|
|
flock($slotLock, LOCK_UN) or die;
|
|
|
|
|
} else {
|
|
|
|
|
$load++;
|
|
|
|
|
}
|
|
|
|
|
close $slotLock;
|
|
|
|
|
$slot++;
|
2010-02-03 22:35:37 +02:00
|
|
|
|
}
|
2012-09-11 23:59:59 +03:00
|
|
|
|
|
2010-08-25 23:44:28 +03:00
|
|
|
|
push @available, { machine => $cur, load => $load, free => $free }
|
|
|
|
|
if $load < $cur->{maxJobs};
|
2008-12-04 16:29:41 +02:00
|
|
|
|
}
|
2010-02-03 22:35:37 +02:00
|
|
|
|
}
|
2007-11-15 16:28:08 +02:00
|
|
|
|
|
2014-09-05 12:43:14 +03:00
|
|
|
|
if ($debug) {
|
2010-08-25 23:44:28 +03:00
|
|
|
|
print STDERR "load on " . $_->{machine}->{hostName} . " = " . $_->{load} . "\n"
|
|
|
|
|
foreach @available;
|
|
|
|
|
}
|
2007-11-15 16:28:08 +02:00
|
|
|
|
|
|
|
|
|
|
2010-08-25 23:44:28 +03:00
|
|
|
|
# Didn't find any available machine? Then decline or postpone.
|
|
|
|
|
if (scalar @available == 0) {
|
|
|
|
|
# Postpone if we have a machine of the right type, except
|
|
|
|
|
# if the local system can and wants to do the build.
|
|
|
|
|
if ($rightType && !$canBuildLocally) {
|
|
|
|
|
sendReply "postpone";
|
|
|
|
|
} else {
|
2012-09-11 23:59:59 +03:00
|
|
|
|
sendReply "decline";
|
2010-08-25 23:44:28 +03:00
|
|
|
|
}
|
|
|
|
|
close MAINLOCK;
|
|
|
|
|
next REQ;
|
2010-02-03 22:35:37 +02:00
|
|
|
|
}
|
2007-11-15 16:28:08 +02:00
|
|
|
|
|
2009-09-17 18:48:17 +03:00
|
|
|
|
|
2010-08-25 23:44:28 +03:00
|
|
|
|
# Prioritise the available machines as follows:
|
|
|
|
|
# - First by load divided by speed factor, rounded to the nearest
|
|
|
|
|
# integer. This causes fast machines to be preferred over slow
|
|
|
|
|
# machines with similar loads.
|
|
|
|
|
# - Then by speed factor.
|
|
|
|
|
# - Finally by load.
|
|
|
|
|
sub lf { my $x = shift; return int($x->{load} / $x->{machine}->{speedFactor} + 0.4999); }
|
|
|
|
|
@available = sort
|
|
|
|
|
{ lf($a) <=> lf($b)
|
|
|
|
|
|| $b->{machine}->{speedFactor} <=> $a->{machine}->{speedFactor}
|
|
|
|
|
|| $a->{load} <=> $b->{load}
|
|
|
|
|
} @available;
|
2009-09-17 18:48:17 +03:00
|
|
|
|
|
|
|
|
|
|
2010-08-25 23:44:28 +03:00
|
|
|
|
# Select the best available machine and lock a free slot.
|
2012-09-11 23:59:59 +03:00
|
|
|
|
my $selected = $available[0];
|
2010-08-25 23:44:28 +03:00
|
|
|
|
my $machine = $selected->{machine};
|
2012-09-11 23:59:59 +03:00
|
|
|
|
|
2010-08-25 23:44:28 +03:00
|
|
|
|
$slotLock = openSlotLock($machine, $selected->{free});
|
|
|
|
|
flock($slotLock, LOCK_EX | LOCK_NB) or die;
|
|
|
|
|
utime undef, undef, $slotLock;
|
2009-09-17 18:48:17 +03:00
|
|
|
|
|
2010-08-25 23:44:28 +03:00
|
|
|
|
close MAINLOCK;
|
2009-09-17 18:48:17 +03:00
|
|
|
|
|
2010-02-03 22:35:37 +02:00
|
|
|
|
|
2010-08-25 23:44:28 +03:00
|
|
|
|
# Connect to the selected machine.
|
2015-06-03 16:19:26 +03:00
|
|
|
|
my @sshOpts = ("-i", $machine->{sshKey});
|
2010-08-25 23:44:28 +03:00
|
|
|
|
$hostName = $machine->{hostName};
|
2014-07-11 17:02:19 +03:00
|
|
|
|
eval {
|
2014-07-12 01:09:43 +03:00
|
|
|
|
($from, $to) = connectToRemoteNix($hostName, \@sshOpts, "2>&4");
|
2014-07-11 17:02:19 +03:00
|
|
|
|
# FIXME: check if builds are inhibited.
|
|
|
|
|
};
|
|
|
|
|
last REQ unless $@;
|
|
|
|
|
print STDERR "$@";
|
2016-11-26 01:37:43 +02:00
|
|
|
|
warn "unable to open SSH connection to ‘$hostName’, trying other available machines...\n";
|
2014-07-11 17:02:19 +03:00
|
|
|
|
$from = undef;
|
|
|
|
|
$to = undef;
|
2010-08-25 23:44:28 +03:00
|
|
|
|
$machine->{enabled} = 0;
|
|
|
|
|
}
|
2010-02-03 22:35:37 +02:00
|
|
|
|
}
|
2009-09-17 18:48:17 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Tell Nix we've accepted the build.
|
2010-08-25 23:44:28 +03:00
|
|
|
|
sendReply "accept";
|
|
|
|
|
my @inputs = split /\s/, readline(STDIN);
|
|
|
|
|
my @outputs = split /\s/, readline(STDIN);
|
2007-11-15 16:28:08 +02:00
|
|
|
|
|
|
|
|
|
|
2011-12-21 20:59:25 +02:00
|
|
|
|
# Copy the derivation and its dependencies to the build machine. This
|
|
|
|
|
# is guarded by an exclusive lock per machine to prevent multiple
|
|
|
|
|
# build-remote instances from copying to a machine simultaneously.
|
|
|
|
|
# That's undesirable because we may end up with N instances uploading
|
|
|
|
|
# the same missing path simultaneously, causing the effective network
|
|
|
|
|
# bandwidth and target disk speed to be divided by N.
|
|
|
|
|
my $uploadLock = "$currentLoad/$hostName.upload-lock";
|
2012-03-05 18:58:09 +02:00
|
|
|
|
sysopen UPLOADLOCK, "$uploadLock", O_RDWR|O_CREAT, 0600 or die;
|
|
|
|
|
eval {
|
|
|
|
|
local $SIG{ALRM} = sub { die "alarm\n" };
|
|
|
|
|
# Don't wait forever, so that a process that gets stuck while
|
|
|
|
|
# holding the lock doesn't block everybody else indefinitely.
|
|
|
|
|
# It's safe to continue after a timeout, just (potentially)
|
|
|
|
|
# inefficient.
|
|
|
|
|
alarm 15 * 60;
|
|
|
|
|
flock(UPLOADLOCK, LOCK_EX);
|
|
|
|
|
alarm 0;
|
|
|
|
|
};
|
|
|
|
|
if ($@) {
|
|
|
|
|
die unless $@ eq "alarm\n";
|
|
|
|
|
print STDERR "somebody is hogging $uploadLock, continuing...\n";
|
|
|
|
|
unlink $uploadLock;
|
|
|
|
|
}
|
2016-05-03 16:11:14 +03:00
|
|
|
|
Nix::CopyClosure::copyToOpen($from, $to, $hostName, [ $drvPath, @inputs ], 0, 0);
|
2012-03-05 18:58:09 +02:00
|
|
|
|
close UPLOADLOCK;
|
2007-11-15 16:28:08 +02:00
|
|
|
|
|
|
|
|
|
|
2010-08-25 14:54:11 +03:00
|
|
|
|
# Perform the build.
|
2016-11-26 01:37:43 +02:00
|
|
|
|
print STDERR "building ‘$drvPath’ on ‘$hostName’\n";
|
2014-07-11 17:02:19 +03:00
|
|
|
|
writeInt(6, $to) or die; # == cmdBuildPaths
|
|
|
|
|
writeStrings([$drvPath], $to);
|
|
|
|
|
writeInt($maxSilentTime, $to);
|
|
|
|
|
writeInt($buildTimeout, $to);
|
|
|
|
|
my $res = readInt($from);
|
|
|
|
|
if ($res != 0) {
|
2014-09-05 12:53:11 +03:00
|
|
|
|
my $msg = decode("utf-8", readString($from));
|
2016-11-26 01:37:43 +02:00
|
|
|
|
print STDERR "error: $msg on ‘$hostName’\n";
|
2009-01-13 13:39:09 +02:00
|
|
|
|
exit $res;
|
|
|
|
|
}
|
2007-11-15 16:28:08 +02:00
|
|
|
|
|
2010-08-25 14:54:11 +03:00
|
|
|
|
|
|
|
|
|
# Copy the output from the build machine.
|
2013-05-23 21:39:58 +03:00
|
|
|
|
my @outputs2 = grep { !isValidPath($_) } @outputs;
|
|
|
|
|
if (scalar @outputs2 > 0) {
|
2014-07-24 17:00:29 +03:00
|
|
|
|
writeInt(5, $to); # == cmdExportPaths
|
|
|
|
|
writeInt(0, $to); # don't sign
|
2014-07-11 17:02:19 +03:00
|
|
|
|
writeStrings(\@outputs2, $to);
|
|
|
|
|
$ENV{'NIX_HELD_LOCKS'} = "@outputs2"; # FIXME: ugly
|
2016-05-31 12:18:45 +03:00
|
|
|
|
importPaths(fileno($from), 1);
|
2007-11-15 16:28:08 +02:00
|
|
|
|
}
|