nix/scripts/build-remote.pl.in

#! @perl@ -w

use strict;
use Fcntl ':flock';
use English '-no_match_vars';

# General operation:
#
# Try to find a free machine of type $neededSystem.  We do this as
# follows:
# - We acquire an exclusive lock on $currentLoad/main-lock.
# - For each machine $machine of type $neededSystem and for each $slot
#   less than the maximum load for that machine, we try to get an
#   exclusive lock on $currentLoad/$machine-$slot (without blocking).
#   If we get such a lock, we send "accept" to the caller.  Otherwise,
#   we send "postpone" and exit. 
# - We release the exclusive lock on $currentLoad/main-lock.
# - We perform the build on $neededSystem.
# - We release the exclusive lock on $currentLoad/$machine-$slot.
#
# The nice thing about this scheme is that if we die prematurely, the
# locks are released automatically.

my $loadIncreased = 0;

my ($amWilling, $localSystem, $neededSystem, $drvPath, $mustRun) = @ARGV;
$mustRun = 0 unless defined $mustRun;

sub sendReply {
    my $reply = shift;
    open OUT, ">&3" or die;
    print OUT "$reply\n";
    close OUT;
}

sub decline {
    sendReply "decline";
    exit 0;
}

my $currentLoad = $ENV{"NIX_CURRENT_LOAD"};
decline unless defined $currentLoad;
mkdir $currentLoad, 0777 or die unless -d $currentLoad;

my $conf = $ENV{"NIX_REMOTE_SYSTEMS"};
decline if !defined $conf || ! -e $conf;

# Decline if the local system can do the build.
decline if $amWilling && ($localSystem eq $neededSystem);


# Otherwise find a willing remote machine.
my @machines;
my %curJobs;


# Read the list of machines.
open CONF, "< $conf" or die;

while (<CONF>) {
    chomp;
    s/\#.*$//g;
    next if /^\s*$/;
    /^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s*$/ or die;
    push @machines,
        { hostName => $1
        , systemType => $2
        , sshKeys => $3
        , maxJobs => $4
        };
}

close CONF;


# Acquire the exclusive lock on $currentLoad/main-lock.
my $mainLock = "$currentLoad/main-lock";
open MAINLOCK, ">>$mainLock" or die;
flock(MAINLOCK, LOCK_EX) or die;


# Find a suitable system.
my $rightType = 0;
my $machine;
LOOP: foreach my $cur (@machines) {
    if ($neededSystem eq $cur->{systemType}) {
        $rightType = 1;

        # We have a machine of the right type.  Try to get a lock on
        # one of the machine's lock files.
        my $slot = 0;
        while ($slot < $cur->{maxJobs} || $mustRun) {
            my $slotLock = "$currentLoad/" . $cur->{systemType} . "-" . $cur->{hostName} . "-$slot";
            open SLOTLOCK, ">>$slotLock" or die;
            if (flock(SLOTLOCK, LOCK_EX | LOCK_NB)) {
                print STDERR "warning: exceeding maximum load on " . $cur->{systemType} . "\n"
                    if $slot >= $cur->{maxJobs};
                $machine = $cur;
                last LOOP;
            }
            close SLOTLOCK;
            $slot++;
        }
    }
}

close MAINLOCK;


# Didn't find one?
if (!defined $machine) {
    if ($rightType) {
        sendReply "postpone";
        exit 0;
    } else {
        decline;
    }
}

# Yes we did, accept.
sendReply "accept";
open IN, "<&4" or die;
my $x = <IN>;
chomp $x;
#print "got $x\n";  
close IN;

if ($x ne "okay") {
    exit 0;
}


# Do the actual job.
my $hostName = $machine->{hostName};
print "BUILDING REMOTE: $drvPath on $hostName\n";

# Make sure that we don't get any SSH passphrase or host key popups -
# if there is any problem it should fail, not do something
# interactive.
$ENV{"DISPLAY"} = "";
$ENV{"SSH_PASSWORD_FILE="} = "";
$ENV{"SSH_ASKPASS="} = "";

my $sshOpts = "-i " . $machine->{sshKeys} . " -x";

# Hack to support Cygwin: if we login without a password, we don't
# have exactly the same rights as when we do.  This causes the
# Microsoft C compiler to fail with certain flags:
#
#   http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=99676
#
# So as a workaround, we pass a verbatim password.  ssh tries to makes
# this very hard; the trick is to make it call SSH_ASKPASS to get the
# password.  (It only calls this command when there is no controlling
# terminal, but Nix ensures that is is the case.  When doing this
# manually, use setsid(1).)
if ($machine->{sshKeys} =~ /^password:/) {
    my $passwordFile = $machine->{sshKeys};
    $passwordFile =~ s/^password://;
    $sshOpts = "ssh -x";
    $ENV{"SSH_PASSWORD_FILE"} = $passwordFile;
    $ENV{"SSH_ASKPASS"} = "/tmp/writepass";

    open WRITEPASS, ">/tmp/writepass" or die;
    print WRITEPASS "#! /bin/sh\ncat \"\$SSH_PASSWORD_FILE\"";
    close WRITEPASS;
    chmod 0755, "/tmp/writepass" or die;
}

my $inputs = `cat inputs`; die if ($? != 0);
$inputs =~ s/\n/ /g;

my $outputs = `cat outputs`; die if ($? != 0);
$outputs =~ s/\n/ /g;

print "COPYING INPUTS...\n";

my $maybeSign = "";
$maybeSign = "--sign" if -e "/nix/etc/nix/signing-key.sec";

system("NIX_SSHOPTS=\"$sshOpts\" nix-copy-closure $hostName $maybeSign $drvPath $inputs") == 0
    or die "cannot copy inputs to $hostName: $?";

print "BUILDING...\n";

# `-tt' forces allocation of a pseudo-terminal.  This is required to
# make the remote nix-store process receive a signal when the
# connection dies.  Without it, the remote process might continue to
# run indefinitely (that is, until it next tries to write to
# stdout/stderr).
system("ssh -tt $sshOpts $hostName 'nix-store -rvvK $drvPath'") == 0
    or die "remote build on $hostName failed: $?";

print "REMOTE BUILD DONE: $drvPath on $hostName\n";

foreach my $output (split '\n', $outputs) {
    my $maybeSignRemote = "";
    $maybeSignRemote = "--sign" if $UID != 0;
    
    system("ssh $sshOpts $hostName 'nix-store --export $maybeSignRemote $output' > dump") == 0
	or die "cannot copy $output from $hostName: $?";

    # This doesn't work yet, since the caller has a lock on the output
    # path.  We should move towards lock-free invocation of build
    # hooks and substitutes.
    #system("nix-store --import < dump") == 0
    #    or die "cannot import $output: $?";

    # Hack: skip the first 8 bytes (the nix-store --export next
    # archive marker).  The archive follows.
    system("(dd bs=1 count=8 of=/dev/null && cat) < dump | nix-store --restore $output") == 0
	or die "cannot restore $output: $?";
}
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00			`#! @perl@ -w`

			`use strict;`
			`use Fcntl ':flock';`
			`use English '-no_match_vars';`

			`# General operation:`
			`#`
			`# Try to find a free machine of type $neededSystem. We do this as`
			`# follows:`
			`# - We acquire an exclusive lock on $currentLoad/main-lock.`
			`# - For each machine $machine of type $neededSystem and for each $slot`
			`# less than the maximum load for that machine, we try to get an`
			`# exclusive lock on $currentLoad/$machine-$slot (without blocking).`
			`# If we get such a lock, we send "accept" to the caller. Otherwise,`
			`# we send "postpone" and exit.`
			`# - We release the exclusive lock on $currentLoad/main-lock.`
			`# - We perform the build on $neededSystem.`
			`# - We release the exclusive lock on $currentLoad/$machine-$slot.`
			`#`
			`# The nice thing about this scheme is that if we die prematurely, the`
			`# locks are released automatically.`

			`my $loadIncreased = 0;`

* When using build hooks, for any nix-store -r build operation, it is necessary that at least one build hook doesn't return "postpone", otherwise nix-store will barf ("waiting for a build slot, yet there are no running children"). So inform the build hook when this is the case, so that it can start a build even when that would exceed the maximum load on a machine. 2008-12-04 14:29:41 +00:00			`my ($amWilling, $localSystem, $neededSystem, $drvPath, $mustRun) = @ARGV;`
			`$mustRun = 0 unless defined $mustRun;`
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00
			`sub sendReply {`
			`my $reply = shift;`
			`open OUT, ">&3" or die;`
			`print OUT "$reply\n";`
			`close OUT;`
			`}`

			`sub decline {`
			`sendReply "decline";`
			`exit 0;`
			`}`

			`my $currentLoad = $ENV{"NIX_CURRENT_LOAD"};`
			`decline unless defined $currentLoad;`
			`mkdir $currentLoad, 0777 or die unless -d $currentLoad;`

			`my $conf = $ENV{"NIX_REMOTE_SYSTEMS"};`
			`decline if !defined $conf \|\| ! -e $conf;`

			`# Decline if the local system can do the build.`
			`decline if $amWilling && ($localSystem eq $neededSystem);`


			`# Otherwise find a willing remote machine.`
* Support multiple system types per remote machine, e.g. a machine list like root@example.org x86_64-linux /root/.ssh/id_buildfarm 1 root@example.org i686-darwin /root/.ssh/id_buildfarm 1 This is possible when the Nix installation on example.org itself has remote builds enabled. 2008-12-04 12:20:06 +00:00			`my @machines;`
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00			`my %curJobs;`


			`# Read the list of machines.`
			`open CONF, "< $conf" or die;`

			`while (<CONF>) {`
			`chomp;`
			`s/\#.*$//g;`
			`next if /^\s*$/;`
			`/^\s(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s$/ or die;`
* Support multiple system types per remote machine, e.g. a machine list like root@example.org x86_64-linux /root/.ssh/id_buildfarm 1 root@example.org i686-darwin /root/.ssh/id_buildfarm 1 This is possible when the Nix installation on example.org itself has remote builds enabled. 2008-12-04 12:20:06 +00:00			`push @machines,`
			`{ hostName => $1`
			`, systemType => $2`
			`, sshKeys => $3`
			`, maxJobs => $4`
			`};`
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00			`}`

			`close CONF;`


			`# Acquire the exclusive lock on $currentLoad/main-lock.`
			`my $mainLock = "$currentLoad/main-lock";`
			`open MAINLOCK, ">>$mainLock" or die;`
			`flock(MAINLOCK, LOCK_EX) or die;`


			`# Find a suitable system.`
			`my $rightType = 0;`
			`my $machine;`
* Support multiple system types per remote machine, e.g. a machine list like root@example.org x86_64-linux /root/.ssh/id_buildfarm 1 root@example.org i686-darwin /root/.ssh/id_buildfarm 1 This is possible when the Nix installation on example.org itself has remote builds enabled. 2008-12-04 12:20:06 +00:00			`LOOP: foreach my $cur (@machines) {`
			`if ($neededSystem eq $cur->{systemType}) {`
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00			`$rightType = 1;`

			`# We have a machine of the right type. Try to get a lock on`
			`# one of the machine's lock files.`
			`my $slot = 0;`
* When using build hooks, for any nix-store -r build operation, it is necessary that at least one build hook doesn't return "postpone", otherwise nix-store will barf ("waiting for a build slot, yet there are no running children"). So inform the build hook when this is the case, so that it can start a build even when that would exceed the maximum load on a machine. 2008-12-04 14:29:41 +00:00			`while ($slot < $cur->{maxJobs} \|\| $mustRun) {`
* Support multiple system types per remote machine, e.g. a machine list like root@example.org x86_64-linux /root/.ssh/id_buildfarm 1 root@example.org i686-darwin /root/.ssh/id_buildfarm 1 This is possible when the Nix installation on example.org itself has remote builds enabled. 2008-12-04 12:20:06 +00:00			`my $slotLock = "$currentLoad/" . $cur->{systemType} . "-" . $cur->{hostName} . "-$slot";`
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00			`open SLOTLOCK, ">>$slotLock" or die;`
			`if (flock(SLOTLOCK, LOCK_EX \| LOCK_NB)) {`
* When using build hooks, for any nix-store -r build operation, it is necessary that at least one build hook doesn't return "postpone", otherwise nix-store will barf ("waiting for a build slot, yet there are no running children"). So inform the build hook when this is the case, so that it can start a build even when that would exceed the maximum load on a machine. 2008-12-04 14:29:41 +00:00			`print STDERR "warning: exceeding maximum load on " . $cur->{systemType} . "\n"`
			`if $slot >= $cur->{maxJobs};`
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00			`$machine = $cur;`
			`last LOOP;`
* When using build hooks, for any nix-store -r build operation, it is necessary that at least one build hook doesn't return "postpone", otherwise nix-store will barf ("waiting for a build slot, yet there are no running children"). So inform the build hook when this is the case, so that it can start a build even when that would exceed the maximum load on a machine. 2008-12-04 14:29:41 +00:00			`}`
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00			`close SLOTLOCK;`
			`$slot++;`
			`}`
			`}`
			`}`

			`close MAINLOCK;`


			`# Didn't find one?`
			`if (!defined $machine) {`
			`if ($rightType) {`
			`sendReply "postpone";`
			`exit 0;`
			`} else {`
			`decline;`
			`}`
			`}`

			`# Yes we did, accept.`
			`sendReply "accept";`
			`open IN, "<&4" or die;`
			`my $x = <IN>;`
			`chomp $x;`
			`#print "got $x\n";`
			`close IN;`

			`if ($x ne "okay") {`
			`exit 0;`
			`}`


			`# Do the actual job.`
* Support multiple system types per remote machine, e.g. a machine list like root@example.org x86_64-linux /root/.ssh/id_buildfarm 1 root@example.org i686-darwin /root/.ssh/id_buildfarm 1 This is possible when the Nix installation on example.org itself has remote builds enabled. 2008-12-04 12:20:06 +00:00			`my $hostName = $machine->{hostName};`
			`print "BUILDING REMOTE: $drvPath on $hostName\n";`
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00
			`# Make sure that we don't get any SSH passphrase or host key popups -`
			`# if there is any problem it should fail, not do something`
			`# interactive.`
			`$ENV{"DISPLAY"} = "";`
			`$ENV{"SSH_PASSWORD_FILE="} = "";`
			`$ENV{"SSH_ASKPASS="} = "";`

* Support multiple system types per remote machine, e.g. a machine list like root@example.org x86_64-linux /root/.ssh/id_buildfarm 1 root@example.org i686-darwin /root/.ssh/id_buildfarm 1 This is possible when the Nix installation on example.org itself has remote builds enabled. 2008-12-04 12:20:06 +00:00			`my $sshOpts = "-i " . $machine->{sshKeys} . " -x";`
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00
			`# Hack to support Cygwin: if we login without a password, we don't`
* Support multiple system types per remote machine, e.g. a machine list like root@example.org x86_64-linux /root/.ssh/id_buildfarm 1 root@example.org i686-darwin /root/.ssh/id_buildfarm 1 This is possible when the Nix installation on example.org itself has remote builds enabled. 2008-12-04 12:20:06 +00:00			`# have exactly the same rights as when we do. This causes the`
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00			`# Microsoft C compiler to fail with certain flags:`
			`#`
			`# http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=99676`
			`#`
			`# So as a workaround, we pass a verbatim password. ssh tries to makes`
			`# this very hard; the trick is to make it call SSH_ASKPASS to get the`
			`# password. (It only calls this command when there is no controlling`
			`# terminal, but Nix ensures that is is the case. When doing this`
			`# manually, use setsid(1).)`
* Support multiple system types per remote machine, e.g. a machine list like root@example.org x86_64-linux /root/.ssh/id_buildfarm 1 root@example.org i686-darwin /root/.ssh/id_buildfarm 1 This is possible when the Nix installation on example.org itself has remote builds enabled. 2008-12-04 12:20:06 +00:00			`if ($machine->{sshKeys} =~ /^password:/) {`
			`my $passwordFile = $machine->{sshKeys};`
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00			`$passwordFile =~ s/^password://;`
			`$sshOpts = "ssh -x";`
			`$ENV{"SSH_PASSWORD_FILE"} = $passwordFile;`
			`$ENV{"SSH_ASKPASS"} = "/tmp/writepass";`

			`open WRITEPASS, ">/tmp/writepass" or die;`
			`print WRITEPASS "#! /bin/sh\ncat \"\$SSH_PASSWORD_FILE\"";`
			`close WRITEPASS;`
			`chmod 0755, "/tmp/writepass" or die;`
			`}`

			my $inputs = `cat inputs`; die if ($? != 0);
			`$inputs =~ s/\n/ /g;`

			my $outputs = `cat outputs`; die if ($? != 0);
			`$outputs =~ s/\n/ /g;`

			`print "COPYING INPUTS...\n";`

			`my $maybeSign = "";`
			`$maybeSign = "--sign" if -e "/nix/etc/nix/signing-key.sec";`

* Support multiple system types per remote machine, e.g. a machine list like root@example.org x86_64-linux /root/.ssh/id_buildfarm 1 root@example.org i686-darwin /root/.ssh/id_buildfarm 1 This is possible when the Nix installation on example.org itself has remote builds enabled. 2008-12-04 12:20:06 +00:00			`system("NIX_SSHOPTS=\"$sshOpts\" nix-copy-closure $hostName $maybeSign $drvPath $inputs") == 0`
			`or die "cannot copy inputs to $hostName: $?";`
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00
			`print "BUILDING...\n";`

* Force allocation of a pseudo-terminal to clean up the remote nix-store process when the connection is interrupted. 2008-12-04 13:36:52 +00:00			# `-tt' forces allocation of a pseudo-terminal. This is required to
			`# make the remote nix-store process receive a signal when the`
			`# connection dies. Without it, the remote process might continue to`
			`# run indefinitely (that is, until it next tries to write to`
			`# stdout/stderr).`
			`system("ssh -tt $sshOpts $hostName 'nix-store -rvvK $drvPath'") == 0`
* Support multiple system types per remote machine, e.g. a machine list like root@example.org x86_64-linux /root/.ssh/id_buildfarm 1 root@example.org i686-darwin /root/.ssh/id_buildfarm 1 This is possible when the Nix installation on example.org itself has remote builds enabled. 2008-12-04 12:20:06 +00:00			`or die "remote build on $hostName failed: $?";`
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00
* Support multiple system types per remote machine, e.g. a machine list like root@example.org x86_64-linux /root/.ssh/id_buildfarm 1 root@example.org i686-darwin /root/.ssh/id_buildfarm 1 This is possible when the Nix installation on example.org itself has remote builds enabled. 2008-12-04 12:20:06 +00:00			`print "REMOTE BUILD DONE: $drvPath on $hostName\n";`
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00
			`foreach my $output (split '\n', $outputs) {`
			`my $maybeSignRemote = "";`
			`$maybeSignRemote = "--sign" if $UID != 0;`

* Support multiple system types per remote machine, e.g. a machine list like root@example.org x86_64-linux /root/.ssh/id_buildfarm 1 root@example.org i686-darwin /root/.ssh/id_buildfarm 1 This is possible when the Nix installation on example.org itself has remote builds enabled. 2008-12-04 12:20:06 +00:00			`system("ssh $sshOpts $hostName 'nix-store --export $maybeSignRemote $output' > dump") == 0`
			`or die "cannot copy $output from $hostName: $?";`
* Add build-remote.pl to the Nix distribution. 2007-11-15 14:28:08 +00:00
			`# This doesn't work yet, since the caller has a lock on the output`
			`# path. We should move towards lock-free invocation of build`
			`# hooks and substitutes.`
			`#system("nix-store --import < dump") == 0`
			`# or die "cannot import $output: $?";`

			`# Hack: skip the first 8 bytes (the nix-store --export next`
			`# archive marker). The archive follows.`
			`system("(dd bs=1 count=8 of=/dev/null && cat) < dump \| nix-store --restore $output") == 0`
			`or die "cannot restore $output: $?";`
			`}`