nixos/slurm: convert test from perl to python

This commit is contained in:
Markus Kowalewski 2019-11-10 23:07:08 +01:00
parent 472e165b56
commit 9b28dbd36a
No known key found for this signature in database
GPG Key ID: D865C8A91D7025EB

View File

@ -1,4 +1,4 @@
import ./make-test.nix ({ lib, ... }: import ./make-test-python.nix ({ lib, ... }:
let let
mungekey = "mungeverryweakkeybuteasytointegratoinatest"; mungekey = "mungeverryweakkeybuteasytointegratoinatest";
@ -85,63 +85,57 @@ in {
testScript = testScript =
'' ''
startAll; start_all()
# Set up authentification across the cluster # Set up authentification across the cluster
foreach my $node (($submit,$control,$dbd,$node1,$node2,$node3)) for node in [submit, control, dbd, node1, node2, node3]:
{
$node->waitForUnit("default.target");
$node->succeed("mkdir /etc/munge"); node.wait_for_unit("default.target")
$node->succeed("echo '${mungekey}' > /etc/munge/munge.key");
$node->succeed("chmod 0400 /etc/munge/munge.key"); node.succeed("mkdir /etc/munge")
$node->succeed("chown munge:munge /etc/munge/munge.key"); node.succeed(
$node->succeed("systemctl restart munged"); "echo '${mungekey}' > /etc/munge/munge.key"
)
node.succeed("chmod 0400 /etc/munge/munge.key")
node.succeed("chown munge:munge /etc/munge/munge.key")
node.succeed("systemctl restart munged")
node.wait_for_unit("munged")
$node->waitForUnit("munged");
};
# Restart the services since they have probably failed due to the munge init # Restart the services since they have probably failed due to the munge init
# failure # failure
subtest "can_start_slurmdbd", sub { with subtest("can_start_slurmdbd"):
$dbd->succeed("systemctl restart slurmdbd"); dbd.succeed("systemctl restart slurmdbd")
$dbd->waitForUnit("slurmdbd.service"); dbd.wait_for_unit("slurmdbd.service")
$dbd->waitForOpenPort(6819); dbd.wait_for_open_port(6819)
};
# there needs to be an entry for the current # there needs to be an entry for the current
# cluster in the database before slurmctld is restarted # cluster in the database before slurmctld is restarted
subtest "add_account", sub { with subtest("add_account"):
$control->succeed("sacctmgr -i add cluster default"); control.succeed("sacctmgr -i add cluster default")
# check for cluster entry # check for cluster entry
$control->succeed("sacctmgr list cluster | awk '{ print \$1 }' | grep default"); control.succeed("sacctmgr list cluster | awk '{ print $1 }' | grep default")
};
subtest "can_start_slurmctld", sub { with subtest("can_start_slurmctld"):
$control->succeed("systemctl restart slurmctld"); control.succeed("systemctl restart slurmctld")
$control->waitForUnit("slurmctld.service"); control.waitForUnit("slurmctld.service")
};
subtest "can_start_slurmd", sub { with subtest("can_start_slurmd"):
foreach my $node (($node1,$node2,$node3)) for node in [node1, node2, node3]:
{ node.succeed("systemctl restart slurmd.service")
$node->succeed("systemctl restart slurmd.service"); node.wait_for_unit("slurmd")
$node->waitForUnit("slurmd");
}
};
# Test that the cluster works and can distribute jobs; # Test that the cluster works and can distribute jobs;
subtest "run_distributed_command", sub { with subtest("run_distributed_command"):
# Run `hostname` on 3 nodes of the partition (so on all the 3 nodes). # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
# The output must contain the 3 different names # The output must contain the 3 different names
$submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq"); submit.succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq")
};
subtest "check_slurm_dbd", sub { with subtest("check_slurm_dbd"):
# find the srun job from above in the database # find the srun job from above in the database
sleep 5; control.succeed("sleep 5")
$control->succeed("sacct | grep hostname"); control.succeed("sacct | grep hostname")
};
''; '';
}) })