Merge pull request #73179 from markuskowa/fix-slurm
nixos/slurm: fix test and X11 options
This commit is contained in:
commit
6928cb22e9
@ -18,7 +18,7 @@ let
|
|||||||
${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''}
|
${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''}
|
||||||
${toString (map (x: "NodeName=${x}\n") cfg.nodeName)}
|
${toString (map (x: "NodeName=${x}\n") cfg.nodeName)}
|
||||||
${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)}
|
${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)}
|
||||||
PlugStackConfig=${plugStackConfig}
|
PlugStackConfig=${plugStackConfig}/plugstack.conf
|
||||||
ProctrackType=${cfg.procTrackType}
|
ProctrackType=${cfg.procTrackType}
|
||||||
${cfg.extraConfig}
|
${cfg.extraConfig}
|
||||||
'';
|
'';
|
||||||
@ -39,6 +39,8 @@ let
|
|||||||
DbdHost=${cfg.dbdserver.dbdHost}
|
DbdHost=${cfg.dbdserver.dbdHost}
|
||||||
SlurmUser=${cfg.user}
|
SlurmUser=${cfg.user}
|
||||||
StorageType=accounting_storage/mysql
|
StorageType=accounting_storage/mysql
|
||||||
|
StorageUser=${cfg.dbdserver.storageUser}
|
||||||
|
${optionalString (cfg.dbdserver.storagePass != null) "StoragePass=${cfg.dbdserver.storagePass}"}
|
||||||
${cfg.dbdserver.extraConfig}
|
${cfg.dbdserver.extraConfig}
|
||||||
'';
|
'';
|
||||||
|
|
||||||
@ -48,7 +50,6 @@ let
|
|||||||
name = "etc-slurm";
|
name = "etc-slurm";
|
||||||
paths = [ configFile cgroupConfig plugStackConfig ] ++ cfg.extraConfigPaths;
|
paths = [ configFile cgroupConfig plugStackConfig ] ++ cfg.extraConfigPaths;
|
||||||
};
|
};
|
||||||
|
|
||||||
in
|
in
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -86,6 +87,37 @@ in
|
|||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
storageUser = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = cfg.user;
|
||||||
|
description = ''
|
||||||
|
Database user name.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
storagePass = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = ''
|
||||||
|
Database password. Note that this password will be publicable
|
||||||
|
readable in the nix store. Use <option>configFile</option>
|
||||||
|
to store the and config file and password outside the nix store.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
configFile = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = ''
|
||||||
|
Path to <literal>slurmdbd.conf</literal>. The password for the database connection
|
||||||
|
is stored in the config file. Use this option to specfify a path
|
||||||
|
outside the nix store. If this option is unset a configuration file
|
||||||
|
will be generated. See also:
|
||||||
|
<citerefentry><refentrytitle>slurmdbd.conf</refentrytitle>
|
||||||
|
<manvolnum>8</manvolnum></citerefentry>.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
extraConfig = mkOption {
|
extraConfig = mkOption {
|
||||||
type = types.lines;
|
type = types.lines;
|
||||||
default = "";
|
default = "";
|
||||||
@ -112,7 +144,7 @@ in
|
|||||||
|
|
||||||
package = mkOption {
|
package = mkOption {
|
||||||
type = types.package;
|
type = types.package;
|
||||||
default = pkgs.slurm;
|
default = pkgs.slurm.override { enableX11 = ! cfg.enableSrunX11; };
|
||||||
defaultText = "pkgs.slurm";
|
defaultText = "pkgs.slurm";
|
||||||
example = literalExample "pkgs.slurm-full";
|
example = literalExample "pkgs.slurm-full";
|
||||||
description = ''
|
description = ''
|
||||||
@ -178,9 +210,14 @@ in
|
|||||||
If enabled srun will accept the option "--x11" to allow for X11 forwarding
|
If enabled srun will accept the option "--x11" to allow for X11 forwarding
|
||||||
from within an interactive session or a batch job. This activates the
|
from within an interactive session or a batch job. This activates the
|
||||||
slurm-spank-x11 module. Note that this option also enables
|
slurm-spank-x11 module. Note that this option also enables
|
||||||
'services.openssh.forwardX11' on the client.
|
<option>services.openssh.forwardX11</option> on the client.
|
||||||
|
|
||||||
This option requires slurm to be compiled without native X11 support.
|
This option requires slurm to be compiled without native X11 support.
|
||||||
|
The default behavior is to re-compile the slurm package with native X11
|
||||||
|
support disabled if this option is set to true.
|
||||||
|
|
||||||
|
To use the native X11 support add <literal>PrologFlags=X11</literal> in <option>extraConfig</option>.
|
||||||
|
Note that this method will only work RSA SSH host keys.
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -356,7 +393,11 @@ in
|
|||||||
requires = [ "munged.service" "mysql.service" ];
|
requires = [ "munged.service" "mysql.service" ];
|
||||||
|
|
||||||
# slurm strips the last component off the path
|
# slurm strips the last component off the path
|
||||||
environment.SLURM_CONF = "${slurmdbdConf}/slurm.conf";
|
environment.SLURM_CONF =
|
||||||
|
if (cfg.dbdserver.configFile == null) then
|
||||||
|
"${slurmdbdConf}/slurm.conf"
|
||||||
|
else
|
||||||
|
cfg.dbdserver.configFile;
|
||||||
|
|
||||||
serviceConfig = {
|
serviceConfig = {
|
||||||
Type = "forking";
|
Type = "forking";
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import ./make-test.nix ({ lib, ... }:
|
import ./make-test-python.nix ({ lib, ... }:
|
||||||
let
|
let
|
||||||
mungekey = "mungeverryweakkeybuteasytointegratoinatest";
|
mungekey = "mungeverryweakkeybuteasytointegratoinatest";
|
||||||
|
|
||||||
@ -54,10 +54,15 @@ in {
|
|||||||
networking.firewall.enable = false;
|
networking.firewall.enable = false;
|
||||||
services.slurm.dbdserver = {
|
services.slurm.dbdserver = {
|
||||||
enable = true;
|
enable = true;
|
||||||
|
storagePass = "password123";
|
||||||
};
|
};
|
||||||
services.mysql = {
|
services.mysql = {
|
||||||
enable = true;
|
enable = true;
|
||||||
package = pkgs.mysql;
|
package = pkgs.mariadb;
|
||||||
|
initialScript = pkgs.writeText "mysql-init.sql" ''
|
||||||
|
CREATE USER 'slurm'@'localhost' IDENTIFIED BY 'password123';
|
||||||
|
GRANT ALL PRIVILEGES ON slurm_acct_db.* TO 'slurm'@'localhost';
|
||||||
|
'';
|
||||||
ensureDatabases = [ "slurm_acct_db" ];
|
ensureDatabases = [ "slurm_acct_db" ];
|
||||||
ensureUsers = [{
|
ensureUsers = [{
|
||||||
ensurePermissions = { "slurm_acct_db.*" = "ALL PRIVILEGES"; };
|
ensurePermissions = { "slurm_acct_db.*" = "ALL PRIVILEGES"; };
|
||||||
@ -80,63 +85,57 @@ in {
|
|||||||
|
|
||||||
testScript =
|
testScript =
|
||||||
''
|
''
|
||||||
startAll;
|
start_all()
|
||||||
|
|
||||||
# Set up authentification across the cluster
|
# Set up authentification across the cluster
|
||||||
foreach my $node (($submit,$control,$dbd,$node1,$node2,$node3))
|
for node in [submit, control, dbd, node1, node2, node3]:
|
||||||
{
|
|
||||||
$node->waitForUnit("default.target");
|
|
||||||
|
|
||||||
$node->succeed("mkdir /etc/munge");
|
node.wait_for_unit("default.target")
|
||||||
$node->succeed("echo '${mungekey}' > /etc/munge/munge.key");
|
|
||||||
$node->succeed("chmod 0400 /etc/munge/munge.key");
|
node.succeed("mkdir /etc/munge")
|
||||||
$node->succeed("chown munge:munge /etc/munge/munge.key");
|
node.succeed(
|
||||||
$node->succeed("systemctl restart munged");
|
"echo '${mungekey}' > /etc/munge/munge.key"
|
||||||
|
)
|
||||||
|
node.succeed("chmod 0400 /etc/munge/munge.key")
|
||||||
|
node.succeed("chown munge:munge /etc/munge/munge.key")
|
||||||
|
node.succeed("systemctl restart munged")
|
||||||
|
|
||||||
|
node.wait_for_unit("munged")
|
||||||
|
|
||||||
$node->waitForUnit("munged");
|
|
||||||
};
|
|
||||||
|
|
||||||
# Restart the services since they have probably failed due to the munge init
|
# Restart the services since they have probably failed due to the munge init
|
||||||
# failure
|
# failure
|
||||||
subtest "can_start_slurmdbd", sub {
|
with subtest("can_start_slurmdbd"):
|
||||||
$dbd->succeed("systemctl restart slurmdbd");
|
dbd.succeed("systemctl restart slurmdbd")
|
||||||
$dbd->waitForUnit("slurmdbd.service");
|
dbd.wait_for_unit("slurmdbd.service")
|
||||||
$dbd->waitForOpenPort(6819);
|
dbd.wait_for_open_port(6819)
|
||||||
};
|
|
||||||
|
|
||||||
# there needs to be an entry for the current
|
# there needs to be an entry for the current
|
||||||
# cluster in the database before slurmctld is restarted
|
# cluster in the database before slurmctld is restarted
|
||||||
subtest "add_account", sub {
|
with subtest("add_account"):
|
||||||
$control->succeed("sacctmgr -i add cluster default");
|
control.succeed("sacctmgr -i add cluster default")
|
||||||
# check for cluster entry
|
# check for cluster entry
|
||||||
$control->succeed("sacctmgr list cluster | awk '{ print \$1 }' | grep default");
|
control.succeed("sacctmgr list cluster | awk '{ print $1 }' | grep default")
|
||||||
};
|
|
||||||
|
|
||||||
subtest "can_start_slurmctld", sub {
|
with subtest("can_start_slurmctld"):
|
||||||
$control->succeed("systemctl restart slurmctld");
|
control.succeed("systemctl restart slurmctld")
|
||||||
$control->waitForUnit("slurmctld.service");
|
control.waitForUnit("slurmctld.service")
|
||||||
};
|
|
||||||
|
|
||||||
subtest "can_start_slurmd", sub {
|
with subtest("can_start_slurmd"):
|
||||||
foreach my $node (($node1,$node2,$node3))
|
for node in [node1, node2, node3]:
|
||||||
{
|
node.succeed("systemctl restart slurmd.service")
|
||||||
$node->succeed("systemctl restart slurmd.service");
|
node.wait_for_unit("slurmd")
|
||||||
$node->waitForUnit("slurmd");
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
# Test that the cluster works and can distribute jobs;
|
# Test that the cluster works and can distribute jobs;
|
||||||
|
|
||||||
subtest "run_distributed_command", sub {
|
with subtest("run_distributed_command"):
|
||||||
# Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
|
# Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
|
||||||
# The output must contain the 3 different names
|
# The output must contain the 3 different names
|
||||||
$submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
|
submit.succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq")
|
||||||
};
|
|
||||||
|
|
||||||
subtest "check_slurm_dbd", sub {
|
with subtest("check_slurm_dbd"):
|
||||||
# find the srun job from above in the database
|
# find the srun job from above in the database
|
||||||
sleep 5;
|
control.succeed("sleep 5")
|
||||||
$control->succeed("sacct | grep hostname");
|
control.succeed("sacct | grep hostname")
|
||||||
};
|
|
||||||
'';
|
'';
|
||||||
})
|
})
|
||||||
|
@ -48,7 +48,8 @@ stdenv.mkDerivation rec {
|
|||||||
"--with-zlib=${zlib}"
|
"--with-zlib=${zlib}"
|
||||||
"--sysconfdir=/etc/slurm"
|
"--sysconfdir=/etc/slurm"
|
||||||
] ++ (optional (gtk2 == null) "--disable-gtktest")
|
] ++ (optional (gtk2 == null) "--disable-gtktest")
|
||||||
++ (optional enableX11 "--with-libssh2=${libssh2.dev}");
|
++ (optional enableX11 "--with-libssh2=${libssh2.dev}")
|
||||||
|
++ (optional (!enableX11) "--disable-x11");
|
||||||
|
|
||||||
|
|
||||||
preConfigure = ''
|
preConfigure = ''
|
||||||
|
Loading…
x
Reference in New Issue
Block a user