Merge pull request #73179 from markuskowa/fix-slurm

nixos/slurm: fix test and X11 options
This commit is contained in:
markuskowa 2019-11-12 21:49:29 +01:00 committed by GitHub
commit 6928cb22e9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 91 additions and 50 deletions

View File

@ -18,7 +18,7 @@ let
${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''} ${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''}
${toString (map (x: "NodeName=${x}\n") cfg.nodeName)} ${toString (map (x: "NodeName=${x}\n") cfg.nodeName)}
${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)} ${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)}
PlugStackConfig=${plugStackConfig} PlugStackConfig=${plugStackConfig}/plugstack.conf
ProctrackType=${cfg.procTrackType} ProctrackType=${cfg.procTrackType}
${cfg.extraConfig} ${cfg.extraConfig}
''; '';
@ -39,6 +39,8 @@ let
DbdHost=${cfg.dbdserver.dbdHost} DbdHost=${cfg.dbdserver.dbdHost}
SlurmUser=${cfg.user} SlurmUser=${cfg.user}
StorageType=accounting_storage/mysql StorageType=accounting_storage/mysql
StorageUser=${cfg.dbdserver.storageUser}
${optionalString (cfg.dbdserver.storagePass != null) "StoragePass=${cfg.dbdserver.storagePass}"}
${cfg.dbdserver.extraConfig} ${cfg.dbdserver.extraConfig}
''; '';
@ -48,7 +50,6 @@ let
name = "etc-slurm"; name = "etc-slurm";
paths = [ configFile cgroupConfig plugStackConfig ] ++ cfg.extraConfigPaths; paths = [ configFile cgroupConfig plugStackConfig ] ++ cfg.extraConfigPaths;
}; };
in in
{ {
@ -86,6 +87,37 @@ in
''; '';
}; };
storageUser = mkOption {
type = types.str;
default = cfg.user;
description = ''
Database user name.
'';
};
storagePass = mkOption {
type = types.nullOr types.str;
default = null;
description = ''
Database password. Note that this password will be publicable
readable in the nix store. Use <option>configFile</option>
to store the and config file and password outside the nix store.
'';
};
configFile = mkOption {
type = types.nullOr types.str;
default = null;
description = ''
Path to <literal>slurmdbd.conf</literal>. The password for the database connection
is stored in the config file. Use this option to specfify a path
outside the nix store. If this option is unset a configuration file
will be generated. See also:
<citerefentry><refentrytitle>slurmdbd.conf</refentrytitle>
<manvolnum>8</manvolnum></citerefentry>.
'';
};
extraConfig = mkOption { extraConfig = mkOption {
type = types.lines; type = types.lines;
default = ""; default = "";
@ -112,7 +144,7 @@ in
package = mkOption { package = mkOption {
type = types.package; type = types.package;
default = pkgs.slurm; default = pkgs.slurm.override { enableX11 = ! cfg.enableSrunX11; };
defaultText = "pkgs.slurm"; defaultText = "pkgs.slurm";
example = literalExample "pkgs.slurm-full"; example = literalExample "pkgs.slurm-full";
description = '' description = ''
@ -178,9 +210,14 @@ in
If enabled srun will accept the option "--x11" to allow for X11 forwarding If enabled srun will accept the option "--x11" to allow for X11 forwarding
from within an interactive session or a batch job. This activates the from within an interactive session or a batch job. This activates the
slurm-spank-x11 module. Note that this option also enables slurm-spank-x11 module. Note that this option also enables
'services.openssh.forwardX11' on the client. <option>services.openssh.forwardX11</option> on the client.
This option requires slurm to be compiled without native X11 support. This option requires slurm to be compiled without native X11 support.
The default behavior is to re-compile the slurm package with native X11
support disabled if this option is set to true.
To use the native X11 support add <literal>PrologFlags=X11</literal> in <option>extraConfig</option>.
Note that this method will only work RSA SSH host keys.
''; '';
}; };
@ -356,7 +393,11 @@ in
requires = [ "munged.service" "mysql.service" ]; requires = [ "munged.service" "mysql.service" ];
# slurm strips the last component off the path # slurm strips the last component off the path
environment.SLURM_CONF = "${slurmdbdConf}/slurm.conf"; environment.SLURM_CONF =
if (cfg.dbdserver.configFile == null) then
"${slurmdbdConf}/slurm.conf"
else
cfg.dbdserver.configFile;
serviceConfig = { serviceConfig = {
Type = "forking"; Type = "forking";

View File

@ -1,4 +1,4 @@
import ./make-test.nix ({ lib, ... }: import ./make-test-python.nix ({ lib, ... }:
let let
mungekey = "mungeverryweakkeybuteasytointegratoinatest"; mungekey = "mungeverryweakkeybuteasytointegratoinatest";
@ -54,10 +54,15 @@ in {
networking.firewall.enable = false; networking.firewall.enable = false;
services.slurm.dbdserver = { services.slurm.dbdserver = {
enable = true; enable = true;
storagePass = "password123";
}; };
services.mysql = { services.mysql = {
enable = true; enable = true;
package = pkgs.mysql; package = pkgs.mariadb;
initialScript = pkgs.writeText "mysql-init.sql" ''
CREATE USER 'slurm'@'localhost' IDENTIFIED BY 'password123';
GRANT ALL PRIVILEGES ON slurm_acct_db.* TO 'slurm'@'localhost';
'';
ensureDatabases = [ "slurm_acct_db" ]; ensureDatabases = [ "slurm_acct_db" ];
ensureUsers = [{ ensureUsers = [{
ensurePermissions = { "slurm_acct_db.*" = "ALL PRIVILEGES"; }; ensurePermissions = { "slurm_acct_db.*" = "ALL PRIVILEGES"; };
@ -80,63 +85,57 @@ in {
testScript = testScript =
'' ''
startAll; start_all()
# Set up authentification across the cluster # Set up authentification across the cluster
foreach my $node (($submit,$control,$dbd,$node1,$node2,$node3)) for node in [submit, control, dbd, node1, node2, node3]:
{
$node->waitForUnit("default.target");
$node->succeed("mkdir /etc/munge"); node.wait_for_unit("default.target")
$node->succeed("echo '${mungekey}' > /etc/munge/munge.key");
$node->succeed("chmod 0400 /etc/munge/munge.key"); node.succeed("mkdir /etc/munge")
$node->succeed("chown munge:munge /etc/munge/munge.key"); node.succeed(
$node->succeed("systemctl restart munged"); "echo '${mungekey}' > /etc/munge/munge.key"
)
node.succeed("chmod 0400 /etc/munge/munge.key")
node.succeed("chown munge:munge /etc/munge/munge.key")
node.succeed("systemctl restart munged")
node.wait_for_unit("munged")
$node->waitForUnit("munged");
};
# Restart the services since they have probably failed due to the munge init # Restart the services since they have probably failed due to the munge init
# failure # failure
subtest "can_start_slurmdbd", sub { with subtest("can_start_slurmdbd"):
$dbd->succeed("systemctl restart slurmdbd"); dbd.succeed("systemctl restart slurmdbd")
$dbd->waitForUnit("slurmdbd.service"); dbd.wait_for_unit("slurmdbd.service")
$dbd->waitForOpenPort(6819); dbd.wait_for_open_port(6819)
};
# there needs to be an entry for the current # there needs to be an entry for the current
# cluster in the database before slurmctld is restarted # cluster in the database before slurmctld is restarted
subtest "add_account", sub { with subtest("add_account"):
$control->succeed("sacctmgr -i add cluster default"); control.succeed("sacctmgr -i add cluster default")
# check for cluster entry # check for cluster entry
$control->succeed("sacctmgr list cluster | awk '{ print \$1 }' | grep default"); control.succeed("sacctmgr list cluster | awk '{ print $1 }' | grep default")
};
subtest "can_start_slurmctld", sub { with subtest("can_start_slurmctld"):
$control->succeed("systemctl restart slurmctld"); control.succeed("systemctl restart slurmctld")
$control->waitForUnit("slurmctld.service"); control.waitForUnit("slurmctld.service")
};
subtest "can_start_slurmd", sub { with subtest("can_start_slurmd"):
foreach my $node (($node1,$node2,$node3)) for node in [node1, node2, node3]:
{ node.succeed("systemctl restart slurmd.service")
$node->succeed("systemctl restart slurmd.service"); node.wait_for_unit("slurmd")
$node->waitForUnit("slurmd");
}
};
# Test that the cluster works and can distribute jobs; # Test that the cluster works and can distribute jobs;
subtest "run_distributed_command", sub { with subtest("run_distributed_command"):
# Run `hostname` on 3 nodes of the partition (so on all the 3 nodes). # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
# The output must contain the 3 different names # The output must contain the 3 different names
$submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq"); submit.succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq")
};
subtest "check_slurm_dbd", sub { with subtest("check_slurm_dbd"):
# find the srun job from above in the database # find the srun job from above in the database
sleep 5; control.succeed("sleep 5")
$control->succeed("sacct | grep hostname"); control.succeed("sacct | grep hostname")
};
''; '';
}) })

View File

@ -48,7 +48,8 @@ stdenv.mkDerivation rec {
"--with-zlib=${zlib}" "--with-zlib=${zlib}"
"--sysconfdir=/etc/slurm" "--sysconfdir=/etc/slurm"
] ++ (optional (gtk2 == null) "--disable-gtktest") ] ++ (optional (gtk2 == null) "--disable-gtktest")
++ (optional enableX11 "--with-libssh2=${libssh2.dev}"); ++ (optional enableX11 "--with-libssh2=${libssh2.dev}")
++ (optional (!enableX11) "--disable-x11");
preConfigure = '' preConfigure = ''