Merge pull request #41377 from markuskowa/slurm-ext-pr
nixos/slurm: Improve slurm configuration options and features.
This commit is contained in:
commit
c30bd1c131
@ -6,7 +6,7 @@ let
|
|||||||
|
|
||||||
cfg = config.services.slurm;
|
cfg = config.services.slurm;
|
||||||
# configuration file can be generated by http://slurm.schedmd.com/configurator.html
|
# configuration file can be generated by http://slurm.schedmd.com/configurator.html
|
||||||
configFile = pkgs.writeText "slurm.conf"
|
configFile = pkgs.writeTextDir "slurm.conf"
|
||||||
''
|
''
|
||||||
${optionalString (cfg.controlMachine != null) ''controlMachine=${cfg.controlMachine}''}
|
${optionalString (cfg.controlMachine != null) ''controlMachine=${cfg.controlMachine}''}
|
||||||
${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''}
|
${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''}
|
||||||
@ -17,10 +17,25 @@ let
|
|||||||
${cfg.extraConfig}
|
${cfg.extraConfig}
|
||||||
'';
|
'';
|
||||||
|
|
||||||
plugStackConfig = pkgs.writeText "plugstack.conf"
|
plugStackConfig = pkgs.writeTextDir "plugstack.conf"
|
||||||
''
|
''
|
||||||
${optionalString cfg.enableSrunX11 ''optional ${pkgs.slurm-spank-x11}/lib/x11.so''}
|
${optionalString cfg.enableSrunX11 ''optional ${pkgs.slurm-spank-x11}/lib/x11.so''}
|
||||||
|
${cfg.extraPlugstackConfig}
|
||||||
'';
|
'';
|
||||||
|
|
||||||
|
|
||||||
|
cgroupConfig = pkgs.writeTextDir "cgroup.conf"
|
||||||
|
''
|
||||||
|
${cfg.extraCgroupConfig}
|
||||||
|
'';
|
||||||
|
|
||||||
|
# slurm expects some additional config files to be
|
||||||
|
# in the same directory as slurm.conf
|
||||||
|
etcSlurm = pkgs.symlinkJoin {
|
||||||
|
name = "etc-slurm";
|
||||||
|
paths = [ configFile cgroupConfig plugStackConfig ];
|
||||||
|
};
|
||||||
|
|
||||||
in
|
in
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -46,7 +61,17 @@ in
|
|||||||
|
|
||||||
client = {
|
client = {
|
||||||
enable = mkEnableOption "slurm client daemon";
|
enable = mkEnableOption "slurm client daemon";
|
||||||
|
};
|
||||||
|
|
||||||
|
enableStools = mkOption {
|
||||||
|
type = types.bool;
|
||||||
|
default = false;
|
||||||
|
description = ''
|
||||||
|
Wether to provide a slurm.conf file.
|
||||||
|
Enable this option if you do not run a slurm daemon on this host
|
||||||
|
(i.e. <literal>server.enable</literal> and <literal>client.enable</literal> are <literal>false</literal>)
|
||||||
|
but you still want to run slurm commands from this host.
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
package = mkOption {
|
package = mkOption {
|
||||||
@ -97,7 +122,7 @@ in
|
|||||||
example = "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP";
|
example = "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP";
|
||||||
description = ''
|
description = ''
|
||||||
Name by which the partition may be referenced. Note that now you have
|
Name by which the partition may be referenced. Note that now you have
|
||||||
to write patrition's parameters after the name.
|
to write the partition's parameters after the name.
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -107,8 +132,10 @@ in
|
|||||||
description = ''
|
description = ''
|
||||||
If enabled srun will accept the option "--x11" to allow for X11 forwarding
|
If enabled srun will accept the option "--x11" to allow for X11 forwarding
|
||||||
from within an interactive session or a batch job. This activates the
|
from within an interactive session or a batch job. This activates the
|
||||||
slurm-spank-x11 module. Note that this requires 'services.openssh.forwardX11'
|
slurm-spank-x11 module. Note that this option also enables
|
||||||
to be enabled on the compute nodes.
|
'services.openssh.forwardX11' on the client.
|
||||||
|
|
||||||
|
This option requires slurm to be compiled without native X11 support.
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -130,6 +157,23 @@ in
|
|||||||
the end of the slurm configuration file.
|
the end of the slurm configuration file.
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extraPlugstackConfig = mkOption {
|
||||||
|
default = "";
|
||||||
|
type = types.lines;
|
||||||
|
description = ''
|
||||||
|
Extra configuration that will be added to the end of <literal>plugstack.conf</literal>.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
extraCgroupConfig = mkOption {
|
||||||
|
default = "";
|
||||||
|
type = types.lines;
|
||||||
|
description = ''
|
||||||
|
Extra configuration for <literal>cgroup.conf</literal>. This file is
|
||||||
|
used when <literal>procTrackType=proctrack/cgroup</literal>.
|
||||||
|
'';
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
};
|
};
|
||||||
@ -142,8 +186,6 @@ in
|
|||||||
wrappedSlurm = pkgs.stdenv.mkDerivation {
|
wrappedSlurm = pkgs.stdenv.mkDerivation {
|
||||||
name = "wrappedSlurm";
|
name = "wrappedSlurm";
|
||||||
|
|
||||||
propagatedBuildInputs = [ cfg.package configFile ];
|
|
||||||
|
|
||||||
builder = pkgs.writeText "builder.sh" ''
|
builder = pkgs.writeText "builder.sh" ''
|
||||||
source $stdenv/setup
|
source $stdenv/setup
|
||||||
mkdir -p $out/bin
|
mkdir -p $out/bin
|
||||||
@ -155,17 +197,20 @@ in
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
if [ -z "$SLURM_CONF" ]
|
if [ -z "$SLURM_CONF" ]
|
||||||
then
|
then
|
||||||
SLURM_CONF="${configFile}" "$EXE" "\$@"
|
SLURM_CONF="${etcSlurm}/slurm.conf" "$EXE" "\$@"
|
||||||
else
|
else
|
||||||
"$EXE" "\$0"
|
"$EXE" "\$0"
|
||||||
fi
|
fi
|
||||||
EOT
|
EOT
|
||||||
chmod +x "$wrappername"
|
chmod +x "$wrappername"
|
||||||
done
|
done
|
||||||
|
|
||||||
|
mkdir -p $out/share
|
||||||
|
ln -s ${getBin cfg.package}/share/man $out/share/man
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
in mkIf (cfg.client.enable || cfg.server.enable) {
|
in mkIf (cfg.enableStools || cfg.client.enable || cfg.server.enable) {
|
||||||
|
|
||||||
environment.systemPackages = [ wrappedSlurm ];
|
environment.systemPackages = [ wrappedSlurm ];
|
||||||
|
|
||||||
@ -190,6 +235,8 @@ in
|
|||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
services.openssh.forwardX11 = mkIf cfg.client.enable (mkDefault true);
|
||||||
|
|
||||||
systemd.services.slurmctld = mkIf (cfg.server.enable) {
|
systemd.services.slurmctld = mkIf (cfg.server.enable) {
|
||||||
path = with pkgs; [ wrappedSlurm munge coreutils ]
|
path = with pkgs; [ wrappedSlurm munge coreutils ]
|
||||||
++ lib.optional cfg.enableSrunX11 slurm-spank-x11;
|
++ lib.optional cfg.enableSrunX11 slurm-spank-x11;
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
import ./make-test.nix ({ pkgs, ... }:
|
import ./make-test.nix ({ pkgs, ... }:
|
||||||
let mungekey = "mungeverryweakkeybuteasytointegratoinatest";
|
let mungekey = "mungeverryweakkeybuteasytointegratoinatest";
|
||||||
slurmconfig = {
|
slurmconfig = {
|
||||||
client.enable = true;
|
|
||||||
controlMachine = "control";
|
controlMachine = "control";
|
||||||
nodeName = ''
|
nodeName = ''
|
||||||
control
|
control
|
||||||
@ -20,9 +19,12 @@ in {
|
|||||||
# TODO slrumd port and slurmctld port should be configurations and
|
# TODO slrumd port and slurmctld port should be configurations and
|
||||||
# automatically allowed by the firewall.
|
# automatically allowed by the firewall.
|
||||||
networking.firewall.enable = false;
|
networking.firewall.enable = false;
|
||||||
services.slurm = slurmconfig;
|
services.slurm = {
|
||||||
|
client.enable = true;
|
||||||
|
} // slurmconfig;
|
||||||
};
|
};
|
||||||
in {
|
in {
|
||||||
|
|
||||||
control =
|
control =
|
||||||
{ config, pkgs, ...}:
|
{ config, pkgs, ...}:
|
||||||
{
|
{
|
||||||
@ -31,17 +33,28 @@ in {
|
|||||||
server.enable = true;
|
server.enable = true;
|
||||||
} // slurmconfig;
|
} // slurmconfig;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
submit =
|
||||||
|
{ config, pkgs, ...}:
|
||||||
|
{
|
||||||
|
networking.firewall.enable = false;
|
||||||
|
services.slurm = {
|
||||||
|
enableStools = true;
|
||||||
|
} // slurmconfig;
|
||||||
|
};
|
||||||
|
|
||||||
node1 = computeNode;
|
node1 = computeNode;
|
||||||
node2 = computeNode;
|
node2 = computeNode;
|
||||||
node3 = computeNode;
|
node3 = computeNode;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
testScript =
|
testScript =
|
||||||
''
|
''
|
||||||
startAll;
|
startAll;
|
||||||
|
|
||||||
# Set up authentification across the cluster
|
# Set up authentification across the cluster
|
||||||
foreach my $node (($control,$node1,$node2,$node3))
|
foreach my $node (($submit,$control,$node1,$node2,$node3))
|
||||||
{
|
{
|
||||||
$node->waitForUnit("default.target");
|
$node->waitForUnit("default.target");
|
||||||
|
|
||||||
@ -60,7 +73,7 @@ in {
|
|||||||
};
|
};
|
||||||
|
|
||||||
subtest "can_start_slurmd", sub {
|
subtest "can_start_slurmd", sub {
|
||||||
foreach my $node (($control,$node1,$node2,$node3))
|
foreach my $node (($node1,$node2,$node3))
|
||||||
{
|
{
|
||||||
$node->succeed("systemctl restart slurmd.service");
|
$node->succeed("systemctl restart slurmd.service");
|
||||||
$node->waitForUnit("slurmd");
|
$node->waitForUnit("slurmd");
|
||||||
@ -72,7 +85,7 @@ in {
|
|||||||
subtest "run_distributed_command", sub {
|
subtest "run_distributed_command", sub {
|
||||||
# Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
|
# Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
|
||||||
# The output must contain the 3 different names
|
# The output must contain the 3 different names
|
||||||
$control->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
|
$submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
|
||||||
};
|
};
|
||||||
'';
|
'';
|
||||||
})
|
})
|
||||||
|
@ -1,5 +1,9 @@
|
|||||||
{ stdenv, fetchurl, pkgconfig, libtool, curl, python, munge, perl, pam, openssl
|
{ stdenv, fetchurl, pkgconfig, libtool, curl
|
||||||
|
, python, munge, perl, pam, openssl
|
||||||
, ncurses, mysql, gtk2, lua, hwloc, numactl
|
, ncurses, mysql, gtk2, lua, hwloc, numactl
|
||||||
|
, readline, freeipmi, libssh2, xorg
|
||||||
|
# enable internal X11 support via libssh2
|
||||||
|
, enableX11 ? true
|
||||||
}:
|
}:
|
||||||
|
|
||||||
stdenv.mkDerivation rec {
|
stdenv.mkDerivation rec {
|
||||||
@ -13,6 +17,11 @@ stdenv.mkDerivation rec {
|
|||||||
|
|
||||||
outputs = [ "out" "dev" ];
|
outputs = [ "out" "dev" ];
|
||||||
|
|
||||||
|
prePatch = stdenv.lib.optional enableX11 ''
|
||||||
|
substituteInPlace src/common/x11_util.c \
|
||||||
|
--replace '"/usr/bin/xauth"' '"${xorg.xauth}/bin/xauth"'
|
||||||
|
'';
|
||||||
|
|
||||||
# nixos test fails to start slurmd with 'undefined symbol: slurm_job_preempt_mode'
|
# nixos test fails to start slurmd with 'undefined symbol: slurm_job_preempt_mode'
|
||||||
# https://groups.google.com/forum/#!topic/slurm-devel/QHOajQ84_Es
|
# https://groups.google.com/forum/#!topic/slurm-devel/QHOajQ84_Es
|
||||||
# this doesn't fix tests completely at least makes slurmd to launch
|
# this doesn't fix tests completely at least makes slurmd to launch
|
||||||
@ -20,14 +29,20 @@ stdenv.mkDerivation rec {
|
|||||||
|
|
||||||
nativeBuildInputs = [ pkgconfig libtool ];
|
nativeBuildInputs = [ pkgconfig libtool ];
|
||||||
buildInputs = [
|
buildInputs = [
|
||||||
curl python munge perl pam openssl mysql.connector-c ncurses gtk2 lua hwloc numactl
|
curl python munge perl pam openssl
|
||||||
];
|
mysql.connector-c ncurses gtk2
|
||||||
|
lua hwloc numactl readline freeipmi
|
||||||
|
] ++ stdenv.lib.optionals enableX11 [ libssh2 xorg.xauth ];
|
||||||
|
|
||||||
configureFlags =
|
configureFlags = with stdenv.lib;
|
||||||
[ "--with-munge=${munge}"
|
[ "--with-munge=${munge}"
|
||||||
"--with-ssl=${openssl.dev}"
|
"--with-ssl=${openssl.dev}"
|
||||||
|
"--with-hwloc=${hwloc.dev}"
|
||||||
|
"--with-freeipmi=${freeipmi}"
|
||||||
"--sysconfdir=/etc/slurm"
|
"--sysconfdir=/etc/slurm"
|
||||||
] ++ stdenv.lib.optional (gtk2 == null) "--disable-gtktest";
|
] ++ (optional (gtk2 == null) "--disable-gtktest")
|
||||||
|
++ (optional enableX11 "--with-libssh2=${libssh2.dev}");
|
||||||
|
|
||||||
|
|
||||||
preConfigure = ''
|
preConfigure = ''
|
||||||
patchShebangs ./doc/html/shtml2html.py
|
patchShebangs ./doc/html/shtml2html.py
|
||||||
@ -45,6 +60,6 @@ stdenv.mkDerivation rec {
|
|||||||
description = "Simple Linux Utility for Resource Management";
|
description = "Simple Linux Utility for Resource Management";
|
||||||
platforms = platforms.linux;
|
platforms = platforms.linux;
|
||||||
license = licenses.gpl2;
|
license = licenses.gpl2;
|
||||||
maintainers = [ maintainers.jagajaga ];
|
maintainers = with maintainers; [ jagajaga markuskowa ];
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user