Merge pull request #41377 from markuskowa/slurm-ext-pr
nixos/slurm: Improve slurm configuration options and features.
This commit is contained in:
commit
c30bd1c131
@ -6,7 +6,7 @@ let
|
||||
|
||||
cfg = config.services.slurm;
|
||||
# configuration file can be generated by http://slurm.schedmd.com/configurator.html
|
||||
configFile = pkgs.writeText "slurm.conf"
|
||||
configFile = pkgs.writeTextDir "slurm.conf"
|
||||
''
|
||||
${optionalString (cfg.controlMachine != null) ''controlMachine=${cfg.controlMachine}''}
|
||||
${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''}
|
||||
@ -17,10 +17,25 @@ let
|
||||
${cfg.extraConfig}
|
||||
'';
|
||||
|
||||
plugStackConfig = pkgs.writeText "plugstack.conf"
|
||||
plugStackConfig = pkgs.writeTextDir "plugstack.conf"
|
||||
''
|
||||
${optionalString cfg.enableSrunX11 ''optional ${pkgs.slurm-spank-x11}/lib/x11.so''}
|
||||
${cfg.extraPlugstackConfig}
|
||||
'';
|
||||
|
||||
|
||||
cgroupConfig = pkgs.writeTextDir "cgroup.conf"
|
||||
''
|
||||
${cfg.extraCgroupConfig}
|
||||
'';
|
||||
|
||||
# slurm expects some additional config files to be
|
||||
# in the same directory as slurm.conf
|
||||
etcSlurm = pkgs.symlinkJoin {
|
||||
name = "etc-slurm";
|
||||
paths = [ configFile cgroupConfig plugStackConfig ];
|
||||
};
|
||||
|
||||
in
|
||||
|
||||
{
|
||||
@ -46,7 +61,17 @@ in
|
||||
|
||||
client = {
|
||||
enable = mkEnableOption "slurm client daemon";
|
||||
};
|
||||
|
||||
enableStools = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = ''
|
||||
Wether to provide a slurm.conf file.
|
||||
Enable this option if you do not run a slurm daemon on this host
|
||||
(i.e. <literal>server.enable</literal> and <literal>client.enable</literal> are <literal>false</literal>)
|
||||
but you still want to run slurm commands from this host.
|
||||
'';
|
||||
};
|
||||
|
||||
package = mkOption {
|
||||
@ -97,7 +122,7 @@ in
|
||||
example = "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP";
|
||||
description = ''
|
||||
Name by which the partition may be referenced. Note that now you have
|
||||
to write patrition's parameters after the name.
|
||||
to write the partition's parameters after the name.
|
||||
'';
|
||||
};
|
||||
|
||||
@ -107,8 +132,10 @@ in
|
||||
description = ''
|
||||
If enabled srun will accept the option "--x11" to allow for X11 forwarding
|
||||
from within an interactive session or a batch job. This activates the
|
||||
slurm-spank-x11 module. Note that this requires 'services.openssh.forwardX11'
|
||||
to be enabled on the compute nodes.
|
||||
slurm-spank-x11 module. Note that this option also enables
|
||||
'services.openssh.forwardX11' on the client.
|
||||
|
||||
This option requires slurm to be compiled without native X11 support.
|
||||
'';
|
||||
};
|
||||
|
||||
@ -130,6 +157,23 @@ in
|
||||
the end of the slurm configuration file.
|
||||
'';
|
||||
};
|
||||
|
||||
extraPlugstackConfig = mkOption {
|
||||
default = "";
|
||||
type = types.lines;
|
||||
description = ''
|
||||
Extra configuration that will be added to the end of <literal>plugstack.conf</literal>.
|
||||
'';
|
||||
};
|
||||
|
||||
extraCgroupConfig = mkOption {
|
||||
default = "";
|
||||
type = types.lines;
|
||||
description = ''
|
||||
Extra configuration for <literal>cgroup.conf</literal>. This file is
|
||||
used when <literal>procTrackType=proctrack/cgroup</literal>.
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
};
|
||||
@ -142,8 +186,6 @@ in
|
||||
wrappedSlurm = pkgs.stdenv.mkDerivation {
|
||||
name = "wrappedSlurm";
|
||||
|
||||
propagatedBuildInputs = [ cfg.package configFile ];
|
||||
|
||||
builder = pkgs.writeText "builder.sh" ''
|
||||
source $stdenv/setup
|
||||
mkdir -p $out/bin
|
||||
@ -155,17 +197,20 @@ in
|
||||
#!/bin/sh
|
||||
if [ -z "$SLURM_CONF" ]
|
||||
then
|
||||
SLURM_CONF="${configFile}" "$EXE" "\$@"
|
||||
SLURM_CONF="${etcSlurm}/slurm.conf" "$EXE" "\$@"
|
||||
else
|
||||
"$EXE" "\$0"
|
||||
fi
|
||||
EOT
|
||||
chmod +x "$wrappername"
|
||||
done
|
||||
|
||||
mkdir -p $out/share
|
||||
ln -s ${getBin cfg.package}/share/man $out/share/man
|
||||
'';
|
||||
};
|
||||
|
||||
in mkIf (cfg.client.enable || cfg.server.enable) {
|
||||
in mkIf (cfg.enableStools || cfg.client.enable || cfg.server.enable) {
|
||||
|
||||
environment.systemPackages = [ wrappedSlurm ];
|
||||
|
||||
@ -190,6 +235,8 @@ in
|
||||
'';
|
||||
};
|
||||
|
||||
services.openssh.forwardX11 = mkIf cfg.client.enable (mkDefault true);
|
||||
|
||||
systemd.services.slurmctld = mkIf (cfg.server.enable) {
|
||||
path = with pkgs; [ wrappedSlurm munge coreutils ]
|
||||
++ lib.optional cfg.enableSrunX11 slurm-spank-x11;
|
||||
|
@ -1,7 +1,6 @@
|
||||
import ./make-test.nix ({ pkgs, ... }:
|
||||
let mungekey = "mungeverryweakkeybuteasytointegratoinatest";
|
||||
slurmconfig = {
|
||||
client.enable = true;
|
||||
controlMachine = "control";
|
||||
nodeName = ''
|
||||
control
|
||||
@ -20,9 +19,12 @@ in {
|
||||
# TODO slrumd port and slurmctld port should be configurations and
|
||||
# automatically allowed by the firewall.
|
||||
networking.firewall.enable = false;
|
||||
services.slurm = slurmconfig;
|
||||
services.slurm = {
|
||||
client.enable = true;
|
||||
} // slurmconfig;
|
||||
};
|
||||
in {
|
||||
|
||||
control =
|
||||
{ config, pkgs, ...}:
|
||||
{
|
||||
@ -31,17 +33,28 @@ in {
|
||||
server.enable = true;
|
||||
} // slurmconfig;
|
||||
};
|
||||
|
||||
submit =
|
||||
{ config, pkgs, ...}:
|
||||
{
|
||||
networking.firewall.enable = false;
|
||||
services.slurm = {
|
||||
enableStools = true;
|
||||
} // slurmconfig;
|
||||
};
|
||||
|
||||
node1 = computeNode;
|
||||
node2 = computeNode;
|
||||
node3 = computeNode;
|
||||
};
|
||||
|
||||
|
||||
testScript =
|
||||
''
|
||||
startAll;
|
||||
|
||||
# Set up authentification across the cluster
|
||||
foreach my $node (($control,$node1,$node2,$node3))
|
||||
foreach my $node (($submit,$control,$node1,$node2,$node3))
|
||||
{
|
||||
$node->waitForUnit("default.target");
|
||||
|
||||
@ -60,7 +73,7 @@ in {
|
||||
};
|
||||
|
||||
subtest "can_start_slurmd", sub {
|
||||
foreach my $node (($control,$node1,$node2,$node3))
|
||||
foreach my $node (($node1,$node2,$node3))
|
||||
{
|
||||
$node->succeed("systemctl restart slurmd.service");
|
||||
$node->waitForUnit("slurmd");
|
||||
@ -72,7 +85,7 @@ in {
|
||||
subtest "run_distributed_command", sub {
|
||||
# Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
|
||||
# The output must contain the 3 different names
|
||||
$control->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
|
||||
$submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
|
||||
};
|
||||
'';
|
||||
})
|
||||
|
@ -1,5 +1,9 @@
|
||||
{ stdenv, fetchurl, pkgconfig, libtool, curl, python, munge, perl, pam, openssl
|
||||
{ stdenv, fetchurl, pkgconfig, libtool, curl
|
||||
, python, munge, perl, pam, openssl
|
||||
, ncurses, mysql, gtk2, lua, hwloc, numactl
|
||||
, readline, freeipmi, libssh2, xorg
|
||||
# enable internal X11 support via libssh2
|
||||
, enableX11 ? true
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation rec {
|
||||
@ -13,6 +17,11 @@ stdenv.mkDerivation rec {
|
||||
|
||||
outputs = [ "out" "dev" ];
|
||||
|
||||
prePatch = stdenv.lib.optional enableX11 ''
|
||||
substituteInPlace src/common/x11_util.c \
|
||||
--replace '"/usr/bin/xauth"' '"${xorg.xauth}/bin/xauth"'
|
||||
'';
|
||||
|
||||
# nixos test fails to start slurmd with 'undefined symbol: slurm_job_preempt_mode'
|
||||
# https://groups.google.com/forum/#!topic/slurm-devel/QHOajQ84_Es
|
||||
# this doesn't fix tests completely at least makes slurmd to launch
|
||||
@ -20,14 +29,20 @@ stdenv.mkDerivation rec {
|
||||
|
||||
nativeBuildInputs = [ pkgconfig libtool ];
|
||||
buildInputs = [
|
||||
curl python munge perl pam openssl mysql.connector-c ncurses gtk2 lua hwloc numactl
|
||||
];
|
||||
curl python munge perl pam openssl
|
||||
mysql.connector-c ncurses gtk2
|
||||
lua hwloc numactl readline freeipmi
|
||||
] ++ stdenv.lib.optionals enableX11 [ libssh2 xorg.xauth ];
|
||||
|
||||
configureFlags =
|
||||
configureFlags = with stdenv.lib;
|
||||
[ "--with-munge=${munge}"
|
||||
"--with-ssl=${openssl.dev}"
|
||||
"--with-hwloc=${hwloc.dev}"
|
||||
"--with-freeipmi=${freeipmi}"
|
||||
"--sysconfdir=/etc/slurm"
|
||||
] ++ stdenv.lib.optional (gtk2 == null) "--disable-gtktest";
|
||||
] ++ (optional (gtk2 == null) "--disable-gtktest")
|
||||
++ (optional enableX11 "--with-libssh2=${libssh2.dev}");
|
||||
|
||||
|
||||
preConfigure = ''
|
||||
patchShebangs ./doc/html/shtml2html.py
|
||||
@ -45,6 +60,6 @@ stdenv.mkDerivation rec {
|
||||
description = "Simple Linux Utility for Resource Management";
|
||||
platforms = platforms.linux;
|
||||
license = licenses.gpl2;
|
||||
maintainers = [ maintainers.jagajaga ];
|
||||
maintainers = with maintainers; [ jagajaga markuskowa ];
|
||||
};
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user