Merge pull request #41377 from markuskowa/slurm-ext-pr

nixos/slurm: Improve slurm configuration options and features.
This commit is contained in:
Joachim F 2018-06-02 12:17:02 +00:00 committed by GitHub
commit c30bd1c131
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 95 additions and 20 deletions

View File

@ -6,7 +6,7 @@ let
cfg = config.services.slurm;
# configuration file can be generated by http://slurm.schedmd.com/configurator.html
configFile = pkgs.writeText "slurm.conf"
configFile = pkgs.writeTextDir "slurm.conf"
''
${optionalString (cfg.controlMachine != null) ''controlMachine=${cfg.controlMachine}''}
${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''}
@ -17,10 +17,25 @@ let
${cfg.extraConfig}
'';
plugStackConfig = pkgs.writeText "plugstack.conf"
plugStackConfig = pkgs.writeTextDir "plugstack.conf"
''
${optionalString cfg.enableSrunX11 ''optional ${pkgs.slurm-spank-x11}/lib/x11.so''}
${cfg.extraPlugstackConfig}
'';
cgroupConfig = pkgs.writeTextDir "cgroup.conf"
''
${cfg.extraCgroupConfig}
'';
# slurm expects some additional config files to be
# in the same directory as slurm.conf
etcSlurm = pkgs.symlinkJoin {
name = "etc-slurm";
paths = [ configFile cgroupConfig plugStackConfig ];
};
in
{
@ -46,7 +61,17 @@ in
client = {
enable = mkEnableOption "slurm client daemon";
};
enableStools = mkOption {
type = types.bool;
default = false;
description = ''
Wether to provide a slurm.conf file.
Enable this option if you do not run a slurm daemon on this host
(i.e. <literal>server.enable</literal> and <literal>client.enable</literal> are <literal>false</literal>)
but you still want to run slurm commands from this host.
'';
};
package = mkOption {
@ -97,7 +122,7 @@ in
example = "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP";
description = ''
Name by which the partition may be referenced. Note that now you have
to write patrition's parameters after the name.
to write the partition's parameters after the name.
'';
};
@ -107,8 +132,10 @@ in
description = ''
If enabled srun will accept the option "--x11" to allow for X11 forwarding
from within an interactive session or a batch job. This activates the
slurm-spank-x11 module. Note that this requires 'services.openssh.forwardX11'
to be enabled on the compute nodes.
slurm-spank-x11 module. Note that this option also enables
'services.openssh.forwardX11' on the client.
This option requires slurm to be compiled without native X11 support.
'';
};
@ -130,6 +157,23 @@ in
the end of the slurm configuration file.
'';
};
extraPlugstackConfig = mkOption {
default = "";
type = types.lines;
description = ''
Extra configuration that will be added to the end of <literal>plugstack.conf</literal>.
'';
};
extraCgroupConfig = mkOption {
default = "";
type = types.lines;
description = ''
Extra configuration for <literal>cgroup.conf</literal>. This file is
used when <literal>procTrackType=proctrack/cgroup</literal>.
'';
};
};
};
@ -142,8 +186,6 @@ in
wrappedSlurm = pkgs.stdenv.mkDerivation {
name = "wrappedSlurm";
propagatedBuildInputs = [ cfg.package configFile ];
builder = pkgs.writeText "builder.sh" ''
source $stdenv/setup
mkdir -p $out/bin
@ -155,17 +197,20 @@ in
#!/bin/sh
if [ -z "$SLURM_CONF" ]
then
SLURM_CONF="${configFile}" "$EXE" "\$@"
SLURM_CONF="${etcSlurm}/slurm.conf" "$EXE" "\$@"
else
"$EXE" "\$0"
fi
EOT
chmod +x "$wrappername"
done
mkdir -p $out/share
ln -s ${getBin cfg.package}/share/man $out/share/man
'';
};
in mkIf (cfg.client.enable || cfg.server.enable) {
in mkIf (cfg.enableStools || cfg.client.enable || cfg.server.enable) {
environment.systemPackages = [ wrappedSlurm ];
@ -190,6 +235,8 @@ in
'';
};
services.openssh.forwardX11 = mkIf cfg.client.enable (mkDefault true);
systemd.services.slurmctld = mkIf (cfg.server.enable) {
path = with pkgs; [ wrappedSlurm munge coreutils ]
++ lib.optional cfg.enableSrunX11 slurm-spank-x11;

View File

@ -1,7 +1,6 @@
import ./make-test.nix ({ pkgs, ... }:
let mungekey = "mungeverryweakkeybuteasytointegratoinatest";
slurmconfig = {
client.enable = true;
controlMachine = "control";
nodeName = ''
control
@ -20,9 +19,12 @@ in {
# TODO slrumd port and slurmctld port should be configurations and
# automatically allowed by the firewall.
networking.firewall.enable = false;
services.slurm = slurmconfig;
services.slurm = {
client.enable = true;
} // slurmconfig;
};
in {
control =
{ config, pkgs, ...}:
{
@ -31,17 +33,28 @@ in {
server.enable = true;
} // slurmconfig;
};
submit =
{ config, pkgs, ...}:
{
networking.firewall.enable = false;
services.slurm = {
enableStools = true;
} // slurmconfig;
};
node1 = computeNode;
node2 = computeNode;
node3 = computeNode;
};
testScript =
''
startAll;
# Set up authentification across the cluster
foreach my $node (($control,$node1,$node2,$node3))
foreach my $node (($submit,$control,$node1,$node2,$node3))
{
$node->waitForUnit("default.target");
@ -60,7 +73,7 @@ in {
};
subtest "can_start_slurmd", sub {
foreach my $node (($control,$node1,$node2,$node3))
foreach my $node (($node1,$node2,$node3))
{
$node->succeed("systemctl restart slurmd.service");
$node->waitForUnit("slurmd");
@ -72,7 +85,7 @@ in {
subtest "run_distributed_command", sub {
# Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
# The output must contain the 3 different names
$control->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
$submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
};
'';
})

View File

@ -1,5 +1,9 @@
{ stdenv, fetchurl, pkgconfig, libtool, curl, python, munge, perl, pam, openssl
{ stdenv, fetchurl, pkgconfig, libtool, curl
, python, munge, perl, pam, openssl
, ncurses, mysql, gtk2, lua, hwloc, numactl
, readline, freeipmi, libssh2, xorg
# enable internal X11 support via libssh2
, enableX11 ? true
}:
stdenv.mkDerivation rec {
@ -13,6 +17,11 @@ stdenv.mkDerivation rec {
outputs = [ "out" "dev" ];
prePatch = stdenv.lib.optional enableX11 ''
substituteInPlace src/common/x11_util.c \
--replace '"/usr/bin/xauth"' '"${xorg.xauth}/bin/xauth"'
'';
# nixos test fails to start slurmd with 'undefined symbol: slurm_job_preempt_mode'
# https://groups.google.com/forum/#!topic/slurm-devel/QHOajQ84_Es
# this doesn't fix tests completely at least makes slurmd to launch
@ -20,14 +29,20 @@ stdenv.mkDerivation rec {
nativeBuildInputs = [ pkgconfig libtool ];
buildInputs = [
curl python munge perl pam openssl mysql.connector-c ncurses gtk2 lua hwloc numactl
];
curl python munge perl pam openssl
mysql.connector-c ncurses gtk2
lua hwloc numactl readline freeipmi
] ++ stdenv.lib.optionals enableX11 [ libssh2 xorg.xauth ];
configureFlags =
configureFlags = with stdenv.lib;
[ "--with-munge=${munge}"
"--with-ssl=${openssl.dev}"
"--with-hwloc=${hwloc.dev}"
"--with-freeipmi=${freeipmi}"
"--sysconfdir=/etc/slurm"
] ++ stdenv.lib.optional (gtk2 == null) "--disable-gtktest";
] ++ (optional (gtk2 == null) "--disable-gtktest")
++ (optional enableX11 "--with-libssh2=${libssh2.dev}");
preConfigure = ''
patchShebangs ./doc/html/shtml2html.py
@ -45,6 +60,6 @@ stdenv.mkDerivation rec {
description = "Simple Linux Utility for Resource Management";
platforms = platforms.linux;
license = licenses.gpl2;
maintainers = [ maintainers.jagajaga ];
maintainers = with maintainers; [ jagajaga markuskowa ];
};
}