From 995d2ec92889bf6c4fb9c5a742ac5e96cee0e677 Mon Sep 17 00:00:00 2001 From: Markus Kowalewski Date: Thu, 24 May 2018 20:16:01 +0200 Subject: [PATCH 1/6] nixos/slurm: Extend configuration options * Updated SrunX11 option * Added extraPlugstackConfig parameter * Added option enableStools * Add cgroup.conf to module * Fix some typos --- .../services/computing/slurm/slurm.nix | 60 ++++++++++++++++--- 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/nixos/modules/services/computing/slurm/slurm.nix b/nixos/modules/services/computing/slurm/slurm.nix index 3e513ab1571..82cf429936e 100644 --- a/nixos/modules/services/computing/slurm/slurm.nix +++ b/nixos/modules/services/computing/slurm/slurm.nix @@ -6,7 +6,7 @@ let cfg = config.services.slurm; # configuration file can be generated by http://slurm.schedmd.com/configurator.html - configFile = pkgs.writeText "slurm.conf" + configFile = pkgs.writeTextDir "slurm.conf" '' ${optionalString (cfg.controlMachine != null) ''controlMachine=${cfg.controlMachine}''} ${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''} @@ -17,10 +17,25 @@ let ${cfg.extraConfig} ''; - plugStackConfig = pkgs.writeText "plugstack.conf" + plugStackConfig = pkgs.writeTextDir "plugstack.conf" '' ${optionalString cfg.enableSrunX11 ''optional ${pkgs.slurm-spank-x11}/lib/x11.so''} + ${cfg.extraPlugstackConfig} ''; + + + cgroupConfig = pkgs.writeTextDir "cgroup.conf" + '' + ${cfg.extraCgroupConfig} + ''; + + # slurm expects some additional config files to be + # in the same directory as slurm.conf + etcSlurm = pkgs.symlinkJoin { + name = "etc-slurm"; + paths = [ configFile cgroupConfig plugStackConfig ]; + }; + in { @@ -46,7 +61,17 @@ in client = { enable = mkEnableOption "slurm client daemon"; + }; + enableStools = mkOption { + type = types.bool; + default = false; + description = '' + Wether to provide a slurm.conf file. + Enable this option if you do not run a slurm daemon on this host + (i.e. server.enable and client.enable are false) + but you still want to run slurm commands from this host. + ''; }; package = mkOption { @@ -97,7 +122,7 @@ in example = "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP"; description = '' Name by which the partition may be referenced. Note that now you have - to write patrition's parameters after the name. + to write the partition's parameters after the name. ''; }; @@ -107,8 +132,8 @@ in description = '' If enabled srun will accept the option "--x11" to allow for X11 forwarding from within an interactive session or a batch job. This activates the - slurm-spank-x11 module. Note that this requires 'services.openssh.forwardX11' - to be enabled on the compute nodes. + slurm-spank-x11 module. Note that this option also enables + 'services.openssh.forwardX11' on the client. ''; }; @@ -130,6 +155,23 @@ in the end of the slurm configuration file. ''; }; + + extraPlugstackConfig = mkOption { + default = ""; + type = types.lines; + description = '' + Extra configuration that will be added to the end of plugstack.conf. + ''; + }; + + extraCgroupConfig = mkOption { + default = ""; + type = types.lines; + description = '' + Extra configuration for cgroup.conf. This file is + used when procTrackType=proctrack/cgroup. + ''; + }; }; }; @@ -142,7 +184,7 @@ in wrappedSlurm = pkgs.stdenv.mkDerivation { name = "wrappedSlurm"; - propagatedBuildInputs = [ cfg.package configFile ]; + propagatedBuildInputs = [ cfg.package etcSlurm ]; builder = pkgs.writeText "builder.sh" '' source $stdenv/setup @@ -155,7 +197,7 @@ in #!/bin/sh if [ -z "$SLURM_CONF" ] then - SLURM_CONF="${configFile}" "$EXE" "\$@" + SLURM_CONF="${etcSlurm}/slurm.conf" "$EXE" "\$@" else "$EXE" "\$0" fi @@ -165,7 +207,7 @@ in ''; }; - in mkIf (cfg.client.enable || cfg.server.enable) { + in mkIf (cfg.enableStools || cfg.client.enable || cfg.server.enable) { environment.systemPackages = [ wrappedSlurm ]; @@ -190,6 +232,8 @@ in ''; }; + services.openssh.forwardX11 = mkIf cfg.client.enable (mkDefault true); + systemd.services.slurmctld = mkIf (cfg.server.enable) { path = with pkgs; [ wrappedSlurm munge coreutils ] ++ lib.optional cfg.enableSrunX11 slurm-spank-x11; From b7e91f096a46c5e3d3b699ff54bbb1b0bcd5e847 Mon Sep 17 00:00:00 2001 From: Markus Kowalewski Date: Fri, 1 Jun 2018 22:02:16 +0200 Subject: [PATCH 2/6] nixos/slurm: Add man pages to wrapedSlurm The nixos module adds a new derivation to systemPackages to make sure that the binaries get the generated config file. This derivation did not contain the man pages so far. Activating the module now makes the man pages available in the system environment. --- nixos/modules/services/computing/slurm/slurm.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nixos/modules/services/computing/slurm/slurm.nix b/nixos/modules/services/computing/slurm/slurm.nix index 82cf429936e..02c0aabe0a7 100644 --- a/nixos/modules/services/computing/slurm/slurm.nix +++ b/nixos/modules/services/computing/slurm/slurm.nix @@ -204,6 +204,9 @@ in EOT chmod +x "$wrappername" done + + mkdir -p $out/share + ln -s ${getBin cfg.package}/share/man $out/share/man ''; }; From 8026127e47398df660cc818d6266a552ecca8c1c Mon Sep 17 00:00:00 2001 From: Markus Kowalewski Date: Sat, 26 May 2018 11:51:45 +0200 Subject: [PATCH 3/6] slurm: add freeipmi, readline, libssh2, fix hwloc * add freeipmi to get power meter readings * readline support for scontrol * libssh2 support for X11 supporta * Add note to enableSrunX11 in module * fix hwloc support (was detected by configure) --- .../services/computing/slurm/slurm.nix | 2 ++ pkgs/servers/computing/slurm/default.nix | 25 +++++++++++++++---- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/nixos/modules/services/computing/slurm/slurm.nix b/nixos/modules/services/computing/slurm/slurm.nix index 02c0aabe0a7..cf7fc8c75c1 100644 --- a/nixos/modules/services/computing/slurm/slurm.nix +++ b/nixos/modules/services/computing/slurm/slurm.nix @@ -134,6 +134,8 @@ in from within an interactive session or a batch job. This activates the slurm-spank-x11 module. Note that this option also enables 'services.openssh.forwardX11' on the client. + + This option requires slurm to be compiled without native X11 support. ''; }; diff --git a/pkgs/servers/computing/slurm/default.nix b/pkgs/servers/computing/slurm/default.nix index b0460f21bfa..1603abac8dc 100644 --- a/pkgs/servers/computing/slurm/default.nix +++ b/pkgs/servers/computing/slurm/default.nix @@ -1,5 +1,9 @@ -{ stdenv, fetchurl, pkgconfig, libtool, curl, python, munge, perl, pam, openssl +{ stdenv, fetchurl, pkgconfig, libtool, curl +, python, munge, perl, pam, openssl , ncurses, mysql, gtk2, lua, hwloc, numactl +, readline, freeipmi, libssh2, xorg +# enable internal X11 support via libssh2 +, enableX11 ? true }: stdenv.mkDerivation rec { @@ -13,6 +17,11 @@ stdenv.mkDerivation rec { outputs = [ "out" "dev" ]; + prePatch = stdenv.lib.optional enableX11 '' + substituteInPlace src/common/x11_util.c \ + --replace '"/usr/bin/xauth"' '"${xorg.xauth}/bin/xauth"' + ''; + # nixos test fails to start slurmd with 'undefined symbol: slurm_job_preempt_mode' # https://groups.google.com/forum/#!topic/slurm-devel/QHOajQ84_Es # this doesn't fix tests completely at least makes slurmd to launch @@ -20,14 +29,20 @@ stdenv.mkDerivation rec { nativeBuildInputs = [ pkgconfig libtool ]; buildInputs = [ - curl python munge perl pam openssl mysql.connector-c ncurses gtk2 lua hwloc numactl - ]; + curl python munge perl pam openssl + mysql.connector-c ncurses gtk2 + lua hwloc numactl readline freeipmi + ] ++ stdenv.lib.optionals enableX11 [ libssh2 xorg.xauth ]; - configureFlags = + configureFlags = with stdenv.lib; [ "--with-munge=${munge}" "--with-ssl=${openssl.dev}" + "--with-hwloc=${hwloc.dev}" + "--with-freeipmi=${freeipmi}" "--sysconfdir=/etc/slurm" - ] ++ stdenv.lib.optional (gtk2 == null) "--disable-gtktest"; + ] ++ (optional (gtk2 == null) "--disable-gtktest") + ++ (optional enableX11 "--with-libssh2=${libssh2.dev}"); + preConfigure = '' patchShebangs ./doc/html/shtml2html.py From c8faa482fa3eae065f011505c4d244d5511171c0 Mon Sep 17 00:00:00 2001 From: Markus Kowalewski Date: Fri, 1 Jun 2018 23:42:21 +0200 Subject: [PATCH 4/6] nixos/slurm: update test, add test for enableStools * Add pure submit host to test 'enableStools' * Disable client.enable on control machine --- nixos/tests/slurm.nix | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/nixos/tests/slurm.nix b/nixos/tests/slurm.nix index dc4f62af564..c23d85e4002 100644 --- a/nixos/tests/slurm.nix +++ b/nixos/tests/slurm.nix @@ -1,7 +1,6 @@ import ./make-test.nix ({ pkgs, ... }: let mungekey = "mungeverryweakkeybuteasytointegratoinatest"; slurmconfig = { - client.enable = true; controlMachine = "control"; nodeName = '' control @@ -20,9 +19,12 @@ in { # TODO slrumd port and slurmctld port should be configurations and # automatically allowed by the firewall. networking.firewall.enable = false; - services.slurm = slurmconfig; + services.slurm = { + client.enable = true; + } // slurmconfig; }; in { + control = { config, pkgs, ...}: { @@ -31,17 +33,28 @@ in { server.enable = true; } // slurmconfig; }; + + submit = + { config, pkgs, ...}: + { + networking.firewall.enable = false; + services.slurm = { + enableStools = true; + } // slurmconfig; + }; + node1 = computeNode; node2 = computeNode; node3 = computeNode; }; + testScript = '' startAll; # Set up authentification across the cluster - foreach my $node (($control,$node1,$node2,$node3)) + foreach my $node (($submit,$control,$node1,$node2,$node3)) { $node->waitForUnit("default.target"); @@ -60,7 +73,7 @@ in { }; subtest "can_start_slurmd", sub { - foreach my $node (($control,$node1,$node2,$node3)) + foreach my $node (($node1,$node2,$node3)) { $node->succeed("systemctl restart slurmd.service"); $node->waitForUnit("slurmd"); @@ -72,7 +85,7 @@ in { subtest "run_distributed_command", sub { # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes). # The output must contain the 3 different names - $control->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq"); + $submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq"); }; ''; }) From f07ddeaa6708cd6ceab295d329b722a523c8edee Mon Sep 17 00:00:00 2001 From: Markus Kowalewski Date: Sat, 2 Jun 2018 00:37:54 +0200 Subject: [PATCH 5/6] slurm: add maintainer --- pkgs/servers/computing/slurm/default.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/servers/computing/slurm/default.nix b/pkgs/servers/computing/slurm/default.nix index 1603abac8dc..7a03bf75802 100644 --- a/pkgs/servers/computing/slurm/default.nix +++ b/pkgs/servers/computing/slurm/default.nix @@ -60,6 +60,6 @@ stdenv.mkDerivation rec { description = "Simple Linux Utility for Resource Management"; platforms = platforms.linux; license = licenses.gpl2; - maintainers = [ maintainers.jagajaga ]; + maintainers = with maintainers; [ jagajaga markuskowa ]; }; } From d7412d0b1615260fc84d346010bcfe6ed955d4aa Mon Sep 17 00:00:00 2001 From: Markus Kowalewski Date: Sat, 2 Jun 2018 14:01:24 +0200 Subject: [PATCH 6/6] nixos/slurm: remove propagatedBuidInputs from slurmWrapped propagatedBuildInputs = [ cfg.package etcSlurm ]; had no effect --- nixos/modules/services/computing/slurm/slurm.nix | 2 -- 1 file changed, 2 deletions(-) diff --git a/nixos/modules/services/computing/slurm/slurm.nix b/nixos/modules/services/computing/slurm/slurm.nix index cf7fc8c75c1..1e1c5bc9f03 100644 --- a/nixos/modules/services/computing/slurm/slurm.nix +++ b/nixos/modules/services/computing/slurm/slurm.nix @@ -186,8 +186,6 @@ in wrappedSlurm = pkgs.stdenv.mkDerivation { name = "wrappedSlurm"; - propagatedBuildInputs = [ cfg.package etcSlurm ]; - builder = pkgs.writeText "builder.sh" '' source $stdenv/setup mkdir -p $out/bin