81 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Nix
		
	
	
	
	
	
			
		
		
	
	
			81 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Nix
		
	
	
	
	
	
import ./make-test.nix ({ pkgs, ... }:
 | 
						||
let mungekey = "mungeverryweakkeybuteasytointegratoinatest";
 | 
						||
    slurmconfig = {
 | 
						||
      client.enable = true;
 | 
						||
      controlMachine = "control";
 | 
						||
      nodeName = ''
 | 
						||
        control
 | 
						||
        NodeName=node[1-3] CPUs=1 State=UNKNOWN
 | 
						||
      '';
 | 
						||
      partitionName = "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP";
 | 
						||
    };
 | 
						||
in {
 | 
						||
  name = "slurm";
 | 
						||
 | 
						||
  nodes =
 | 
						||
    let
 | 
						||
    computeNode =
 | 
						||
      { config, pkgs, ...}:
 | 
						||
      {
 | 
						||
        # TODO slrumd port and slurmctld port should be configurations and
 | 
						||
        # automatically allowed by the  firewall.
 | 
						||
        networking.firewall.enable = false;
 | 
						||
        services.munge.enable = true;
 | 
						||
        services.slurm = slurmconfig;
 | 
						||
      };
 | 
						||
    in {
 | 
						||
    control =
 | 
						||
      { config, pkgs, ...}:
 | 
						||
      {
 | 
						||
        networking.firewall.enable = false;
 | 
						||
        services.munge.enable = true;
 | 
						||
        services.slurm = {
 | 
						||
          server.enable = true;
 | 
						||
        } // slurmconfig;
 | 
						||
      };
 | 
						||
    node1 = computeNode;
 | 
						||
    node2 = computeNode;
 | 
						||
    node3 = computeNode;
 | 
						||
  };
 | 
						||
 | 
						||
  testScript =
 | 
						||
  ''
 | 
						||
  startAll;
 | 
						||
 | 
						||
  # Set up authentification across the cluster
 | 
						||
  foreach my $node (($control,$node1,$node2,$node3))
 | 
						||
  {
 | 
						||
    $node->waitForUnit("default.target");
 | 
						||
 | 
						||
    $node->succeed("mkdir /etc/munge");
 | 
						||
    $node->succeed("echo '${mungekey}' > /etc/munge/munge.key");
 | 
						||
    $node->succeed("chmod 0400 /etc/munge/munge.key");
 | 
						||
    $node->succeed("systemctl restart munged");
 | 
						||
  }
 | 
						||
 | 
						||
  # Restart the services since they have probably failed due to the munge init
 | 
						||
  # failure
 | 
						||
 | 
						||
  subtest "can_start_slurmctld", sub {
 | 
						||
    $control->succeed("systemctl restart slurmctld");
 | 
						||
    $control->waitForUnit("slurmctld.service");
 | 
						||
  };
 | 
						||
 | 
						||
  subtest "can_start_slurmd", sub {
 | 
						||
    foreach my $node (($control,$node1,$node2,$node3))
 | 
						||
    {
 | 
						||
      $node->succeed("systemctl restart slurmd.service");
 | 
						||
      $node->waitForUnit("slurmd");
 | 
						||
    }
 | 
						||
  };
 | 
						||
 | 
						||
  # Test that the cluster work and can distribute jobs;
 | 
						||
 | 
						||
  subtest "run_distributed_command", sub {
 | 
						||
    # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
 | 
						||
    # The output must contain the 3 different names
 | 
						||
    $control->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
 | 
						||
  };
 | 
						||
  '';
 | 
						||
})
 |