81 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Nix
		
	
	
	
	
	
			
		
		
	
	
			81 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Nix
		
	
	
	
	
	
| import ./make-test.nix ({ pkgs, ... }:
 | ||
| let mungekey = "mungeverryweakkeybuteasytointegratoinatest";
 | ||
|     slurmconfig = {
 | ||
|       client.enable = true;
 | ||
|       controlMachine = "control";
 | ||
|       nodeName = ''
 | ||
|         control
 | ||
|         NodeName=node[1-3] CPUs=1 State=UNKNOWN
 | ||
|       '';
 | ||
|       partitionName = "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP";
 | ||
|     };
 | ||
| in {
 | ||
|   name = "slurm";
 | ||
| 
 | ||
|   nodes =
 | ||
|     let
 | ||
|     computeNode =
 | ||
|       { config, pkgs, ...}:
 | ||
|       {
 | ||
|         # TODO slrumd port and slurmctld port should be configurations and
 | ||
|         # automatically allowed by the  firewall.
 | ||
|         networking.firewall.enable = false;
 | ||
|         services.munge.enable = true;
 | ||
|         services.slurm = slurmconfig;
 | ||
|       };
 | ||
|     in {
 | ||
|     control =
 | ||
|       { config, pkgs, ...}:
 | ||
|       {
 | ||
|         networking.firewall.enable = false;
 | ||
|         services.munge.enable = true;
 | ||
|         services.slurm = {
 | ||
|           server.enable = true;
 | ||
|         } // slurmconfig;
 | ||
|       };
 | ||
|     node1 = computeNode;
 | ||
|     node2 = computeNode;
 | ||
|     node3 = computeNode;
 | ||
|   };
 | ||
| 
 | ||
|   testScript =
 | ||
|   ''
 | ||
|   startAll;
 | ||
| 
 | ||
|   # Set up authentification across the cluster
 | ||
|   foreach my $node (($control,$node1,$node2,$node3))
 | ||
|   {
 | ||
|     $node->waitForUnit("default.target");
 | ||
| 
 | ||
|     $node->succeed("mkdir /etc/munge");
 | ||
|     $node->succeed("echo '${mungekey}' > /etc/munge/munge.key");
 | ||
|     $node->succeed("chmod 0400 /etc/munge/munge.key");
 | ||
|     $node->succeed("systemctl restart munged");
 | ||
|   }
 | ||
| 
 | ||
|   # Restart the services since they have probably failed due to the munge init
 | ||
|   # failure
 | ||
| 
 | ||
|   subtest "can_start_slurmctld", sub {
 | ||
|     $control->succeed("systemctl restart slurmctld");
 | ||
|     $control->waitForUnit("slurmctld.service");
 | ||
|   };
 | ||
| 
 | ||
|   subtest "can_start_slurmd", sub {
 | ||
|     foreach my $node (($control,$node1,$node2,$node3))
 | ||
|     {
 | ||
|       $node->succeed("systemctl restart slurmd.service");
 | ||
|       $node->waitForUnit("slurmd");
 | ||
|     }
 | ||
|   };
 | ||
| 
 | ||
|   # Test that the cluster work and can distribute jobs;
 | ||
| 
 | ||
|   subtest "run_distributed_command", sub {
 | ||
|     # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
 | ||
|     # The output must contain the 3 different names
 | ||
|     $control->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
 | ||
|   };
 | ||
|   '';
 | ||
| })
 | 
