| 
									
										
										
										
											2020-07-02 14:27:43 +02:00
										 |  |  |  | import ./make-test-python.nix ({ lib, pkgs, ... }: | 
					
						
							| 
									
										
										
										
											2018-09-15 13:09:36 +02:00
										 |  |  |  | let | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  |     slurmconfig = { | 
					
						
							| 
									
										
										
										
											2020-07-02 14:27:43 +02:00
										 |  |  |  |       services.slurm = { | 
					
						
							|  |  |  |  |         controlMachine = "control"; | 
					
						
							|  |  |  |  |         nodeName = [ "node[1-3] CPUs=1 State=UNKNOWN" ]; | 
					
						
							|  |  |  |  |         partitionName = [ "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP" ]; | 
					
						
							|  |  |  |  |         extraConfig = ''
 | 
					
						
							|  |  |  |  |           AccountingStorageHost=dbd | 
					
						
							|  |  |  |  |           AccountingStorageType=accounting_storage/slurmdbd | 
					
						
							|  |  |  |  |         '';
 | 
					
						
							|  |  |  |  |       }; | 
					
						
							|  |  |  |  |       environment.systemPackages = [ mpitest ]; | 
					
						
							|  |  |  |  |       networking.firewall.enable = false; | 
					
						
							|  |  |  |  |       systemd.tmpfiles.rules = [ | 
					
						
							|  |  |  |  |         "f /etc/munge/munge.key 0400 munge munge - mungeverryweakkeybuteasytointegratoinatest" | 
					
						
							|  |  |  |  |       ]; | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  |     }; | 
					
						
							| 
									
										
										
										
											2020-07-02 14:27:43 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     mpitest = let | 
					
						
							|  |  |  |  |       mpitestC = pkgs.writeText "mpitest.c" ''
 | 
					
						
							|  |  |  |  |         #include <stdio.h> | 
					
						
							|  |  |  |  |         #include <stdlib.h> | 
					
						
							|  |  |  |  |         #include <mpi.h> | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         int | 
					
						
							|  |  |  |  |         main (int argc, char *argv[]) | 
					
						
							|  |  |  |  |         { | 
					
						
							|  |  |  |  |           int rank, size, length; | 
					
						
							|  |  |  |  |           char name[512]; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |           MPI_Init (&argc, &argv); | 
					
						
							|  |  |  |  |           MPI_Comm_rank (MPI_COMM_WORLD, &rank); | 
					
						
							|  |  |  |  |           MPI_Comm_size (MPI_COMM_WORLD, &size); | 
					
						
							|  |  |  |  |           MPI_Get_processor_name (name, &length); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |           if ( rank == 0 ) printf("size=%d\n", size); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |           printf ("%s: hello world from process %d of %d\n", name, rank, size); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |           MPI_Finalize (); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |           return EXIT_SUCCESS; | 
					
						
							|  |  |  |  |         } | 
					
						
							|  |  |  |  |       '';
 | 
					
						
							|  |  |  |  |     in pkgs.runCommandNoCC "mpitest" {} ''
 | 
					
						
							|  |  |  |  |       mkdir -p $out/bin | 
					
						
							|  |  |  |  |       ${pkgs.openmpi}/bin/mpicc ${mpitestC} -o $out/bin/mpitest | 
					
						
							|  |  |  |  |     '';
 | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  | in { | 
					
						
							|  |  |  |  |   name = "slurm"; | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-25 21:03:23 +02:00
										 |  |  |  |   meta.maintainers = [ lib.maintainers.markuskowa ]; | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  |   nodes = | 
					
						
							|  |  |  |  |     let | 
					
						
							|  |  |  |  |     computeNode = | 
					
						
							| 
									
										
										
										
											2018-07-20 20:56:59 +00:00
										 |  |  |  |       { ...}: | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  |       { | 
					
						
							| 
									
										
										
										
											2020-07-02 14:27:43 +02:00
										 |  |  |  |         imports = [ slurmconfig ]; | 
					
						
							| 
									
										
										
										
											2018-09-15 13:09:36 +02:00
										 |  |  |  |         # TODO slurmd port and slurmctld port should be configurations and | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  |         # automatically allowed by the  firewall. | 
					
						
							| 
									
										
										
										
											2018-06-01 23:42:21 +02:00
										 |  |  |  |         services.slurm = { | 
					
						
							|  |  |  |  |           client.enable = true; | 
					
						
							| 
									
										
										
										
											2020-07-02 14:27:43 +02:00
										 |  |  |  |         }; | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  |       }; | 
					
						
							|  |  |  |  |     in { | 
					
						
							| 
									
										
										
										
											2018-06-01 23:42:21 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  |     control = | 
					
						
							| 
									
										
										
										
											2018-07-20 20:56:59 +00:00
										 |  |  |  |       { ...}: | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  |       { | 
					
						
							| 
									
										
										
										
											2020-07-02 14:27:43 +02:00
										 |  |  |  |         imports = [ slurmconfig ]; | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  |         services.slurm = { | 
					
						
							|  |  |  |  |           server.enable = true; | 
					
						
							| 
									
										
										
										
											2020-07-02 14:27:43 +02:00
										 |  |  |  |         }; | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  |       }; | 
					
						
							| 
									
										
										
										
											2018-06-01 23:42:21 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     submit = | 
					
						
							| 
									
										
										
										
											2018-07-20 20:56:59 +00:00
										 |  |  |  |       { ...}: | 
					
						
							| 
									
										
										
										
											2018-06-01 23:42:21 +02:00
										 |  |  |  |       { | 
					
						
							| 
									
										
										
										
											2020-07-02 14:27:43 +02:00
										 |  |  |  |         imports = [ slurmconfig ]; | 
					
						
							| 
									
										
										
										
											2018-06-01 23:42:21 +02:00
										 |  |  |  |         services.slurm = { | 
					
						
							|  |  |  |  |           enableStools = true; | 
					
						
							| 
									
										
										
										
											2020-07-02 14:27:43 +02:00
										 |  |  |  |         }; | 
					
						
							| 
									
										
										
										
											2018-06-01 23:42:21 +02:00
										 |  |  |  |       }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-15 13:09:36 +02:00
										 |  |  |  |     dbd = | 
					
						
							|  |  |  |  |       { pkgs, ... } : | 
					
						
							|  |  |  |  |       { | 
					
						
							|  |  |  |  |         networking.firewall.enable = false; | 
					
						
							| 
									
										
										
										
											2020-07-02 14:27:43 +02:00
										 |  |  |  |         systemd.tmpfiles.rules = [ | 
					
						
							|  |  |  |  |           "f /etc/munge/munge.key 0400 munge munge - mungeverryweakkeybuteasytointegratoinatest" | 
					
						
							|  |  |  |  |         ]; | 
					
						
							| 
									
										
										
										
											2018-09-15 13:09:36 +02:00
										 |  |  |  |         services.slurm.dbdserver = { | 
					
						
							|  |  |  |  |           enable = true; | 
					
						
							| 
									
										
										
										
											2019-11-10 21:28:09 +01:00
										 |  |  |  |           storagePass = "password123"; | 
					
						
							| 
									
										
										
										
											2018-09-15 13:09:36 +02:00
										 |  |  |  |         }; | 
					
						
							|  |  |  |  |         services.mysql = { | 
					
						
							|  |  |  |  |           enable = true; | 
					
						
							| 
									
										
										
										
											2019-11-10 21:28:09 +01:00
										 |  |  |  |           package = pkgs.mariadb; | 
					
						
							|  |  |  |  |           initialScript = pkgs.writeText "mysql-init.sql" ''
 | 
					
						
							|  |  |  |  |             CREATE USER 'slurm'@'localhost' IDENTIFIED BY 'password123'; | 
					
						
							|  |  |  |  |             GRANT ALL PRIVILEGES ON slurm_acct_db.* TO 'slurm'@'localhost'; | 
					
						
							|  |  |  |  |           '';
 | 
					
						
							| 
									
										
										
										
											2018-09-15 13:09:36 +02:00
										 |  |  |  |           ensureDatabases = [ "slurm_acct_db" ]; | 
					
						
							|  |  |  |  |           ensureUsers = [{ | 
					
						
							|  |  |  |  |             ensurePermissions = { "slurm_acct_db.*" = "ALL PRIVILEGES"; }; | 
					
						
							|  |  |  |  |             name = "slurm"; | 
					
						
							|  |  |  |  |           }]; | 
					
						
							| 
									
										
										
										
											2018-11-22 13:21:37 +01:00
										 |  |  |  |           extraOptions = ''
 | 
					
						
							|  |  |  |  |             # recommendations from: https://slurm.schedmd.com/accounting.html#mysql-configuration | 
					
						
							|  |  |  |  |             innodb_buffer_pool_size=1024M | 
					
						
							|  |  |  |  |             innodb_log_file_size=64M | 
					
						
							|  |  |  |  |             innodb_lock_wait_timeout=900 | 
					
						
							|  |  |  |  |           '';
 | 
					
						
							| 
									
										
										
										
											2018-09-15 13:09:36 +02:00
										 |  |  |  |         }; | 
					
						
							|  |  |  |  |       }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  |     node1 = computeNode; | 
					
						
							|  |  |  |  |     node2 = computeNode; | 
					
						
							|  |  |  |  |     node3 = computeNode; | 
					
						
							|  |  |  |  |   }; | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-01 23:42:21 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  |   testScript = | 
					
						
							|  |  |  |  |   ''
 | 
					
						
							| 
									
										
										
										
											2019-11-10 23:07:08 +01:00
										 |  |  |  |   start_all() | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-02 14:27:43 +02:00
										 |  |  |  |   # Make sure DBD is up after DB initialzation | 
					
						
							| 
									
										
										
										
											2019-11-10 23:07:08 +01:00
										 |  |  |  |   with subtest("can_start_slurmdbd"): | 
					
						
							|  |  |  |  |       dbd.succeed("systemctl restart slurmdbd") | 
					
						
							|  |  |  |  |       dbd.wait_for_unit("slurmdbd.service") | 
					
						
							|  |  |  |  |       dbd.wait_for_open_port(6819) | 
					
						
							| 
									
										
										
										
											2018-09-15 13:09:36 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |   # there needs to be an entry for the current | 
					
						
							|  |  |  |  |   # cluster in the database before slurmctld is restarted | 
					
						
							| 
									
										
										
										
											2019-11-10 23:07:08 +01:00
										 |  |  |  |   with subtest("add_account"): | 
					
						
							|  |  |  |  |       control.succeed("sacctmgr -i add cluster default") | 
					
						
							|  |  |  |  |       # check for cluster entry | 
					
						
							|  |  |  |  |       control.succeed("sacctmgr list cluster | awk '{ print $1 }' | grep default") | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-10 23:07:08 +01:00
										 |  |  |  |   with subtest("can_start_slurmctld"): | 
					
						
							|  |  |  |  |       control.succeed("systemctl restart slurmctld") | 
					
						
							| 
									
										
										
										
											2019-12-24 19:51:18 +01:00
										 |  |  |  |       control.wait_for_unit("slurmctld.service") | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-10 23:07:08 +01:00
										 |  |  |  |   with subtest("can_start_slurmd"): | 
					
						
							|  |  |  |  |       for node in [node1, node2, node3]: | 
					
						
							|  |  |  |  |           node.succeed("systemctl restart slurmd.service") | 
					
						
							|  |  |  |  |           node.wait_for_unit("slurmd") | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-09-15 13:09:36 +02:00
										 |  |  |  |   # Test that the cluster works and can distribute jobs; | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-10 23:07:08 +01:00
										 |  |  |  |   with subtest("run_distributed_command"): | 
					
						
							|  |  |  |  |       # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes). | 
					
						
							|  |  |  |  |       # The output must contain the 3 different names | 
					
						
							|  |  |  |  |       submit.succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq") | 
					
						
							| 
									
										
										
										
											2018-09-15 13:09:36 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-10 23:07:08 +01:00
										 |  |  |  |       with subtest("check_slurm_dbd"): | 
					
						
							|  |  |  |  |           # find the srun job from above in the database | 
					
						
							|  |  |  |  |           control.succeed("sleep 5") | 
					
						
							|  |  |  |  |           control.succeed("sacct | grep hostname") | 
					
						
							| 
									
										
										
										
											2020-07-02 14:27:43 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |   with subtest("run_PMIx_mpitest"): | 
					
						
							|  |  |  |  |       submit.succeed("srun -N 3 --mpi=pmix mpitest | grep size=3") | 
					
						
							| 
									
										
										
										
											2015-12-25 15:55:07 +01:00
										 |  |  |  |   '';
 | 
					
						
							|  |  |  |  | }) |