diff --git a/system/ids.nix b/system/ids.nix index 8c8d44cddf4..abbd5c58c6e 100644 --- a/system/ids.nix +++ b/system/ids.nix @@ -7,6 +7,7 @@ ntp = 3; messagebus = 4; # D-Bus haldaemon = 5; + nagios = 6; nixbld = 30000; # start of range of uids nobody = 65534; }; diff --git a/system/options.nix b/system/options.nix index 8cbd1171db7..bfad4e37305 100644 --- a/system/options.nix +++ b/system/options.nix @@ -1180,6 +1180,36 @@ }; + nagios = { + + enable = mkOption { + default = false; + description = " + Whether to use Nagios to monitor + your system or network. + "; + }; + + objectDefs = mkOption { + description = " + A list of Nagios object configuration files that must define + the hosts, host groups, services and contacts for the + network that you want Nagios to monitor. + "; + }; + + plugins = mkOption { + default = [pkgs.nagiosPluginsOfficial pkgs.ssmtp]; + description = " + Packages to be added to the Nagios PATH. + Typically used to add plugins, but can be anything. + "; + }; + + }; + + }; diff --git a/upstart-jobs/default.nix b/upstart-jobs/default.nix index 06131fab725..040119ecc73 100644 --- a/upstart-jobs/default.nix +++ b/upstart-jobs/default.nix @@ -226,6 +226,12 @@ import ../upstart-jobs/gather.nix { inherit (pkgs) stdenv hal; }) + # Nagios system/network monitoring daemon. + ++ optional config.services.nagios.enable + (import ../upstart-jobs/nagios { + inherit config pkgs; + }) + # Handles the reboot/halt events. ++ (map (event: makeJob (import ../upstart-jobs/halt.nix { diff --git a/upstart-jobs/make-job.nix b/upstart-jobs/make-job.nix index e6c92bb4d3a..14f48ae04d6 100644 --- a/upstart-jobs/make-job.nix +++ b/upstart-jobs/make-job.nix @@ -5,8 +5,12 @@ job.jobDrv else ( - runCommand ("upstart-" + job.name) {inherit (job) job; jobName = job.name;} - "ensureDir $out/etc/event.d; echo \"$job\" > $out/etc/event.d/$jobName" + runCommand ("upstart-" + job.name) + { inherit (job) job; + jobName = job.name; + buildHook = if job ? buildHook then job.buildHook else "true"; + } + "eval \"$buildHook\"; ensureDir $out/etc/event.d; echo \"$job\" > $out/etc/event.d/$jobName" ) ) diff --git a/upstart-jobs/nagios/commands.cfg b/upstart-jobs/nagios/commands.cfg new file mode 100644 index 00000000000..feeb0155932 --- /dev/null +++ b/upstart-jobs/nagios/commands.cfg @@ -0,0 +1,34 @@ +define command { + command_name host-notify-by-email + command_line printf "%b" "Subject: [Nagios] Host $HOSTSTATE$ alert for $HOSTNAME$\n\n***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | sendmail $CONTACTEMAIL$ +} + + +define command{ + command_name notify-by-email + command_line printf "%b" "Subject: [Nagios] $NOTIFICATIONTYPE$ alert - $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$\n\n***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$" | sendmail $CONTACTEMAIL$ +} + + +define command { + command_name dummy-ok + command_line true +} + + +define command { + command_name check-host-alive + command_line check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 1 +} + + +define command { + command_name check_local_disk + command_line check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$ +} + + +define command { + command_name check_ssh + command_line check_ssh $HOSTADDRESS$ +} diff --git a/upstart-jobs/nagios/default.nix b/upstart-jobs/nagios/default.nix new file mode 100644 index 00000000000..0dc31feaf61 --- /dev/null +++ b/upstart-jobs/nagios/default.nix @@ -0,0 +1,87 @@ +{config, pkgs}: + +let + + nagiosUser = "nagios"; + nagiosGroup = "nogroup"; + + nagiosState = "/var/lib/nagios"; + nagiosLogDir = "/var/log/nagios"; + + nagiosObjectDefs = [ + ./timeperiods.cfg + ./host-templates.cfg + ./service-templates.cfg + ./commands.cfg + ] ++ config.services.nagios.objectDefs; + + nagiosObjectDefsDir = pkgs.runCommand "nagios-objects" {} + "ensureDir $out; ln -s ${toString nagiosObjectDefs} $out/"; + + nagiosCfgFile = pkgs.writeText "nagios.cfg" " + + # Paths for state and logs. + log_file=${nagiosLogDir}/current + log_archive_path=${nagiosLogDir}/archive + status_file=${nagiosState}/status.dat + object_cache_file=${nagiosState}/objects.cache + comment_file=${nagiosState}/comment.dat + downtime_file=${nagiosState}/downtime.dat + temp_file=${nagiosState}/nagios.tmp + lock_file=/var/run/nagios.lock # Not used I think. + state_retention_file=${nagiosState}/retention.dat + + # Configuration files. + #resource_file=resource.cfg + cfg_dir=${nagiosObjectDefsDir} + + # Uid/gid that the daemon runs under. + nagios_user=${nagiosUser} + nagios_group=${nagiosGroup} + + # Misc. options. + illegal_macro_output_chars=`~$&|'\"<> + retain_state_information=1 + + "; + +in + +{ + name = "nagios"; + + users = [ + { name = nagiosUser; + uid = (import ../../system/ids.nix).uids.nagios; + description = "Nagios monitoring daemon"; + home = nagiosState; + } + ]; + + # Run `nagios -v' to check the validity of the configuration file so + # that a nixos-rebuild fails *before* we kill the running Nagios + # daemon. + buildHook = "${pkgs.nagios}/bin/nagios -v ${nagiosCfgFile}"; + + job = " + description \"Nagios monitoring daemon\" + + start on network-interfaces/started + stop on network-interfaces/stop + + start script + mkdir -m 0755 -p ${nagiosState} ${nagiosLogDir} + chown ${nagiosUser} ${nagiosState} ${nagiosLogDir} + end script + + respawn + + script + for i in ${toString config.services.nagios.plugins}; do + export PATH=$i/bin:$i/sbin:$i/libexec:$PATH + done + exec ${pkgs.nagios}/bin/nagios ${nagiosCfgFile} + end script + "; + +} diff --git a/upstart-jobs/nagios/host-templates.cfg b/upstart-jobs/nagios/host-templates.cfg new file mode 100644 index 00000000000..3a4c269e257 --- /dev/null +++ b/upstart-jobs/nagios/host-templates.cfg @@ -0,0 +1,27 @@ +define host { + name generic-host + notifications_enabled 1 + event_handler_enabled 1 + flap_detection_enabled 1 + failure_prediction_enabled 1 + process_perf_data 1 + retain_status_information 1 + retain_nonstatus_information 1 + notification_period 24x7 + register 0 +} + + +define host { + name generic-server + use generic-host + check_period 24x7 + max_check_attempts 10 + check_command check-host-alive + notification_period 24x7 + notification_interval 120 + notification_options d,u,r + contact_groups admins + register 0 + #check_interval 1 +} diff --git a/upstart-jobs/nagios/service-templates.cfg b/upstart-jobs/nagios/service-templates.cfg new file mode 100644 index 00000000000..e729ea77675 --- /dev/null +++ b/upstart-jobs/nagios/service-templates.cfg @@ -0,0 +1,32 @@ +define service { + name generic-service + active_checks_enabled 1 + passive_checks_enabled 1 + parallelize_check 1 + obsess_over_service 1 + check_freshness 0 + notifications_enabled 1 + event_handler_enabled 1 + flap_detection_enabled 1 + failure_prediction_enabled 1 + process_perf_data 1 + retain_status_information 1 + retain_nonstatus_information 1 + is_volatile 0 + register 0 +} + + +define service { + name local-service + use generic-service + check_period 24x7 + max_check_attempts 4 + normal_check_interval 5 + retry_check_interval 1 + contact_groups admins + notification_options w,u,c,r + notification_interval 0 # notify only once + notification_period 24x7 + register 0 +} diff --git a/upstart-jobs/nagios/timeperiods.cfg b/upstart-jobs/nagios/timeperiods.cfg new file mode 100644 index 00000000000..2669be54d3d --- /dev/null +++ b/upstart-jobs/nagios/timeperiods.cfg @@ -0,0 +1,11 @@ +define timeperiod { + timeperiod_name 24x7 + alias 24 Hours A Day, 7 Days A Week + sunday 00:00-24:00 + monday 00:00-24:00 + tuesday 00:00-24:00 + wednesday 00:00-24:00 + thursday 00:00-24:00 + friday 00:00-24:00 + saturday 00:00-24:00 +}