From 0c10b2baa6bf61c8ddaed7cdb6c2f2dbaab42662 Mon Sep 17 00:00:00 2001 From: Allan Espinosa Date: Sun, 20 May 2018 21:09:31 -0400 Subject: [PATCH] nixos/hadoop: add hadoop module (hdfs, yarn) --- nixos/modules/misc/ids.nix | 8 ++ nixos/modules/module-list.nix | 1 + .../modules/services/cluster/hadoop/conf.nix | 31 ++++++++ .../services/cluster/hadoop/default.nix | 63 ++++++++++++++++ .../modules/services/cluster/hadoop/hdfs.nix | 73 ++++++++++++++++++ .../modules/services/cluster/hadoop/yarn.nix | 74 +++++++++++++++++++ nixos/release.nix | 2 + nixos/tests/hadoop/hdfs.nix | 54 ++++++++++++++ nixos/tests/hadoop/yarn.nix | 46 ++++++++++++ 9 files changed, 352 insertions(+) create mode 100644 nixos/modules/services/cluster/hadoop/conf.nix create mode 100644 nixos/modules/services/cluster/hadoop/default.nix create mode 100644 nixos/modules/services/cluster/hadoop/hdfs.nix create mode 100644 nixos/modules/services/cluster/hadoop/yarn.nix create mode 100644 nixos/tests/hadoop/hdfs.nix create mode 100644 nixos/tests/hadoop/yarn.nix diff --git a/nixos/modules/misc/ids.nix b/nixos/modules/misc/ids.nix index 73231edf077..aac86087f9e 100644 --- a/nixos/modules/misc/ids.nix +++ b/nixos/modules/misc/ids.nix @@ -317,6 +317,10 @@ restic = 291; openvpn = 292; meguca = 293; + yarn = 294; + hdfs = 295; + mapred = 296; + hadoop = 297; # When adding a uid, make sure it doesn't match an existing gid. And don't use uids above 399! @@ -594,6 +598,10 @@ restic = 291; openvpn = 292; meguca = 293; + yarn = 294; + hdfs = 295; + mapred = 296; + hadoop = 297; # When adding a gid, make sure it doesn't match an existing # uid. Users and groups with the same name should have equal diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index c425f3c6507..5ed01a8da40 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -178,6 +178,7 @@ ./services/backup/rsnapshot.nix ./services/backup/tarsnap.nix ./services/backup/znapzend.nix + ./services/cluster/hadoop/default.nix ./services/cluster/kubernetes/default.nix ./services/cluster/kubernetes/dns.nix ./services/cluster/kubernetes/dashboard.nix diff --git a/nixos/modules/services/cluster/hadoop/conf.nix b/nixos/modules/services/cluster/hadoop/conf.nix new file mode 100644 index 00000000000..38db10406b9 --- /dev/null +++ b/nixos/modules/services/cluster/hadoop/conf.nix @@ -0,0 +1,31 @@ +{ hadoop, pkgs }: +let + propertyXml = name: value: '' + + ${name} + ${builtins.toString value} + + ''; + siteXml = fileName: properties: pkgs.writeTextDir fileName '' + + + + ${builtins.concatStringsSep "\n" (pkgs.lib.mapAttrsToList propertyXml properties)} + + ''; + userFunctions = '' + hadoop_verify_logdir() { + echo Skipping verification of log directory + } + ''; +in +pkgs.buildEnv { + name = "hadoop-conf"; + paths = [ + (siteXml "core-site.xml" hadoop.coreSite) + (siteXml "hdfs-site.xml" hadoop.hdfsSite) + (siteXml "mapred-site.xml" hadoop.mapredSite) + (siteXml "yarn-site.xml" hadoop.yarnSite) + (pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions) + ]; +} diff --git a/nixos/modules/services/cluster/hadoop/default.nix b/nixos/modules/services/cluster/hadoop/default.nix new file mode 100644 index 00000000000..53c13fd0603 --- /dev/null +++ b/nixos/modules/services/cluster/hadoop/default.nix @@ -0,0 +1,63 @@ +{ config, lib, pkgs, ...}: +let + cfg = config.services.hadoop; + hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; }; +in +with lib; +{ + imports = [ ./yarn.nix ./hdfs.nix ]; + + options.services.hadoop = { + coreSite = mkOption { + default = {}; + example = { + "fs.defaultFS" = "hdfs://localhost"; + }; + description = "Hadoop core-site.xml definition"; + }; + + hdfsSite = mkOption { + default = {}; + example = { + "dfs.nameservices" = "namenode1"; + }; + description = "Hadoop hdfs-site.xml definition"; + }; + + mapredSite = mkOption { + default = {}; + example = { + "mapreduce.map.cpu.vcores" = "1"; + }; + description = "Hadoop mapred-site.xml definition"; + }; + + yarnSite = mkOption { + default = {}; + example = { + "yarn.resourcemanager.ha.id" = "resourcemanager1"; + }; + description = "Hadoop yarn-site.xml definition"; + }; + + package = mkOption { + type = types.package; + default = pkgs.hadoop; + defaultText = "pkgs.hadoop"; + example = literalExample "pkgs.hadoop"; + description = '' + ''; + }; + }; + + + config = mkMerge [ + (mkIf (builtins.hasAttr "yarn" config.users.extraUsers || + builtins.hasAttr "hdfs" config.users.extraUsers ) { + users.extraGroups.hadoop = { + gid = config.ids.gids.hadoop; + }; + }) + + ]; +} diff --git a/nixos/modules/services/cluster/hadoop/hdfs.nix b/nixos/modules/services/cluster/hadoop/hdfs.nix new file mode 100644 index 00000000000..48020e6139c --- /dev/null +++ b/nixos/modules/services/cluster/hadoop/hdfs.nix @@ -0,0 +1,73 @@ +{ config, lib, pkgs, ...}: +let + cfg = config.services.hadoop; + hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; }; +in +with lib; +{ + options.services.hadoop.hdfs = { + namenode.enabled = mkOption { + type = types.bool; + default = false; + description = '' + Whether to run the Hadoop YARN NameNode + ''; + }; + datanode.enabled = mkOption { + type = types.bool; + default = false; + description = '' + Whether to run the Hadoop YARN DataNode + ''; + }; + }; + + config = mkMerge [ + (mkIf cfg.hdfs.namenode.enabled { + systemd.services."hdfs-namenode" = { + description = "Hadoop HDFS NameNode"; + wantedBy = [ "multi-user.target" ]; + + environment = { + HADOOP_HOME = "${cfg.package}"; + }; + + preStart = '' + ${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true + ''; + + serviceConfig = { + User = "hdfs"; + SyslogIdentifier = "hdfs-namenode"; + ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode"; + }; + }; + }) + (mkIf cfg.hdfs.datanode.enabled { + systemd.services."hdfs-datanode" = { + description = "Hadoop HDFS DataNode"; + wantedBy = [ "multi-user.target" ]; + + environment = { + HADOOP_HOME = "${cfg.package}"; + }; + + serviceConfig = { + User = "hdfs"; + SyslogIdentifier = "hdfs-datanode"; + ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} datanode"; + }; + }; + }) + (mkIf ( + cfg.hdfs.namenode.enabled || cfg.hdfs.datanode.enabled + ) { + users.extraUsers.hdfs = { + description = "Hadoop HDFS user"; + group = "hadoop"; + uid = config.ids.uids.hdfs; + }; + }) + + ]; +} diff --git a/nixos/modules/services/cluster/hadoop/yarn.nix b/nixos/modules/services/cluster/hadoop/yarn.nix new file mode 100644 index 00000000000..ce5b04a331c --- /dev/null +++ b/nixos/modules/services/cluster/hadoop/yarn.nix @@ -0,0 +1,74 @@ +{ config, lib, pkgs, ...}: +let + cfg = config.services.hadoop; + hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; }; +in +with lib; +{ + options.services.hadoop.yarn = { + resourcemanager.enabled = mkOption { + type = types.bool; + default = false; + description = '' + Whether to run the Hadoop YARN ResourceManager + ''; + }; + nodemanager.enabled = mkOption { + type = types.bool; + default = false; + description = '' + Whether to run the Hadoop YARN NodeManager + ''; + }; + }; + + config = mkMerge [ + (mkIf ( + cfg.yarn.resourcemanager.enabled || cfg.yarn.nodemanager.enabled + ) { + + users.extraUsers.yarn = { + description = "Hadoop YARN user"; + group = "hadoop"; + uid = config.ids.uids.yarn; + }; + }) + + (mkIf cfg.yarn.resourcemanager.enabled { + systemd.services."yarn-resourcemanager" = { + description = "Hadoop YARN ResourceManager"; + wantedBy = [ "multi-user.target" ]; + + environment = { + HADOOP_HOME = "${cfg.package}"; + }; + + serviceConfig = { + User = "yarn"; + SyslogIdentifier = "yarn-resourcemanager"; + ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " + + " resourcemanager"; + }; + }; + }) + + (mkIf cfg.yarn.nodemanager.enabled { + systemd.services."yarn-nodemanager" = { + description = "Hadoop YARN NodeManager"; + wantedBy = [ "multi-user.target" ]; + + environment = { + HADOOP_HOME = "${cfg.package}"; + }; + + serviceConfig = { + User = "yarn"; + SyslogIdentifier = "yarn-nodemanager"; + ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " + + " nodemanager"; + }; + }; + }) + + ]; +} diff --git a/nixos/release.nix b/nixos/release.nix index e494fa35029..93566762b3a 100644 --- a/nixos/release.nix +++ b/nixos/release.nix @@ -299,6 +299,8 @@ in rec { tests.gnome3-gdm = callTest tests/gnome3-gdm.nix {}; tests.grafana = callTest tests/grafana.nix {}; tests.graphite = callTest tests/graphite.nix {}; + tests.hadoop.hdfs = callTestOnMatchingSystems [ "x86_64-linux" ] tests/hadoop/hdfs.nix {}; + tests.hadoop.yarn = callTestOnMatchingSystems [ "x86_64-linux" ] tests/hadoop/yarn.nix {}; tests.hardened = callTest tests/hardened.nix { }; tests.haproxy = callTest tests/haproxy.nix {}; tests.hibernate = callTest tests/hibernate.nix {}; diff --git a/nixos/tests/hadoop/hdfs.nix b/nixos/tests/hadoop/hdfs.nix new file mode 100644 index 00000000000..4206c940c1a --- /dev/null +++ b/nixos/tests/hadoop/hdfs.nix @@ -0,0 +1,54 @@ +import ../make-test.nix ({pkgs, ...}: { + nodes = { + namenode = {pkgs, config, ...}: { + services.hadoop = { + package = pkgs.hadoop_3_1; + hdfs.namenode.enabled = true; + coreSite = { + "fs.defaultFS" = "hdfs://namenode:8020"; + }; + hdfsSite = { + "dfs.replication" = 1; + "dfs.namenode.rpc-bind-host" = "0.0.0.0"; + "dfs.namenode.http-bind-host" = "0.0.0.0"; + }; + }; + networking.firewall.allowedTCPPorts = [ + 9870 # namenode.http-address + 8020 # namenode.rpc-address + ]; + }; + datanode = {pkgs, config, ...}: { + services.hadoop = { + package = pkgs.hadoop_3_1; + hdfs.datanode.enabled = true; + coreSite = { + "fs.defaultFS" = "hdfs://namenode:8020"; + }; + }; + networking.firewall.allowedTCPPorts = [ + 9864 # datanode.http.address + 9866 # datanode.address + 9867 # datanode.ipc.address + ]; + }; + }; + + testScript = '' + startAll + + $namenode->waitForUnit("hdfs-namenode"); + $namenode->waitForUnit("network.target"); + $namenode->waitForOpenPort(8020); + $namenode->waitForOpenPort(9870); + + $datanode->waitForUnit("hdfs-datanode"); + $datanode->waitForUnit("network.target"); + $datanode->waitForOpenPort(9864); + $datanode->waitForOpenPort(9866); + $datanode->waitForOpenPort(9867); + + $namenode->succeed("curl http://namenode:9870"); + $datanode->succeed("curl http://datanode:9864"); + ''; +}) diff --git a/nixos/tests/hadoop/yarn.nix b/nixos/tests/hadoop/yarn.nix new file mode 100644 index 00000000000..e97cc1acc90 --- /dev/null +++ b/nixos/tests/hadoop/yarn.nix @@ -0,0 +1,46 @@ +import ../make-test.nix ({pkgs, ...}: { + nodes = { + resourcemanager = {pkgs, config, ...}: { + services.hadoop.package = pkgs.hadoop_3_1; + services.hadoop.yarn.resourcemanager.enabled = true; + services.hadoop.yarnSite = { + "yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler"; + }; + networking.firewall.allowedTCPPorts = [ + 8088 # resourcemanager.webapp.address + 8031 # resourcemanager.resource-tracker.address + ]; + }; + nodemanager = {pkgs, config, ...}: { + services.hadoop.package = pkgs.hadoop_3_1; + services.hadoop.yarn.nodemanager.enabled = true; + services.hadoop.yarnSite = { + "yarn.resourcemanager.hostname" = "resourcemanager"; + "yarn.nodemanager.log-dirs" = "/tmp/userlogs"; + "yarn.nodemanager.address" = "0.0.0.0:8041"; + }; + networking.firewall.allowedTCPPorts = [ + 8042 # nodemanager.webapp.address + 8041 # nodemanager.address + ]; + }; + + }; + + testScript = '' + startAll; + + $resourcemanager->waitForUnit("yarn-resourcemanager"); + $resourcemanager->waitForUnit("network.target"); + $resourcemanager->waitForOpenPort(8031); + $resourcemanager->waitForOpenPort(8088); + + $nodemanager->waitForUnit("yarn-nodemanager"); + $nodemanager->waitForUnit("network.target"); + $nodemanager->waitForOpenPort(8042); + $nodemanager->waitForOpenPort(8041); + + $resourcemanager->succeed("curl http://localhost:8088"); + $nodemanager->succeed("curl http://localhost:8042"); + ''; +})