Merge pull request #41381 from aespinosa/hadoop-modules

nixos/hadoop: add hadoop module (hdfs, yarn)
This commit is contained in:
Matthew Bauer 2018-06-30 18:33:03 -04:00 committed by GitHub
commit 060a98e9f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 352 additions and 0 deletions

View File

@ -317,6 +317,10 @@
restic = 291; restic = 291;
openvpn = 292; openvpn = 292;
meguca = 293; meguca = 293;
yarn = 294;
hdfs = 295;
mapred = 296;
hadoop = 297;
# When adding a uid, make sure it doesn't match an existing gid. And don't use uids above 399! # When adding a uid, make sure it doesn't match an existing gid. And don't use uids above 399!
@ -594,6 +598,10 @@
restic = 291; restic = 291;
openvpn = 292; openvpn = 292;
meguca = 293; meguca = 293;
yarn = 294;
hdfs = 295;
mapred = 296;
hadoop = 297;
# When adding a gid, make sure it doesn't match an existing # When adding a gid, make sure it doesn't match an existing
# uid. Users and groups with the same name should have equal # uid. Users and groups with the same name should have equal

View File

@ -178,6 +178,7 @@
./services/backup/rsnapshot.nix ./services/backup/rsnapshot.nix
./services/backup/tarsnap.nix ./services/backup/tarsnap.nix
./services/backup/znapzend.nix ./services/backup/znapzend.nix
./services/cluster/hadoop/default.nix
./services/cluster/kubernetes/default.nix ./services/cluster/kubernetes/default.nix
./services/cluster/kubernetes/dns.nix ./services/cluster/kubernetes/dns.nix
./services/cluster/kubernetes/dashboard.nix ./services/cluster/kubernetes/dashboard.nix

View File

@ -0,0 +1,31 @@
{ hadoop, pkgs }:
let
propertyXml = name: value: ''
<property>
<name>${name}</name>
<value>${builtins.toString value}</value>
</property>
'';
siteXml = fileName: properties: pkgs.writeTextDir fileName ''
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- generated by NixOS -->
<configuration>
${builtins.concatStringsSep "\n" (pkgs.lib.mapAttrsToList propertyXml properties)}
</configuration>
'';
userFunctions = ''
hadoop_verify_logdir() {
echo Skipping verification of log directory
}
'';
in
pkgs.buildEnv {
name = "hadoop-conf";
paths = [
(siteXml "core-site.xml" hadoop.coreSite)
(siteXml "hdfs-site.xml" hadoop.hdfsSite)
(siteXml "mapred-site.xml" hadoop.mapredSite)
(siteXml "yarn-site.xml" hadoop.yarnSite)
(pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions)
];
}

View File

@ -0,0 +1,63 @@
{ config, lib, pkgs, ...}:
let
cfg = config.services.hadoop;
hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; };
in
with lib;
{
imports = [ ./yarn.nix ./hdfs.nix ];
options.services.hadoop = {
coreSite = mkOption {
default = {};
example = {
"fs.defaultFS" = "hdfs://localhost";
};
description = "Hadoop core-site.xml definition";
};
hdfsSite = mkOption {
default = {};
example = {
"dfs.nameservices" = "namenode1";
};
description = "Hadoop hdfs-site.xml definition";
};
mapredSite = mkOption {
default = {};
example = {
"mapreduce.map.cpu.vcores" = "1";
};
description = "Hadoop mapred-site.xml definition";
};
yarnSite = mkOption {
default = {};
example = {
"yarn.resourcemanager.ha.id" = "resourcemanager1";
};
description = "Hadoop yarn-site.xml definition";
};
package = mkOption {
type = types.package;
default = pkgs.hadoop;
defaultText = "pkgs.hadoop";
example = literalExample "pkgs.hadoop";
description = ''
'';
};
};
config = mkMerge [
(mkIf (builtins.hasAttr "yarn" config.users.extraUsers ||
builtins.hasAttr "hdfs" config.users.extraUsers ) {
users.extraGroups.hadoop = {
gid = config.ids.gids.hadoop;
};
})
];
}

View File

@ -0,0 +1,73 @@
{ config, lib, pkgs, ...}:
let
cfg = config.services.hadoop;
hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; };
in
with lib;
{
options.services.hadoop.hdfs = {
namenode.enabled = mkOption {
type = types.bool;
default = false;
description = ''
Whether to run the Hadoop YARN NameNode
'';
};
datanode.enabled = mkOption {
type = types.bool;
default = false;
description = ''
Whether to run the Hadoop YARN DataNode
'';
};
};
config = mkMerge [
(mkIf cfg.hdfs.namenode.enabled {
systemd.services."hdfs-namenode" = {
description = "Hadoop HDFS NameNode";
wantedBy = [ "multi-user.target" ];
environment = {
HADOOP_HOME = "${cfg.package}";
};
preStart = ''
${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true
'';
serviceConfig = {
User = "hdfs";
SyslogIdentifier = "hdfs-namenode";
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode";
};
};
})
(mkIf cfg.hdfs.datanode.enabled {
systemd.services."hdfs-datanode" = {
description = "Hadoop HDFS DataNode";
wantedBy = [ "multi-user.target" ];
environment = {
HADOOP_HOME = "${cfg.package}";
};
serviceConfig = {
User = "hdfs";
SyslogIdentifier = "hdfs-datanode";
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} datanode";
};
};
})
(mkIf (
cfg.hdfs.namenode.enabled || cfg.hdfs.datanode.enabled
) {
users.extraUsers.hdfs = {
description = "Hadoop HDFS user";
group = "hadoop";
uid = config.ids.uids.hdfs;
};
})
];
}

View File

@ -0,0 +1,74 @@
{ config, lib, pkgs, ...}:
let
cfg = config.services.hadoop;
hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; };
in
with lib;
{
options.services.hadoop.yarn = {
resourcemanager.enabled = mkOption {
type = types.bool;
default = false;
description = ''
Whether to run the Hadoop YARN ResourceManager
'';
};
nodemanager.enabled = mkOption {
type = types.bool;
default = false;
description = ''
Whether to run the Hadoop YARN NodeManager
'';
};
};
config = mkMerge [
(mkIf (
cfg.yarn.resourcemanager.enabled || cfg.yarn.nodemanager.enabled
) {
users.extraUsers.yarn = {
description = "Hadoop YARN user";
group = "hadoop";
uid = config.ids.uids.yarn;
};
})
(mkIf cfg.yarn.resourcemanager.enabled {
systemd.services."yarn-resourcemanager" = {
description = "Hadoop YARN ResourceManager";
wantedBy = [ "multi-user.target" ];
environment = {
HADOOP_HOME = "${cfg.package}";
};
serviceConfig = {
User = "yarn";
SyslogIdentifier = "yarn-resourcemanager";
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
" resourcemanager";
};
};
})
(mkIf cfg.yarn.nodemanager.enabled {
systemd.services."yarn-nodemanager" = {
description = "Hadoop YARN NodeManager";
wantedBy = [ "multi-user.target" ];
environment = {
HADOOP_HOME = "${cfg.package}";
};
serviceConfig = {
User = "yarn";
SyslogIdentifier = "yarn-nodemanager";
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
" nodemanager";
};
};
})
];
}

View File

@ -299,6 +299,8 @@ in rec {
tests.gnome3-gdm = callTest tests/gnome3-gdm.nix {}; tests.gnome3-gdm = callTest tests/gnome3-gdm.nix {};
tests.grafana = callTest tests/grafana.nix {}; tests.grafana = callTest tests/grafana.nix {};
tests.graphite = callTest tests/graphite.nix {}; tests.graphite = callTest tests/graphite.nix {};
tests.hadoop.hdfs = callTestOnMatchingSystems [ "x86_64-linux" ] tests/hadoop/hdfs.nix {};
tests.hadoop.yarn = callTestOnMatchingSystems [ "x86_64-linux" ] tests/hadoop/yarn.nix {};
tests.hardened = callTest tests/hardened.nix { }; tests.hardened = callTest tests/hardened.nix { };
tests.haproxy = callTest tests/haproxy.nix {}; tests.haproxy = callTest tests/haproxy.nix {};
tests.hibernate = callTest tests/hibernate.nix {}; tests.hibernate = callTest tests/hibernate.nix {};

View File

@ -0,0 +1,54 @@
import ../make-test.nix ({pkgs, ...}: {
nodes = {
namenode = {pkgs, config, ...}: {
services.hadoop = {
package = pkgs.hadoop_3_1;
hdfs.namenode.enabled = true;
coreSite = {
"fs.defaultFS" = "hdfs://namenode:8020";
};
hdfsSite = {
"dfs.replication" = 1;
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
"dfs.namenode.http-bind-host" = "0.0.0.0";
};
};
networking.firewall.allowedTCPPorts = [
9870 # namenode.http-address
8020 # namenode.rpc-address
];
};
datanode = {pkgs, config, ...}: {
services.hadoop = {
package = pkgs.hadoop_3_1;
hdfs.datanode.enabled = true;
coreSite = {
"fs.defaultFS" = "hdfs://namenode:8020";
};
};
networking.firewall.allowedTCPPorts = [
9864 # datanode.http.address
9866 # datanode.address
9867 # datanode.ipc.address
];
};
};
testScript = ''
startAll
$namenode->waitForUnit("hdfs-namenode");
$namenode->waitForUnit("network.target");
$namenode->waitForOpenPort(8020);
$namenode->waitForOpenPort(9870);
$datanode->waitForUnit("hdfs-datanode");
$datanode->waitForUnit("network.target");
$datanode->waitForOpenPort(9864);
$datanode->waitForOpenPort(9866);
$datanode->waitForOpenPort(9867);
$namenode->succeed("curl http://namenode:9870");
$datanode->succeed("curl http://datanode:9864");
'';
})

View File

@ -0,0 +1,46 @@
import ../make-test.nix ({pkgs, ...}: {
nodes = {
resourcemanager = {pkgs, config, ...}: {
services.hadoop.package = pkgs.hadoop_3_1;
services.hadoop.yarn.resourcemanager.enabled = true;
services.hadoop.yarnSite = {
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler";
};
networking.firewall.allowedTCPPorts = [
8088 # resourcemanager.webapp.address
8031 # resourcemanager.resource-tracker.address
];
};
nodemanager = {pkgs, config, ...}: {
services.hadoop.package = pkgs.hadoop_3_1;
services.hadoop.yarn.nodemanager.enabled = true;
services.hadoop.yarnSite = {
"yarn.resourcemanager.hostname" = "resourcemanager";
"yarn.nodemanager.log-dirs" = "/tmp/userlogs";
"yarn.nodemanager.address" = "0.0.0.0:8041";
};
networking.firewall.allowedTCPPorts = [
8042 # nodemanager.webapp.address
8041 # nodemanager.address
];
};
};
testScript = ''
startAll;
$resourcemanager->waitForUnit("yarn-resourcemanager");
$resourcemanager->waitForUnit("network.target");
$resourcemanager->waitForOpenPort(8031);
$resourcemanager->waitForOpenPort(8088);
$nodemanager->waitForUnit("yarn-nodemanager");
$nodemanager->waitForUnit("network.target");
$nodemanager->waitForOpenPort(8042);
$nodemanager->waitForOpenPort(8041);
$resourcemanager->succeed("curl http://localhost:8088");
$nodemanager->succeed("curl http://localhost:8042");
'';
})