nixos/hadoop: add module options for commonly used service configs

This commit is contained in:
illustris 2022-03-02 12:50:01 +05:30
parent bef71d7c53
commit e1017adb32
6 changed files with 194 additions and 49 deletions

View File

@ -1,6 +1,6 @@
{ cfg, pkgs, lib }:
let
propertyXml = name: value: ''
propertyXml = name: value: lib.optionalString (value != null) ''
<property>
<name>${name}</name>
<value>${builtins.toString value}</value>
@ -29,16 +29,16 @@ let
export HADOOP_LOG_DIR=/tmp/hadoop/$USER
'';
in
pkgs.runCommand "hadoop-conf" {} ''
pkgs.runCommand "hadoop-conf" {} (with cfg; ''
mkdir -p $out/
cp ${siteXml "core-site.xml" cfg.coreSite}/* $out/
cp ${siteXml "hdfs-site.xml" cfg.hdfsSite}/* $out/
cp ${siteXml "mapred-site.xml" cfg.mapredSite}/* $out/
cp ${siteXml "yarn-site.xml" cfg.yarnSite}/* $out/
cp ${siteXml "httpfs-site.xml" cfg.httpfsSite}/* $out/
cp ${cfgFile "container-executor.cfg" cfg.containerExecutorCfg}/* $out/
cp ${siteXml "core-site.xml" (coreSite // coreSiteInternal)}/* $out/
cp ${siteXml "hdfs-site.xml" (hdfsSiteDefault // hdfsSite // hdfsSiteInternal)}/* $out/
cp ${siteXml "mapred-site.xml" (mapredSiteDefault // mapredSite)}/* $out/
cp ${siteXml "yarn-site.xml" (yarnSiteDefault // yarnSite // yarnSiteInternal)}/* $out/
cp ${siteXml "httpfs-site.xml" httpfsSite}/* $out/
cp ${cfgFile "container-executor.cfg" containerExecutorCfg}/* $out/
cp ${pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions}/* $out/
cp ${pkgs.writeTextDir "hadoop-env.sh" hadoopEnv}/* $out/
cp ${cfg.log4jProperties} $out/log4j.properties
${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") cfg.extraConfDirs}
''
cp ${log4jProperties} $out/log4j.properties
${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") extraConfDirs}
'')

View File

@ -21,25 +21,50 @@ with lib;
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/core-default.xml"/>
'';
};
coreSiteInternal = mkOption {
default = {};
type = types.attrsOf types.anything;
internal = true;
description = ''
Internal option to add configs to core-site.xml based on module options
'';
};
hdfsSite = mkOption {
hdfsSiteDefault = mkOption {
default = {
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
"dfs.namenode.http-address" = "0.0.0.0:9870";
"dfs.namenode.servicerpc-bind-host" = "0.0.0.0";
"dfs.namenode.http-bind-host" = "0.0.0.0";
};
type = types.attrsOf types.anything;
description = ''
Default options for hdfs-site.xml
'';
};
hdfsSite = mkOption {
default = {};
type = types.attrsOf types.anything;
example = literalExpression ''
{
"dfs.nameservices" = "namenode1";
}
'';
description = ''
Hadoop hdfs-site.xml definition
Additional options and overrides for hdfs-site.xml
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml"/>
'';
};
hdfsSiteInternal = mkOption {
default = {};
type = types.attrsOf types.anything;
internal = true;
description = ''
Internal option to add configs to hdfs-site.xml based on module options
'';
};
mapredSite = mkOption {
mapredSiteDefault = mkOption {
default = {
"mapreduce.framework.name" = "yarn";
"yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
@ -55,18 +80,25 @@ with lib;
}
'';
type = types.attrsOf types.anything;
description = ''
Default options for mapred-site.xml
'';
};
mapredSite = mkOption {
default = {};
type = types.attrsOf types.anything;
example = literalExpression ''
options.services.hadoop.mapredSite.default // {
{
"mapreduce.map.java.opts" = "-Xmx900m -XX:+UseParallelGC";
}
'';
description = ''
Hadoop mapred-site.xml definition
Additional options and overrides for mapred-site.xml
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml"/>
'';
};
yarnSite = mkOption {
yarnSiteDefault = mkOption {
default = {
"yarn.nodemanager.admin-env" = "PATH=$PATH";
"yarn.nodemanager.aux-services" = "mapreduce_shuffle";
@ -79,22 +111,33 @@ with lib;
"yarn.nodemanager.log-dirs" = "/var/log/hadoop/yarn/nodemanager";
"yarn.resourcemanager.bind-host" = "0.0.0.0";
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler";
"yarn.nodemanager.linux-container-executor.cgroups.hierarchy" = "/hadoop-yarn";
"yarn.nodemanager.linux-container-executor.resources-handler.class" = "org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler";
"yarn.nodemanager.linux-container-executor.cgroups.mount" = "true";
"yarn.nodemanager.linux-container-executor.cgroups.mount-path" = "/run/wrappers/yarn-nodemanager/cgroup";
};
type = types.attrsOf types.anything;
description = ''
Default options for yarn-site.xml
'';
};
yarnSite = mkOption {
default = {};
type = types.attrsOf types.anything;
example = literalExpression ''
options.services.hadoop.yarnSite.default // {
{
"yarn.resourcemanager.hostname" = "''${config.networking.hostName}";
}
'';
description = ''
Hadoop yarn-site.xml definition
Additional options and overrides for yarn-site.xml
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-common/yarn-default.xml"/>
'';
};
yarnSiteInternal = mkOption {
default = {};
type = types.attrsOf types.anything;
internal = true;
description = ''
Internal option to add configs to yarn-site.xml based on module options
'';
};
httpfsSite = mkOption {
default = { };

View File

@ -7,7 +7,7 @@ let
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
# Generator for HDFS service options
hadoopServiceOption = { serviceName, firewallOption ? true }: {
hadoopServiceOption = { serviceName, firewallOption ? true, extraOpts ? null }: {
enable = mkEnableOption serviceName;
restartIfChanged = mkOption {
type = types.bool;
@ -19,13 +19,27 @@ let
'';
default = false;
};
extraFlags = mkOption{
type = with types; listOf str;
default = [];
description = "Extra command line flags to pass to ${serviceName}";
example = [
"-Dcom.sun.management.jmxremote"
"-Dcom.sun.management.jmxremote.port=8010"
];
};
extraEnv = mkOption{
type = with types; attrsOf str;
default = {};
description = "Extra environment variables for ${serviceName}";
};
} // (optionalAttrs firewallOption {
openFirewall = mkOption {
type = types.bool;
default = false;
description = "Open firewall ports for ${serviceName}.";
};
});
}) // (optionalAttrs (extraOpts != null) extraOpts);
# Generator for HDFS service configs
hadoopServiceConfig =
@ -36,17 +50,19 @@ let
, allowedTCPPorts ? [ ]
, preStart ? ""
, environment ? { }
, extraConfig ? { }
}: (
mkIf serviceOptions.enable {
mkIf serviceOptions.enable ( mkMerge [{
systemd.services."hdfs-${toLower name}" = {
inherit description preStart environment;
inherit description preStart;
environment = environment // serviceOptions.extraEnv;
wantedBy = [ "multi-user.target" ];
inherit (serviceOptions) restartIfChanged;
serviceConfig = {
inherit User;
SyslogIdentifier = "hdfs-${toLower name}";
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} ${toLower name}";
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} ${toLower name} ${escapeShellArgs serviceOptions.extraFlags}";
Restart = "always";
};
};
@ -56,7 +72,7 @@ let
networking.firewall.allowedTCPPorts = mkIf
((builtins.hasAttr "openFirewall" serviceOptions) && serviceOptions.openFirewall)
allowedTCPPorts;
}
} extraConfig])
);
in
@ -77,7 +93,27 @@ in
};
};
datanode = hadoopServiceOption { serviceName = "HDFS DataNode"; };
datanode = hadoopServiceOption { serviceName = "HDFS DataNode"; } // {
dataDirs = mkOption {
default = null;
description = "Tier and path definitions for datanode storage.";
type = with types; nullOr (listOf (submodule {
options = {
type = mkOption {
type = enum [ "SSD" "DISK" "ARCHIVE" "RAM_DISK" ];
description = ''
Storage types ([SSD]/[DISK]/[ARCHIVE]/[RAM_DISK]) for HDFS storage policies.
'';
};
path = mkOption {
type = path;
example = [ "/var/lib/hadoop/hdfs/dn" ];
description = "Determines where on the local filesystem a data node should store its blocks.";
};
};
}));
};
};
journalnode = hadoopServiceOption { serviceName = "HDFS JournalNode"; };
@ -122,6 +158,8 @@ in
50010 # datanode.address
50020 # datanode.ipc.address
];
extraConfig.services.hadoop.hdfsSiteInternal."dfs.datanode.data.dir" = let d = cfg.hdfs.datanode.dataDirs; in
if (d!= null) then (concatMapStringsSep "," (x: "["+x.type+"]file://"+x.path) cfg.hdfs.datanode.dataDirs) else d;
})
(hadoopServiceConfig {

View File

@ -13,12 +13,27 @@ let
'';
default = false;
};
extraFlags = mkOption{
type = with types; listOf str;
default = [];
description = "Extra command line flags to pass to the service";
example = [
"-Dcom.sun.management.jmxremote"
"-Dcom.sun.management.jmxremote.port=8010"
];
};
extraEnv = mkOption{
type = with types; attrsOf str;
default = {};
description = "Extra environment variables";
};
in
{
options.services.hadoop.yarn = {
resourcemanager = {
enable = mkEnableOption "Hadoop YARN ResourceManager";
inherit restartIfChanged;
inherit restartIfChanged extraFlags extraEnv;
openFirewall = mkOption {
type = types.bool;
default = false;
@ -29,7 +44,46 @@ in
};
nodemanager = {
enable = mkEnableOption "Hadoop YARN NodeManager";
inherit restartIfChanged;
inherit restartIfChanged extraFlags extraEnv;
resource = {
cpuVCores = mkOption {
description = "Number of vcores that can be allocated for containers.";
type = with types; nullOr ints.positive;
default = null;
};
maximumAllocationVCores = mkOption {
description = "The maximum virtual CPU cores any container can be allocated.";
type = with types; nullOr ints.positive;
default = null;
};
memoryMB = mkOption {
description = "Amount of physical memory, in MB, that can be allocated for containers.";
type = with types; nullOr ints.positive;
default = null;
};
maximumAllocationMB = mkOption {
description = "The maximum physical memory any container can be allocated.";
type = with types; nullOr ints.positive;
default = null;
};
};
useCGroups = mkOption {
type = types.bool;
default = true;
description = ''
Use cgroups to enforce resource limits on containers
'';
};
localDir = mkOption {
description = "List of directories to store localized files in.";
type = with types; nullOr (listOf path);
example = [ "/var/lib/hadoop/yarn/nm" ];
default = null;
};
addBinBash = mkOption {
type = types.bool;
default = true;
@ -62,12 +116,13 @@ in
description = "Hadoop YARN ResourceManager";
wantedBy = [ "multi-user.target" ];
inherit (cfg.yarn.resourcemanager) restartIfChanged;
environment = cfg.yarn.resourcemanager.extraEnv;
serviceConfig = {
User = "yarn";
SyslogIdentifier = "yarn-resourcemanager";
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
" resourcemanager";
" resourcemanager ${escapeShellArgs cfg.yarn.resourcemanager.extraFlags}";
Restart = "always";
};
};
@ -94,6 +149,7 @@ in
description = "Hadoop YARN NodeManager";
wantedBy = [ "multi-user.target" ];
inherit (cfg.yarn.nodemanager) restartIfChanged;
environment = cfg.yarn.nodemanager.extraEnv;
preStart = ''
# create log dir
@ -115,13 +171,26 @@ in
SyslogIdentifier = "yarn-nodemanager";
PermissionsStartOnly = true;
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
" nodemanager";
" nodemanager ${escapeShellArgs cfg.yarn.nodemanager.extraFlags}";
Restart = "always";
};
};
services.hadoop.gatewayRole.enable = true;
services.hadoop.yarnSiteInternal = with cfg.yarn.nodemanager; {
"yarn.nodemanager.local-dirs" = localDir;
"yarn.scheduler.maximum-allocation-vcores" = resource.maximumAllocationVCores;
"yarn.scheduler.maximum-allocation-mb" = resource.maximumAllocationMB;
"yarn.nodemanager.resource.cpu-vcores" = resource.cpuVCores;
"yarn.nodemanager.resource.memory-mb" = resource.memoryMB;
} // mkIf useCGroups {
"yarn.nodemanager.linux-container-executor.cgroups.hierarchy" = "/hadoop-yarn";
"yarn.nodemanager.linux-container-executor.resources-handler.class" = "org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler";
"yarn.nodemanager.linux-container-executor.cgroups.mount" = "true";
"yarn.nodemanager.linux-container-executor.cgroups.mount-path" = "/run/wrappers/yarn-nodemanager/cgroup";
};
networking.firewall.allowedTCPPortRanges = [
(mkIf (cfg.yarn.nodemanager.openFirewall) {from = 1024; to = 65535;})
];

View File

@ -10,15 +10,10 @@ import ../make-test-python.nix ({ package, ... }: {
"fs.defaultFS" = "hdfs://ns1";
};
hdfsSite = {
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
"dfs.namenode.http-bind-host" = "0.0.0.0";
"dfs.namenode.servicerpc-bind-host" = "0.0.0.0";
# HA Quorum Journal Manager configuration
"dfs.nameservices" = "ns1";
"dfs.ha.namenodes.ns1" = "nn1,nn2";
"dfs.namenode.shared.edits.dir.ns1.nn1" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
"dfs.namenode.shared.edits.dir.ns1.nn2" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
"dfs.namenode.shared.edits.dir.ns1" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
"dfs.namenode.rpc-address.ns1.nn1" = "nn1:8020";
"dfs.namenode.rpc-address.ns1.nn2" = "nn2:8020";
"dfs.namenode.servicerpc-address.ns1.nn1" = "nn1:8022";
@ -32,7 +27,7 @@ import ../make-test-python.nix ({ package, ... }: {
"dfs.ha.fencing.methods" = "shell(true)";
"ha.zookeeper.quorum" = "zk1:2181";
};
yarnSiteHA = {
yarnSite = {
"yarn.resourcemanager.zk-address" = "zk1:2181";
"yarn.resourcemanager.ha.enabled" = "true";
"yarn.resourcemanager.ha.rm-ids" = "rm1,rm2";
@ -116,8 +111,7 @@ import ../make-test-python.nix ({ package, ... }: {
# YARN cluster
rm1 = { options, ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
inherit package coreSite hdfsSite yarnSite;
yarn.resourcemanager = {
enable = true;
openFirewall = true;
@ -126,8 +120,7 @@ import ../make-test-python.nix ({ package, ... }: {
};
rm2 = { options, ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
inherit package coreSite hdfsSite yarnSite;
yarn.resourcemanager = {
enable = true;
openFirewall = true;
@ -137,8 +130,7 @@ import ../make-test-python.nix ({ package, ... }: {
nm1 = { options, ... }: {
virtualisation.memorySize = 2048;
services.hadoop = {
inherit package coreSite hdfsSite;
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
inherit package coreSite hdfsSite yarnSite;
yarn.nodemanager = {
enable = true;
openFirewall = true;
@ -148,8 +140,7 @@ import ../make-test-python.nix ({ package, ... }: {
client = { options, ... }: {
services.hadoop = {
gatewayRole.enable = true;
inherit package coreSite hdfsSite;
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
inherit package coreSite hdfsSite yarnSite;
};
};
};

View File

@ -35,6 +35,10 @@ with lib;
hdfs.datanode = {
enable = true;
openFirewall = true;
dataDirs = [{
type = "DISK";
path = "/tmp/dn1";
}];
};
inherit coreSite;
};