2022-02-01 17:45:12 +00:00
|
|
|
{ lib
|
|
|
|
, stdenv
|
|
|
|
, fetchurl
|
|
|
|
, makeWrapper
|
|
|
|
, autoPatchelfHook
|
|
|
|
, jdk8_headless
|
|
|
|
, jdk11_headless
|
|
|
|
, bash
|
|
|
|
, coreutils
|
|
|
|
, which
|
|
|
|
, bzip2
|
|
|
|
, cyrus_sasl
|
|
|
|
, protobuf3_7
|
|
|
|
, snappy
|
|
|
|
, zlib
|
|
|
|
, zstd
|
2021-10-20 19:37:49 +00:00
|
|
|
, openssl
|
2022-01-26 09:06:08 +00:00
|
|
|
, glibc
|
2022-01-08 13:08:34 +00:00
|
|
|
, nixosTests
|
2022-03-28 17:48:11 +00:00
|
|
|
, sparkSupport ? true
|
|
|
|
, spark
|
2019-08-20 19:06:08 +00:00
|
|
|
}:
|
2012-08-22 12:13:32 +00:00
|
|
|
|
2021-10-20 19:37:49 +00:00
|
|
|
with lib;
|
2017-12-20 13:22:00 +00:00
|
|
|
|
2022-02-14 14:21:28 +00:00
|
|
|
assert elem stdenv.system [ "x86_64-linux" "x86_64-darwin" "aarch64-linux" "aarch64-darwin" ];
|
|
|
|
|
2021-10-20 19:37:49 +00:00
|
|
|
let
|
2022-07-14 01:56:13 +00:00
|
|
|
common = { pname, platformAttrs, untarDir ? "${pname}-${version}", jdk, openssl ? null, nativeLibs ? [ ], libPatches ? "", tests }:
|
2021-10-20 19:37:49 +00:00
|
|
|
stdenv.mkDerivation rec {
|
2022-06-28 16:42:43 +00:00
|
|
|
inherit pname jdk libPatches untarDir openssl;
|
2022-07-14 01:56:13 +00:00
|
|
|
version = platformAttrs.${stdenv.system}.version or (throw "Unsupported system: ${stdenv.system}");
|
2021-10-20 19:37:49 +00:00
|
|
|
src = fetchurl {
|
2022-02-26 00:19:47 +00:00
|
|
|
url = "mirror://apache/hadoop/common/hadoop-${version}/hadoop-${version}" + optionalString stdenv.isAarch64 "-aarch64" + ".tar.gz";
|
2022-07-14 01:56:13 +00:00
|
|
|
inherit (platformAttrs.${stdenv.system}) hash;
|
2017-12-20 13:22:00 +00:00
|
|
|
};
|
2022-02-01 17:45:12 +00:00
|
|
|
doCheck = true;
|
2017-12-20 13:22:00 +00:00
|
|
|
|
2021-10-20 19:37:49 +00:00
|
|
|
nativeBuildInputs = [ makeWrapper ]
|
2022-02-01 17:45:12 +00:00
|
|
|
++ optional (stdenv.isLinux && (nativeLibs != [ ] || libPatches != "")) [ autoPatchelfHook ];
|
2021-10-20 19:37:49 +00:00
|
|
|
buildInputs = [ openssl ] ++ nativeLibs;
|
2017-12-20 13:22:00 +00:00
|
|
|
|
2021-10-20 19:37:49 +00:00
|
|
|
installPhase = ''
|
|
|
|
mkdir -p $out/{lib/${untarDir}/conf,bin,lib}
|
|
|
|
mv * $out/lib/${untarDir}
|
2022-01-26 09:06:08 +00:00
|
|
|
'' + optionalString stdenv.isLinux ''
|
|
|
|
# All versions need container-executor, but some versions can't use autoPatchelf because of broken SSL versions
|
|
|
|
patchelf --set-interpreter ${glibc.out}/lib64/ld-linux-x86-64.so.2 $out/lib/${untarDir}/bin/container-executor
|
|
|
|
'' + ''
|
2021-10-20 19:37:49 +00:00
|
|
|
for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do
|
|
|
|
makeWrapper "$n" "$out/bin/$(basename $n)"\
|
|
|
|
--set-default JAVA_HOME ${jdk.home}\
|
|
|
|
--set-default HADOOP_HOME $out/lib/${untarDir}\
|
2022-05-07 12:10:58 +00:00
|
|
|
--run "test -d /etc/hadoop-conf && export HADOOP_CONF_DIR=\''${HADOOP_CONF_DIR-'/etc/hadoop-conf/'}"\
|
|
|
|
--set-default HADOOP_CONF_DIR $out/lib/${untarDir}/etc/hadoop/\
|
2021-10-20 19:37:49 +00:00
|
|
|
--prefix PATH : "${makeBinPath [ bash coreutils which]}"\
|
|
|
|
--prefix JAVA_LIBRARY_PATH : "${makeLibraryPath buildInputs}"
|
|
|
|
done
|
2022-03-28 17:48:11 +00:00
|
|
|
'' + optionalString sparkSupport ''
|
|
|
|
# Add the spark shuffle service jar to YARN
|
|
|
|
cp ${spark.src}/yarn/spark-${spark.version}-yarn-shuffle.jar $out/lib/${untarDir}/share/hadoop/yarn/
|
2021-10-20 19:37:49 +00:00
|
|
|
'' + libPatches;
|
2017-12-20 13:22:00 +00:00
|
|
|
|
2022-01-08 13:08:34 +00:00
|
|
|
passthru = { inherit tests; };
|
|
|
|
|
2022-07-14 01:56:13 +00:00
|
|
|
meta = recursiveUpdate {
|
2021-10-20 19:37:49 +00:00
|
|
|
homepage = "https://hadoop.apache.org/";
|
|
|
|
description = "Framework for distributed processing of large data sets across clusters of computers";
|
|
|
|
license = licenses.asl20;
|
2022-06-02 13:38:41 +00:00
|
|
|
sourceProvenance = with sourceTypes; [ binaryBytecode ];
|
2017-12-20 13:22:00 +00:00
|
|
|
|
2021-10-20 19:37:49 +00:00
|
|
|
longDescription = ''
|
|
|
|
The Apache Hadoop software library is a framework that allows for
|
|
|
|
the distributed processing of large data sets across clusters of
|
|
|
|
computers using a simple programming model. It is designed to
|
|
|
|
scale up from single servers to thousands of machines, each
|
|
|
|
offering local computation and storage. Rather than rely on
|
|
|
|
hardware to deliver high-avaiability, the library itself is
|
|
|
|
designed to detect and handle failures at the application layer,
|
|
|
|
so delivering a highly-availabile service on top of a cluster of
|
|
|
|
computers, each of which may be prone to failures.
|
|
|
|
'';
|
2022-06-22 22:49:35 +00:00
|
|
|
maintainers = with maintainers; [ illustris ];
|
2022-07-14 01:56:13 +00:00
|
|
|
platforms = attrNames platformAttrs;
|
|
|
|
} (attrByPath [ stdenv.system "meta" ] {} platformAttrs);
|
2017-12-20 13:22:00 +00:00
|
|
|
};
|
2021-10-20 19:37:49 +00:00
|
|
|
in
|
|
|
|
{
|
|
|
|
# Different version of hadoop support different java runtime versions
|
|
|
|
# https://cwiki.apache.org/confluence/display/HADOOP/Hadoop+Java+Versions
|
2022-01-08 13:08:34 +00:00
|
|
|
hadoop_3_3 = common rec {
|
|
|
|
pname = "hadoop";
|
2022-07-14 01:56:13 +00:00
|
|
|
platformAttrs = rec {
|
|
|
|
x86_64-linux = {
|
|
|
|
version = "3.3.3";
|
|
|
|
hash = "sha256-+nHGG7qkJxKa7wn+wCizTdVCxlrZD9zOxefvk9g7h2Q=";
|
|
|
|
};
|
|
|
|
x86_64-darwin = x86_64-linux;
|
|
|
|
aarch64-linux = {
|
|
|
|
version = "3.3.1";
|
|
|
|
hash = "sha256-v1Om2pk0wsgKBghRD2wgTSHJoKd3jkm1wPKAeDcKlgI=";
|
|
|
|
meta.knownVulnerabilities = [ "CVE-2021-37404" "CVE-2021-33036" ];
|
|
|
|
};
|
|
|
|
aarch64-darwin = aarch64-linux;
|
2022-01-08 13:08:34 +00:00
|
|
|
};
|
2022-07-14 01:56:13 +00:00
|
|
|
untarDir = "${pname}-${platformAttrs.${stdenv.system}.version}";
|
2022-01-26 08:01:38 +00:00
|
|
|
jdk = jdk11_headless;
|
2022-01-08 13:08:34 +00:00
|
|
|
inherit openssl;
|
2022-01-26 08:01:38 +00:00
|
|
|
# TODO: Package and add Intel Storage Acceleration Library
|
2022-01-08 13:08:34 +00:00
|
|
|
nativeLibs = [ stdenv.cc.cc.lib protobuf3_7 zlib snappy ];
|
|
|
|
libPatches = ''
|
|
|
|
ln -s ${getLib cyrus_sasl}/lib/libsasl2.so $out/lib/${untarDir}/lib/native/libsasl2.so.2
|
|
|
|
ln -s ${getLib openssl}/lib/libcrypto.so $out/lib/${untarDir}/lib/native/
|
|
|
|
ln -s ${getLib zlib}/lib/libz.so.1 $out/lib/${untarDir}/lib/native/
|
|
|
|
ln -s ${getLib zstd}/lib/libzstd.so.1 $out/lib/${untarDir}/lib/native/
|
|
|
|
ln -s ${getLib bzip2}/lib/libbz2.so.1 $out/lib/${untarDir}/lib/native/
|
2022-01-26 08:01:38 +00:00
|
|
|
'' + optionalString stdenv.isLinux ''
|
|
|
|
# libjvm.so for Java >=11
|
|
|
|
patchelf --add-rpath ${jdk.home}/lib/server $out/lib/${untarDir}/lib/native/libnativetask.so.1.0.0
|
|
|
|
# Java 8 has libjvm.so at a different path
|
|
|
|
patchelf --add-rpath ${jdk.home}/jre/lib/amd64/server $out/lib/${untarDir}/lib/native/libnativetask.so.1.0.0
|
|
|
|
'';
|
2022-01-08 13:08:34 +00:00
|
|
|
tests = nixosTests.hadoop;
|
|
|
|
};
|
2021-10-20 19:37:49 +00:00
|
|
|
hadoop_3_2 = common rec {
|
|
|
|
pname = "hadoop";
|
2022-07-14 01:56:13 +00:00
|
|
|
platformAttrs.x86_64-linux = {
|
|
|
|
version = "3.2.3";
|
|
|
|
hash = "sha256-Q2/a1LcKutpJoGySB0qlCcYE2bvC/HoG/dp9nBikuNU=";
|
|
|
|
};
|
2021-10-20 19:37:49 +00:00
|
|
|
jdk = jdk8_headless;
|
|
|
|
# not using native libs because of broken openssl_1_0_2 dependency
|
|
|
|
# can be manually overriden
|
2022-01-26 09:06:08 +00:00
|
|
|
tests = nixosTests.hadoop_3_2;
|
2018-06-15 23:06:16 +00:00
|
|
|
};
|
2021-10-20 19:37:49 +00:00
|
|
|
hadoop2 = common rec {
|
|
|
|
pname = "hadoop";
|
2022-07-14 01:56:13 +00:00
|
|
|
platformAttrs.x86_64-linux = {
|
|
|
|
version = "2.10.2";
|
|
|
|
hash = "sha256-xhA4zxqIRGNhIeBnJO9dLKf/gx/Bq+uIyyZwsIafEyo=";
|
|
|
|
};
|
2021-10-20 19:37:49 +00:00
|
|
|
jdk = jdk8_headless;
|
2022-01-26 09:06:08 +00:00
|
|
|
tests = nixosTests.hadoop2;
|
2012-08-22 12:13:32 +00:00
|
|
|
};
|
|
|
|
}
|