2022-02-08 13:59:23 +00:00
|
|
|
{ lib
|
|
|
|
, stdenv
|
|
|
|
, fetchzip
|
|
|
|
, makeWrapper
|
|
|
|
, jdk8
|
|
|
|
, python3Packages
|
|
|
|
, extraPythonPackages ? [ ]
|
|
|
|
, coreutils
|
2022-03-28 17:48:11 +00:00
|
|
|
, hadoopSupport ? true
|
2022-02-08 13:59:23 +00:00
|
|
|
, hadoop
|
|
|
|
, RSupport ? true
|
|
|
|
, R
|
2014-12-04 16:27:01 +00:00
|
|
|
}:
|
|
|
|
|
2021-01-15 05:42:41 +00:00
|
|
|
with lib;
|
2014-02-20 11:54:04 +00:00
|
|
|
|
2021-09-17 15:19:21 +00:00
|
|
|
let
|
2022-04-01 06:14:43 +00:00
|
|
|
spark = { pname, version, sha256, extraMeta ? {} }:
|
2021-09-17 15:19:21 +00:00
|
|
|
stdenv.mkDerivation rec {
|
2022-02-08 13:59:23 +00:00
|
|
|
inherit pname version;
|
2022-03-28 17:48:11 +00:00
|
|
|
jdk = if hadoopSupport then hadoop.jdk else jdk8;
|
2022-02-08 13:59:23 +00:00
|
|
|
src = fetchzip {
|
|
|
|
url = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
|
|
|
|
sha256 = sha256;
|
|
|
|
};
|
2021-09-17 15:19:21 +00:00
|
|
|
nativeBuildInputs = [ makeWrapper ];
|
2022-03-28 17:48:11 +00:00
|
|
|
buildInputs = [ jdk python3Packages.python ]
|
2021-09-17 15:19:21 +00:00
|
|
|
++ extraPythonPackages
|
|
|
|
++ optional RSupport R;
|
2017-02-06 20:18:15 +00:00
|
|
|
|
2021-09-17 15:19:21 +00:00
|
|
|
untarDir = "${pname}-${version}";
|
|
|
|
installPhase = ''
|
|
|
|
mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java}
|
|
|
|
mv * $out/lib/${untarDir}
|
2014-02-20 11:54:04 +00:00
|
|
|
|
2021-09-17 15:19:21 +00:00
|
|
|
cp $out/lib/${untarDir}/conf/log4j.properties{.template,}
|
|
|
|
|
|
|
|
cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF
|
2022-03-28 17:48:11 +00:00
|
|
|
export JAVA_HOME="${jdk}"
|
2021-09-17 15:19:21 +00:00
|
|
|
export SPARK_HOME="$out/lib/${untarDir}"
|
2022-03-28 17:48:11 +00:00
|
|
|
'' + optionalString hadoopSupport ''
|
2021-09-17 15:19:21 +00:00
|
|
|
export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath)
|
2022-03-28 17:48:11 +00:00
|
|
|
'' + ''
|
2021-09-17 15:19:21 +00:00
|
|
|
export PYSPARK_PYTHON="${python3Packages.python}/bin/${python3Packages.python.executable}"
|
|
|
|
export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
|
|
|
|
${optionalString RSupport ''
|
|
|
|
export SPARKR_R_SHELL="${R}/bin/R"
|
|
|
|
export PATH="\$PATH:${R}/bin"''}
|
|
|
|
EOF
|
|
|
|
|
|
|
|
for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do
|
|
|
|
makeWrapper "$n" "$out/bin/$(basename $n)"
|
|
|
|
substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname
|
|
|
|
done
|
|
|
|
for n in $(find $out/lib/${untarDir}/sbin -type f); do
|
|
|
|
# Spark deprecated scripts with "slave" in the name.
|
|
|
|
# This line adds forward compatibility with the nixos spark module for
|
|
|
|
# older versions of spark that don't have the new "worker" scripts.
|
|
|
|
ln -s "$n" $(echo "$n" | sed -r 's/slave(s?).sh$/worker\1.sh/g') || true
|
|
|
|
done
|
|
|
|
ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java
|
|
|
|
'';
|
|
|
|
|
|
|
|
meta = {
|
2022-02-08 13:59:23 +00:00
|
|
|
description = "Apache Spark is a fast and general engine for large-scale data processing";
|
|
|
|
homepage = "https://spark.apache.org/";
|
2022-06-02 13:38:41 +00:00
|
|
|
sourceProvenance = with sourceTypes; [ binaryBytecode ];
|
2022-02-08 13:59:23 +00:00
|
|
|
license = lib.licenses.asl20;
|
|
|
|
platforms = lib.platforms.all;
|
|
|
|
maintainers = with maintainers; [ thoughtpolice offline kamilchm illustris ];
|
2022-04-01 06:14:43 +00:00
|
|
|
} // extraMeta;
|
2021-09-17 15:19:21 +00:00
|
|
|
};
|
2022-02-08 13:59:23 +00:00
|
|
|
in
|
|
|
|
{
|
2022-03-10 04:35:54 +00:00
|
|
|
spark_3_2 = spark rec {
|
2022-02-08 13:59:23 +00:00
|
|
|
pname = "spark";
|
2022-10-06 12:17:38 +00:00
|
|
|
version = "3.2.2";
|
|
|
|
sha256 = "sha256-yKoTyD/IqvsJQs0jB67h1zqwYaLuikdoa5fYIXtvhz0=";
|
2022-02-08 13:59:23 +00:00
|
|
|
};
|
2022-03-10 04:35:54 +00:00
|
|
|
spark_3_1 = spark rec {
|
2021-09-17 15:19:21 +00:00
|
|
|
pname = "spark";
|
2022-04-01 06:31:00 +00:00
|
|
|
version = "3.1.3";
|
|
|
|
sha256 = "sha256-RIQyN5YjxFLfNIrETR3Vv99zsHxt77rhOXHIThCI2Y8=";
|
2014-02-20 11:54:04 +00:00
|
|
|
};
|
2022-03-10 04:35:54 +00:00
|
|
|
spark_2_4 = spark rec {
|
2021-09-17 15:19:21 +00:00
|
|
|
pname = "spark";
|
|
|
|
version = "2.4.8";
|
2022-02-08 13:59:23 +00:00
|
|
|
sha256 = "1mkyq0gz9fiav25vr0dba5ivp0wh0mh7kswwnx8pvsmb6wbwyfxv";
|
2022-04-01 06:14:43 +00:00
|
|
|
extraMeta.knownVulnerabilities = [ "CVE-2021-38296" ];
|
2014-02-20 11:54:04 +00:00
|
|
|
};
|
|
|
|
}
|