From 5f4bdbe6c387bf740025581d94bbfba9a887c76f Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 27 Feb 2023 16:28:07 +0200 Subject: [PATCH] python3Packages.tensorflow: fix `GLIBCXX_3.4.30' not found Make tensorflow (and a bunch of ther things) use CUDA-compatible toolchain. Introduces cudaPackages.backendStdenv --- .../compilers/cudatoolkit/common.nix | 54 +++++++------------ .../compilers/cudatoolkit/extension.nix | 19 +++++-- .../redist/build-cuda-redist-package.nix | 11 ++-- .../cudatoolkit/redist/overrides.nix | 3 +- .../libraries/science/math/cudnn/generic.nix | 8 +-- .../science/math/tensorrt/generic.nix | 8 +-- .../python-modules/tensorflow/default.nix | 46 +++++++++++----- .../cuda/cuda-library-samples/generic.nix | 8 +-- 8 files changed, 88 insertions(+), 69 deletions(-) diff --git a/pkgs/development/compilers/cudatoolkit/common.nix b/pkgs/development/compilers/cudatoolkit/common.nix index a94f6fbdaf73..e6d7cbc377cf 100644 --- a/pkgs/development/compilers/cudatoolkit/common.nix +++ b/pkgs/development/compilers/cudatoolkit/common.nix @@ -11,7 +11,7 @@ args@ , fetchurl , fontconfig , freetype -, gcc +, gcc # :: String , gdk-pixbuf , glib , glibc @@ -22,13 +22,13 @@ args@ , perl , python3 , requireFile -, stdenv +, backendStdenv # E.g. gcc11Stdenv, set in extension.nix , unixODBC , xorg , zlib }: -stdenv.mkDerivation rec { +backendStdenv.mkDerivation rec { pname = "cudatoolkit"; inherit version runPatches; @@ -146,37 +146,24 @@ stdenv.mkDerivation rec { # Fix builds with newer glibc version sed -i "1 i#define _BITS_FLOATN_H" "$out/include/host_defines.h" - - # Ensure that cmake can find CUDA. + '' + + # Point NVCC at a compatible compiler + # FIXME: redist cuda_nvcc copy-pastes this code + # Refer to comments in the overrides for cuda_nvcc for explanation + # CUDA_TOOLKIT_ROOT_DIR is legacy, + # Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables + '' mkdir -p $out/nix-support - echo "cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'" >> $out/nix-support/setup-hook - - # Set the host compiler to be used by nvcc. - # FIXME: redist cuda_nvcc copy-pastes this code - - # For CMake-based projects: - # https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables - # https://cmake.org/cmake/help/latest/envvar/CUDAHOSTCXX.html - # https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_HOST_COMPILER.html - - # For non-CMake projects: - # FIXME: results in "incompatible redefinition" warnings ...but we keep - # both this and cmake variables until we come up with a more general - # solution - # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin - cat <> $out/nix-support/setup-hook - - cmakeFlags+=' -DCUDA_HOST_COMPILER=${gcc}/bin' - cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${gcc}/bin' + cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out' + cmakeFlags+=' -DCUDA_HOST_COMPILER=${backendStdenv.cc}/bin' + cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${backendStdenv.cc}/bin' if [ -z "\''${CUDAHOSTCXX-}" ]; then - export CUDAHOSTCXX=${gcc}/bin; + export CUDAHOSTCXX=${backendStdenv.cc}/bin; fi - - export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${gcc}/bin' + export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin' EOF - # Move some libraries to the lib output so that programs that # depend on them don't pull in this entire monstrosity. mkdir -p $lib/lib @@ -212,11 +199,10 @@ stdenv.mkDerivation rec { # The path to libstdc++ and such # - # NB: - # 1. "gcc" (gcc-wrapper) here is what's exposed as cudaPackages.cudatoolkit.cc - # 2. "gcc.cc" is the unwrapped gcc - # 3. "gcc.cc.lib" is one of its outputs - "${gcc.cc.lib}/lib64" + # `backendStdenv` is the cuda-compatible toolchain that we pick in + # extension.nix; we hand it to NVCC to use as a back-end, and we link + # cudatoolkit's binaries against its libstdc++ + "${backendStdenv.cc.cc.lib}/lib64" "$out/jre/lib/amd64/jli" "$out/lib64" @@ -286,7 +272,7 @@ stdenv.mkDerivation rec { popd ''; passthru = { - cc = gcc; + cc = backendStdenv.cc; majorMinorVersion = lib.versions.majorMinor version; majorVersion = lib.versions.majorMinor version; }; diff --git a/pkgs/development/compilers/cudatoolkit/extension.nix b/pkgs/development/compilers/cudatoolkit/extension.nix index c11f12b118a2..72cab97f8ffc 100644 --- a/pkgs/development/compilers/cudatoolkit/extension.nix +++ b/pkgs/development/compilers/cudatoolkit/extension.nix @@ -7,11 +7,24 @@ final: prev: let # Version info for the classic cudatoolkit packages that contain everything that is in redist. cudatoolkitVersions = final.lib.importTOML ./versions.toml; + finalVersion = cudatoolkitVersions.${final.cudaVersion}; + + # Exposed as cudaPackages.backendStdenv. + # We don't call it just "stdenv" to avoid confusion: e.g. this toolchain doesn't contain nvcc. + # Instead, it's the back-end toolchain for nvcc to use. + # We also use this to link a compatible libstdc++ (backendStdenv.cc.cc.lib) + # Cf. https://github.com/NixOS/nixpkgs/pull/218265 for context + backendStdenv = prev.pkgs."${finalVersion.gcc}Stdenv"; + ### Add classic cudatoolkit package - cudatoolkit = buildCudaToolkitPackage ((attrs: attrs // { gcc = prev.pkgs.${attrs.gcc}; }) cudatoolkitVersions.${final.cudaVersion}); + cudatoolkit = buildCudaToolkitPackage (finalVersion // { inherit backendStdenv; }); cudaFlags = final.callPackage ./flags.nix {}; -in { - inherit cudatoolkit cudaFlags; +in +{ + inherit + backendStdenv + cudatoolkit + cudaFlags; } diff --git a/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix b/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix index 3bf9184eefab..1b216ee625a8 100644 --- a/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix +++ b/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix @@ -1,5 +1,5 @@ { lib -, stdenv +, backendStdenv , fetchurl , autoPatchelfHook , autoAddOpenGLRunpathHook @@ -11,7 +11,7 @@ attrs: let arch = "linux-x86_64"; in -stdenv.mkDerivation { +backendStdenv.mkDerivation { inherit pname; inherit (attrs) version; @@ -33,11 +33,8 @@ stdenv.mkDerivation { # autoPatchelfHook will search for a libstdc++ and we're giving it a # "compatible" libstdc++ from the same toolchain that NVCC uses. # - # E.g. it might happen that stdenv=gcc12Stdenv, but we build against cuda11 - # that only "supports" gcc11. Linking against gcc12's libraries we might - # sometimes actually sometimes encounter dynamic linkage errors at runtime # NB: We don't actually know if this is the right thing to do - cudatoolkit.cc.cc.lib + backendStdenv.cc.cc.lib ]; dontBuild = true; @@ -51,7 +48,7 @@ stdenv.mkDerivation { runHook postInstall ''; - passthru.stdenv = stdenv; + passthru.stdenv = backendStdenv; meta = { description = attrs.name; diff --git a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix index 663af1db7632..96b782d8c990 100644 --- a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix +++ b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix @@ -24,7 +24,7 @@ in cuda_nvcc = prev.cuda_nvcc.overrideAttrs (oldAttrs: let - inherit (prev.cudatoolkit) cc; + inherit (prev.backendStdenv) cc; in { # Point NVCC at a compatible compiler @@ -44,7 +44,6 @@ in postInstall = (oldAttrs.postInstall or "") + '' mkdir -p $out/nix-support cat <> $out/nix-support/setup-hook - cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out' cmakeFlags+=' -DCUDA_HOST_COMPILER=${cc}/bin' cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${cc}/bin' if [ -z "\''${CUDAHOSTCXX-}" ]; then diff --git a/pkgs/development/libraries/science/math/cudnn/generic.nix b/pkgs/development/libraries/science/math/cudnn/generic.nix index d4e1f641a956..b2844ae6b074 100644 --- a/pkgs/development/libraries/science/math/cudnn/generic.nix +++ b/pkgs/development/libraries/science/math/cudnn/generic.nix @@ -1,11 +1,11 @@ { - stdenv, + backendStdenv, lib, zlib, useCudatoolkitRunfile ? false, cudaVersion, cudaMajorVersion, - cudatoolkit, # if cuda>=11: only used for .cc + cudatoolkit, # For cuda < 11 libcublas ? null, # cuda <11 doesn't ship redist packages autoPatchelfHook, autoAddOpenGLRunpathHook, @@ -26,7 +26,7 @@ maxCudaVersion, }: assert useCudatoolkitRunfile || (libcublas != null); let - inherit (cudatoolkit) cc; + inherit (backendStdenv) cc; inherit (lib) lists strings trivial versions; # majorMinorPatch :: String -> String @@ -46,7 +46,7 @@ assert useCudatoolkitRunfile || (libcublas != null); let then cudatoolkit else libcublas; in - stdenv.mkDerivation { + backendStdenv.mkDerivation { pname = "cudatoolkit-${cudaMajorVersion}-cudnn"; version = versionTriple; diff --git a/pkgs/development/libraries/science/math/tensorrt/generic.nix b/pkgs/development/libraries/science/math/tensorrt/generic.nix index 3447087051f1..31090f715c22 100644 --- a/pkgs/development/libraries/science/math/tensorrt/generic.nix +++ b/pkgs/development/libraries/science/math/tensorrt/generic.nix @@ -1,5 +1,5 @@ { lib -, stdenv +, backendStdenv , requireFile , autoPatchelfHook , autoAddOpenGLRunpathHook @@ -18,7 +18,7 @@ assert lib.assertMsg (lib.strings.versionAtLeast cudnn.version fileVersionCudnn) "This version of TensorRT requires at least cuDNN ${fileVersionCudnn} (current version is ${cudnn.version})"; -stdenv.mkDerivation rec { +backendStdenv.mkDerivation rec { pname = "cudatoolkit-${cudatoolkit.majorVersion}-tensorrt"; version = fullVersion; src = requireFile rec { @@ -45,7 +45,7 @@ stdenv.mkDerivation rec { # Used by autoPatchelfHook buildInputs = [ - cudatoolkit.cc.cc.lib # libstdc++ + backendStdenv.cc.cc.lib # libstdc++ cudatoolkit cudnn ]; @@ -74,6 +74,8 @@ stdenv.mkDerivation rec { "$out/lib/libnvinfer_builder_resource.so.${mostOfVersion}" ''; + passthru.stdenv = backendStdenv; + meta = with lib; { # Check that the cudatoolkit version satisfies our min/max constraints (both # inclusive). We mark the package as broken if it fails to satisfies the diff --git a/pkgs/development/python-modules/tensorflow/default.nix b/pkgs/development/python-modules/tensorflow/default.nix index f18a924c31fa..adc7b1c1e0b3 100644 --- a/pkgs/development/python-modules/tensorflow/default.nix +++ b/pkgs/development/python-modules/tensorflow/default.nix @@ -32,6 +32,26 @@ }: let + originalStdenv = stdenv; +in +let + # Tensorflow looks at many toolchain-related variables which may diverge. + # + # Toolchain for cuda-enabled builds. + # We want to achieve two things: + # 1. NVCC should use a compatible back-end (e.g. gcc11 for cuda11) + # 2. Normal C++ files should be compiled with the same toolchain, + # to avoid potential weird dynamic linkage errors at runtime. + # This may not be necessary though + # + # Toolchain for Darwin: + # clang 7 fails to emit a symbol for + # __ZN4llvm11SmallPtrSetIPKNS_10AllocaInstELj8EED1Ev in any of the + # translation units, so the build fails at link time + stdenv = + if cudaSupport then cudaPackages.backendStdenv + else if originalStdenv.isDarwin then llvmPackages_11.stdenv + else originalStdenv; inherit (cudaPackages) cudatoolkit cudnn nccl; in @@ -44,6 +64,7 @@ assert ! (stdenv.isDarwin && cudaSupport); let withTensorboard = (pythonOlder "3.6") || tensorboardSupport; + # FIXME: migrate to redist cudaPackages cudatoolkit_joined = symlinkJoin { name = "${cudatoolkit.name}-merged"; paths = [ @@ -56,10 +77,13 @@ let ]; }; + # Tensorflow expects bintools at hard-coded paths, e.g. /usr/bin/ar + # The only way to overcome that is to set GCC_HOST_COMPILER_PREFIX, + # but that path must contain cc as well, so we merge them cudatoolkit_cc_joined = symlinkJoin { - name = "${cudatoolkit.cc.name}-merged"; + name = "${stdenv.cc.name}-merged"; paths = [ - cudatoolkit.cc + stdenv.cc binutils.bintools # for ar, dwp, nm, objcopy, objdump, strip ]; }; @@ -175,12 +199,7 @@ let ''; }) else _bazel-build; - _bazel-build = (buildBazelPackage.override (lib.optionalAttrs stdenv.isDarwin { - # clang 7 fails to emit a symbol for - # __ZN4llvm11SmallPtrSetIPKNS_10AllocaInstELj8EED1Ev in any of the - # translation units, so the build fails at link time - stdenv = llvmPackages_11.stdenv; - })) { + _bazel-build = buildBazelPackage.override { inherit stdenv; } { name = "${pname}-${version}"; bazel = bazel_5; @@ -211,12 +230,13 @@ let flatbuffers-core giflib grpc - icu + # Necessary to fix the "`GLIBCXX_3.4.30' not found" error + (icu.override { inherit stdenv; }) jsoncpp libjpeg_turbo libpng lmdb-core - pybind11 + (pybind11.overridePythonAttrs (_: { inherit stdenv; })) snappy sqlite ] ++ lib.optionals cudaSupport [ @@ -301,10 +321,12 @@ let TF_NEED_CUDA = tfFeature cudaSupport; TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}"; - GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin"; - GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/gcc"; TF_CUDA_COMPUTE_CAPABILITIES = lib.concatStringsSep "," cudaCapabilities; + # Needed even when we override stdenv: e.g. for ar + GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin"; + GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/cc"; + postPatch = '' # bazel 3.3 should work just as well as bazel 3.1 rm -f .bazelversion diff --git a/pkgs/test/cuda/cuda-library-samples/generic.nix b/pkgs/test/cuda/cuda-library-samples/generic.nix index e01664bab319..e9a481c94a7a 100644 --- a/pkgs/test/cuda/cuda-library-samples/generic.nix +++ b/pkgs/test/cuda/cuda-library-samples/generic.nix @@ -1,4 +1,4 @@ -{ lib, stdenv, fetchFromGitHub +{ lib, backendStdenv, fetchFromGitHub , cmake, addOpenGLRunpath , cudatoolkit , cutensor @@ -35,13 +35,13 @@ let in { - cublas = stdenv.mkDerivation (commonAttrs // { + cublas = backendStdenv.mkDerivation (commonAttrs // { pname = "cuda-library-samples-cublas"; src = "${src}/cuBLASLt"; }); - cusolver = stdenv.mkDerivation (commonAttrs // { + cusolver = backendStdenv.mkDerivation (commonAttrs // { pname = "cuda-library-samples-cusolver"; src = "${src}/cuSOLVER"; @@ -49,7 +49,7 @@ in sourceRoot = "cuSOLVER/gesv"; }); - cutensor = stdenv.mkDerivation (commonAttrs // { + cutensor = backendStdenv.mkDerivation (commonAttrs // { pname = "cuda-library-samples-cutensor"; src = "${src}/cuTENSOR";