mirror of
https://github.com/NixOS/nixpkgs.git
synced 2024-11-24 07:53:19 +00:00
Merge pull request #218265 from SomeoneSerge/hotfix-nvcc-gcc-incompatibility
cudaPackages: point nvcc at a compatible -ccbin
This commit is contained in:
commit
e1fbe85b0a
@ -50,7 +50,7 @@ stdenv.mkDerivation rec {
|
||||
"-DUSE_OLDCMAKECUDA=ON" # see https://github.com/apache/incubator-mxnet/issues/10743
|
||||
"-DCUDA_ARCH_NAME=All"
|
||||
"-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
|
||||
"-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.cudaRealArches}"
|
||||
"-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.realArches}"
|
||||
] else [ "-DUSE_CUDA=OFF" ])
|
||||
++ lib.optional (!cudnnSupport) "-DUSE_CUDNN=OFF";
|
||||
|
||||
|
@ -11,7 +11,6 @@ args@
|
||||
, fetchurl
|
||||
, fontconfig
|
||||
, freetype
|
||||
, gcc
|
||||
, gdk-pixbuf
|
||||
, glib
|
||||
, glibc
|
||||
@ -22,13 +21,13 @@ args@
|
||||
, perl
|
||||
, python3
|
||||
, requireFile
|
||||
, stdenv
|
||||
, backendStdenv # E.g. gcc11Stdenv, set in extension.nix
|
||||
, unixODBC
|
||||
, xorg
|
||||
, zlib
|
||||
}:
|
||||
|
||||
stdenv.mkDerivation rec {
|
||||
backendStdenv.mkDerivation rec {
|
||||
pname = "cudatoolkit";
|
||||
inherit version runPatches;
|
||||
|
||||
@ -146,14 +145,23 @@ stdenv.mkDerivation rec {
|
||||
|
||||
# Fix builds with newer glibc version
|
||||
sed -i "1 i#define _BITS_FLOATN_H" "$out/include/host_defines.h"
|
||||
|
||||
# Ensure that cmake can find CUDA.
|
||||
'' +
|
||||
# Point NVCC at a compatible compiler
|
||||
# FIXME: redist cuda_nvcc copy-pastes this code
|
||||
# Refer to comments in the overrides for cuda_nvcc for explanation
|
||||
# CUDA_TOOLKIT_ROOT_DIR is legacy,
|
||||
# Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
|
||||
''
|
||||
mkdir -p $out/nix-support
|
||||
echo "cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'" >> $out/nix-support/setup-hook
|
||||
|
||||
# Set the host compiler to be used by nvcc for CMake-based projects:
|
||||
# https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
|
||||
echo "cmakeFlags+=' -DCUDA_HOST_COMPILER=${gcc}/bin'" >> $out/nix-support/setup-hook
|
||||
cat <<EOF >> $out/nix-support/setup-hook
|
||||
cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'
|
||||
cmakeFlags+=' -DCUDA_HOST_COMPILER=${backendStdenv.cc}/bin'
|
||||
cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${backendStdenv.cc}/bin'
|
||||
if [ -z "\''${CUDAHOSTCXX-}" ]; then
|
||||
export CUDAHOSTCXX=${backendStdenv.cc}/bin;
|
||||
fi
|
||||
export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin'
|
||||
EOF
|
||||
|
||||
# Move some libraries to the lib output so that programs that
|
||||
# depend on them don't pull in this entire monstrosity.
|
||||
@ -167,10 +175,6 @@ stdenv.mkDerivation rec {
|
||||
mv $out/extras/CUPTI/lib64/libcupti* $out/lib
|
||||
''}
|
||||
|
||||
# Set compiler for NVCC.
|
||||
wrapProgram $out/bin/nvcc \
|
||||
--prefix PATH : ${gcc}/bin
|
||||
|
||||
# nvprof do not find any program to profile if LD_LIBRARY_PATH is not set
|
||||
wrapProgram $out/bin/nvprof \
|
||||
--prefix LD_LIBRARY_PATH : $out/lib
|
||||
@ -191,7 +195,14 @@ stdenv.mkDerivation rec {
|
||||
preFixup =
|
||||
let rpath = lib.concatStringsSep ":" [
|
||||
(lib.makeLibraryPath (runtimeDependencies ++ [ "$lib" "$out" "$out/nvvm" ]))
|
||||
"${stdenv.cc.cc.lib}/lib64"
|
||||
|
||||
# The path to libstdc++ and such
|
||||
#
|
||||
# `backendStdenv` is the cuda-compatible toolchain that we pick in
|
||||
# extension.nix; we hand it to NVCC to use as a back-end, and we link
|
||||
# cudatoolkit's binaries against its libstdc++
|
||||
"${backendStdenv.cc.cc.lib}/lib64"
|
||||
|
||||
"$out/jre/lib/amd64/jli"
|
||||
"$out/lib64"
|
||||
"$out/nvvm/lib64"
|
||||
@ -260,7 +271,7 @@ stdenv.mkDerivation rec {
|
||||
popd
|
||||
'';
|
||||
passthru = {
|
||||
cc = gcc;
|
||||
inherit (backendStdenv) cc;
|
||||
majorMinorVersion = lib.versions.majorMinor version;
|
||||
majorVersion = lib.versions.majorMinor version;
|
||||
};
|
||||
|
@ -7,11 +7,29 @@ final: prev: let
|
||||
# Version info for the classic cudatoolkit packages that contain everything that is in redist.
|
||||
cudatoolkitVersions = final.lib.importTOML ./versions.toml;
|
||||
|
||||
finalVersion = cudatoolkitVersions.${final.cudaVersion};
|
||||
|
||||
# Exposed as cudaPackages.backendStdenv.
|
||||
# We don't call it just "stdenv" to avoid confusion: e.g. this toolchain doesn't contain nvcc.
|
||||
# Instead, it's the back-end toolchain for nvcc to use.
|
||||
# We also use this to link a compatible libstdc++ (backendStdenv.cc.cc.lib)
|
||||
# Cf. https://github.com/NixOS/nixpkgs/pull/218265 for context
|
||||
backendStdenv = prev.pkgs."${finalVersion.gcc}Stdenv";
|
||||
|
||||
### Add classic cudatoolkit package
|
||||
cudatoolkit = buildCudaToolkitPackage ((attrs: attrs // { gcc = prev.pkgs.${attrs.gcc}; }) cudatoolkitVersions.${final.cudaVersion});
|
||||
cudatoolkit =
|
||||
let
|
||||
attrs = builtins.removeAttrs finalVersion [ "gcc" ];
|
||||
attrs' = attrs // { inherit backendStdenv; };
|
||||
in
|
||||
buildCudaToolkitPackage attrs';
|
||||
|
||||
cudaFlags = final.callPackage ./flags.nix {};
|
||||
|
||||
in {
|
||||
inherit cudatoolkit cudaFlags;
|
||||
in
|
||||
{
|
||||
inherit
|
||||
backendStdenv
|
||||
cudatoolkit
|
||||
cudaFlags;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
{ config
|
||||
, lib
|
||||
, cudatoolkit
|
||||
, cudaVersion
|
||||
}:
|
||||
|
||||
# Type aliases
|
||||
@ -13,14 +13,21 @@
|
||||
|
||||
let
|
||||
inherit (lib) attrsets lists strings trivial versions;
|
||||
cudaVersion = cudatoolkit.version;
|
||||
|
||||
# Flags are determined based on your CUDA toolkit by default. You may benefit
|
||||
# from improved performance, reduced file size, or greater hardware suppport by
|
||||
# passing a configuration based on your specific GPU environment.
|
||||
#
|
||||
# config.cudaCapabilities: list of hardware generations to support (e.g., "8.0")
|
||||
# config.cudaForwardCompat: bool for compatibility with future GPU generations
|
||||
# config.cudaCapabilities :: List Capability
|
||||
# List of hardware generations to build.
|
||||
# E.g. [ "8.0" ]
|
||||
# Currently, the last item is considered the optional forward-compatibility arch,
|
||||
# but this may change in the future.
|
||||
#
|
||||
# config.cudaForwardCompat :: Bool
|
||||
# Whether to include the forward compatibility gencode (+PTX)
|
||||
# to support future GPU generations.
|
||||
# E.g. true
|
||||
#
|
||||
# Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351
|
||||
|
||||
@ -40,6 +47,9 @@ let
|
||||
# GPUs which are supported by the provided CUDA version.
|
||||
supportedGpus = builtins.filter isSupported gpus;
|
||||
|
||||
# supportedCapabilities :: List Capability
|
||||
supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus;
|
||||
|
||||
# cudaArchNameToVersions :: AttrSet String (List String)
|
||||
# Maps the name of a GPU architecture to different versions of that architecture.
|
||||
# For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ].
|
||||
@ -50,12 +60,6 @@ let
|
||||
(gpu: gpu.archName)
|
||||
supportedGpus;
|
||||
|
||||
# cudaArchNames :: List String
|
||||
# NOTE: It's important that we don't rely on builtins.attrNames cudaArchNameToVersions here;
|
||||
# otherwise, we'll get the names sorted in alphabetical order. The JSON array we read them
|
||||
# from is already sorted, so we'll preserve that order here.
|
||||
cudaArchNames = lists.unique (lists.map (gpu: gpu.archName) supportedGpus);
|
||||
|
||||
# cudaComputeCapabilityToName :: AttrSet String String
|
||||
# Maps the version of a GPU architecture to the name of that architecture.
|
||||
# For example, "8.0" maps to "Ampere".
|
||||
@ -68,23 +72,6 @@ let
|
||||
supportedGpus
|
||||
);
|
||||
|
||||
# cudaComputeCapabilities :: List String
|
||||
# NOTE: It's important that we don't rely on builtins.attrNames cudaComputeCapabilityToName here;
|
||||
# otherwise, we'll get the versions sorted in alphabetical order. The JSON array we read them
|
||||
# from is already sorted, so we'll preserve that order here.
|
||||
# Use the user-provided list of CUDA capabilities if it's provided.
|
||||
cudaComputeCapabilities = config.cudaCapabilities
|
||||
or (lists.map (gpu: gpu.computeCapability) supportedGpus);
|
||||
|
||||
# cudaForwardComputeCapability :: String
|
||||
cudaForwardComputeCapability = (lists.last cudaComputeCapabilities) + "+PTX";
|
||||
|
||||
# cudaComputeCapabilitiesAndForward :: List String
|
||||
# The list of supported CUDA architectures, including the forward compatibility architecture.
|
||||
# If forward compatibility is disabled, this will be the same as cudaComputeCapabilities.
|
||||
cudaComputeCapabilitiesAndForward = cudaComputeCapabilities
|
||||
++ lists.optional (config.cudaForwardCompat or true) cudaForwardComputeCapability;
|
||||
|
||||
# dropDot :: String -> String
|
||||
dropDot = ver: builtins.replaceStrings [ "." ] [ "" ] ver;
|
||||
|
||||
@ -102,38 +89,68 @@ let
|
||||
"-gencode=arch=compute_${dropDot computeCapability},code=${feat}_${dropDot computeCapability}"
|
||||
);
|
||||
|
||||
# cudaRealArches :: List String
|
||||
# The real architectures are physical architectures supported by the CUDA version.
|
||||
# For example, "sm_80".
|
||||
cudaRealArches = archMapper "sm" cudaComputeCapabilities;
|
||||
formatCapabilities = { cudaCapabilities, enableForwardCompat ? true }: rec {
|
||||
inherit cudaCapabilities enableForwardCompat;
|
||||
|
||||
# cudaVirtualArches :: List String
|
||||
# The virtual architectures are typically used for forward compatibility, when trying to support
|
||||
# an architecture newer than the CUDA version allows.
|
||||
# For example, "compute_80".
|
||||
cudaVirtualArches = archMapper "compute" cudaComputeCapabilities;
|
||||
# archNames :: List String
|
||||
# E.g. [ "Turing" "Ampere" ]
|
||||
archNames = lists.unique (builtins.map (cap: cudaComputeCapabilityToName.${cap}) cudaCapabilities);
|
||||
|
||||
# cudaArches :: List String
|
||||
# By default, build for all supported architectures and forward compatibility via a virtual
|
||||
# architecture for the newest supported architecture.
|
||||
cudaArches = cudaRealArches ++
|
||||
lists.optional (config.cudaForwardCompat or true) (lists.last cudaVirtualArches);
|
||||
# realArches :: List String
|
||||
# The real architectures are physical architectures supported by the CUDA version.
|
||||
# E.g. [ "sm_75" "sm_86" ]
|
||||
realArches = archMapper "sm" cudaCapabilities;
|
||||
|
||||
# cudaGencode :: List String
|
||||
# A list of CUDA gencode arguments to pass to NVCC.
|
||||
cudaGencode =
|
||||
let
|
||||
base = gencodeMapper "sm" cudaComputeCapabilities;
|
||||
forwardCompat = gencodeMapper "compute" [ (lists.last cudaComputeCapabilities) ];
|
||||
in
|
||||
base ++ lists.optionals (config.cudaForwardCompat or true) forwardCompat;
|
||||
# virtualArches :: List String
|
||||
# The virtual architectures are typically used for forward compatibility, when trying to support
|
||||
# an architecture newer than the CUDA version allows.
|
||||
# E.g. [ "compute_75" "compute_86" ]
|
||||
virtualArches = archMapper "compute" cudaCapabilities;
|
||||
|
||||
# arches :: List String
|
||||
# By default, build for all supported architectures and forward compatibility via a virtual
|
||||
# architecture for the newest supported architecture.
|
||||
# E.g. [ "sm_75" "sm_86" "compute_86" ]
|
||||
arches = realArches ++
|
||||
lists.optional enableForwardCompat (lists.last virtualArches);
|
||||
|
||||
# gencode :: List String
|
||||
# A list of CUDA gencode arguments to pass to NVCC.
|
||||
# E.g. [ "-gencode=arch=compute_75,code=sm_75" ... "-gencode=arch=compute_86,code=compute_86" ]
|
||||
gencode =
|
||||
let
|
||||
base = gencodeMapper "sm" cudaCapabilities;
|
||||
forward = gencodeMapper "compute" [ (lists.last cudaCapabilities) ];
|
||||
in
|
||||
base ++ lib.optionals enableForwardCompat forward;
|
||||
};
|
||||
|
||||
in
|
||||
# When changing names or formats: pause, validate, and update the assert
|
||||
assert (formatCapabilities { cudaCapabilities = [ "7.5" "8.6" ]; }) == {
|
||||
cudaCapabilities = [ "7.5" "8.6" ];
|
||||
enableForwardCompat = true;
|
||||
|
||||
archNames = [ "Turing" "Ampere" ];
|
||||
realArches = [ "sm_75" "sm_86" ];
|
||||
virtualArches = [ "compute_75" "compute_86" ];
|
||||
arches = [ "sm_75" "sm_86" "compute_86" ];
|
||||
|
||||
gencode = [ "-gencode=arch=compute_75,code=sm_75" "-gencode=arch=compute_86,code=sm_86" "-gencode=arch=compute_86,code=compute_86" ];
|
||||
};
|
||||
{
|
||||
inherit
|
||||
cudaArchNames
|
||||
cudaArchNameToVersions cudaComputeCapabilityToName
|
||||
cudaRealArches cudaVirtualArches cudaArches
|
||||
cudaGencode;
|
||||
cudaCapabilities = cudaComputeCapabilitiesAndForward;
|
||||
# formatCapabilities :: { cudaCapabilities: List Capability, cudaForwardCompat: Boolean } -> { ... }
|
||||
inherit formatCapabilities;
|
||||
|
||||
# cudaArchNameToVersions :: String => String
|
||||
inherit cudaArchNameToVersions;
|
||||
|
||||
# cudaComputeCapabilityToName :: String => String
|
||||
inherit cudaComputeCapabilityToName;
|
||||
|
||||
# dropDot :: String -> String
|
||||
inherit dropDot;
|
||||
} // formatCapabilities {
|
||||
cudaCapabilities = config.cudaCapabilities or supportedCapabilities;
|
||||
enableForwardCompat = config.cudaForwardCompat or true;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
{ lib
|
||||
, stdenv
|
||||
, backendStdenv
|
||||
, fetchurl
|
||||
, autoPatchelfHook
|
||||
, autoAddOpenGLRunpathHook
|
||||
@ -10,7 +10,8 @@ attrs:
|
||||
|
||||
let
|
||||
arch = "linux-x86_64";
|
||||
in stdenv.mkDerivation {
|
||||
in
|
||||
backendStdenv.mkDerivation {
|
||||
inherit pname;
|
||||
inherit (attrs) version;
|
||||
|
||||
@ -29,7 +30,11 @@ in stdenv.mkDerivation {
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
stdenv.cc.cc.lib
|
||||
# autoPatchelfHook will search for a libstdc++ and we're giving it a
|
||||
# "compatible" libstdc++ from the same toolchain that NVCC uses.
|
||||
#
|
||||
# NB: We don't actually know if this is the right thing to do
|
||||
backendStdenv.cc.cc.lib
|
||||
];
|
||||
|
||||
dontBuild = true;
|
||||
@ -43,6 +48,8 @@ in stdenv.mkDerivation {
|
||||
runHook postInstall
|
||||
'';
|
||||
|
||||
passthru.stdenv = backendStdenv;
|
||||
|
||||
meta = {
|
||||
description = attrs.name;
|
||||
license = lib.licenses.unfree;
|
||||
|
@ -1,6 +1,8 @@
|
||||
final: prev: let
|
||||
final: prev:
|
||||
let
|
||||
inherit (prev) lib pkgs;
|
||||
in (lib.filterAttrs (attr: _: (prev ? "${attr}")) {
|
||||
in
|
||||
(lib.filterAttrs (attr: _: (prev ? "${attr}")) {
|
||||
### Overrides to fix the components of cudatoolkit-redist
|
||||
|
||||
# Attributes that don't exist in the previous set are removed.
|
||||
@ -20,6 +22,38 @@ in (lib.filterAttrs (attr: _: (prev ? "${attr}")) {
|
||||
prev.libcublas
|
||||
];
|
||||
|
||||
cuda_nvcc = prev.cuda_nvcc.overrideAttrs (oldAttrs:
|
||||
let
|
||||
inherit (prev.backendStdenv) cc;
|
||||
in
|
||||
{
|
||||
# Point NVCC at a compatible compiler
|
||||
# FIXME: non-redist cudatoolkit copy-pastes this code
|
||||
|
||||
# For CMake-based projects:
|
||||
# https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
|
||||
# https://cmake.org/cmake/help/latest/envvar/CUDAHOSTCXX.html
|
||||
# https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_HOST_COMPILER.html
|
||||
|
||||
# For non-CMake projects:
|
||||
# We prepend --compiler-bindir to nvcc flags.
|
||||
# Downstream packages can override these, because NVCC
|
||||
# uses the last --compiler-bindir it gets on the command line.
|
||||
# FIXME: this results in "incompatible redefinition" warnings.
|
||||
# https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin
|
||||
postInstall = (oldAttrs.postInstall or "") + ''
|
||||
mkdir -p $out/nix-support
|
||||
cat <<EOF >> $out/nix-support/setup-hook
|
||||
cmakeFlags+=' -DCUDA_HOST_COMPILER=${cc}/bin'
|
||||
cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${cc}/bin'
|
||||
if [ -z "\''${CUDAHOSTCXX-}" ]; then
|
||||
export CUDAHOSTCXX=${cc}/bin;
|
||||
fi
|
||||
export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin'
|
||||
EOF
|
||||
'';
|
||||
});
|
||||
|
||||
cuda_nvprof = prev.cuda_nvprof.overrideAttrs (oldAttrs: {
|
||||
nativeBuildInputs = oldAttrs.nativeBuildInputs ++ [ pkgs.addOpenGLRunpath ];
|
||||
buildInputs = oldAttrs.buildInputs ++ [ prev.cuda_cupti ];
|
||||
|
@ -76,8 +76,4 @@ gcc = "gcc11"
|
||||
version = "12.0.1"
|
||||
url = "https://developer.download.nvidia.com/compute/cuda/12.0.1/local_installers/cuda_12.0.1_525.85.12_linux.run"
|
||||
sha256 = "sha256-GyBaBicvFGP0dydv2rkD8/ZmkXwGjlIHOAAeacehh1s="
|
||||
# CUDA 12 is compatible with gcc12, but nixpkgs default gcc is still on gcc11 as
|
||||
# of 2023-01-08. See https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements.
|
||||
# This should be upgraded to gcc12 once nixpkgs default gcc is upgraded. Other
|
||||
# CUDA versions should likely have their gcc versions upgraded as well.
|
||||
gcc = "gcc11"
|
||||
gcc = "gcc12"
|
||||
|
@ -15,8 +15,8 @@
|
||||
, enableContrib ? true
|
||||
|
||||
, enableCuda ? (config.cudaSupport or false) &&
|
||||
stdenv.hostPlatform.isx86_64, cudatoolkit
|
||||
|
||||
stdenv.hostPlatform.isx86_64
|
||||
, cudaPackages ? { }
|
||||
, enableUnfree ? false
|
||||
, enableIpp ? false
|
||||
, enablePython ? false, pythonPackages ? null
|
||||
@ -40,6 +40,9 @@ assert blas.implementation == "openblas" && lapack.implementation == "openblas";
|
||||
assert enablePython -> pythonPackages != null;
|
||||
|
||||
let
|
||||
inherit (cudaPackages) cudatoolkit;
|
||||
inherit (cudaPackages.cudaFlags) cudaCapabilities;
|
||||
|
||||
version = "3.4.18";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
@ -242,6 +245,8 @@ stdenv.mkDerivation {
|
||||
"-DCUDA_FAST_MATH=ON"
|
||||
"-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
|
||||
"-DCUDA_NVCC_FLAGS=--expt-relaxed-constexpr"
|
||||
"-DCUDA_ARCH_BIN=${lib.concatStringsSep ";" cudaCapabilities}"
|
||||
"-DCUDA_ARCH_PTX=${lib.last cudaCapabilities}"
|
||||
] ++ lib.optionals stdenv.isDarwin [
|
||||
"-DWITH_OPENCL=OFF"
|
||||
"-DWITH_LAPACK=OFF"
|
||||
|
@ -37,7 +37,7 @@
|
||||
, enableContrib ? true
|
||||
|
||||
, enableCuda ? (config.cudaSupport or false) && stdenv.hostPlatform.isx86_64
|
||||
, cudatoolkit
|
||||
, cudaPackages ? { }
|
||||
, nvidia-optical-flow-sdk
|
||||
|
||||
, enableUnfree ? false
|
||||
@ -79,6 +79,9 @@
|
||||
}:
|
||||
|
||||
let
|
||||
inherit (cudaPackages) cudatoolkit;
|
||||
inherit (cudaPackages.cudaFlags) cudaCapabilities;
|
||||
|
||||
version = "4.7.0";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
@ -342,6 +345,14 @@ stdenv.mkDerivation {
|
||||
"-DCUDA_FAST_MATH=ON"
|
||||
"-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
|
||||
"-DCUDA_NVCC_FLAGS=--expt-relaxed-constexpr"
|
||||
|
||||
# OpenCV respects at least three variables:
|
||||
# -DCUDA_GENERATION takes a single arch name, e.g. Volta
|
||||
# -DCUDA_ARCH_BIN takes a semi-colon separated list of real arches, e.g. "8.0;8.6"
|
||||
# -DCUDA_ARCH_PTX takes the virtual arch, e.g. "8.6"
|
||||
"-DCUDA_ARCH_BIN=${lib.concatStringsSep ";" cudaCapabilities}"
|
||||
"-DCUDA_ARCH_PTX=${lib.last cudaCapabilities}"
|
||||
|
||||
"-DNVIDIA_OPTICAL_FLOW_2_0_HEADERS_PATH=${nvidia-optical-flow-sdk}"
|
||||
] ++ lib.optionals stdenv.isDarwin [
|
||||
"-DWITH_OPENCL=OFF"
|
||||
|
@ -1,11 +1,11 @@
|
||||
{
|
||||
stdenv,
|
||||
backendStdenv,
|
||||
lib,
|
||||
zlib,
|
||||
useCudatoolkitRunfile ? false,
|
||||
cudaVersion,
|
||||
cudaMajorVersion,
|
||||
cudatoolkit, # if cuda>=11: only used for .cc
|
||||
cudatoolkit, # For cuda < 11
|
||||
libcublas ? null, # cuda <11 doesn't ship redist packages
|
||||
autoPatchelfHook,
|
||||
autoAddOpenGLRunpathHook,
|
||||
@ -26,7 +26,7 @@
|
||||
maxCudaVersion,
|
||||
}:
|
||||
assert useCudatoolkitRunfile || (libcublas != null); let
|
||||
inherit (cudatoolkit) cc;
|
||||
inherit (backendStdenv) cc;
|
||||
inherit (lib) lists strings trivial versions;
|
||||
|
||||
# majorMinorPatch :: String -> String
|
||||
@ -46,7 +46,7 @@ assert useCudatoolkitRunfile || (libcublas != null); let
|
||||
then cudatoolkit
|
||||
else libcublas;
|
||||
in
|
||||
stdenv.mkDerivation {
|
||||
backendStdenv.mkDerivation {
|
||||
pname = "cudatoolkit-${cudaMajorVersion}-cudnn";
|
||||
version = versionTriple;
|
||||
|
||||
|
@ -8,10 +8,16 @@
|
||||
{ blas
|
||||
, cmake
|
||||
, cudaPackages
|
||||
# FIXME: cuda being unfree means ofborg won't eval "magma".
|
||||
# respecting config.cudaSupport -> false by default
|
||||
# -> ofborg eval -> throws "no GPU targets specified".
|
||||
# Probably should delete everything but "magma-cuda" and "magma-hip"
|
||||
# from all-packages.nix
|
||||
, cudaSupport ? true
|
||||
, fetchurl
|
||||
, gfortran
|
||||
, gpuTargets ? [ ]
|
||||
, cudaCapabilities ? cudaPackages.cudaFlags.cudaCapabilities
|
||||
, gpuTargets ? [ ] # Non-CUDA targets, that is HIP
|
||||
, hip
|
||||
, hipblas
|
||||
, hipsparse
|
||||
@ -36,14 +42,8 @@ let
|
||||
# of the first list *from* the second list. That means:
|
||||
# lists.subtractLists a b = b - a
|
||||
|
||||
# For CUDA
|
||||
supportedCudaSmArches = lists.intersectLists cudaFlags.cudaRealArches supportedGpuTargets;
|
||||
# Subtract the supported SM architectures from the real SM architectures to get the unsupported
|
||||
# SM architectures.
|
||||
unsupportedCudaSmArches = lists.subtractLists supportedCudaSmArches cudaFlags.cudaRealArches;
|
||||
|
||||
# For ROCm
|
||||
# NOTE: The hip.gpuTargets are prefixed with "gfx" instead of "sm" like cudaFlags.cudaRealArches.
|
||||
# NOTE: The hip.gpuTargets are prefixed with "gfx" instead of "sm" like cudaFlags.realArches.
|
||||
# For some reason, Magma's CMakeLists.txt file does not handle the "gfx" prefix, so we must
|
||||
# remove it.
|
||||
rocmArches = lists.map (x: strings.removePrefix "gfx" x) hip.gpuTargets;
|
||||
@ -62,19 +62,32 @@ let
|
||||
)
|
||||
supported;
|
||||
|
||||
# Create the gpuTargetString.
|
||||
gpuTargetString = strings.concatStringsSep "," (
|
||||
if gpuTargets != [ ] then
|
||||
# If gpuTargets is specified, it always takes priority.
|
||||
gpuArchWarner supportedCustomGpuTargets unsupportedCustomGpuTargets
|
||||
else if cudaSupport then
|
||||
gpuArchWarner supportedCudaSmArches unsupportedCudaSmArches
|
||||
else if rocmSupport then
|
||||
gpuArchWarner supportedRocmArches unsupportedRocmArches
|
||||
else if cudaSupport then
|
||||
[ ] # It's important we pass explicit -DGPU_TARGET to reset magma's defaults
|
||||
else
|
||||
throw "No GPU targets specified"
|
||||
);
|
||||
|
||||
# E.g. [ "80" "86" "90" ]
|
||||
cudaArchitectures = (builtins.map cudaFlags.dropDot cudaCapabilities);
|
||||
|
||||
cudaArchitecturesString = strings.concatStringsSep ";" cudaArchitectures;
|
||||
minArch =
|
||||
let
|
||||
minArch' = builtins.head (builtins.sort builtins.lessThan cudaArchitectures);
|
||||
in
|
||||
# If this fails some day, something must've changed and we should re-validate our assumptions
|
||||
assert builtins.stringLength minArch' == 2;
|
||||
# "75" -> "750" Cf. https://bitbucket.org/icl/magma/src/f4ec79e2c13a2347eff8a77a3be6f83bc2daec20/CMakeLists.txt#lines-273
|
||||
"${minArch'}0";
|
||||
|
||||
|
||||
cuda_joined = symlinkJoin {
|
||||
name = "cuda-redist-${cudaVersion}";
|
||||
paths = with cudaPackages; [
|
||||
@ -87,6 +100,8 @@ let
|
||||
};
|
||||
in
|
||||
|
||||
assert (builtins.match "[^[:space:]]*" gpuTargetString) != null;
|
||||
|
||||
stdenv.mkDerivation {
|
||||
pname = "magma";
|
||||
inherit version;
|
||||
@ -116,7 +131,11 @@ stdenv.mkDerivation {
|
||||
openmp
|
||||
];
|
||||
|
||||
cmakeFlags = lists.optionals cudaSupport [
|
||||
cmakeFlags = [
|
||||
"-DGPU_TARGET=${gpuTargetString}"
|
||||
] ++ lists.optionals cudaSupport [
|
||||
"-DCMAKE_CUDA_ARCHITECTURES=${cudaArchitecturesString}"
|
||||
"-DMIN_ARCH=${minArch}" # Disarms magma's asserts
|
||||
"-DCMAKE_C_COMPILER=${cudatoolkit.cc}/bin/cc"
|
||||
"-DCMAKE_CXX_COMPILER=${cudatoolkit.cc}/bin/c++"
|
||||
"-DMAGMA_ENABLE_CUDA=ON"
|
||||
@ -126,14 +145,10 @@ stdenv.mkDerivation {
|
||||
"-DMAGMA_ENABLE_HIP=ON"
|
||||
];
|
||||
|
||||
# NOTE: We must set GPU_TARGET in preConfigure in this way because it may contain spaces.
|
||||
preConfigure = ''
|
||||
cmakeFlagsArray+=("-DGPU_TARGET=${gpuTargetString}")
|
||||
''
|
||||
# NOTE: The stdenv's CXX is used when compiling the CMake test to determine the version of
|
||||
# CUDA available. This isn't necessarily the same as cudatoolkit.cc, so we must set
|
||||
# CUDAHOSTCXX.
|
||||
+ strings.optionalString cudaSupport ''
|
||||
preConfigure = strings.optionalString cudaSupport ''
|
||||
export CUDAHOSTCXX=${cudatoolkit.cc}/bin/c++
|
||||
'';
|
||||
|
||||
|
@ -1,27 +1,13 @@
|
||||
# NOTE: Order matters! Put the oldest version first, and the newest version last.
|
||||
# NOTE: Make sure the supportedGpuTargets are in order of oldest to newest.
|
||||
# You can update the supportedGpuTargets by looking at the CMakeLists.txt file.
|
||||
# CUDA starts here: https://bitbucket.org/icl/magma/src/f4ec79e2c13a2347eff8a77a3be6f83bc2daec20/CMakeLists.txt#lines-175
|
||||
# HIP is here: https://bitbucket.org/icl/magma/src/f4ec79e2c13a2347eff8a77a3be6f83bc2daec20/CMakeLists.txt#lines-386
|
||||
# CUDA works around magma's wrappers and uses FindCUDAToolkit directly
|
||||
[
|
||||
{
|
||||
version = "2.6.2";
|
||||
hash = "sha256-dbVU2rAJA+LRC5cskT5Q5/iMvGLzrkMrWghsfk7aCnE=";
|
||||
supportedGpuTargets = [
|
||||
"sm_20"
|
||||
"sm_30"
|
||||
"sm_35"
|
||||
"sm_37"
|
||||
"sm_50"
|
||||
"sm_52"
|
||||
"sm_53"
|
||||
"sm_60"
|
||||
"sm_61"
|
||||
"sm_62"
|
||||
"sm_70"
|
||||
"sm_71"
|
||||
"sm_75"
|
||||
"sm_80"
|
||||
"700"
|
||||
"701"
|
||||
"702"
|
||||
@ -53,21 +39,6 @@
|
||||
version = "2.7.1";
|
||||
hash = "sha256-2chxHAR6OMrhbv3nS+4uszMyF/0nEeHpuGBsu7SuGlA=";
|
||||
supportedGpuTargets = [
|
||||
"sm_20"
|
||||
"sm_30"
|
||||
"sm_35"
|
||||
"sm_37"
|
||||
"sm_50"
|
||||
"sm_52"
|
||||
"sm_53"
|
||||
"sm_60"
|
||||
"sm_61"
|
||||
"sm_62"
|
||||
"sm_70"
|
||||
"sm_71"
|
||||
"sm_75"
|
||||
"sm_80"
|
||||
"sm_90"
|
||||
"700"
|
||||
"701"
|
||||
"702"
|
||||
|
@ -1,11 +1,19 @@
|
||||
{ lib, stdenv, fetchFromGitHub, which, cudaPackages, addOpenGLRunpath }:
|
||||
{ lib
|
||||
, backendStdenv
|
||||
, fetchFromGitHub
|
||||
, which
|
||||
, cudaPackages ? { }
|
||||
, addOpenGLRunpath
|
||||
}:
|
||||
|
||||
with cudaPackages;
|
||||
|
||||
let
|
||||
inherit (cudaPackages) cudatoolkit;
|
||||
# Output looks like "-gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_86,code=compute_86"
|
||||
gencode = lib.concatStringsSep " " cudaFlags.gencode;
|
||||
in
|
||||
|
||||
stdenv.mkDerivation rec {
|
||||
name = "nccl-${version}-cuda-${cudatoolkit.majorVersion}";
|
||||
backendStdenv.mkDerivation rec {
|
||||
name = "nccl-${version}-cuda-${cudaPackages.cudaMajorVersion}";
|
||||
version = "2.16.5-1";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
@ -17,16 +25,29 @@ stdenv.mkDerivation rec {
|
||||
|
||||
outputs = [ "out" "dev" ];
|
||||
|
||||
nativeBuildInputs = [ which addOpenGLRunpath ];
|
||||
nativeBuildInputs = [
|
||||
which
|
||||
addOpenGLRunpath
|
||||
cuda_nvcc
|
||||
];
|
||||
|
||||
buildInputs = [ cudatoolkit ];
|
||||
buildInputs = [
|
||||
cuda_cudart
|
||||
] ++ lib.optionals (lib.versionAtLeast cudaVersion "12.0.0") [
|
||||
cuda_cccl
|
||||
];
|
||||
|
||||
preConfigure = ''
|
||||
patchShebangs src/collectives/device/gen_rules.sh
|
||||
makeFlagsArray+=(
|
||||
"NVCC_GENCODE=${gencode}"
|
||||
)
|
||||
'';
|
||||
|
||||
makeFlags = [
|
||||
"CUDA_HOME=${cudatoolkit}"
|
||||
"CUDA_HOME=${cuda_nvcc}"
|
||||
"CUDA_LIB=${cuda_cudart}/lib64"
|
||||
"CUDA_INC=${cuda_cudart}/include"
|
||||
"PREFIX=$(out)"
|
||||
];
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
{ lib
|
||||
, stdenv
|
||||
, backendStdenv
|
||||
, requireFile
|
||||
, autoPatchelfHook
|
||||
, autoAddOpenGLRunpathHook
|
||||
@ -18,7 +18,7 @@
|
||||
assert lib.assertMsg (lib.strings.versionAtLeast cudnn.version fileVersionCudnn)
|
||||
"This version of TensorRT requires at least cuDNN ${fileVersionCudnn} (current version is ${cudnn.version})";
|
||||
|
||||
stdenv.mkDerivation rec {
|
||||
backendStdenv.mkDerivation rec {
|
||||
pname = "cudatoolkit-${cudatoolkit.majorVersion}-tensorrt";
|
||||
version = fullVersion;
|
||||
src = requireFile rec {
|
||||
@ -45,7 +45,7 @@ stdenv.mkDerivation rec {
|
||||
|
||||
# Used by autoPatchelfHook
|
||||
buildInputs = [
|
||||
cudatoolkit.cc.cc.lib # libstdc++
|
||||
backendStdenv.cc.cc.lib # libstdc++
|
||||
cudatoolkit
|
||||
cudnn
|
||||
];
|
||||
@ -74,6 +74,8 @@ stdenv.mkDerivation rec {
|
||||
"$out/lib/libnvinfer_builder_resource.so.${mostOfVersion}"
|
||||
'';
|
||||
|
||||
passthru.stdenv = backendStdenv;
|
||||
|
||||
meta = with lib; {
|
||||
# Check that the cudatoolkit version satisfies our min/max constraints (both
|
||||
# inclusive). We mark the package as broken if it fails to satisfies the
|
||||
|
@ -164,7 +164,7 @@ let
|
||||
build --action_env TF_CUDA_PATHS="${cudatoolkit_joined},${cudnn},${nccl}"
|
||||
build --action_env TF_CUDA_VERSION="${lib.versions.majorMinor cudatoolkit.version}"
|
||||
build --action_env TF_CUDNN_VERSION="${lib.versions.major cudnn.version}"
|
||||
build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.cudaRealArches}"
|
||||
build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.realArches}"
|
||||
'' + ''
|
||||
CFG
|
||||
'';
|
||||
|
@ -17,7 +17,9 @@
|
||||
# that in nix as well. It would make some things easier and less confusing, but
|
||||
# it would also make the default tensorflow package unfree. See
|
||||
# https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/iRCt5m4qUz0
|
||||
, cudaSupport ? false, cudaPackages ? {}
|
||||
, cudaSupport ? false
|
||||
, cudaPackages ? { }
|
||||
, cudaCapabilities ? cudaPackages.cudaFlags.cudaCapabilities
|
||||
, mklSupport ? false, mkl
|
||||
, tensorboardSupport ? true
|
||||
# XLA without CUDA is broken
|
||||
@ -30,7 +32,27 @@
|
||||
}:
|
||||
|
||||
let
|
||||
inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl;
|
||||
originalStdenv = stdenv;
|
||||
in
|
||||
let
|
||||
# Tensorflow looks at many toolchain-related variables which may diverge.
|
||||
#
|
||||
# Toolchain for cuda-enabled builds.
|
||||
# We want to achieve two things:
|
||||
# 1. NVCC should use a compatible back-end (e.g. gcc11 for cuda11)
|
||||
# 2. Normal C++ files should be compiled with the same toolchain,
|
||||
# to avoid potential weird dynamic linkage errors at runtime.
|
||||
# This may not be necessary though
|
||||
#
|
||||
# Toolchain for Darwin:
|
||||
# clang 7 fails to emit a symbol for
|
||||
# __ZN4llvm11SmallPtrSetIPKNS_10AllocaInstELj8EED1Ev in any of the
|
||||
# translation units, so the build fails at link time
|
||||
stdenv =
|
||||
if cudaSupport then cudaPackages.backendStdenv
|
||||
else if originalStdenv.isDarwin then llvmPackages_11.stdenv
|
||||
else originalStdenv;
|
||||
inherit (cudaPackages) cudatoolkit cudnn nccl;
|
||||
in
|
||||
|
||||
assert cudaSupport -> cudatoolkit != null
|
||||
@ -42,6 +64,7 @@ assert ! (stdenv.isDarwin && cudaSupport);
|
||||
let
|
||||
withTensorboard = (pythonOlder "3.6") || tensorboardSupport;
|
||||
|
||||
# FIXME: migrate to redist cudaPackages
|
||||
cudatoolkit_joined = symlinkJoin {
|
||||
name = "${cudatoolkit.name}-merged";
|
||||
paths = [
|
||||
@ -54,10 +77,13 @@ let
|
||||
];
|
||||
};
|
||||
|
||||
# Tensorflow expects bintools at hard-coded paths, e.g. /usr/bin/ar
|
||||
# The only way to overcome that is to set GCC_HOST_COMPILER_PREFIX,
|
||||
# but that path must contain cc as well, so we merge them
|
||||
cudatoolkit_cc_joined = symlinkJoin {
|
||||
name = "${cudatoolkit.cc.name}-merged";
|
||||
name = "${stdenv.cc.name}-merged";
|
||||
paths = [
|
||||
cudatoolkit.cc
|
||||
stdenv.cc
|
||||
binutils.bintools # for ar, dwp, nm, objcopy, objdump, strip
|
||||
];
|
||||
};
|
||||
@ -173,12 +199,7 @@ let
|
||||
'';
|
||||
}) else _bazel-build;
|
||||
|
||||
_bazel-build = (buildBazelPackage.override (lib.optionalAttrs stdenv.isDarwin {
|
||||
# clang 7 fails to emit a symbol for
|
||||
# __ZN4llvm11SmallPtrSetIPKNS_10AllocaInstELj8EED1Ev in any of the
|
||||
# translation units, so the build fails at link time
|
||||
stdenv = llvmPackages_11.stdenv;
|
||||
})) {
|
||||
_bazel-build = buildBazelPackage.override { inherit stdenv; } {
|
||||
name = "${pname}-${version}";
|
||||
bazel = bazel_5;
|
||||
|
||||
@ -209,12 +230,13 @@ let
|
||||
flatbuffers-core
|
||||
giflib
|
||||
grpc
|
||||
icu
|
||||
# Necessary to fix the "`GLIBCXX_3.4.30' not found" error
|
||||
(icu.override { inherit stdenv; })
|
||||
jsoncpp
|
||||
libjpeg_turbo
|
||||
libpng
|
||||
lmdb-core
|
||||
pybind11
|
||||
(pybind11.overridePythonAttrs (_: { inherit stdenv; }))
|
||||
snappy
|
||||
sqlite
|
||||
] ++ lib.optionals cudaSupport [
|
||||
@ -299,9 +321,11 @@ let
|
||||
|
||||
TF_NEED_CUDA = tfFeature cudaSupport;
|
||||
TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}";
|
||||
TF_CUDA_COMPUTE_CAPABILITIES = lib.concatStringsSep "," cudaCapabilities;
|
||||
|
||||
# Needed even when we override stdenv: e.g. for ar
|
||||
GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin";
|
||||
GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/gcc";
|
||||
TF_CUDA_COMPUTE_CAPABILITIES = builtins.concatStringsSep "," cudaFlags.cudaRealArches;
|
||||
GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/cc";
|
||||
|
||||
postPatch = ''
|
||||
# bazel 3.3 should work just as well as bazel 3.1
|
||||
|
@ -1,4 +1,4 @@
|
||||
{ lib, stdenv, fetchFromGitHub
|
||||
{ lib, backendStdenv, fetchFromGitHub
|
||||
, cmake, addOpenGLRunpath
|
||||
, cudatoolkit
|
||||
, cutensor
|
||||
@ -35,13 +35,13 @@ let
|
||||
in
|
||||
|
||||
{
|
||||
cublas = stdenv.mkDerivation (commonAttrs // {
|
||||
cublas = backendStdenv.mkDerivation (commonAttrs // {
|
||||
pname = "cuda-library-samples-cublas";
|
||||
|
||||
src = "${src}/cuBLASLt";
|
||||
});
|
||||
|
||||
cusolver = stdenv.mkDerivation (commonAttrs // {
|
||||
cusolver = backendStdenv.mkDerivation (commonAttrs // {
|
||||
pname = "cuda-library-samples-cusolver";
|
||||
|
||||
src = "${src}/cuSOLVER";
|
||||
@ -49,7 +49,7 @@ in
|
||||
sourceRoot = "cuSOLVER/gesv";
|
||||
});
|
||||
|
||||
cutensor = stdenv.mkDerivation (commonAttrs // {
|
||||
cutensor = backendStdenv.mkDerivation (commonAttrs // {
|
||||
pname = "cuda-library-samples-cutensor";
|
||||
|
||||
src = "${src}/cuTENSOR";
|
||||
|
Loading…
Reference in New Issue
Block a user