opencv4: respect config.cudaCapabilities

This is needed for faster builds when debugging the opencv derivation,
and it's more consistent with other cuda-enabled packages

-DCUDA_GENERATION seems to expect architecture names, so we refactor
cudaFlags to facilitate easier extraction of the configured archnames
This commit is contained in:
Someone Serge 2023-02-27 14:58:14 +02:00
parent e305011223
commit d378cc6fb2
No known key found for this signature in database
GPG Key ID: 7B0E3B1390D61DA4
6 changed files with 99 additions and 60 deletions

View File

@ -50,7 +50,7 @@ stdenv.mkDerivation rec {
"-DUSE_OLDCMAKECUDA=ON" # see https://github.com/apache/incubator-mxnet/issues/10743
"-DCUDA_ARCH_NAME=All"
"-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
"-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.cudaRealArches}"
"-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.realArches}"
] else [ "-DUSE_CUDA=OFF" ])
++ lib.optional (!cudnnSupport) "-DUSE_CUDNN=OFF";

View File

@ -18,8 +18,15 @@ let
# from improved performance, reduced file size, or greater hardware suppport by
# passing a configuration based on your specific GPU environment.
#
# config.cudaCapabilities: list of hardware generations to support (e.g., "8.0")
# config.cudaForwardCompat: bool for compatibility with future GPU generations
# config.cudaCapabilities :: List Capability
# List of hardware generations to build
# Last item is considered the optional forward-compatibility arch
# E.g. [ "8.0" ]
#
# config.cudaForwardCompat :: Bool
# Whether to include the forward compatibility gencode (+PTX)
# to support future GPU generations:
# E.g. true
#
# Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351
@ -39,6 +46,9 @@ let
# GPUs which are supported by the provided CUDA version.
supportedGpus = builtins.filter isSupported gpus;
# supportedCapabilities :: List Capability
supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus;
# cudaArchNameToVersions :: AttrSet String (List String)
# Maps the name of a GPU architecture to different versions of that architecture.
# For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ].
@ -49,12 +59,6 @@ let
(gpu: gpu.archName)
supportedGpus;
# cudaArchNames :: List String
# NOTE: It's important that we don't rely on builtins.attrNames cudaArchNameToVersions here;
# otherwise, we'll get the names sorted in alphabetical order. The JSON array we read them
# from is already sorted, so we'll preserve that order here.
cudaArchNames = lists.unique (lists.map (gpu: gpu.archName) supportedGpus);
# cudaComputeCapabilityToName :: AttrSet String String
# Maps the version of a GPU architecture to the name of that architecture.
# For example, "8.0" maps to "Ampere".
@ -67,23 +71,6 @@ let
supportedGpus
);
# cudaComputeCapabilities :: List String
# NOTE: It's important that we don't rely on builtins.attrNames cudaComputeCapabilityToName here;
# otherwise, we'll get the versions sorted in alphabetical order. The JSON array we read them
# from is already sorted, so we'll preserve that order here.
# Use the user-provided list of CUDA capabilities if it's provided.
cudaComputeCapabilities = config.cudaCapabilities
or (lists.map (gpu: gpu.computeCapability) supportedGpus);
# cudaForwardComputeCapability :: String
cudaForwardComputeCapability = (lists.last cudaComputeCapabilities) + "+PTX";
# cudaComputeCapabilitiesAndForward :: List String
# The list of supported CUDA architectures, including the forward compatibility architecture.
# If forward compatibility is disabled, this will be the same as cudaComputeCapabilities.
cudaComputeCapabilitiesAndForward = cudaComputeCapabilities
++ lists.optional (config.cudaForwardCompat or true) cudaForwardComputeCapability;
# dropDot :: String -> String
dropDot = ver: builtins.replaceStrings [ "." ] [ "" ] ver;
@ -101,38 +88,79 @@ let
"-gencode=arch=compute_${dropDot computeCapability},code=${feat}_${dropDot computeCapability}"
);
# cudaRealArches :: List String
# The real architectures are physical architectures supported by the CUDA version.
# For example, "sm_80".
cudaRealArches = archMapper "sm" cudaComputeCapabilities;
formatCapabilities = { cudaCapabilities, enableForwardCompat ? true }: rec {
inherit cudaCapabilities enableForwardCompat;
# cudaVirtualArches :: List String
# The virtual architectures are typically used for forward compatibility, when trying to support
# an architecture newer than the CUDA version allows.
# For example, "compute_80".
cudaVirtualArches = archMapper "compute" cudaComputeCapabilities;
# forwardCapability :: String
# Forward "compute" capability, a.k.a PTX
# E.g. "8.6+PTX"
forwardCapability = (lists.last cudaCapabilities) + "+PTX";
# cudaArches :: List String
# By default, build for all supported architectures and forward compatibility via a virtual
# architecture for the newest supported architecture.
cudaArches = cudaRealArches ++
lists.optional (config.cudaForwardCompat or true) (lists.last cudaVirtualArches);
# capabilitiesAndForward :: List String
# The list of supported CUDA architectures, including the forward compatibility architecture.
# If forward compatibility is disabled, this will be the same as cudaCapabilities.
# E.g. [ "7.5" "8.6" "8.6+PTX" ]
capabilitiesAndForward = cudaCapabilities ++ lists.optionals enableForwardCompat [ forwardCapability ];
# cudaGencode :: List String
# A list of CUDA gencode arguments to pass to NVCC.
cudaGencode =
let
base = gencodeMapper "sm" cudaComputeCapabilities;
forwardCompat = gencodeMapper "compute" [ (lists.last cudaComputeCapabilities) ];
in
base ++ lists.optionals (config.cudaForwardCompat or true) forwardCompat;
# archNames :: List String
# E.g. [ "Turing" "Ampere" ]
archNames = lists.unique (builtins.map (cap: cudaComputeCapabilityToName.${cap}) cudaCapabilities);
# realArches :: List String
# The real architectures are physical architectures supported by the CUDA version.
# E.g. [ "sm_75" "sm_86" ]
realArches = archMapper "sm" cudaCapabilities;
# virtualArches :: List String
# The virtual architectures are typically used for forward compatibility, when trying to support
# an architecture newer than the CUDA version allows.
# E.g. [ "compute_75" "compute_86" ]
virtualArches = archMapper "compute" cudaCapabilities;
# arches :: List String
# By default, build for all supported architectures and forward compatibility via a virtual
# architecture for the newest supported architecture.
# E.g. [ "sm_75" "sm_86" "compute_86" ]
arches = realArches ++
lists.optional enableForwardCompat (lists.last virtualArches);
# gencode :: List String
# A list of CUDA gencode arguments to pass to NVCC.
# E.g. [ "-gencode=arch=compute_75,code=sm_75" ... "-gencode=arch=compute_86,code=compute_86" ]
gencode =
let
base = gencodeMapper "sm" cudaCapabilities;
forward = gencodeMapper "compute" [ (lists.last cudaCapabilities) ];
in
base ++ lib.optionals enableForwardCompat forward;
};
in
# When changing names or formats: pause, validate, and update the assert
assert (formatCapabilities { cudaCapabilities = [ "7.5" "8.6" ]; }) == {
cudaCapabilities = [ "7.5" "8.6" ];
enableForwardCompat = true;
capabilitiesAndForward = [ "7.5" "8.6" "8.6+PTX" ];
forwardCapability = "8.6+PTX";
archNames = [ "Turing" "Ampere" ];
realArches = [ "sm_75" "sm_86" ];
virtualArches = [ "compute_75" "compute_86" ];
arches = [ "sm_75" "sm_86" "compute_86" ];
gencode = [ "-gencode=arch=compute_75,code=sm_75" "-gencode=arch=compute_86,code=sm_86" "-gencode=arch=compute_86,code=compute_86" ];
};
{
inherit
cudaArchNames
cudaArchNameToVersions cudaComputeCapabilityToName
cudaRealArches cudaVirtualArches cudaArches
cudaGencode;
cudaCapabilities = cudaComputeCapabilitiesAndForward;
# formatCapabilities :: { cudaCapabilities: List Capability, cudaForwardCompat: Boolean } -> { ... }
inherit formatCapabilities;
# cudaArchNameToVersions :: String => String
inherit cudaArchNameToVersions;
# cudaComputeCapabilityToName :: String => String
inherit cudaComputeCapabilityToName;
} // formatCapabilities {
cudaCapabilities = config.cudaCapabilities or supportedCapabilities;
enableForwardCompat = config.cudaForwardCompat or true;
}

View File

@ -37,7 +37,7 @@
, enableContrib ? true
, enableCuda ? (config.cudaSupport or false) && stdenv.hostPlatform.isx86_64
, cudatoolkit
, cudaPackages ? { }
, nvidia-optical-flow-sdk
, enableUnfree ? false
@ -79,6 +79,9 @@
}:
let
inherit (cudaPackages) cudatoolkit;
inherit (cudaPackages.cudaFlags) cudaCapabilities;
version = "4.7.0";
src = fetchFromGitHub {
@ -342,6 +345,14 @@ stdenv.mkDerivation {
"-DCUDA_FAST_MATH=ON"
"-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
"-DCUDA_NVCC_FLAGS=--expt-relaxed-constexpr"
# OpenCV respects at least three variables:
# -DCUDA_GENERATION takes a single arch name, e.g. Volta
# -DCUDA_ARCH_BIN takes a semi-colon separated list of real arches, e.g. "8.0;8.6"
# -DCUDA_ARCH_PTX takes the virtual arch, e.g. "8.6"
"-DCUDA_ARCH_BIN=${lib.concatStringsSep ";" cudaCapabilities}"
"-DCUDA_ARCH_PTX=${lib.last cudaCapabilities}"
"-DNVIDIA_OPTICAL_FLOW_2_0_HEADERS_PATH=${nvidia-optical-flow-sdk}"
] ++ lib.optionals stdenv.isDarwin [
"-DWITH_OPENCL=OFF"

View File

@ -37,13 +37,13 @@ let
# lists.subtractLists a b = b - a
# For CUDA
supportedCudaSmArches = lists.intersectLists cudaFlags.cudaRealArches supportedGpuTargets;
supportedCudaSmArches = lists.intersectLists cudaFlags.realArches supportedGpuTargets;
# Subtract the supported SM architectures from the real SM architectures to get the unsupported
# SM architectures.
unsupportedCudaSmArches = lists.subtractLists supportedCudaSmArches cudaFlags.cudaRealArches;
unsupportedCudaSmArches = lists.subtractLists supportedCudaSmArches cudaFlags.realArches;
# For ROCm
# NOTE: The hip.gpuTargets are prefixed with "gfx" instead of "sm" like cudaFlags.cudaRealArches.
# NOTE: The hip.gpuTargets are prefixed with "gfx" instead of "sm" like cudaFlags.realArches.
# For some reason, Magma's CMakeLists.txt file does not handle the "gfx" prefix, so we must
# remove it.
rocmArches = lists.map (x: strings.removePrefix "gfx" x) hip.gpuTargets;

View File

@ -10,7 +10,7 @@ with cudaPackages;
let
# Output looks like "-gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_86,code=compute_86"
gencode = lib.concatStringsSep " " cudaFlags.cudaGencode;
gencode = lib.concatStringsSep " " cudaFlags.gencode;
in
backendStdenv.mkDerivation rec {
name = "nccl-${version}-cuda-${cudaPackages.cudaMajorVersion}";

View File

@ -164,7 +164,7 @@ let
build --action_env TF_CUDA_PATHS="${cudatoolkit_joined},${cudnn},${nccl}"
build --action_env TF_CUDA_VERSION="${lib.versions.majorMinor cudatoolkit.version}"
build --action_env TF_CUDNN_VERSION="${lib.versions.major cudnn.version}"
build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.cudaRealArches}"
build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.realArches}"
'' + ''
CFG
'';