Merge master into staging-next

2025-01-18 19:03:28 +00:00 · 2024-02-18 12:01:09 +00:00 · 2024-02-18 12:01:09 +00:00 · b81c130216
commit b81c130216
parent a3a1ec5010 6ecec40792
25 changed files with 4371 additions and 1373 deletions
--- a/maintainers/maintainer-list.nix
+++ b/maintainers/maintainer-list.nix
@ -14920,6 +14920,12 @@
      fingerprint = "3AC6 F170 F011 33CE 393B  CD94 BE94 8AFD 7E78 73BE";
    }];
  };
+  phijor = {
+    name = "Philipp Joram";
+    email = "nixpkgs@phijor.me";
+    github = "phijor";
+    githubId = 10487782;
+  };
  philandstuff = {
    email = "philip.g.potter@gmail.com";
    github = "philandstuff";
--- a/pkgs/applications/audio/waylyrics/Cargo.lock
+++ b/pkgs/applications/audio/waylyrics/Cargo.lock
--- a/pkgs/applications/audio/waylyrics/default.nix
+++ b/pkgs/applications/audio/waylyrics/default.nix
@ -1,59 +1,61 @@
-{ lib, fetchFromGitHub, rustPlatform, gtk4, pkg-config, openssl, dbus, wrapGAppsHook4, glib, makeDesktopItem, copyDesktopItems }:
+{ lib
+, rustPlatform
+, fetchFromGitHub
+, pkg-config
+, wrapGAppsHook4
+, openssl
+, dbus
+}:

 rustPlatform.buildRustPackage rec {
  pname = "waylyrics";
-  version = "unstable-2023-05-14";
+  version = "0.2.4";

  src = fetchFromGitHub {
    owner = "poly000";
-    repo = pname;
-    rev = "7e8bd99e1748a5448c1a5c49f0664bd96fbf965e";
-    hash = "sha256-vSYtLsLvRHCCHxomPSHifXFZKjkFrlskNp7IlFflrUU=";
+    repo = "waylyrics";
+    rev = "v${version}";
+    hash = "sha256-Tpsk1KL+QSiv8aWl8N5hextKnhMulI3YWtQvB6IIdmQ=";
  };

-  cargoHash = "sha256-dpJa0T6xapCBPM5fWbSDEhBlZ55c3Sr5oTnu58B/voM=";
+  cargoLock = {
+    lockFile = ./Cargo.lock;
+    outputHashes = {
+      "ncmapi-0.1.13" = "sha256-wh9RsyuS1L7rnz1jh2A27s6wUvyH8cNgUywPORIimmg=";
+      "qqmusic-rs-0.1.0" = "sha256-woLsO0n+m3EBUI+PRLio7iLp0UPQSliWK0djCSZEaZc=";
+    };
+  };

-  nativeBuildInputs = [ pkg-config wrapGAppsHook4 copyDesktopItems ];
-  buildInputs = [ gtk4 openssl dbus glib ];
+  postPatch = ''
+    cp ${./Cargo.lock} Cargo.lock
+  '';

-  RUSTC_BOOTSTRAP = 1;
+  nativeBuildInputs = [ pkg-config wrapGAppsHook4 ];
+  buildInputs = [ openssl dbus ];

-  doCheck = false; # No tests defined in the project.
+  doCheck = false; # Requires network access

-  WAYLYRICS_DEFAULT_CONFIG = "${placeholder "out"}/share/waylyrics/config.toml";
  WAYLYRICS_THEME_PRESETS_DIR = "${placeholder "out"}/share/waylyrics/themes";

-  desktopItems = [
-    (makeDesktopItem {
-      name = "io.poly000.waylyrics";
-      exec = "waylyrics";
-      comment = "Simple on screen lyrics for MPRIS-friendly players";
-      type = "Application";
-      icon = "io.poly000.waylyrics";
-      desktopName = "Waylyrics";
-      terminal = false;
-      categories = [ "Audio" "AudioVideo" ];
-    })
-  ];
-
  postInstall = ''
-    $out/bin/gen_config_example
-    mkdir -p $out/share/waylyrics
-    install -Dm644 config.toml $WAYLYRICS_DEFAULT_CONFIG
-    cp -vr themes $out/share/waylyrics/
-    rm $out/bin/gen_config_example # Unnecessary for end users
+    # Install themes
+    install -d $WAYLYRICS_THEME_PRESETS_DIR
+    cp -vr themes/* $WAYLYRICS_THEME_PRESETS_DIR
+    # Install desktop entry
+    install -Dm644 io.poly000.waylyrics.desktop -t $out/share/applications
    # Install schema
    install -Dm644 io.poly000.waylyrics.gschema.xml -t $out/share/gsettings-schemas/$name/glib-2.0/schemas
    glib-compile-schemas $out/share/gsettings-schemas/$name/glib-2.0/schemas/
    # Install icons
-    cp -vr res/icons $out/share/
+    install -d $out/share/icons
+    cp -vr res/icons/hicolor $out/share/icons/hicolor
  '';

  meta = with lib; {
-    description = "On screen lyrics for Wayland with NetEase Music source";
+    description = "Desktop lyrics with QQ and NetEase Music source";
    homepage = "https://github.com/poly000/waylyrics";
-    license = licenses.mit;
-    maintainers = [ maintainers.shadowrz ];
+    license = with licenses; [ mit cc-by-40 ];
+    maintainers = with maintainers; [ shadowrz aleksana ];
    platforms = platforms.linux;
  };
 }
--- a/pkgs/applications/graphics/hugin/default.nix
+++ b/pkgs/applications/graphics/hugin/default.nix
@ -2,7 +2,6 @@
 , stdenv
 , cmake
 , fetchurl
-, fetchpatch
 , gnumake
 , makeWrapper
 , pkg-config
@ -38,21 +37,13 @@

 stdenv.mkDerivation rec {
  pname = "hugin";
-  version = "2022.0.0";
+  version = "2023.0.0";

  src = fetchurl {
    url = "mirror://sourceforge/hugin/hugin-${version}.tar.bz2";
-    hash = "sha256-l8hWKgupp0PguVWkPf3gSLHGDNnl8u4rad4agWRuBac=";
+    hash = "sha256-BKOfzMYBfgVExjm9IjCUcsV001s0Vcut4fw4cOYxYys=";
  };

-  patches = [
-    (fetchpatch {
-      name = "hugin-2022.0.0-exiv2-0.28.patch";
-      url = "https://gitweb.gentoo.org/repo/gentoo.git/plain/media-gfx/hugin/files/hugin-2022.0.0-exiv2-0.28.patch?id=d18335caa756f5e5c1478d5fe3ba17f011a78c80";
-      hash = "sha256-Y+79bFb926GW5oLOL0e5y7kLhqU/vZcry+kLL4H2fUE=";
-    })
-  ];
-
  buildInputs = [
    boost
    cairo
--- a/pkgs/applications/networking/cluster/nomad/default.nix
+++ b/pkgs/applications/networking/cluster/nomad/default.nix
@ -57,9 +57,9 @@ rec {

  nomad_1_5 = generic {
    buildGoModule = buildGo121Module;
-    version = "1.5.13";
-    sha256 = "sha256-SFPjcr3W6Sj1n+1ooi1HDMQEapgGapVy4HtqxSIVi9U=";
-    vendorHash = "sha256-F9lzO3jMVbDq8sA4rBo81vmIoOhK2N8d4HXX58HOw18=";
+    version = "1.5.15";
+    sha256 = "sha256-OFmGOU+ObA0+BS48y0ZyyxR+VI5DYL39peVKcyVHgGI=";
+    vendorHash = "sha256-Ds94lB43cyMNyRJZti0mZDWGTtSdwY31dDijfAUxR0I=";
    license = lib.licenses.mpl20;
    passthru.tests.nomad = nixosTests.nomad;
    preCheck = ''
@ -69,9 +69,9 @@ rec {

  nomad_1_6 = generic {
    buildGoModule = buildGo121Module;
-    version = "1.6.6";
-    sha256 = "sha256-E7HLBABOtDO/BUc2+4mD4yJ/sfy85gy67ZylRTZI3Cg=";
-    vendorHash = "sha256-6jq00RsukuP8OSkXhqYqQxpXtp/jm/GChEwEJTVyO10=";
+    version = "1.6.8";
+    sha256 = "sha256-lc/HZgyzqWZNW2WHOFZ43gCeL5Y2hwK4lXPgWGboPOY=";
+    vendorHash = "sha256-ecLhq4OHDhA1Bd/97NMpfePqtuCtVje3BdvCzcwWzas=";
    license = lib.licenses.mpl20;
    passthru.tests.nomad = nixosTests.nomad;
    preCheck = ''
--- a/pkgs/applications/networking/headlines/default.nix
+++ b/pkgs/applications/networking/headlines/default.nix
@ -14,7 +14,7 @@
 , gtkmm4
 , libsecret
 , fetchFromGitLab
-, makeWrapper
+, wrapGAppsHook4
 , xdg-utils
 , youtube-dl
 , ffmpeg
@ -34,7 +34,7 @@ stdenv.mkDerivation rec {
  nativeBuildInputs = [
    cmake
    pkg-config
-    makeWrapper
+    wrapGAppsHook4
  ];

  buildInputs = [
@ -56,10 +56,10 @@ stdenv.mkDerivation rec {
    gst-plugins-bad
  ]);

-  postFixup = ''
-    wrapProgram "$out/bin/headlines" \
-      --prefix PATH : "${lib.makeBinPath [ xdg-utils youtube-dl ffmpeg ]}" \
-      --prefix GST_PLUGIN_SYSTEM_PATH_1_0 : "$GST_PLUGIN_SYSTEM_PATH_1_0"
+  preFixup = ''
+    gappsWrapperArgs+=(
+      --prefix PATH : "${lib.makeBinPath [ xdg-utils youtube-dl ffmpeg ]}"
+    )
  '';

  meta = with lib; {
--- a/pkgs/applications/version-management/gh/default.nix
+++ b/pkgs/applications/version-management/gh/default.nix
@ -2,13 +2,13 @@

 buildGoModule rec {
  pname = "gh";
-  version = "2.44.0";
+  version = "2.44.1";

  src = fetchFromGitHub {
    owner = "cli";
    repo = "cli";
    rev = "v${version}";
-    hash = "sha256-5UESwrEDQpwQSGCKE6WjAGAQvJXACxIt2lw3fZvhAb4=";
+    hash = "sha256-ZcJY9XNkp1Glo0sQ0O9iadsvW4eterkogjlJmQeP+M4=";
  };

  vendorHash = "sha256-r1zcwBz/mJOv1RU4Ilgg73yH37xu7a/BmqgAkiODq0I=";
--- a/pkgs/by-name/op/openscad-unstable/package.nix
+++ b/pkgs/by-name/op/openscad-unstable/package.nix
@ -78,12 +78,12 @@ in
 # clang consume much less RAM than GCC
 clangStdenv.mkDerivation rec {
  pname = "openscad-unstable";
-  version = "2024-01-22";
+  version = "2024-02-18";
  src = fetchFromGitHub {
    owner = "openscad";
    repo = "openscad";
-    rev = "88d244aed3c40a76194ff537ed84bd65bc0e1aeb";
-    hash = "sha256-qkQNbYhmOxF14zm+eCcwe9asLOEciYBANefUb8+KNEI=";
+    rev = "f5688998760d6b85d7b280300388448c162edc42";
+    hash = "sha256-rQnih7Am7NvlrTwIGAN4QbZCcziFm6YOOT27wmjcY8A=";
    fetchSubmodules = true;
  };
  nativeBuildInputs = [
--- a/pkgs/by-name/pd/pdepend/composer.lock
+++ b/pkgs/by-name/pd/pdepend/composer.lock
--- a/pkgs/by-name/pd/pdepend/package.nix
+++ b/pkgs/by-name/pd/pdepend/package.nix
@ -1,20 +1,24 @@
-{ php, fetchFromGitHub, lib }:
+{ php
+, fetchFromGitHub
+, lib
+}:

 php.buildComposerProject (finalAttrs: {
  pname = "pdepend";
-  version = "2.15.1";
+  version = "2.16.2";

  src = fetchFromGitHub {
    owner = "pdepend";
    repo = "pdepend";
    rev = finalAttrs.version;
-    hash = "sha256-tVWOR0rKMnQDeHk3MHhEVOjn+dSpoMx+Ln+AwFRMwYs=";
+    hash = "sha256-2Ruubcm9IWZYu2LGeGeKm1tmHca0P5xlKYkuBCCV9ag=";
  };

  composerLock = ./composer.lock;
-  vendorHash = "sha256-MWm8urRB9IujqrIl22x+JFFCRR+nINLQqnHUywT2pi0=";
+  vendorHash = "sha256-Rvvy6MI0q+T2W7xzf2UqWIbsqgrWhgqVnzhphQ3iw9g=";

  meta = {
+    changelog = "https://github.com/pdepend/pdepend/releases/tag/${finalAttrs.version}";
    description = "An adaptation of JDepend for PHP";
    homepage = "https://github.com/pdepend/pdepend";
    license = lib.licenses.bsd3;
@ -24,7 +28,7 @@ php.buildComposerProject (finalAttrs: {
      of your design in terms of extensibility, reusability and
      maintainability.
    ";
+    mainProgram = "pdepend";
    maintainers = lib.teams.php.members;
-    platforms = lib.platforms.all;
  };
 })
--- a/pkgs/by-name/si/sink-rotate/package.nix
+++ b/pkgs/by-name/si/sink-rotate/package.nix
@ -0,0 +1,41 @@
+{ lib
+, rustPlatform
+, fetchFromGitHub
+, pipewire
+, wireplumber
+, makeWrapper
+}:
+let
+  version = "1.0.4";
+in
+rustPlatform.buildRustPackage {
+  pname = "sink-rotate";
+  inherit version;
+
+  src = fetchFromGitHub {
+    owner = "mightyiam";
+    repo = "sink-rotate";
+    rev = "v${version}";
+    hash = "sha256-q20uUr+7yLJlZc5YgEkY125YrZ2cuJrPv5IgWXaYRlo=";
+  };
+
+  cargoHash = "sha256-MPeyPTkxpi6iw/BT5m4S7jVBD0c2zG2rsv+UZWQxpUU=";
+
+  buildInputs = [ makeWrapper ];
+
+  postFixup = ''
+    wrapProgram $out/bin/sink-rotate \
+      --prefix PATH : ${pipewire}/bin/pw-dump \
+      --prefix PATH : ${wireplumber}/bin/wpctl
+  '';
+
+  meta = with lib; {
+    description = "Command that rotates default between two PipeWire audio sinks.";
+    homepage = "https://github.com/mightyiam/sink-rotate";
+    license = licenses.mit;
+    maintainers = with maintainers; [ mightyiam ];
+    mainProgram = "sink-rotate";
+    platforms = platforms.linux;
+  };
+}
+
--- a/pkgs/development/libraries/agda/cubical/default.nix
+++ b/pkgs/development/libraries/agda/cubical/default.nix
@ -2,13 +2,13 @@

 mkDerivation rec {
  pname = "cubical";
-  version = "0.6";
+  version = "0.7";

  src = fetchFromGitHub {
    repo = pname;
    owner = "agda";
    rev = "v${version}";
-    hash = "sha256-2quAZ/j7kQaFkh9W5Bj1y7YQj9BT7FwHqVWyj8T4AH8=";
+    hash = "sha256-oLpKRWfQqb6CIscC2XM0ia9HJ8edJFHoPeql3kfvyrA=";
  };

  # The cubical library has several `Everything.agda` files, which are
@ -26,6 +26,6 @@ mkDerivation rec {
    homepage = src.meta.homepage;
    license = licenses.mit;
    platforms = platforms.unix;
-    maintainers = with maintainers; [ alexarice ryanorendorff ncfavier ];
+    maintainers = with maintainers; [ alexarice ryanorendorff ncfavier phijor ];
  };
 }
--- a/pkgs/development/libraries/libcef/default.nix
+++ b/pkgs/development/libraries/libcef/default.nix
@ -70,16 +70,16 @@ let
      projectArch = "x86_64";
    };
  };
-  platforms."aarch64-linux".sha256 = "12sp58nxa3nv800badv62vpvc30hyb0ykywdaxgv9y8pswp9lq0z";
-  platforms."x86_64-linux".sha256 = "0vzzwq1k6bv9d209yg3samvfnfwj7s58y9r3p3pd98wxa9iyzf4j";
+  platforms."aarch64-linux".sha256 = "16sbfk599h96wcsmpbxlwsvq0n1pssmm8dpwmjsqfrn1464dvs68";
+  platforms."x86_64-linux".sha256 = "1wa4nv28saz96kar9svdarfz6c4rnbcqz0rqxzl9zclnhfzhqdiw";

  platformInfo = builtins.getAttr stdenv.hostPlatform.system platforms;
 in
 stdenv.mkDerivation rec {
  pname = "cef-binary";
-  version = "117.2.4";
-  gitRevision = "5053a95";
-  chromiumVersion = "117.0.5938.150";
+  version = "121.3.13";
+  gitRevision = "5c4a81b";
+  chromiumVersion = "121.0.6167.184";

  src = fetchurl {
    url = "https://cef-builds.spotifycdn.com/cef_binary_${version}+g${gitRevision}+chromium-${chromiumVersion}_${platformInfo.platformStr}_minimal.tar.bz2";
--- a/pkgs/development/python-modules/pip-api/default.nix
+++ b/pkgs/development/python-modules/pip-api/default.nix
@ -11,7 +11,7 @@

 buildPythonPackage rec {
  pname = "pip-api";
-  version = "0.0.31";
+  version = "0.0.33";
  pyproject = true;

  disabled = pythonOlder "3.7";
@ -20,7 +20,7 @@ buildPythonPackage rec {
    owner = "di";
    repo = "pip-api";
    rev = "refs/tags/${version}";
-    hash = "sha256-WFyrEEfrGwsITYzQaukwmz5ml+I6zlMddINTkGeNUTM=";
+    hash = "sha256-bDM31YpVB0pZMqeGTCbnINSmJc03N0HuU8hcc8nnHgw=";
  };

  nativeBuildInputs = [
--- a/pkgs/development/python-modules/vllm/default.nix
+++ b/pkgs/development/python-modules/vllm/default.nix
@ -0,0 +1,139 @@
+{ lib
+, buildPythonPackage
+, fetchFromGitHub
+, fetchpatch
+, which
+, ninja
+, packaging
+, setuptools
+, torch
+, wheel
+, psutil
+, ray
+, pandas
+, pyarrow
+, sentencepiece
+, numpy
+, transformers
+, xformers
+, fastapi
+, uvicorn
+, pydantic
+, aioprometheus
+, pynvml
+, cupy
+, writeShellScript
+
+, config
+
+, cudaSupport ? config.cudaSupport
+, cudaPackages ? {}
+
+, rocmSupport ? config.rocmSupport
+, rocmPackages ? {}
+, gpuTargets ? []
+}:
+
+buildPythonPackage rec {
+  pname = "vllm";
+  version = "0.3.1";
+  format = "pyproject";
+
+  src = fetchFromGitHub {
+    owner = "vllm-project";
+    repo = pname;
+    rev = "v${version}";
+    hash = "sha256-hfd4ScU0mkZ7z4+w08BUA1K9bPXSiFThfiO+Ll2MTtg=";
+  };
+
+  # Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing.
+  PYTORCH_ROCM_ARCH = lib.optionalString rocmSupport (lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets);
+
+  # xformers 0.0.23.post1 github release specifies its version as 0.0.24
+  #
+  # cupy-cuda12x is the same wheel as cupy, but built with cuda dependencies, we already have it set up
+  # like that in nixpkgs. Version upgrade is due to upstream shenanigans
+  # https://github.com/vllm-project/vllm/pull/2845/commits/34a0ad7f9bb7880c0daa2992d700df3e01e91363
+  #
+  # hipcc --version works badly on NixOS due to unresolved paths.
+  postPatch = ''
+    substituteInPlace requirements.txt \
+      --replace "xformers == 0.0.23.post1" "xformers == 0.0.24"
+    substituteInPlace requirements.txt \
+      --replace "cupy-cuda12x == 12.1.0" "cupy == 12.3.0"
+    substituteInPlace requirements-build.txt \
+      --replace "torch==2.1.2" "torch == 2.2.0"
+    substituteInPlace pyproject.toml \
+      --replace "torch == 2.1.2" "torch == 2.2.0"
+    substituteInPlace requirements.txt \
+      --replace "torch == 2.1.2" "torch == 2.2.0"
+  '' + lib.optionalString rocmSupport ''
+    substituteInPlace setup.py \
+      --replace "'hipcc', '--version'" "'${writeShellScript "hipcc-version-stub" "echo HIP version: 0.0"}'"
+  '';
+
+  preBuild = lib.optionalString cudaSupport ''
+    export CUDA_HOME=${cudaPackages.cuda_nvcc}
+  ''
+  + lib.optionalString rocmSupport ''
+    export ROCM_HOME=${rocmPackages.clr}
+    export PATH=$PATH:${rocmPackages.hipcc}
+  '';
+
+  nativeBuildInputs = [
+    ninja
+    packaging
+    setuptools
+    torch
+    wheel
+    which
+  ] ++ lib.optionals rocmSupport [
+    rocmPackages.hipcc
+  ];
+
+  buildInputs = (lib.optionals cudaSupport (with cudaPackages; [
+    cuda_cudart # cuda_runtime.h, -lcudart
+    cuda_cccl.dev # <thrust/*>
+    libcusparse.dev # cusparse.h
+    libcublas.dev # cublas_v2.h
+    libcusolver # cusolverDn.h
+  ])) ++ (lib.optionals rocmSupport (with rocmPackages; [
+    clr
+    rocthrust
+    rocprim
+    hipsparse
+    hipblas
+  ]));
+
+  propagatedBuildInputs = [
+    psutil
+    ray
+    pandas
+    pyarrow
+    sentencepiece
+    numpy
+    torch
+    transformers
+    xformers
+    fastapi
+    uvicorn
+    pydantic
+    aioprometheus
+  ] ++ uvicorn.optional-dependencies.standard
+    ++ aioprometheus.optional-dependencies.starlette
+    ++ lib.optionals cudaSupport [
+      pynvml
+      cupy
+    ];
+
+  pythonImportsCheck = [ "vllm" ];
+
+  meta = with lib; {
+    description = "A high-throughput and memory-efficient inference and serving engine for LLMs";
+    changelog = "https://github.com/vllm-project/vllm/releases/tag/v${version}";
+    homepage = "https://github.com/vllm-project/vllm";
+    license = licenses.asl20;
+    maintainers = with maintainers; [ happysalada lach ];
+    broken = !cudaSupport && !rocmSupport;
+  };
+}
--- a/pkgs/development/rocm-modules/5/clr/add-missing-operators.patch
+++ b/pkgs/development/rocm-modules/5/clr/add-missing-operators.patch
@ -0,0 +1,979 @@
+From 86bd518981b364c138f9901b28a529899d8654f3 Mon Sep 17 00:00:00 2001
+From: Jatin Chaudhary <JatinJaikishan.Chaudhary@amd.com>
+Date: Wed, 11 Oct 2023 23:19:29 +0100
+Subject: [PATCH] SWDEV-367537 - Add missing operators to __hip_bfloat16
+ implementation
+
+Add __host__ and __device__ to bunch of operator/function matching CUDA
+Fix some bugs seen in __hisinf
+
+Change-Id: I9e67e3e3eb2083b463158f3e250e5221c89b2896
+---
+ hipamd/include/hip/amd_detail/amd_hip_bf16.h | 533 ++++++++++++++++---
+ 1 file changed, 446 insertions(+), 87 deletions(-)
+
+diff --git a/hipamd/include/hip/amd_detail/amd_hip_bf16.h b/hipamd/include/hip/amd_detail/amd_hip_bf16.h
+index 757cb7ada..b15ea3b65 100644
+--- a/hipamd/include/hip/amd_detail/amd_hip_bf16.h
+++ b/hipamd/include/hip/amd_detail/amd_hip_bf16.h
+@@ -96,10 +96,20 @@
+ #if defined(__HIPCC_RTC__)
+ #define __HOST_DEVICE__ __device__
+ #else
+#include <algorithm>
+ #include <climits>
+-#define __HOST_DEVICE__ __host__ __device__
+#include <cmath>
+#define __HOST_DEVICE__ __host__ __device__ inline
+ #endif
+ 
+#define HIPRT_ONE_BF16 __float2bfloat16(1.0f)
+#define HIPRT_ZERO_BF16 __float2bfloat16(0.0f)
+#define HIPRT_INF_BF16 __ushort_as_bfloat16((unsigned short)0x7F80U)
+#define HIPRT_MAX_NORMAL_BF16 __ushort_as_bfloat16((unsigned short)0x7F7FU)
+#define HIPRT_MIN_DENORM_BF16 __ushort_as_bfloat16((unsigned short)0x0001U)
+#define HIPRT_NAN_BF16 __ushort_as_bfloat16((unsigned short)0x7FFFU)
+#define HIPRT_NEG_ZERO_BF16 __ushort_as_bfloat16((unsigned short)0x8000U)
+
+ // Since we are using unsigned short to represent data in bfloat16, it can be of different sizes on
+ // different machines. These naive checks should prevent some undefined behavior on systems which
+ // have different sizes for basic types.
+@@ -189,7 +199,7 @@ __HOST_DEVICE__ float2 __bfloat1622float2(const __hip_bfloat162 a) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
+  * \brief Moves bfloat16 value to bfloat162
+  */
+-__device__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a) {
+__HOST_DEVICE__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a) {
+   return __hip_bfloat162{a, a};
+ }
+ 
+@@ -197,13 +207,13 @@ __device__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
+  * \brief Reinterprets bits in a __hip_bfloat16 as a signed short integer
+  */
+-__device__ short int __bfloat16_as_short(const __hip_bfloat16 h) { return (short)h.data; }
+__HOST_DEVICE__ short int __bfloat16_as_short(const __hip_bfloat16 h) { return (short)h.data; }
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
+  * \brief Reinterprets bits in a __hip_bfloat16 as an unsigned signed short integer
+  */
+-__device__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h) { return h.data; }
+__HOST_DEVICE__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h) { return h.data; }
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
+@@ -225,7 +235,7 @@ __HOST_DEVICE__ __hip_bfloat162 __float22bfloat162_rn(const float2 a) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
+  * \brief Combine two __hip_bfloat16 to __hip_bfloat162
+  */
+-__device__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return __hip_bfloat162{a, b};
+ }
+ 
+@@ -233,13 +243,13 @@ __device__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hi
+  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
+  * \brief Returns high 16 bits of __hip_bfloat162
+  */
+-__device__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a) { return a.y; }
+__HOST_DEVICE__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a) { return a.y; }
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
+  * \brief Returns high 16 bits of __hip_bfloat162
+  */
+-__device__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a) {
+__HOST_DEVICE__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a) {
+   return __hip_bfloat162{a.y, a.y};
+ }
+ 
+@@ -253,7 +263,8 @@ __HOST_DEVICE__ float __high2float(const __hip_bfloat162 a) { return __bfloat162
+  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
+  * \brief Extracts high 16 bits from each and combines them
+  */
+-__device__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a,
+                                                  const __hip_bfloat162 b) {
+   return __hip_bfloat162{a.y, b.y};
+ }
+ 
+@@ -261,13 +272,13 @@ __device__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a, const __hi
+  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
+  * \brief Returns low 16 bits of __hip_bfloat162
+  */
+-__device__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a) { return a.x; }
+__HOST_DEVICE__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a) { return a.x; }
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
+  * \brief Returns low 16 bits of __hip_bfloat162
+  */
+-__device__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a) {
+__HOST_DEVICE__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a) {
+   return __hip_bfloat162{a.x, a.x};
+ }
+ 
+@@ -281,7 +292,7 @@ __HOST_DEVICE__ float __low2float(const __hip_bfloat162 a) { return __bfloat162f
+  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
+  * \brief Swaps both halves
+  */
+-__device__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a) {
+__HOST_DEVICE__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a) {
+   return __hip_bfloat162{a.y, a.x};
+ }
+ 
+@@ -289,7 +300,7 @@ __device__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
+  * \brief Extracts low 16 bits from each and combines them
+  */
+-__device__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hip_bfloat162{a.x, b.x};
+ }
+ 
+@@ -297,7 +308,7 @@ __device__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip
+  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
+  * \brief Reinterprets short int into a bfloat16
+  */
+-__device__ __hip_bfloat16 __short_as_bfloat16(const short int a) {
+__HOST_DEVICE__ __hip_bfloat16 __short_as_bfloat16(const short int a) {
+   return __hip_bfloat16{(unsigned short)a};
+ }
+ 
+@@ -305,7 +316,7 @@ __device__ __hip_bfloat16 __short_as_bfloat16(const short int a) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_CONV
+  * \brief Reinterprets unsigned short int into a bfloat16
+  */
+-__device__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a) {
+__HOST_DEVICE__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a) {
+   return __hip_bfloat16{a};
+ }
+ 
+@@ -314,7 +325,7 @@ __device__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a) {
+  * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+  * \brief Adds two bfloat16 values
+  */
+-__device__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return __float2bfloat16(__bfloat162float(a) + __bfloat162float(b));
+ }
+ 
+@@ -322,7 +333,7 @@ __device__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b)
+  * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+  * \brief Subtracts two bfloat16 values
+  */
+-__device__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return __float2bfloat16(__bfloat162float(a) - __bfloat162float(b));
+ }
+ 
+@@ -330,7 +341,7 @@ __device__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b)
+  * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+  * \brief Divides two bfloat16 values
+  */
+-__device__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return __float2bfloat16(__bfloat162float(a) / __bfloat162float(b));
+ }
+ 
+@@ -348,7 +359,7 @@ __device__ __hip_bfloat16 __hfma(const __hip_bfloat16 a, const __hip_bfloat16 b,
+  * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+  * \brief Multiplies two bfloat16 values
+  */
+-__device__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return __float2bfloat16(__bfloat162float(a) * __bfloat162float(b));
+ }
+ 
+@@ -356,7 +367,7 @@ __device__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b)
+  * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+  * \brief Negate a bfloat16 value
+  */
+-__device__ __hip_bfloat16 __hneg(const __hip_bfloat16 a) {
+__HOST_DEVICE__ __hip_bfloat16 __hneg(const __hip_bfloat16 a) {
+   auto ret = a;
+   ret.data ^= 0x8000;
+   return ret;
+@@ -366,7 +377,7 @@ __device__ __hip_bfloat16 __hneg(const __hip_bfloat16 a) {
+  * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+  * \brief Returns absolute of a bfloat16
+  */
+-__device__ __hip_bfloat16 __habs(const __hip_bfloat16 a) {
+__HOST_DEVICE__ __hip_bfloat16 __habs(const __hip_bfloat16 a) {
+   auto ret = a;
+   ret.data &= 0x7FFF;
+   return ret;
+@@ -376,7 +387,7 @@ __device__ __hip_bfloat16 __habs(const __hip_bfloat16 a) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+  * \brief Divides bfloat162 values
+  */
+-__device__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hip_bfloat162{__float2bfloat16(__bfloat162float(a.x) / __bfloat162float(b.x)),
+                          __float2bfloat16(__bfloat162float(a.y) / __bfloat162float(b.y))};
+ }
+@@ -385,7 +396,7 @@ __device__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat16
+  * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+  * \brief Returns absolute of a bfloat162
+  */
+-__device__ __hip_bfloat162 __habs2(const __hip_bfloat162 a) {
+__HOST_DEVICE__ __hip_bfloat162 __habs2(const __hip_bfloat162 a) {
+   return __hip_bfloat162{__habs(a.x), __habs(a.y)};
+ }
+ 
+@@ -393,7 +404,7 @@ __device__ __hip_bfloat162 __habs2(const __hip_bfloat162 a) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+  * \brief Adds two bfloat162 values
+  */
+-__device__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hip_bfloat162{__hadd(a.x, b.x), __hadd(a.y, b.y)};
+ }
+ 
+@@ -410,7 +421,7 @@ __device__ __hip_bfloat162 __hfma2(const __hip_bfloat162 a, const __hip_bfloat16
+  * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+  * \brief Multiplies two bfloat162 values
+  */
+-__device__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hip_bfloat162{__hmul(a.x, b.x), __hmul(a.y, b.y)};
+ }
+ 
+@@ -418,7 +429,7 @@ __device__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat16
+  * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+  * \brief Converts a bfloat162 into negative
+  */
+-__device__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a) {
+__HOST_DEVICE__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a) {
+   return __hip_bfloat162{__hneg(a.x), __hneg(a.y)};
+ }
+ 
+@@ -426,15 +437,251 @@ __device__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+  * \brief Subtracts two bfloat162 values
+  */
+-__device__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hip_bfloat162{__hsub(a.x, b.x), __hsub(a.y, b.y)};
+ }
+ 
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+ * \brief Operator to multiply two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat16 operator*(const __hip_bfloat16& l, const __hip_bfloat16& r) {
+  return __hmul(l, r);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+ * \brief Operator to multiply-assign two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat16 operator*=(__hip_bfloat16& l, const __hip_bfloat16& r) {
+  l = __hmul(l, r);
+  return l;
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+ * \brief Operator to unary+ on a __hip_bfloat16 number
+ */
+__HOST_DEVICE__ __hip_bfloat16 operator+(const __hip_bfloat16& l) { return l; }
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+ * \brief Operator to add two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat16 operator+(const __hip_bfloat16& l, const __hip_bfloat16& r) {
+  return __hadd(l, r);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+ * \brief Operator to negate a __hip_bfloat16 number
+ */
+__HOST_DEVICE__ __hip_bfloat16 operator-(const __hip_bfloat16& l) { return __hneg(l); }
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+ * \brief Operator to subtract two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat16 operator-(const __hip_bfloat16& l, const __hip_bfloat16& r) {
+  return __hsub(l, r);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+ * \brief Operator to post increment a __hip_bfloat16 number
+ */
+__HOST_DEVICE__ __hip_bfloat16 operator++(__hip_bfloat16& l, const int) {
+  auto ret = l;
+  l = __hadd(l, HIPRT_ONE_BF16);
+  return ret;
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+ * \brief Operator to pre increment a __hip_bfloat16 number
+ */
+__HOST_DEVICE__ __hip_bfloat16& operator++(__hip_bfloat16& l) {
+  l = __hadd(l, HIPRT_ONE_BF16);
+  return l;
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+ * \brief Operator to post decrement a __hip_bfloat16 number
+ */
+__HOST_DEVICE__ __hip_bfloat16 operator--(__hip_bfloat16& l, const int) {
+  auto ret = l;
+  l = __hsub(l, HIPRT_ONE_BF16);
+  return ret;
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+ * \brief Operator to pre decrement a __hip_bfloat16 number
+ */
+__HOST_DEVICE__ __hip_bfloat16& operator--(__hip_bfloat16& l) {
+  l = __hsub(l, HIPRT_ONE_BF16);
+  return l;
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+ * \brief Operator to add-assign two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat16& operator+=(__hip_bfloat16& l, const __hip_bfloat16& r) {
+  l = l + r;
+  return l;
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+ * \brief Operator to subtract-assign two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat16& operator-=(__hip_bfloat16& l, const __hip_bfloat16& r) {
+  l = l - r;
+  return l;
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+ * \brief Operator to divide two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat16 operator/(const __hip_bfloat16& l, const __hip_bfloat16& r) {
+  return __hdiv(l, r);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_ARITH
+ * \brief Operator to divide-assign two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat16& operator/=(__hip_bfloat16& l, const __hip_bfloat16& r) {
+  l = l / r;
+  return l;
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+ * \brief Operator to multiply two __hip_bfloat162 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat162 operator*(const __hip_bfloat162& l, const __hip_bfloat162& r) {
+  return __hmul2(l, r);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+ * \brief Operator to multiply-assign two __hip_bfloat162 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat162 operator*=(__hip_bfloat162& l, const __hip_bfloat162& r) {
+  l = __hmul2(l, r);
+  return l;
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+ * \brief Operator to unary+ on a __hip_bfloat162 number
+ */
+__HOST_DEVICE__ __hip_bfloat162 operator+(const __hip_bfloat162& l) { return l; }
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+ * \brief Operator to add two __hip_bfloat162 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat162 operator+(const __hip_bfloat162& l, const __hip_bfloat162& r) {
+  return __hadd2(l, r);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+ * \brief Operator to negate a __hip_bfloat162 number
+ */
+__HOST_DEVICE__ __hip_bfloat162 operator-(const __hip_bfloat162& l) { return __hneg2(l); }
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+ * \brief Operator to subtract two __hip_bfloat162 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat162 operator-(const __hip_bfloat162& l, const __hip_bfloat162& r) {
+  return __hsub2(l, r);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+ * \brief Operator to post increment a __hip_bfloat162 number
+ */
+__HOST_DEVICE__ __hip_bfloat162 operator++(__hip_bfloat162& l, const int) {
+  auto ret = l;
+  l = __hadd2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
+  return ret;
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+ * \brief Operator to pre increment a __hip_bfloat162 number
+ */
+__HOST_DEVICE__ __hip_bfloat162& operator++(__hip_bfloat162& l) {
+  l = __hadd2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
+  return l;
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+ * \brief Operator to post decrement a __hip_bfloat162 number
+ */
+__HOST_DEVICE__ __hip_bfloat162 operator--(__hip_bfloat162& l, const int) {
+  auto ret = l;
+  l = __hsub2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
+  return ret;
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+ * \brief Operator to pre decrement a __hip_bfloat162 number
+ */
+__HOST_DEVICE__ __hip_bfloat162& operator--(__hip_bfloat162& l) {
+  l = __hsub2(l, {HIPRT_ONE_BF16, HIPRT_ONE_BF16});
+  return l;
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+ * \brief Operator to add-assign two __hip_bfloat162 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat162& operator+=(__hip_bfloat162& l, const __hip_bfloat162& r) {
+  l = l + r;
+  return l;
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+ * \brief Operator to subtract-assign two __hip_bfloat162 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat162& operator-=(__hip_bfloat162& l, const __hip_bfloat162& r) {
+  l = l - r;
+  return l;
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+ * \brief Operator to divide two __hip_bfloat162 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat162 operator/(const __hip_bfloat162& l, const __hip_bfloat162& r) {
+  return __h2div(l, r);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_ARITH
+ * \brief Operator to divide-assign two __hip_bfloat162 numbers
+ */
+__HOST_DEVICE__ __hip_bfloat162& operator/=(__hip_bfloat162& l, const __hip_bfloat162& r) {
+  l = l / r;
+  return l;
+}
+
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+  * \brief Compare two bfloat162 values
+  */
+-__device__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return __bfloat162float(a) == __bfloat162float(b);
+ }
+ 
+@@ -442,7 +689,7 @@ __device__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+  * \brief Compare two bfloat162 values - unordered equal
+  */
+-__device__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return !(__bfloat162float(a) < __bfloat162float(b)) &&
+       !(__bfloat162float(a) > __bfloat162float(b));
+ }
+@@ -451,7 +698,7 @@ __device__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+  * \brief Compare two bfloat162 values - greater than
+  */
+-__device__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return __bfloat162float(a) > __bfloat162float(b);
+ }
+ 
+@@ -459,7 +706,7 @@ __device__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+  * \brief Compare two bfloat162 values - unordered greater than
+  */
+-__device__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return !(__bfloat162float(a) <= __bfloat162float(b));
+ }
+ 
+@@ -467,7 +714,7 @@ __device__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+  * \brief Compare two bfloat162 values - greater than equal
+  */
+-__device__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return __bfloat162float(a) >= __bfloat162float(b);
+ }
+ 
+@@ -475,7 +722,7 @@ __device__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+  * \brief Compare two bfloat162 values - unordered greater than equal
+  */
+-__device__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return !(__bfloat162float(a) < __bfloat162float(b));
+ }
+ 
+@@ -483,7 +730,7 @@ __device__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+  * \brief Compare two bfloat162 values - not equal
+  */
+-__device__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return __bfloat162float(a) != __bfloat162float(b);
+ }
+ 
+@@ -491,7 +738,7 @@ __device__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+  * \brief Compare two bfloat162 values - unordered not equal
+  */
+-__device__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return !(__bfloat162float(a) == __bfloat162float(b));
+ }
+ 
+@@ -499,23 +746,31 @@ __device__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+  * \brief Compare two bfloat162 values - return max
+  */
+-__device__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+#if __HIP_DEVICE_COMPILE__
+   return __float2bfloat16(__ocml_fmax_f32(__bfloat162float(a), __bfloat162float(b)));
+#else
+  return __float2bfloat16(std::max(__bfloat162float(a), __bfloat162float(b)));
+#endif
+ }
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+  * \brief Compare two bfloat162 values - return min
+  */
+-__device__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+#if __HIP_DEVICE_COMPILE__
+   return __float2bfloat16(__ocml_fmin_f32(__bfloat162float(a), __bfloat162float(b)));
+#else
+  return __float2bfloat16(std::min(__bfloat162float(a), __bfloat162float(b)));
+#endif
+ }
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+  * \brief Compare two bfloat162 values - less than operator
+  */
+-__device__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return __bfloat162float(a) < __bfloat162float(b);
+ }
+ 
+@@ -523,15 +778,15 @@ __device__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+  * \brief Compare two bfloat162 values - unordered less than
+  */
+-__device__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return !(__bfloat162float(a) >= __bfloat162float(b));
+ }
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+- * \brief Compare two bfloat162 values - less than
+ * \brief Compare two bfloat162 values - less than equal
+  */
+-__device__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return __bfloat162float(a) <= __bfloat162float(b);
+ }
+ 
+@@ -539,7 +794,7 @@ __device__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+  * \brief Compare two bfloat162 values - unordered less than equal
+  */
+-__device__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+__HOST_DEVICE__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+   return !(__bfloat162float(a) > __bfloat162float(b));
+ }
+ 
+@@ -547,19 +802,33 @@ __device__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+  * \brief Checks if number is inf
+  */
+-__device__ int __hisinf(const __hip_bfloat16 a) { return __ocml_isinf_f32(__bfloat162float(a)); }
+__HOST_DEVICE__ int __hisinf(const __hip_bfloat16 a) {
+  unsigned short sign = a.data & 0x8000U;
+#if __HIP_DEVICE_COMPILE__
+  int res = __ocml_isinf_f32(__bfloat162float(a));
+#else
+  int res = std::isinf(__bfloat162float(a)) ? 1 : 0;
+#endif
+  return (res == 0) ? res : ((sign != 0U) ? -res : res);
+}
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+  * \brief Checks if number is nan
+  */
+-__device__ bool __hisnan(const __hip_bfloat16 a) { return __ocml_isnan_f32(__bfloat162float(a)); }
+__HOST_DEVICE__ bool __hisnan(const __hip_bfloat16 a) {
+#if __HIP_DEVICE_COMPILE__
+  return __ocml_isnan_f32(__bfloat162float(a));
+#else
+  return std::isnan(__bfloat162float(a));
+#endif
+}
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Checks if two numbers are equal
+  */
+-__device__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __heq(a.x, b.x) && __heq(a.y, b.y);
+ }
+ 
+@@ -567,7 +836,7 @@ __device__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Checks if two numbers are equal - unordered
+  */
+-__device__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hequ(a.x, b.x) && __hequ(a.y, b.y);
+ }
+ 
+@@ -575,7 +844,7 @@ __device__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a >= b
+  */
+-__device__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hge(a.x, b.x) && __hge(a.y, b.y);
+ }
+ 
+@@ -583,7 +852,7 @@ __device__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a >= b - unordered
+  */
+-__device__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hgeu(a.x, b.x) && __hgeu(a.y, b.y);
+ }
+ 
+@@ -591,7 +860,7 @@ __device__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a > b
+  */
+-__device__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hgt(a.x, b.x) && __hgt(a.y, b.y);
+ }
+ 
+@@ -599,7 +868,7 @@ __device__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a > b - unordered
+  */
+-__device__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hgtu(a.x, b.x) && __hgtu(a.y, b.y);
+ }
+ 
+@@ -607,7 +876,7 @@ __device__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a <= b
+  */
+-__device__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hle(a.x, b.x) && __hle(a.y, b.y);
+ }
+ 
+@@ -615,7 +884,7 @@ __device__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a <= b - unordered
+  */
+-__device__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hleu(a.x, b.x) && __hleu(a.y, b.y);
+ }
+ 
+@@ -623,7 +892,7 @@ __device__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a < b
+  */
+-__device__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hlt(a.x, b.x) && __hlt(a.y, b.y);
+ }
+ 
+@@ -631,7 +900,7 @@ __device__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a < b - unordered
+  */
+-__device__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hltu(a.x, b.x) && __hltu(a.y, b.y);
+ }
+ 
+@@ -639,7 +908,7 @@ __device__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a != b
+  */
+-__device__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hne(a.x, b.x) && __hne(a.y, b.y);
+ }
+ 
+@@ -647,7 +916,7 @@ __device__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a != b
+  */
+-__device__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+__HOST_DEVICE__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+   return __hneu(a.x, b.x) && __hneu(a.y, b.y);
+ }
+ 
+@@ -655,84 +924,175 @@ __device__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a != b, returns 1.0 if equal, otherwise 0.0
+  */
+-__device__ __hip_bfloat162 __heq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+-  return __hip_bfloat162{{__heq(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
+-                         {__heq(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
+__HOST_DEVICE__ __hip_bfloat162 __heq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  return __hip_bfloat162{{__heq(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
+                         {__heq(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
+ }
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a >= b, returns 1.0 if greater than equal, otherwise 0.0
+  */
+-__device__ __hip_bfloat162 __hge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+-  return __hip_bfloat162{{__hge(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
+-                         {__hge(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
+__HOST_DEVICE__ __hip_bfloat162 __hge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  return __hip_bfloat162{{__hge(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
+                         {__hge(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
+ }
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a > b, returns 1.0 if greater than equal, otherwise 0.0
+  */
+-__device__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+-  return __hip_bfloat162{{__hgt(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
+-                         {__hgt(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
+__HOST_DEVICE__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  return __hip_bfloat162{{__hgt(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
+                         {__hgt(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ONE_BF16}};
+ }
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a is NaN, returns 1.0 if NaN, otherwise 0.0
+  */
+-__device__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a) {
+-  return __hip_bfloat162{
+-      {__ocml_isnan_f32(__bfloat162float(a.x)) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
+-      {__ocml_isnan_f32(__bfloat162float(a.y)) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
+__HOST_DEVICE__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a) {
+  return __hip_bfloat162{{__hisnan(a.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
+                         {__hisnan(a.y) ? HIPRT_ONE_BF16 : HIPRT_ONE_BF16}};
+ }
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a <= b, returns 1.0 if greater than equal, otherwise 0.0
+  */
+-__device__ __hip_bfloat162 __hle2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+-  return __hip_bfloat162{{__hle(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
+-                         {__hle(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
+__HOST_DEVICE__ __hip_bfloat162 __hle2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  return __hip_bfloat162{{__hle(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
+                         {__hle(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
+ }
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Check for a < b, returns 1.0 if greater than equal, otherwise 0.0
+  */
+-__device__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+-  return __hip_bfloat162{{__hlt(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
+-                         {__hlt(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
+__HOST_DEVICE__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  return __hip_bfloat162{{__hlt(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
+                         {__hlt(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
+ }
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Returns max of two elements
+  */
+-__device__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+-  return __hip_bfloat162{
+-      __float2bfloat16(__ocml_fmax_f32(__bfloat162float(a.x), __bfloat162float(b.x))),
+-      __float2bfloat16(__ocml_fmax_f32(__bfloat162float(a.y), __bfloat162float(b.y)))};
+__HOST_DEVICE__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  return __hip_bfloat162{__hmax(a.x, b.x), __hmax(a.y, b.y)};
+ }
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Returns min of two elements
+  */
+-__device__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+-  return __hip_bfloat162{
+-      __float2bfloat16(__ocml_fmin_f32(__bfloat162float(a.x), __bfloat162float(b.x))),
+-      __float2bfloat16(__ocml_fmin_f32(__bfloat162float(a.y), __bfloat162float(b.y)))};
+__HOST_DEVICE__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  return __hip_bfloat162{__hmin(a.x, b.x), __hmin(a.y, b.y)};
+ }
+ 
+ /**
+  * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+  * \brief Checks for not equal to
+  */
+-__device__ __hip_bfloat162 __hne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+-  return __hip_bfloat162{{__hne(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
+-                         {__hne(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
+__HOST_DEVICE__ __hip_bfloat162 __hne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
+  return __hip_bfloat162{{__hne(a.x, b.x) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16},
+                         {__hne(a.y, b.y) ? HIPRT_ONE_BF16 : HIPRT_ZERO_BF16}};
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+ * \brief Operator to perform an equal compare on two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ bool operator==(const __hip_bfloat16& l, const __hip_bfloat16& r) {
+  return __heq(l, r);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+ * \brief Operator to perform a not equal on two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ bool operator!=(const __hip_bfloat16& l, const __hip_bfloat16& r) {
+  return __hne(l, r);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+ * \brief Operator to perform a less than on two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ bool operator<(const __hip_bfloat16& l, const __hip_bfloat16& r) {
+  return __hlt(l, r);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+ * \brief Operator to perform a less than equal on two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ bool operator<=(const __hip_bfloat16& l, const __hip_bfloat16& r) {
+  return __hle(l, r);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+ * \brief Operator to perform a greater than on two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ bool operator>(const __hip_bfloat16& l, const __hip_bfloat16& r) {
+  return __hgt(l, r);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+ * \brief Operator to perform a greater than equal on two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ bool operator>=(const __hip_bfloat16& l, const __hip_bfloat16& r) {
+  return __hge(l, r);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+ * \brief Operator to perform an equal compare on two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ bool operator==(const __hip_bfloat162& l, const __hip_bfloat162& r) {
+  return __heq(l.x, r.x) && __heq(l.y, r.y);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+ * \brief Operator to perform a not equal on two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ bool operator!=(const __hip_bfloat162& l, const __hip_bfloat162& r) {
+  return __hne(l.x, r.x) || __hne(l.y, r.y);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+ * \brief Operator to perform a less than on two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ bool operator<(const __hip_bfloat162& l, const __hip_bfloat162& r) {
+  return __hlt(l.x, r.x) && __hlt(l.y, r.y);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+ * \brief Operator to perform a less than equal on two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ bool operator<=(const __hip_bfloat162& l, const __hip_bfloat162& r) {
+  return __hle(l.x, r.x) && __hle(l.y, r.y);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT162_COMP
+ * \brief Operator to perform a greater than on two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ bool operator>(const __hip_bfloat162& l, const __hip_bfloat162& r) {
+  return __hgt(l.x, r.x) && __hgt(l.y, r.y);
+}
+
+/**
+ * \ingroup HIP_INTRINSIC_BFLOAT16_COMP
+ * \brief Operator to perform a greater than equal on two __hip_bfloat16 numbers
+ */
+__HOST_DEVICE__ bool operator>=(const __hip_bfloat162& l, const __hip_bfloat162& r) {
+  return __hge(l.x, r.x) && __hge(l.y, r.y);
+ }
+ 
+ /**
+@@ -974,5 +1334,4 @@ __device__ __hip_bfloat162 h2sqrt(const __hip_bfloat162 h) {
+ __device__ __hip_bfloat162 h2trunc(const __hip_bfloat162 h) {
+   return __hip_bfloat162{htrunc(h.x), htrunc(h.y)};
+ }
+-
+ #endif
--- a/pkgs/development/rocm-modules/5/clr/default.nix
+++ b/pkgs/development/rocm-modules/5/clr/default.nix
@ -88,6 +88,11 @@ in stdenv.mkDerivation (finalAttrs: {
    "-DCMAKE_INSTALL_LIBDIR=lib"
  ];

+  patches = [
+    ./add-missing-operators.patch
+    ./static-functions.patch
+  ];
+
  postPatch = ''
    patchShebangs hipamd/src

--- a/pkgs/development/rocm-modules/5/clr/static-functions.patch
+++ b/pkgs/development/rocm-modules/5/clr/static-functions.patch
@ -0,0 +1,31 @@
+From 77c581a3ebd47b5e2908973b70adea66891159ee Mon Sep 17 00:00:00 2001
+From: Jatin Chaudhary <JatinJaikishan.Chaudhary@amd.com>
+Date: Mon, 4 Dec 2023 17:21:39 +0000
+Subject: [PATCH] SWDEV-435702 - the functions in bf16 header need to be static
+
+If the compiler decides not to inline these functions, we might break ODR (one definition rule) due to this file being included in multiple files and being linked together
+
+Change-Id: Iacbfdabb53f5b4e5db8c690b23f3730ec9af16c0
+---
+ hipamd/include/hip/amd_detail/amd_hip_bf16.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/hipamd/include/hip/amd_detail/amd_hip_bf16.h b/hipamd/include/hip/amd_detail/amd_hip_bf16.h
+index 836e090eb..204269a84 100644
+--- a/hipamd/include/hip/amd_detail/amd_hip_bf16.h
+++ b/hipamd/include/hip/amd_detail/amd_hip_bf16.h
+@@ -94,12 +94,12 @@
+ #include "math_fwd.h"              // ocml device functions
+ 
+ #if defined(__HIPCC_RTC__)
+-#define __HOST_DEVICE__ __device__
+#define __HOST_DEVICE__ __device__ static
+ #else
+ #include <algorithm>
+ #include <climits>
+ #include <cmath>
+-#define __HOST_DEVICE__ __host__ __device__ inline
+#define __HOST_DEVICE__ __host__ __device__ static inline
+ #endif
+ 
+ #define HIPRT_ONE_BF16 __float2bfloat16(1.0f)
--- a/pkgs/development/tools/rust/cargo-llvm-cov/default.nix
+++ b/pkgs/development/tools/rust/cargo-llvm-cov/default.nix
@ -26,7 +26,7 @@

 let
  pname = "cargo-llvm-cov";
-  version = "0.6.2";
+  version = "0.6.5";

  owner = "taiki-e";
  homepage = "https://github.com/${owner}/${pname}";
@ -37,7 +37,7 @@ let
  cargoLock = fetchurl {
    name = "Cargo.lock";
    url = "https://crates.io/api/v1/crates/${pname}/${version}/download";
-    sha256 = "sha256-iML16yjSJsyDsr9F3gyp4XTu5Z9petSUQ0jXotU5tmw=";
+    sha256 = "sha256-nx0OwijDVwDoOiA7bEqK2aVo89xxOD9EQcOn5gv65jk=";
    downloadToTemp = true;
    postFetch = ''
      tar xzf $downloadedFile ${pname}-${version}/Cargo.lock
@ -55,7 +55,7 @@ rustPlatform.buildRustPackage {
    inherit owner;
    repo = pname;
    rev = "v${version}";
-    sha256 = "sha256-1VfWs8f4i3YjH69A7X3/1WPxSIwPRF5osQ/1eqOUB8U=";
+    sha256 = "sha256-TYz6fAuWLUndmu6NuK4XcsUN4/IWwKZMV6aPsB70esM=";
    leaveDotGit = true;
  };

@ -64,7 +64,7 @@ rustPlatform.buildRustPackage {
    cp ${cargoLock} source/Cargo.lock
  '';

-  cargoSha256 = "sha256-9pOfhGnktEgyTbfK4roFU7t3qcgx2yRp17hJVKsvNqw=";
+  cargoSha256 = "sha256-KygRkdYlgCgc0UX5wkCfZsaigllOVCW+h4ralv/18g8=";

  # `cargo-llvm-cov` reads these environment variables to find these binaries,
  # which are needed to run the tests
--- a/pkgs/os-specific/linux/checkpolicy/default.nix
+++ b/pkgs/os-specific/linux/checkpolicy/default.nix
@ -2,12 +2,12 @@

 stdenv.mkDerivation rec {
  pname = "checkpolicy";
-  version = "3.5";
+  version = "3.6";
  inherit (libsepol) se_url;

  src = fetchurl {
    url = "${se_url}/${version}/checkpolicy-${version}.tar.gz";
-    sha256 = "sha256-eqSKsiIqC5iBER1tf3DDAU09kziCfZ4C3xBaaMDfXbw=";
+    sha256 = "sha256-GzRrPN1PinihV2J7rWSjs0ecZ7ahnRXm1chpRiDq28E=";
  };

  nativeBuildInputs = [ bison flex ];
--- a/pkgs/tools/archivers/wimlib/default.nix
+++ b/pkgs/tools/archivers/wimlib/default.nix
@ -1,8 +1,9 @@
 { lib, stdenv, fetchurl, makeWrapper
-, pkg-config, fuse3
+, pkg-config
 , cabextract ? null
 , cdrkit ? null
 , mtools ? null
+, fuse3 ? null
 , ntfs3g ? null
 , syslinux ? null
 }:
@ -12,7 +13,7 @@ stdenv.mkDerivation rec {
  pname = "wimlib";

  nativeBuildInputs = [ pkg-config makeWrapper ];
-  buildInputs = [ fuse3 ntfs3g ];
+  buildInputs = [ ntfs3g ] ++ lib.optionals (!stdenv.isDarwin) [ fuse3 ];

  src = fetchurl {
    url = "https://wimlib.net/downloads/${pname}-${version}.tar.gz";
@ -27,7 +28,7 @@ stdenv.mkDerivation rec {
  '';

  postInstall = let
-    path = lib.makeBinPath  ([ cabextract mtools ntfs3g ] ++ lib.optionals (!stdenv.isDarwin) [ cdrkit syslinux ]);
+    path = lib.makeBinPath  ([ cabextract mtools ntfs3g ] ++ lib.optionals (!stdenv.isDarwin) [ cdrkit syslinux fuse3 ]);
  in ''
    for prog in $out/bin/*; do
      wrapProgram $prog --prefix PATH : $out/bin:${path}
--- a/pkgs/tools/misc/bdf2psf/default.nix
+++ b/pkgs/tools/misc/bdf2psf/default.nix
@ -2,11 +2,11 @@

 stdenv.mkDerivation rec {
  pname = "bdf2psf";
-  version = "1.225";
+  version = "1.226";

  src = fetchurl {
    url = "mirror://debian/pool/main/c/console-setup/bdf2psf_${version}_all.deb";
-    sha256 = "sha256-QEu1USgoOrFE2dHWodfg0nu4HM5C3V/pcpBIKIRuZuQ=";
+    sha256 = "sha256-MLNLeCgBzp2awt9ZJM2kaCWQhRnC6sSwm1fHlv3EwHo=";
  };

  nativeBuildInputs = [ dpkg ];
--- a/pkgs/tools/misc/plantuml/default.nix
+++ b/pkgs/tools/misc/plantuml/default.nix
@ -8,11 +8,11 @@

 stdenvNoCC.mkDerivation (finalAttrs: {
  pname = "plantuml";
-  version = "1.2024.2";
+  version = "1.2024.3";

  src = fetchurl {
    url = "https://github.com/plantuml/plantuml/releases/download/v${finalAttrs.version}/plantuml-pdf-${finalAttrs.version}.jar";
-    hash = "sha256-23EKdS1Z7beuyovgab8ELA1rCAn2Zl83YPmSZ83EBdw=";
+    hash = "sha256-zgpqXawlIdNgSxiOjtk7XLOnrVVD09T5qE9K8LD4TtY=";
  };

  nativeBuildInputs = [
--- a/pkgs/tools/networking/minio-client/default.nix
+++ b/pkgs/tools/networking/minio-client/default.nix
@ -2,13 +2,13 @@

 buildGoModule rec {
  pname = "minio-client";
-  version = "2024-02-09T22-18-24Z";
+  version = "2024-02-16T11-05-48Z";

  src = fetchFromGitHub {
    owner = "minio";
    repo = "mc";
    rev = "RELEASE.${version}";
-    sha256 = "sha256-Z4bqbU5ZDVlHLHyJWTNLSjBgE3Fybn/oUyqjod0bUCw=";
+    sha256 = "sha256-Kqv48krXiDi/8QtCEpn0uGvuLS2P6BYAtSnY5sNrCQ0=";
  };

  vendorHash = "sha256-wxFhj+oqj5WV/UkPZlmeJHF2WC4oLlZOql1qgSFs+zU=";
--- a/pkgs/top-level/python-packages.nix
+++ b/pkgs/top-level/python-packages.nix
@ -16032,6 +16032,8 @@ self: super: with self; {

  viv-utils = callPackage ../development/python-modules/viv-utils { };

+  vllm = callPackage ../development/python-modules/vllm { };
+
  vmprof = callPackage ../development/python-modules/vmprof { };

  vncdo = callPackage ../development/python-modules/vncdo { };